package Sisimai::ARF; use feature ':5.10'; use strict; use warnings; use Sisimai::Bite::Email; # http://tools.ietf.org/html/rfc5965 # http://en.wikipedia.org/wiki/Feedback_loop_(email) # http://en.wikipedia.org/wiki/Abuse_Reporting_Format # # Netease DMARC uses: This is a spf/dkim authentication-failure report for an email message received from IP # OpenDMARC 1.3.0 uses: This is an authentication failure report for an email message received from IP # Abusix ARF uses this is an autogenerated email abuse complaint regarding your network. my $StartingOf = { 'rfc822' => ['Content-Type: message/rfc822', 'Content-Type: text/rfc822-headers'] }; my $MarkingsOf = { 'message' => qr{\A(?> [Tt]his[ ]is[ ].+[ ]email[ ]abuse[ ]report |[Tt]his[ ]is[ ](?: an[ ]autogenerated[ ]email[ ]abuse[ ]complaint |an?[ ].+[ ]report[ ]for |a[ ].+[ ]authentication[ -]failure[ ]report[ ]for ) ) }x, }; my $Indicators = Sisimai::Bite::Email->INDICATORS; my $LongFields = Sisimai::RFC5322->LONGFIELDS; my $RFC822Head = Sisimai::RFC5322->HEADERFIELDS; sub description { return 'Abuse Feedback Reporting Format' } sub smtpagent { 'Feeback-Loop' } sub headerlist { return [] } sub is_arf { # Email is a Feedback-Loop message or not # @param [Hash] heads Email header including "Content-Type", "From", # and "Subject" field # @return [Integer] 1: Feedback Loop # 0: is not Feedback loop my $class = shift; my $heads = shift || return 0; my $match = 0; if( $heads->{'content-type'} =~ /report-type=["]?feedback-report["]?/ ) { # Content-Type: multipart/report; report-type=feedback-report; ... $match = 1; } elsif( index($heads->{'content-type'}, 'multipart/mixed') > -1 ) { # Microsoft (Hotmail, MSN, Live, Outlook) uses its own report format. # Amazon SES Complaints bounces my $title = 'complaint about message from '; my $mfrom = qr{(?: staff[@]hotmail[.]com |complaints[@]email-abuse[.]amazonses[.]com )\z }x; if( $heads->{'from'} =~ $mfrom && index($heads->{'subject'}, $title) > -1) { # From: staff@hotmail.com # From: complaints@email-abuse.amazonses.com # Subject: complaint about message from 192.0.2.1 $match = 1; } } return $match; } sub scan { # Detect an error for Feedback Loop # @param [Hash] mhead Message header of a bounce email # @options mhead [String] from From header # @options mhead [String] date Date header # @options mhead [String] subject Subject header # @options mhead [Array] received Received headers # @options mhead [String] others Other required headers # @param [String] mbody Message body of a bounce email # @return [Hash, Undef] Bounce data list and message/rfc822 part # or Undef if it failed to parse or the # arguments are missing my $class = shift; my $mhead = shift // return undef; my $mbody = shift // return undef; return undef unless is_arf(undef, $mhead); my $dscontents = [Sisimai::Bite::Email->DELIVERYSTATUS]; my @hasdivided = split("\n", $$mbody); my $rfc822part = ''; # (String) message/rfc822-headers part my $previousfn = ''; # (String) Previous field name my $readcursor = 0; # (Integer) Points the current cursor position my $recipients = 0; # (Integer) The number of 'Final-Recipient' header my $rcptintext = ''; # (String) Recipient address in the message body my $commondata = { 'diagnosis' => '', # Error message 'from' => '', # Original-Mail-From: 'rhost' => '', # Reporting-MTA: }; my $arfheaders = { 'feedbacktype' => '', # Feedback-Type: 'rhost' => '', # Source-IP: 'agent' => '', # User-Agent: 'date' => '', # Arrival-Date: 'authres' => '', # Authentication-Results: }; my $v = undef; # 3.1. Required Fields # # The following report header fields MUST appear exactly once: # # o "Feedback-Type" contains the type of feedback report (as defined # in the corresponding IANA registry and later in this memo). This # is intended to let report parsers distinguish among different # types of reports. # # o "User-Agent" indicates the name and version of the software # program that generated the report. The format of this field MUST # follow section 14.43 of [HTTP]. This field is for documentation # only; there is no registry of user agent names or versions, and # report receivers SHOULD NOT expect user agent names to belong to a # known set. # # o "Version" indicates the version of specification that the report # generator is using to generate the report. The version number in # this specification is set to "1". # for my $e ( @hasdivided ) { # Read each line between the start of the message and the start of rfc822 part. unless( $readcursor ) { # Beginning of the bounce message or delivery status part if( $e =~ $MarkingsOf->{'message'} ) { $readcursor |= $Indicators->{'deliverystatus'}; next; } } unless( $readcursor & $Indicators->{'message-rfc822'} ) { # Beginning of the original message part if( index($e, $StartingOf->{'rfc822'}->[0]) == 0 || index($e, $StartingOf->{'rfc822'}->[1]) == 0 ) { $readcursor |= $Indicators->{'message-rfc822'}; next; } } if( $readcursor & $Indicators->{'message-rfc822'} ) { # After "message/rfc822" if( $e =~ /X-HmXmrOriginalRecipient:[ ]*(.+)\z/ ) { # Microsoft ARF: original recipient. $dscontents->[-1]->{'recipient'} = Sisimai::Address->s3s4($1); $recipients++; # The "X-HmXmrOriginalRecipient" header appears only once so # we take this opportunity to hard-code ARF headers missing in # Microsoft's implementation. $arfheaders->{'feedbacktype'} = 'abuse'; $arfheaders->{'agent'} = 'Microsoft Junk Mail Reporting Program'; } elsif( $e =~ /\AFrom:[ ]*(.+)\z/ ) { # Microsoft ARF: original sender. $commondata->{'from'} ||= Sisimai::Address->s3s4($1); } elsif( $e =~ /\A([-0-9A-Za-z]+?)[:][ ]*(.+)\z/ ) { # Get required headers only my $lhs = lc $1; my $rhs = $2; $previousfn = ''; next unless exists $RFC822Head->{ $lhs }; $previousfn = $lhs; $rfc822part .= $e."\n"; $rcptintext = $rhs if $lhs eq 'to'; } elsif( $e =~ /\A[ \t]+/ ) { # Continued line from the previous line $rfc822part .= $e."\n" if exists $LongFields->{ $previousfn }; next if length $e; $rcptintext .= $e if $previousfn eq 'to'; } } else { # Before "message/rfc822" next unless $readcursor & $Indicators->{'deliverystatus'}; next unless length $e; # Feedback-Type: abuse # User-Agent: SomeGenerator/1.0 # Version: 0.1 # Original-Mail-From: # Original-Rcpt-To: # Received-Date: Thu, 29 Apr 2009 00:00:00 JST # Source-IP: 192.0.2.1 $v = $dscontents->[-1]; if( $e =~ /\AOriginal-Rcpt-To:[ ]+[<]?(.+)[>]?\z/ || $e =~ /\ARedacted-Address:[ ]([^ ].+[@])\z/ ) { # Original-Rcpt-To header field is optional and may appear any # number of times as appropriate: # Original-Rcpt-To: # Redacted-Address: localpart@ if( $v->{'recipient'} ) { # There are multiple recipient addresses in the message body. push @$dscontents, Sisimai::Bite::Email->DELIVERYSTATUS; $v = $dscontents->[-1]; } $v->{'recipient'} = Sisimai::Address->s3s4($1); $recipients++; } elsif( $e =~ /\AFeedback-Type:[ ]*([^ ]+)\z/ ) { # The header field MUST appear exactly once. # Feedback-Type: abuse $arfheaders->{'feedbacktype'} = $1; } elsif( $e =~ /\AAuthentication-Results:[ ]*(.+)\z/ ) { # "Authentication-Results" indicates the result of one or more # authentication checks run by the report generator. # # Authentication-Results: mail.example.com; # spf=fail smtp.mail=somespammer@example.com $arfheaders->{'authres'} = $1; } elsif( $e =~ /\AUser-Agent:[ ]*(.+)\z/ ) { # The header field MUST appear exactly once. # User-Agent: SomeGenerator/1.0 $arfheaders->{'agent'} = $1; } elsif( $e =~ /\A(?:Received|Arrival)-Date:[ ]*(.+)\z/ ) { # Arrival-Date header is optional and MUST NOT appear more than # once. # Received-Date: Thu, 29 Apr 2010 00:00:00 JST # Arrival-Date: Thu, 29 Apr 2010 00:00:00 +0000 $arfheaders->{'date'} = $1; } elsif( $e =~ /\AReporting-MTA:[ ]*dns;[ ]*(.+)\z/ ) { # The header is optional and MUST NOT appear more than once. # Reporting-MTA: dns; mx.example.jp $commondata->{'rhost'} = $1; } elsif( $e =~ /\ASource-IP:[ ]*(.+)\z/ ) { # The header is optional and MUST NOT appear more than once. # Source-IP: 192.0.2.45 $arfheaders->{'rhost'} = $1; } elsif( $e =~ /\AOriginal-Mail-From:[ ]*(.+)\z/ ) { # the header is optional and MUST NOT appear more than once. # Original-Mail-From: $commondata->{'from'} ||= Sisimai::Address->s3s4($1); } elsif( $e =~ $MarkingsOf->{'message'} ) { # This is an email abuse report for an email message with the # message-id of 0000-000000000000000000000000000000000@mx # received from IP address 192.0.2.1 on # Thu, 29 Apr 2010 00:00:00 +0900 (JST) $commondata->{'diagnosis'} = $e; } } # End of if: rfc822 } if( ($arfheaders->{'feedbacktype'} eq 'auth-failure' ) && $arfheaders->{'authres'} ) { # Append the value of Authentication-Results header $commondata->{'diagnosis'} .= ' '.$arfheaders->{'authres'} } unless( $recipients ) { # Insert pseudo recipient address when there is no valid recipient # address in the message. $dscontents->[-1]->{'recipient'} = Sisimai::Address->undisclosed('r'); $recipients = 1; } unless( $rfc822part =~ /\bFrom: [^ ]+[@][^ ]+\b/ ) { # There is no "From:" header in the original message if( $commondata->{'from'} ) { # Append the value of "Original-Mail-From" value as a sender address. $rfc822part .= 'From: '.$commondata->{'from'}."\n"; } } if( $mhead->{'subject'} =~ /complaint about message from (\d{1,3}[.]\d{1,3}[.]\d{1,3}[.]\d{1,3})/ ) { # Microsoft ARF: remote host address. $arfheaders->{'rhost'} = $1; $commondata->{'diagnosis'} = sprintf( "This is a Microsoft email abuse report for an email message received from IP %s on %s", $arfheaders->{'rhost'}, $mhead->{'date'}); } for my $e ( @$dscontents ) { if( $e->{'recipient'} =~ /\A[^ ]+[@]\z/ ) { # AOL = http://forums.cpanel.net/f43/aol-brutal-work-71473.html $e->{'recipient'} = Sisimai::Address->s3s4($rcptintext); } map { $e->{ $_ } ||= $arfheaders->{ $_ } } keys %$arfheaders; delete $e->{'authres'}; $e->{'softbounce'} = -1; $e->{'diagnosis'} ||= $commondata->{'diagnosis'}; $e->{'date'} ||= $mhead->{'date'}; unless( $e->{'rhost'} ) { # Get the remote IP address from the message body if( $commondata->{'rhost'} ) { # The value of "Reporting-MTA" header $e->{'rhost'} = $commondata->{'rhost'}; } elsif( $e->{'diagnosis'} =~ /\breceived from IP address ([^ ]+)/ ) { # This is an email abuse report for an email message received # from IP address 24.64.1.1 on Thu, 29 Apr 2010 00:00:00 +0000 $e->{'rhost'} = $1; } } $e->{'reason'} = 'feedback'; $e->{'command'} = ''; $e->{'action'} = ''; $e->{'agent'} ||= __PACKAGE__->smtpagent; } return { 'ds' => $dscontents, 'rfc822' => $rfc822part }; } 1; __END__ =encoding utf-8 =head1 NAME Sisimai::ARF - Parser class for detecting ARF: Abuse Feedback Reporting Format. =head1 SYNOPSIS Do not use this class directly, use Sisimai::ARF. use Sisimai::ARF; my $v = Sisimai::ARF->scan($header, $body); =head1 DESCRIPTION Sisimai::ARF is a parser for email returned as a Feedback Loop report message. =head1 FEEDBACK TYPES =head2 B Unsolicited email or some other kind of email abuse. =head2 B Indicates some kind of C or C activity. =head2 B Any other feedback that does not fit into other registered types. =head2 B Report of a virus found in the originating message. =head1 SEE ALSO L =head1 AUTHOR azumakuniyuki =head1 COPYRIGHT Copyright (C) 2014-2018 azumakuniyuki, All rights reserved. =head1 LICENSE This software is distributed under The BSD 2-Clause License. =cut