370 lines
14 KiB
Perl
370 lines
14 KiB
Perl
package Sisimai::ARF;
|
|
use feature ':5.10';
|
|
use strict;
|
|
use warnings;
|
|
use Sisimai::Bite::Email;
|
|
|
|
# http://tools.ietf.org/html/rfc5965
|
|
# http://en.wikipedia.org/wiki/Feedback_loop_(email)
|
|
# http://en.wikipedia.org/wiki/Abuse_Reporting_Format
|
|
#
|
|
# Netease DMARC uses: This is a spf/dkim authentication-failure report for an email message received from IP
|
|
# OpenDMARC 1.3.0 uses: This is an authentication failure report for an email message received from IP
|
|
# Abusix ARF uses this is an autogenerated email abuse complaint regarding your network.
|
|
my $StartingOf = { 'rfc822' => ['Content-Type: message/rfc822', 'Content-Type: text/rfc822-headers'] };
|
|
my $MarkingsOf = {
|
|
'message' => qr{\A(?>
|
|
[Tt]his[ ]is[ ].+[ ]email[ ]abuse[ ]report
|
|
|[Tt]his[ ]is[ ](?:
|
|
an[ ]autogenerated[ ]email[ ]abuse[ ]complaint
|
|
|an?[ ].+[ ]report[ ]for
|
|
|a[ ].+[ ]authentication[ -]failure[ ]report[ ]for
|
|
)
|
|
)
|
|
}x,
|
|
};
|
|
my $Indicators = Sisimai::Bite::Email->INDICATORS;
|
|
my $LongFields = Sisimai::RFC5322->LONGFIELDS;
|
|
my $RFC822Head = Sisimai::RFC5322->HEADERFIELDS;
|
|
|
|
sub description { return 'Abuse Feedback Reporting Format' }
|
|
sub smtpagent { 'Feeback-Loop' }
|
|
sub headerlist { return [] }
|
|
sub is_arf {
|
|
# Email is a Feedback-Loop message or not
|
|
# @param [Hash] heads Email header including "Content-Type", "From",
|
|
# and "Subject" field
|
|
# @return [Integer] 1: Feedback Loop
|
|
# 0: is not Feedback loop
|
|
my $class = shift;
|
|
my $heads = shift || return 0;
|
|
my $match = 0;
|
|
|
|
if( $heads->{'content-type'} =~ /report-type=["]?feedback-report["]?/ ) {
|
|
# Content-Type: multipart/report; report-type=feedback-report; ...
|
|
$match = 1;
|
|
|
|
} elsif( index($heads->{'content-type'}, 'multipart/mixed') > -1 ) {
|
|
# Microsoft (Hotmail, MSN, Live, Outlook) uses its own report format.
|
|
# Amazon SES Complaints bounces
|
|
my $title = 'complaint about message from ';
|
|
my $mfrom = qr{(?:
|
|
staff[@]hotmail[.]com
|
|
|complaints[@]email-abuse[.]amazonses[.]com
|
|
)\z
|
|
}x;
|
|
if( $heads->{'from'} =~ $mfrom && index($heads->{'subject'}, $title) > -1) {
|
|
# From: staff@hotmail.com
|
|
# From: complaints@email-abuse.amazonses.com
|
|
# Subject: complaint about message from 192.0.2.1
|
|
$match = 1;
|
|
}
|
|
}
|
|
return $match;
|
|
}
|
|
|
|
sub scan {
|
|
# Detect an error for Feedback Loop
|
|
# @param [Hash] mhead Message header of a bounce email
|
|
# @options mhead [String] from From header
|
|
# @options mhead [String] date Date header
|
|
# @options mhead [String] subject Subject header
|
|
# @options mhead [Array] received Received headers
|
|
# @options mhead [String] others Other required headers
|
|
# @param [String] mbody Message body of a bounce email
|
|
# @return [Hash, Undef] Bounce data list and message/rfc822 part
|
|
# or Undef if it failed to parse or the
|
|
# arguments are missing
|
|
my $class = shift;
|
|
my $mhead = shift // return undef;
|
|
my $mbody = shift // return undef;
|
|
return undef unless is_arf(undef, $mhead);
|
|
|
|
my $dscontents = [Sisimai::Bite::Email->DELIVERYSTATUS];
|
|
my @hasdivided = split("\n", $$mbody);
|
|
my $rfc822part = ''; # (String) message/rfc822-headers part
|
|
my $previousfn = ''; # (String) Previous field name
|
|
my $readcursor = 0; # (Integer) Points the current cursor position
|
|
my $recipients = 0; # (Integer) The number of 'Final-Recipient' header
|
|
my $rcptintext = ''; # (String) Recipient address in the message body
|
|
my $commondata = {
|
|
'diagnosis' => '', # Error message
|
|
'from' => '', # Original-Mail-From:
|
|
'rhost' => '', # Reporting-MTA:
|
|
};
|
|
my $arfheaders = {
|
|
'feedbacktype' => '', # Feedback-Type:
|
|
'rhost' => '', # Source-IP:
|
|
'agent' => '', # User-Agent:
|
|
'date' => '', # Arrival-Date:
|
|
'authres' => '', # Authentication-Results:
|
|
};
|
|
my $v = undef;
|
|
|
|
# 3.1. Required Fields
|
|
#
|
|
# The following report header fields MUST appear exactly once:
|
|
#
|
|
# o "Feedback-Type" contains the type of feedback report (as defined
|
|
# in the corresponding IANA registry and later in this memo). This
|
|
# is intended to let report parsers distinguish among different
|
|
# types of reports.
|
|
#
|
|
# o "User-Agent" indicates the name and version of the software
|
|
# program that generated the report. The format of this field MUST
|
|
# follow section 14.43 of [HTTP]. This field is for documentation
|
|
# only; there is no registry of user agent names or versions, and
|
|
# report receivers SHOULD NOT expect user agent names to belong to a
|
|
# known set.
|
|
#
|
|
# o "Version" indicates the version of specification that the report
|
|
# generator is using to generate the report. The version number in
|
|
# this specification is set to "1".
|
|
#
|
|
for my $e ( @hasdivided ) {
|
|
# Read each line between the start of the message and the start of rfc822 part.
|
|
unless( $readcursor ) {
|
|
# Beginning of the bounce message or delivery status part
|
|
if( $e =~ $MarkingsOf->{'message'} ) {
|
|
$readcursor |= $Indicators->{'deliverystatus'};
|
|
next;
|
|
}
|
|
}
|
|
|
|
unless( $readcursor & $Indicators->{'message-rfc822'} ) {
|
|
# Beginning of the original message part
|
|
if( index($e, $StartingOf->{'rfc822'}->[0]) == 0 ||
|
|
index($e, $StartingOf->{'rfc822'}->[1]) == 0 ) {
|
|
$readcursor |= $Indicators->{'message-rfc822'};
|
|
next;
|
|
}
|
|
}
|
|
|
|
if( $readcursor & $Indicators->{'message-rfc822'} ) {
|
|
# After "message/rfc822"
|
|
if( $e =~ /X-HmXmrOriginalRecipient:[ ]*(.+)\z/ ) {
|
|
# Microsoft ARF: original recipient.
|
|
$dscontents->[-1]->{'recipient'} = Sisimai::Address->s3s4($1);
|
|
$recipients++;
|
|
# The "X-HmXmrOriginalRecipient" header appears only once so
|
|
# we take this opportunity to hard-code ARF headers missing in
|
|
# Microsoft's implementation.
|
|
$arfheaders->{'feedbacktype'} = 'abuse';
|
|
$arfheaders->{'agent'} = 'Microsoft Junk Mail Reporting Program';
|
|
|
|
} elsif( $e =~ /\AFrom:[ ]*(.+)\z/ ) {
|
|
# Microsoft ARF: original sender.
|
|
$commondata->{'from'} ||= Sisimai::Address->s3s4($1);
|
|
|
|
} elsif( $e =~ /\A([-0-9A-Za-z]+?)[:][ ]*(.+)\z/ ) {
|
|
# Get required headers only
|
|
my $lhs = lc $1;
|
|
my $rhs = $2;
|
|
|
|
$previousfn = '';
|
|
next unless exists $RFC822Head->{ $lhs };
|
|
|
|
$previousfn = $lhs;
|
|
$rfc822part .= $e."\n";
|
|
$rcptintext = $rhs if $lhs eq 'to';
|
|
|
|
} elsif( $e =~ /\A[ \t]+/ ) {
|
|
# Continued line from the previous line
|
|
$rfc822part .= $e."\n" if exists $LongFields->{ $previousfn };
|
|
next if length $e;
|
|
$rcptintext .= $e if $previousfn eq 'to';
|
|
}
|
|
} else {
|
|
# Before "message/rfc822"
|
|
next unless $readcursor & $Indicators->{'deliverystatus'};
|
|
next unless length $e;
|
|
|
|
# Feedback-Type: abuse
|
|
# User-Agent: SomeGenerator/1.0
|
|
# Version: 0.1
|
|
# Original-Mail-From: <somespammer@example.net>
|
|
# Original-Rcpt-To: <kijitora@example.jp>
|
|
# Received-Date: Thu, 29 Apr 2009 00:00:00 JST
|
|
# Source-IP: 192.0.2.1
|
|
$v = $dscontents->[-1];
|
|
|
|
if( $e =~ /\AOriginal-Rcpt-To:[ ]+[<]?(.+)[>]?\z/ ||
|
|
$e =~ /\ARedacted-Address:[ ]([^ ].+[@])\z/ ) {
|
|
# Original-Rcpt-To header field is optional and may appear any
|
|
# number of times as appropriate:
|
|
# Original-Rcpt-To: <user@example.com>
|
|
# Redacted-Address: localpart@
|
|
if( $v->{'recipient'} ) {
|
|
# There are multiple recipient addresses in the message body.
|
|
push @$dscontents, Sisimai::Bite::Email->DELIVERYSTATUS;
|
|
$v = $dscontents->[-1];
|
|
}
|
|
$v->{'recipient'} = Sisimai::Address->s3s4($1);
|
|
$recipients++;
|
|
|
|
} elsif( $e =~ /\AFeedback-Type:[ ]*([^ ]+)\z/ ) {
|
|
# The header field MUST appear exactly once.
|
|
# Feedback-Type: abuse
|
|
$arfheaders->{'feedbacktype'} = $1;
|
|
|
|
} elsif( $e =~ /\AAuthentication-Results:[ ]*(.+)\z/ ) {
|
|
# "Authentication-Results" indicates the result of one or more
|
|
# authentication checks run by the report generator.
|
|
#
|
|
# Authentication-Results: mail.example.com;
|
|
# spf=fail smtp.mail=somespammer@example.com
|
|
$arfheaders->{'authres'} = $1;
|
|
|
|
} elsif( $e =~ /\AUser-Agent:[ ]*(.+)\z/ ) {
|
|
# The header field MUST appear exactly once.
|
|
# User-Agent: SomeGenerator/1.0
|
|
$arfheaders->{'agent'} = $1;
|
|
|
|
} elsif( $e =~ /\A(?:Received|Arrival)-Date:[ ]*(.+)\z/ ) {
|
|
# Arrival-Date header is optional and MUST NOT appear more than
|
|
# once.
|
|
# Received-Date: Thu, 29 Apr 2010 00:00:00 JST
|
|
# Arrival-Date: Thu, 29 Apr 2010 00:00:00 +0000
|
|
$arfheaders->{'date'} = $1;
|
|
|
|
} elsif( $e =~ /\AReporting-MTA:[ ]*dns;[ ]*(.+)\z/ ) {
|
|
# The header is optional and MUST NOT appear more than once.
|
|
# Reporting-MTA: dns; mx.example.jp
|
|
$commondata->{'rhost'} = $1;
|
|
|
|
} elsif( $e =~ /\ASource-IP:[ ]*(.+)\z/ ) {
|
|
# The header is optional and MUST NOT appear more than once.
|
|
# Source-IP: 192.0.2.45
|
|
$arfheaders->{'rhost'} = $1;
|
|
|
|
} elsif( $e =~ /\AOriginal-Mail-From:[ ]*(.+)\z/ ) {
|
|
# the header is optional and MUST NOT appear more than once.
|
|
# Original-Mail-From: <somespammer@example.net>
|
|
$commondata->{'from'} ||= Sisimai::Address->s3s4($1);
|
|
|
|
} elsif( $e =~ $MarkingsOf->{'message'} ) {
|
|
# This is an email abuse report for an email message with the
|
|
# message-id of 0000-000000000000000000000000000000000@mx
|
|
# received from IP address 192.0.2.1 on
|
|
# Thu, 29 Apr 2010 00:00:00 +0900 (JST)
|
|
$commondata->{'diagnosis'} = $e;
|
|
}
|
|
} # End of if: rfc822
|
|
}
|
|
|
|
if( ($arfheaders->{'feedbacktype'} eq 'auth-failure' ) && $arfheaders->{'authres'} ) {
|
|
# Append the value of Authentication-Results header
|
|
$commondata->{'diagnosis'} .= ' '.$arfheaders->{'authres'}
|
|
}
|
|
|
|
unless( $recipients ) {
|
|
# Insert pseudo recipient address when there is no valid recipient
|
|
# address in the message.
|
|
$dscontents->[-1]->{'recipient'} = Sisimai::Address->undisclosed('r');
|
|
$recipients = 1;
|
|
}
|
|
|
|
unless( $rfc822part =~ /\bFrom: [^ ]+[@][^ ]+\b/ ) {
|
|
# There is no "From:" header in the original message
|
|
if( $commondata->{'from'} ) {
|
|
# Append the value of "Original-Mail-From" value as a sender address.
|
|
$rfc822part .= 'From: '.$commondata->{'from'}."\n";
|
|
}
|
|
}
|
|
|
|
if( $mhead->{'subject'} =~ /complaint about message from (\d{1,3}[.]\d{1,3}[.]\d{1,3}[.]\d{1,3})/ ) {
|
|
# Microsoft ARF: remote host address.
|
|
$arfheaders->{'rhost'} = $1;
|
|
$commondata->{'diagnosis'} = sprintf(
|
|
"This is a Microsoft email abuse report for an email message received from IP %s on %s",
|
|
$arfheaders->{'rhost'}, $mhead->{'date'});
|
|
}
|
|
|
|
for my $e ( @$dscontents ) {
|
|
if( $e->{'recipient'} =~ /\A[^ ]+[@]\z/ ) {
|
|
# AOL = http://forums.cpanel.net/f43/aol-brutal-work-71473.html
|
|
$e->{'recipient'} = Sisimai::Address->s3s4($rcptintext);
|
|
}
|
|
map { $e->{ $_ } ||= $arfheaders->{ $_ } } keys %$arfheaders;
|
|
delete $e->{'authres'};
|
|
|
|
$e->{'softbounce'} = -1;
|
|
$e->{'diagnosis'} ||= $commondata->{'diagnosis'};
|
|
$e->{'date'} ||= $mhead->{'date'};
|
|
|
|
unless( $e->{'rhost'} ) {
|
|
# Get the remote IP address from the message body
|
|
if( $commondata->{'rhost'} ) {
|
|
# The value of "Reporting-MTA" header
|
|
$e->{'rhost'} = $commondata->{'rhost'};
|
|
|
|
} elsif( $e->{'diagnosis'} =~ /\breceived from IP address ([^ ]+)/ ) {
|
|
# This is an email abuse report for an email message received
|
|
# from IP address 24.64.1.1 on Thu, 29 Apr 2010 00:00:00 +0000
|
|
$e->{'rhost'} = $1;
|
|
}
|
|
}
|
|
$e->{'reason'} = 'feedback';
|
|
$e->{'command'} = '';
|
|
$e->{'action'} = '';
|
|
$e->{'agent'} ||= __PACKAGE__->smtpagent;
|
|
}
|
|
return { 'ds' => $dscontents, 'rfc822' => $rfc822part };
|
|
}
|
|
|
|
1;
|
|
__END__
|
|
|
|
=encoding utf-8
|
|
|
|
=head1 NAME
|
|
|
|
Sisimai::ARF - Parser class for detecting ARF: Abuse Feedback Reporting Format.
|
|
|
|
=head1 SYNOPSIS
|
|
|
|
Do not use this class directly, use Sisimai::ARF.
|
|
|
|
use Sisimai::ARF;
|
|
my $v = Sisimai::ARF->scan($header, $body);
|
|
|
|
=head1 DESCRIPTION
|
|
|
|
Sisimai::ARF is a parser for email returned as a Feedback Loop report message.
|
|
|
|
=head1 FEEDBACK TYPES
|
|
|
|
=head2 B<abuse>
|
|
|
|
Unsolicited email or some other kind of email abuse.
|
|
|
|
=head2 B<fraud>
|
|
|
|
Indicates some kind of C<fraud> or C<phishing> activity.
|
|
|
|
=head2 B<other>
|
|
|
|
Any other feedback that does not fit into other registered types.
|
|
|
|
=head2 B<virus>
|
|
|
|
Report of a virus found in the originating message.
|
|
|
|
=head1 SEE ALSO
|
|
|
|
L<http://tools.ietf.org/html/rfc5965>
|
|
|
|
=head1 AUTHOR
|
|
|
|
azumakuniyuki
|
|
|
|
=head1 COPYRIGHT
|
|
|
|
Copyright (C) 2014-2018 azumakuniyuki, All rights reserved.
|
|
|
|
=head1 LICENSE
|
|
|
|
This software is distributed under The BSD 2-Clause License.
|
|
|
|
=cut
|