Files
scripts/Perl OTRS/Kernel/cpan-lib/Sisimai/ARF.pm
2024-10-14 00:08:40 +02:00

370 lines
14 KiB
Perl

package Sisimai::ARF;
use feature ':5.10';
use strict;
use warnings;
use Sisimai::Bite::Email;
# http://tools.ietf.org/html/rfc5965
# http://en.wikipedia.org/wiki/Feedback_loop_(email)
# http://en.wikipedia.org/wiki/Abuse_Reporting_Format
#
# Netease DMARC uses: This is a spf/dkim authentication-failure report for an email message received from IP
# OpenDMARC 1.3.0 uses: This is an authentication failure report for an email message received from IP
# Abusix ARF uses this is an autogenerated email abuse complaint regarding your network.
my $StartingOf = { 'rfc822' => ['Content-Type: message/rfc822', 'Content-Type: text/rfc822-headers'] };
my $MarkingsOf = {
'message' => qr{\A(?>
[Tt]his[ ]is[ ].+[ ]email[ ]abuse[ ]report
|[Tt]his[ ]is[ ](?:
an[ ]autogenerated[ ]email[ ]abuse[ ]complaint
|an?[ ].+[ ]report[ ]for
|a[ ].+[ ]authentication[ -]failure[ ]report[ ]for
)
)
}x,
};
my $Indicators = Sisimai::Bite::Email->INDICATORS;
my $LongFields = Sisimai::RFC5322->LONGFIELDS;
my $RFC822Head = Sisimai::RFC5322->HEADERFIELDS;
sub description { return 'Abuse Feedback Reporting Format' }
sub smtpagent { 'Feeback-Loop' }
sub headerlist { return [] }
sub is_arf {
# Email is a Feedback-Loop message or not
# @param [Hash] heads Email header including "Content-Type", "From",
# and "Subject" field
# @return [Integer] 1: Feedback Loop
# 0: is not Feedback loop
my $class = shift;
my $heads = shift || return 0;
my $match = 0;
if( $heads->{'content-type'} =~ /report-type=["]?feedback-report["]?/ ) {
# Content-Type: multipart/report; report-type=feedback-report; ...
$match = 1;
} elsif( index($heads->{'content-type'}, 'multipart/mixed') > -1 ) {
# Microsoft (Hotmail, MSN, Live, Outlook) uses its own report format.
# Amazon SES Complaints bounces
my $title = 'complaint about message from ';
my $mfrom = qr{(?:
staff[@]hotmail[.]com
|complaints[@]email-abuse[.]amazonses[.]com
)\z
}x;
if( $heads->{'from'} =~ $mfrom && index($heads->{'subject'}, $title) > -1) {
# From: staff@hotmail.com
# From: complaints@email-abuse.amazonses.com
# Subject: complaint about message from 192.0.2.1
$match = 1;
}
}
return $match;
}
sub scan {
# Detect an error for Feedback Loop
# @param [Hash] mhead Message header of a bounce email
# @options mhead [String] from From header
# @options mhead [String] date Date header
# @options mhead [String] subject Subject header
# @options mhead [Array] received Received headers
# @options mhead [String] others Other required headers
# @param [String] mbody Message body of a bounce email
# @return [Hash, Undef] Bounce data list and message/rfc822 part
# or Undef if it failed to parse or the
# arguments are missing
my $class = shift;
my $mhead = shift // return undef;
my $mbody = shift // return undef;
return undef unless is_arf(undef, $mhead);
my $dscontents = [Sisimai::Bite::Email->DELIVERYSTATUS];
my @hasdivided = split("\n", $$mbody);
my $rfc822part = ''; # (String) message/rfc822-headers part
my $previousfn = ''; # (String) Previous field name
my $readcursor = 0; # (Integer) Points the current cursor position
my $recipients = 0; # (Integer) The number of 'Final-Recipient' header
my $rcptintext = ''; # (String) Recipient address in the message body
my $commondata = {
'diagnosis' => '', # Error message
'from' => '', # Original-Mail-From:
'rhost' => '', # Reporting-MTA:
};
my $arfheaders = {
'feedbacktype' => '', # Feedback-Type:
'rhost' => '', # Source-IP:
'agent' => '', # User-Agent:
'date' => '', # Arrival-Date:
'authres' => '', # Authentication-Results:
};
my $v = undef;
# 3.1. Required Fields
#
# The following report header fields MUST appear exactly once:
#
# o "Feedback-Type" contains the type of feedback report (as defined
# in the corresponding IANA registry and later in this memo). This
# is intended to let report parsers distinguish among different
# types of reports.
#
# o "User-Agent" indicates the name and version of the software
# program that generated the report. The format of this field MUST
# follow section 14.43 of [HTTP]. This field is for documentation
# only; there is no registry of user agent names or versions, and
# report receivers SHOULD NOT expect user agent names to belong to a
# known set.
#
# o "Version" indicates the version of specification that the report
# generator is using to generate the report. The version number in
# this specification is set to "1".
#
for my $e ( @hasdivided ) {
# Read each line between the start of the message and the start of rfc822 part.
unless( $readcursor ) {
# Beginning of the bounce message or delivery status part
if( $e =~ $MarkingsOf->{'message'} ) {
$readcursor |= $Indicators->{'deliverystatus'};
next;
}
}
unless( $readcursor & $Indicators->{'message-rfc822'} ) {
# Beginning of the original message part
if( index($e, $StartingOf->{'rfc822'}->[0]) == 0 ||
index($e, $StartingOf->{'rfc822'}->[1]) == 0 ) {
$readcursor |= $Indicators->{'message-rfc822'};
next;
}
}
if( $readcursor & $Indicators->{'message-rfc822'} ) {
# After "message/rfc822"
if( $e =~ /X-HmXmrOriginalRecipient:[ ]*(.+)\z/ ) {
# Microsoft ARF: original recipient.
$dscontents->[-1]->{'recipient'} = Sisimai::Address->s3s4($1);
$recipients++;
# The "X-HmXmrOriginalRecipient" header appears only once so
# we take this opportunity to hard-code ARF headers missing in
# Microsoft's implementation.
$arfheaders->{'feedbacktype'} = 'abuse';
$arfheaders->{'agent'} = 'Microsoft Junk Mail Reporting Program';
} elsif( $e =~ /\AFrom:[ ]*(.+)\z/ ) {
# Microsoft ARF: original sender.
$commondata->{'from'} ||= Sisimai::Address->s3s4($1);
} elsif( $e =~ /\A([-0-9A-Za-z]+?)[:][ ]*(.+)\z/ ) {
# Get required headers only
my $lhs = lc $1;
my $rhs = $2;
$previousfn = '';
next unless exists $RFC822Head->{ $lhs };
$previousfn = $lhs;
$rfc822part .= $e."\n";
$rcptintext = $rhs if $lhs eq 'to';
} elsif( $e =~ /\A[ \t]+/ ) {
# Continued line from the previous line
$rfc822part .= $e."\n" if exists $LongFields->{ $previousfn };
next if length $e;
$rcptintext .= $e if $previousfn eq 'to';
}
} else {
# Before "message/rfc822"
next unless $readcursor & $Indicators->{'deliverystatus'};
next unless length $e;
# Feedback-Type: abuse
# User-Agent: SomeGenerator/1.0
# Version: 0.1
# Original-Mail-From: <somespammer@example.net>
# Original-Rcpt-To: <kijitora@example.jp>
# Received-Date: Thu, 29 Apr 2009 00:00:00 JST
# Source-IP: 192.0.2.1
$v = $dscontents->[-1];
if( $e =~ /\AOriginal-Rcpt-To:[ ]+[<]?(.+)[>]?\z/ ||
$e =~ /\ARedacted-Address:[ ]([^ ].+[@])\z/ ) {
# Original-Rcpt-To header field is optional and may appear any
# number of times as appropriate:
# Original-Rcpt-To: <user@example.com>
# Redacted-Address: localpart@
if( $v->{'recipient'} ) {
# There are multiple recipient addresses in the message body.
push @$dscontents, Sisimai::Bite::Email->DELIVERYSTATUS;
$v = $dscontents->[-1];
}
$v->{'recipient'} = Sisimai::Address->s3s4($1);
$recipients++;
} elsif( $e =~ /\AFeedback-Type:[ ]*([^ ]+)\z/ ) {
# The header field MUST appear exactly once.
# Feedback-Type: abuse
$arfheaders->{'feedbacktype'} = $1;
} elsif( $e =~ /\AAuthentication-Results:[ ]*(.+)\z/ ) {
# "Authentication-Results" indicates the result of one or more
# authentication checks run by the report generator.
#
# Authentication-Results: mail.example.com;
# spf=fail smtp.mail=somespammer@example.com
$arfheaders->{'authres'} = $1;
} elsif( $e =~ /\AUser-Agent:[ ]*(.+)\z/ ) {
# The header field MUST appear exactly once.
# User-Agent: SomeGenerator/1.0
$arfheaders->{'agent'} = $1;
} elsif( $e =~ /\A(?:Received|Arrival)-Date:[ ]*(.+)\z/ ) {
# Arrival-Date header is optional and MUST NOT appear more than
# once.
# Received-Date: Thu, 29 Apr 2010 00:00:00 JST
# Arrival-Date: Thu, 29 Apr 2010 00:00:00 +0000
$arfheaders->{'date'} = $1;
} elsif( $e =~ /\AReporting-MTA:[ ]*dns;[ ]*(.+)\z/ ) {
# The header is optional and MUST NOT appear more than once.
# Reporting-MTA: dns; mx.example.jp
$commondata->{'rhost'} = $1;
} elsif( $e =~ /\ASource-IP:[ ]*(.+)\z/ ) {
# The header is optional and MUST NOT appear more than once.
# Source-IP: 192.0.2.45
$arfheaders->{'rhost'} = $1;
} elsif( $e =~ /\AOriginal-Mail-From:[ ]*(.+)\z/ ) {
# the header is optional and MUST NOT appear more than once.
# Original-Mail-From: <somespammer@example.net>
$commondata->{'from'} ||= Sisimai::Address->s3s4($1);
} elsif( $e =~ $MarkingsOf->{'message'} ) {
# This is an email abuse report for an email message with the
# message-id of 0000-000000000000000000000000000000000@mx
# received from IP address 192.0.2.1 on
# Thu, 29 Apr 2010 00:00:00 +0900 (JST)
$commondata->{'diagnosis'} = $e;
}
} # End of if: rfc822
}
if( ($arfheaders->{'feedbacktype'} eq 'auth-failure' ) && $arfheaders->{'authres'} ) {
# Append the value of Authentication-Results header
$commondata->{'diagnosis'} .= ' '.$arfheaders->{'authres'}
}
unless( $recipients ) {
# Insert pseudo recipient address when there is no valid recipient
# address in the message.
$dscontents->[-1]->{'recipient'} = Sisimai::Address->undisclosed('r');
$recipients = 1;
}
unless( $rfc822part =~ /\bFrom: [^ ]+[@][^ ]+\b/ ) {
# There is no "From:" header in the original message
if( $commondata->{'from'} ) {
# Append the value of "Original-Mail-From" value as a sender address.
$rfc822part .= 'From: '.$commondata->{'from'}."\n";
}
}
if( $mhead->{'subject'} =~ /complaint about message from (\d{1,3}[.]\d{1,3}[.]\d{1,3}[.]\d{1,3})/ ) {
# Microsoft ARF: remote host address.
$arfheaders->{'rhost'} = $1;
$commondata->{'diagnosis'} = sprintf(
"This is a Microsoft email abuse report for an email message received from IP %s on %s",
$arfheaders->{'rhost'}, $mhead->{'date'});
}
for my $e ( @$dscontents ) {
if( $e->{'recipient'} =~ /\A[^ ]+[@]\z/ ) {
# AOL = http://forums.cpanel.net/f43/aol-brutal-work-71473.html
$e->{'recipient'} = Sisimai::Address->s3s4($rcptintext);
}
map { $e->{ $_ } ||= $arfheaders->{ $_ } } keys %$arfheaders;
delete $e->{'authres'};
$e->{'softbounce'} = -1;
$e->{'diagnosis'} ||= $commondata->{'diagnosis'};
$e->{'date'} ||= $mhead->{'date'};
unless( $e->{'rhost'} ) {
# Get the remote IP address from the message body
if( $commondata->{'rhost'} ) {
# The value of "Reporting-MTA" header
$e->{'rhost'} = $commondata->{'rhost'};
} elsif( $e->{'diagnosis'} =~ /\breceived from IP address ([^ ]+)/ ) {
# This is an email abuse report for an email message received
# from IP address 24.64.1.1 on Thu, 29 Apr 2010 00:00:00 +0000
$e->{'rhost'} = $1;
}
}
$e->{'reason'} = 'feedback';
$e->{'command'} = '';
$e->{'action'} = '';
$e->{'agent'} ||= __PACKAGE__->smtpagent;
}
return { 'ds' => $dscontents, 'rfc822' => $rfc822part };
}
1;
__END__
=encoding utf-8
=head1 NAME
Sisimai::ARF - Parser class for detecting ARF: Abuse Feedback Reporting Format.
=head1 SYNOPSIS
Do not use this class directly, use Sisimai::ARF.
use Sisimai::ARF;
my $v = Sisimai::ARF->scan($header, $body);
=head1 DESCRIPTION
Sisimai::ARF is a parser for email returned as a Feedback Loop report message.
=head1 FEEDBACK TYPES
=head2 B<abuse>
Unsolicited email or some other kind of email abuse.
=head2 B<fraud>
Indicates some kind of C<fraud> or C<phishing> activity.
=head2 B<other>
Any other feedback that does not fit into other registered types.
=head2 B<virus>
Report of a virus found in the originating message.
=head1 SEE ALSO
L<http://tools.ietf.org/html/rfc5965>
=head1 AUTHOR
azumakuniyuki
=head1 COPYRIGHT
Copyright (C) 2014-2018 azumakuniyuki, All rights reserved.
=head1 LICENSE
This software is distributed under The BSD 2-Clause License.
=cut