Files
scripts/Perl OTRS/Kernel/System/EmailParser.pm
2024-10-14 00:08:40 +02:00

1119 lines
32 KiB
Perl

# --
# Copyright (C) 2001-2019 OTRS AG, https://otrs.com/
# --
# This software comes with ABSOLUTELY NO WARRANTY. For details, see
# the enclosed file COPYING for license information (GPL). If you
# did not receive this file, see https://www.gnu.org/licenses/gpl-3.0.txt.
# --
package Kernel::System::EmailParser;
use strict;
use warnings;
use Mail::Internet;
use MIME::Parser;
use MIME::QuotedPrint;
use MIME::Base64;
use MIME::Words qw(:all);
use Mail::Address;
our $ObjectManagerDisabled = 1;
=head1 NAME
Kernel::System::EmailParser - parse and encode an email
=head1 DESCRIPTION
A module to parse and encode an email.
=head1 PUBLIC INTERFACE
=head2 new()
create an object. Do not use it directly, instead use:
use Kernel::System::EmailParser;
# as string (takes more memory!)
my $ParserObject = Kernel::System::EmailParser->new(
Email => $EmailString,
Debug => 0,
);
# as stand alone mode, without parsing emails
my $ParserObject = Kernel::System::EmailParser->new(
Mode => 'Standalone',
Debug => 0,
);
=cut
sub new {
my ( $Type, %Param ) = @_;
# allocate new hash for object
my $Self = {};
bless( $Self, $Type );
# get debug level from parent
$Self->{Debug} = $Param{Debug} || 0;
if ( $Param{Mode} && $Param{Mode} eq 'Standalone' ) {
return $Self;
}
# check needed objects
if ( !$Param{Email} && !$Param{Entity} ) {
die "Need Email or Entity!";
}
# if email is given
if ( $Param{Email} ) {
# check if Email is an array ref
if ( ref $Param{Email} eq 'SCALAR' ) {
my @Content = split /\n/, ${ $Param{Email} };
for my $Line (@Content) {
$Line .= "\n";
}
$Param{Email} = \@Content;
}
# check if Email is an array ref
if ( ref $Param{Email} eq '' ) {
my @Content = split /\n/, $Param{Email};
for my $Line (@Content) {
$Line .= "\n";
}
$Param{Email} = \@Content;
}
$Self->{OriginalEmail} = join( '', @{ $Param{Email} } );
# create Mail::Internet object
$Self->{Email} = Mail::Internet->new( $Param{Email} );
# create a Mail::Header object with email
$Self->{HeaderObject} = $Self->{Email}->head();
# create MIME::Parser object and get message body or body of first attachment
my $Parser = MIME::Parser->new();
$Parser->output_to_core('ALL');
# Keep nested messages as attachments (see bug#1970).
$Parser->extract_nested_messages(0);
$Self->{ParserParts} = $Parser->parse_data( $Self->{Email}->as_string() );
}
else {
$Self->{ParserParts} = $Param{Entity};
$Self->{HeaderObject} = $Param{Entity}->head();
$Self->{EntityMode} = 1;
}
# get NoHTMLChecks param
if ( $Param{NoHTMLChecks} ) {
$Self->{NoHTMLChecks} = $Param{NoHTMLChecks};
}
# parse email at first
$Self->GetMessageBody();
return $Self;
}
=head2 GetPlainEmail()
To get a email as a string back (plain email).
my $Email = $ParserObject->GetPlainEmail();
=cut
sub GetPlainEmail {
my $Self = shift;
return $Self->{OriginalEmail} || $Self->{Email}->as_string();
}
=head2 GetParam()
To get a header (e. g. Subject, To, ContentType, ...) of an email
(mime is already done!).
my $To = $ParserObject->GetParam( WHAT => 'To' );
=cut
sub GetParam {
my ( $Self, %Param ) = @_;
my $What = $Param{WHAT} || return;
if ( !$Self->{HeaderObject} ) {
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'error',
Message => 'HeaderObject is needed!',
);
return;
}
$Self->{HeaderObject}->unfold();
$Self->{HeaderObject}->combine($What);
my $Line = $Self->{HeaderObject}->get($What) || '';
chomp($Line);
my $ReturnLine;
# We need to split address lists before decoding; see "6.2. Display of 'encoded-word's"
# in RFC 2047. Mail::Address routines will quote stuff if necessary (i.e. comma
# or semicolon found in phrase).
if ( $What =~ /^(From|To|Cc)/ ) {
for my $Address ( Mail::Address->parse($Line) ) {
$Address->phrase( $Self->_DecodeString( String => $Address->phrase() ) );
$Address->address( $Self->_DecodeString( String => $Address->address() ) );
$Address->comment( $Self->_DecodeString( String => $Address->comment() ) );
$ReturnLine .= ', ' if $ReturnLine;
$ReturnLine .= $Address->format();
}
}
else {
$ReturnLine = $Self->_DecodeString( String => $Line );
}
$ReturnLine //= '';
# debug
if ( $Self->{Debug} > 1 ) {
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'debug',
Message => "Get: $What; ReturnLine: $ReturnLine; OrigLine: $Line",
);
}
return $ReturnLine;
}
=head2 GetEmailAddress()
To get the senders email address back.
my $SenderEmail = $ParserObject->GetEmailAddress(
Email => 'Juergen Weber <juergen.qeber@air.com>',
);
=cut
sub GetEmailAddress {
my ( $Self, %Param ) = @_;
my $Email = '';
for my $EmailSplit ( $Self->_MailAddressParse( Email => $Param{Email} ) ) {
$Email = $EmailSplit->address();
}
# return if no email address is there
return if $Email !~ /@/;
# return email address
return $Email;
}
=head2 GetRealname()
to get the sender's C<RealName>.
my $Realname = $ParserObject->GetRealname(
Email => 'Juergen Weber <juergen.qeber@air.com>',
);
=cut
sub GetRealname {
my ( $Self, %Param ) = @_;
my $Realname = '';
# find "NamePart, NamePart" <some@example.com> (get not recognized by Mail::Address)
if ( $Param{Email} =~ /"(.+?)"\s+?\<.+?@.+?\..+?\>/ ) {
$Realname = $1;
# removes unnecessary blank spaces, if the string has quotes.
# This is because of bug 6059
$Realname =~ s/"\s+?(.+?)\s+?"/"$1"/g;
return $Realname;
}
# fallback of Mail::Address
for my $EmailSplit ( $Self->_MailAddressParse( Email => $Param{Email} ) ) {
$Realname = $EmailSplit->phrase();
}
return $Realname;
}
=head2 SplitAddressLine()
To get an array of email addresses of an To, Cc or Bcc line back.
my @Addresses = $ParserObject->SplitAddressLine(
Line => 'Juergen Weber <juergen.qeber@air.com>, me@example.com, hans@example.com (Hans Huber)',
);
This returns an array with ('Juergen Weber <juergen.qeber@air.com>', 'me@example.com', 'hans@example.com (Hans Huber)').
=cut
sub SplitAddressLine {
my ( $Self, %Param ) = @_;
my @GetParam;
for my $Line ( $Self->_MailAddressParse( Email => $Param{Line} ) ) {
push @GetParam, $Line->format();
}
return @GetParam;
}
=head2 GetContentType()
Returns the message body (or from the first attachment) "ContentType" header.
my $ContentType = $ParserObject->GetContentType();
(e. g. 'text/plain; charset="iso-8859-1"')
=cut
sub GetContentType {
my $Self = shift;
return $Self->{ContentType} if $Self->{ContentType};
return $Self->GetParam( WHAT => 'Content-Type' ) || 'text/plain';
}
=head2 GetContentDisposition()
Returns the message body (or from the first attachment) "ContentDisposition" header.
my $ContentDisposition = $ParserObject->GetContentDisposition();
(e. g. 'Content-Disposition: attachment; filename="test-123"')
=cut
sub GetContentDisposition {
my $Self = shift;
return $Self->{ContentDisposition} if $Self->{ContentDisposition};
return $Self->GetParam( WHAT => 'Content-Disposition' );
}
=head2 GetCharset()
Returns the message body (or from the first attachment) "charset".
my $Charset = $ParserObject->GetCharset();
(e. g. iso-8859-1, utf-8, ...)
=cut
sub GetCharset {
my $Self = shift;
# return charset of already defined
if ( defined $Self->{Charset} ) {
# debug
if ( $Self->{Debug} > 0 ) {
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'debug',
Message => "Got charset from mime body: $Self->{Charset}",
);
}
return $Self->{Charset};
}
if ( !$Self->{HeaderObject} ) {
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'error',
Message => 'HeaderObject is needed!',
);
return;
}
# find charset
$Self->{HeaderObject}->unfold();
my $Line = $Self->{HeaderObject}->get('Content-Type') || '';
chomp $Line;
my %Data = $Self->GetContentTypeParams( ContentType => $Line );
# check content type (only do charset decode if no Content-Type or ContentType
# with text/* exists) if it's not a text content type (e. g. pdf, png, ...),
# return no charset
if ( $Data{ContentType} && $Data{ContentType} !~ /text/i ) {
# debug
if ( $Self->{Debug} > 0 ) {
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'debug',
Message =>
"Got no charset from email body because of ContentType ($Data{ContentType})!",
);
}
# remember charset
$Self->{Charset} = '';
# return charset
return '';
}
# return charset if it can be detected
if ( $Data{Charset} ) {
# debug
if ( $Self->{Debug} > 0 ) {
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'debug',
Message => "Got charset from email body: $Data{Charset}",
);
}
# remember charset
$Self->{Charset} = $Data{Charset};
# return charset
return $Data{Charset};
}
# if there is no available header for charset and content type, use
# iso-8859-1 as charset
# debug
if ( $Self->{Debug} > 0 ) {
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'debug',
Message => 'Got no charset from email body! Take iso-8859-1!',
);
}
# remember charset
$Self->{Charset} = 'ISO-8859-1';
# return charset
return 'ISO-8859-1';
}
=head2 GetReturnContentType()
Returns the new message body (or from the first attachment) "ContentType" header
(maybe the message is converted to utf-8).
my $ContentType = $ParserObject->GetReturnContentType();
(e. g. 'text/plain; charset="utf-8"')
=cut
sub GetReturnContentType {
my $Self = shift;
my $ContentType = $Self->GetContentType();
$ContentType =~ s/(charset=)(.*)/$1utf-8/ig;
# debug
if ( $Self->{Debug} > 0 ) {
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'debug',
Message => "Changed ContentType from '"
. $Self->GetContentType()
. "' to '$ContentType'.",
);
}
return $ContentType;
}
=head2 GetReturnCharset()
Returns the charset of the new message body "Charset"
(maybe the message is converted to utf-8).
my $Charset = $ParserObject->GetReturnCharset();
(e. g. 'text/plain; charset="utf-8"')
=cut
sub GetReturnCharset {
my $Self = shift;
return 'utf-8';
}
=head2 GetMessageBody()
Returns the message body (or from the first attachment) from the email.
my $Body = $ParserObject->GetMessageBody();
=cut
sub GetMessageBody {
my ( $Self, %Param ) = @_;
# check if message body is already there
return $Self->{MessageBody} if defined $Self->{MessageBody};
# get encode object
my $EncodeObject = $Kernel::OM->Get('Kernel::System::Encode');
if ( !$Self->{EntityMode} && $Self->{ParserParts}->parts() == 0 ) {
$Self->{MimeEmail} = 0;
if ( $Self->{Debug} > 0 ) {
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'debug',
Message => 'It\'s a plain (not mime) email!',
);
}
my $BodyStrg = join( '', @{ $Self->{Email}->body() } );
# quoted printable!
if ( $Self->GetParam( WHAT => 'Content-Transfer-Encoding' ) =~ /quoted-printable/i ) {
$BodyStrg = MIME::QuotedPrint::decode($BodyStrg);
}
# base64 decode
elsif ( $Self->GetParam( WHAT => 'Content-Transfer-Encoding' ) =~ /base64/i ) {
$BodyStrg = decode_base64($BodyStrg);
}
# charset decode
if ( $Self->GetCharset() ) {
$Self->{MessageBody} = $EncodeObject->Convert2CharsetInternal(
Text => $BodyStrg,
From => $Self->GetCharset(),
Check => 1,
);
}
else {
$Self->{MessageBody} = $BodyStrg;
}
# check if the mail contains only HTML (store it as attachment and add text/plain)
$Self->CheckMessageBody();
# return message body
return $Self->{MessageBody};
}
else {
$Self->{MimeEmail} = 1;
if ( $Self->{Debug} > 0 ) {
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'debug',
Message => 'It\'s a mime email!',
);
}
# Check if there is a valid attachment there, if yes, return
# the first attachment (normally text/plain) as message body.
# For multipart/mixed emails, PartsAttachments() will concatenate subsequent
# body MIME parts into just one attachment.
my @Attachments = $Self->GetAttachments();
if ( @Attachments > 0 ) {
$Self->{Charset} = $Attachments[0]->{Charset};
$Self->{ContentType} = $Attachments[0]->{ContentType};
if ( $Self->{Debug} > 0 ) {
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'debug',
Message => "First attachment ContentType: $Self->{ContentType}",
);
}
# check if charset is given, set iso-8859-1 if content is text
if ( !$Self->{Charset} && $Self->{ContentType} =~ /\btext\b/ ) {
$Self->{Charset} = 'iso-8859-1';
}
# check if charset exists
if ( $Self->GetCharset() ) {
$Self->{MessageBody} = $EncodeObject->Convert2CharsetInternal(
Text => $Attachments[0]->{Content},
From => $Self->GetCharset(),
Check => 1,
);
}
else {
$Self->{Charset} = 'us-ascii';
$Self->{ContentType} = 'text/plain';
$Self->{MessageBody} = '- no text message => see attachment -';
}
# check if it's a html-only email (store it as attachment and add text/plain)
$Self->CheckMessageBody();
# return message body
return $Self->{MessageBody};
}
else {
if ( $Self->{Debug} > 0 ) {
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'debug',
Message =>
'No attachments returned from GetAttachments(), just an empty attachment!?',
);
}
# return empty attachment
$Self->{Charset} = 'iso-8859-1';
$Self->{ContentType} = 'text/plain';
return '-';
}
}
return;
}
=head2 GetAttachments()
Returns an array of the email attachments.
my @Attachments = $ParserObject->GetAttachments();
for my $Attachment (@Attachments) {
print $Attachment->{Filename};
print $Attachment->{Charset};
print $Attachment->{MimeType};
print $Attachment->{ContentType};
print $Attachment->{Content};
# optional
print $Attachment->{ContentID};
print $Attachment->{ContentAlternative};
print $Attachment->{ContentMixed};
}
=cut
sub GetAttachments {
my ( $Self, %Param ) = @_;
# return if it's no mime email
return if !$Self->{MimeEmail};
# return if it is already parsed
return @{ $Self->{Attachments} } if $Self->{Attachments};
# parse email
$Self->PartsAttachments( Part => $Self->{ParserParts} );
# return if no attachments are found
return if !$Self->{Attachments};
# return attachments
return @{ $Self->{Attachments} };
}
# just for internal
sub PartsAttachments {
my ( $Self, %Param ) = @_;
my $Part = $Param{Part} || $Self->{ParserParts};
my $PartCounter = $Param{PartCounter} || 0;
my $SubPartCounter = $Param{SubPartCounter} || 0;
my $ContentAlternative = $Param{ContentAlternative} || '';
my $ContentMixed = $Param{ContentMixed} || '';
$Self->{PartCounter}++;
if ( $Part->parts() > 0 ) {
# check if it's an alternative part
$Part->head()->unfold();
$Part->head()->combine('Content-Type');
my $ContentType = $Part->head()->get('Content-Type');
if ( $ContentType && $ContentType =~ /multipart\/alternative;/i ) {
$ContentAlternative = 1;
}
if ( $ContentType && $ContentType =~ /multipart\/mixed;/i ) {
$ContentMixed = 1;
}
$PartCounter++;
for my $Part ( $Part->parts() ) {
$SubPartCounter++;
if ( $Self->{Debug} > 0 ) {
print STDERR "Sub part($PartCounter/$SubPartCounter)!\n";
}
$Self->PartsAttachments(
Part => $Part,
PartCounter => $PartCounter,
ContentAlternative => $ContentAlternative,
ContentMixed => $ContentMixed,
);
}
return 1;
}
# get attachment meta stuff
my %PartData;
if ($ContentAlternative) {
$PartData{ContentAlternative} = $ContentAlternative;
}
# get ContentType
$Part->head()->unfold();
$Part->head()->combine('Content-Type');
# get Content-Type, use text/plain if no content type is given
$PartData{ContentType} = $Part->head()->get('Content-Type') || 'text/plain;';
chomp $PartData{ContentType};
# Fix for broken content type headers, see bug#7913 or DuplicatedContentTypeHeader.t.
$PartData{ContentType} =~ s{\r?\n}{}smxg;
# get mime type
$PartData{MimeType} = $Part->head()->mime_type();
# get charset
my %Data = $Self->GetContentTypeParams( ContentType => $PartData{ContentType} );
if ( $Data{Charset} ) {
$PartData{Charset} = $Data{Charset};
}
else {
$PartData{Charset} = '';
}
# get content (if possible)
if ( $Part->bodyhandle() ) {
$PartData{Content} = $Part->bodyhandle()->as_string();
if ( !$PartData{Content} ) {
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'notice',
Message => "Empty attachment part ($PartCounter)",
);
}
}
# log error if there is an corrupt MIME email
else {
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'notice',
Message =>
"Was not able to parse corrupt MIME email! Skipped attachment ($PartCounter)",
);
return;
}
# check if there is no recommended_filename or subject -> add file-NoFilenamePartCounter
if ( $Part->head()->recommended_filename() ) {
$PartData{Filename} = $Self->_DecodeString(
String => $Part->head()->recommended_filename(),
Encode => 'utf-8',
);
# cleanup filename
$PartData{Filename} = $Kernel::OM->Get('Kernel::System::Main')->FilenameCleanUp(
Filename => $PartData{Filename},
Type => 'Local',
);
$PartData{ContentDisposition} = $Part->head()->get('Content-Disposition');
if ( $PartData{ContentDisposition} ) {
my %Data = $Self->GetContentTypeParams(
ContentType => $PartData{ContentDisposition},
);
if ( $Data{Charset} ) {
$PartData{Charset} = $Data{Charset};
}
}
else {
$PartData{Charset} = '';
}
# check if reserved filename file-1 or file-2 is already used
COUNT:
for my $Count ( 1 .. 2 ) {
if ( $PartData{Filename} eq "file-$Count" ) {
$PartData{Filename} = "File-$Count";
last COUNT;
}
}
}
# Guess the filename for nested messages (see bug#1970).
elsif ( $PartData{ContentType} eq 'message/rfc822' ) {
my ($SubjectString) = $Part->as_string() =~ m/^Subject: ([^\n]*(\n[ \t][^\n]*)*)/m;
my $Subject = $Self->_DecodeString( String => $SubjectString );
# cleanup filename
$Subject = $Kernel::OM->Get('Kernel::System::Main')->FilenameCleanUp(
Filename => $Subject,
Type => 'Local',
);
if ( $Subject eq '' ) {
$Self->{NoFilenamePartCounter}++;
$Subject = "Unbenannt-$Self->{NoFilenamePartCounter}";
}
$PartData{Filename} = $Subject . '.eml';
}
else {
$Self->{NoFilenamePartCounter}++;
$PartData{Filename} = "file-$Self->{NoFilenamePartCounter}";
}
# parse/get Content-Id, Content-Location and Disposition for html email attachments
$PartData{ContentID} = $Part->head()->get('Content-Id');
$PartData{ContentLocation} = $Part->head()->get('Content-Location');
$PartData{Disposition} = $Part->head()->get('Content-Disposition');
if ( $PartData{ContentID} ) {
chomp $PartData{ContentID};
}
elsif ( $PartData{ContentLocation} ) {
chomp $PartData{ContentLocation};
$PartData{ContentID} = $PartData{ContentLocation};
}
if ( $PartData{Disposition} ) {
chomp $PartData{Disposition};
$PartData{Disposition} = lc $PartData{Disposition};
}
# get attachment size
$PartData{Filesize} = bytes::length( $PartData{Content} );
# debug
if ( $Self->{Debug} > 0 ) {
print STDERR
"->GotArticle::Atm: '$PartData{Filename}' '$PartData{ContentType}' ($PartData{Filesize})\n";
}
# For multipart/mixed emails, we check for all text/plain or text/html MIME parts which are
# body elements, and concatenate them into the first relevant attachment, to stay in line
# with OTRS file-1 and file-2 attachment handling.
#
# HTML parts will just be concatenated, so that the attachment has two complete HTML documents
# inside. Browsers tolerate this.
#
# The first found body part determines the content type to be used. So if it is text/plain, subsequent
# text/html body parts will be converted to plain text, and vice versa. In case of multipart/alternative,
# a text/plain and a text/html body attachment can coexist.
if (
$ContentMixed
&& ( !$PartData{Disposition} || $PartData{Disposition} eq 'inline' )
&& ( $PartData{ContentType} =~ /text\/(?:html|plain)/i )
)
{
# Is it a plain or HTML body?
my $MimeType = $PartData{ContentType} =~ /text\/html/i ? 'text/html' : 'text/plain';
my $TargetMimeType = $MimeType;
my $BodyAttachmentKey = "MultipartMixedBodyAttachment$MimeType";
if ( !$Self->{FirstBodyAttachmentKey} ) {
# Remember the first found attachment.
$Self->{FirstBodyAttachmentKey} = $BodyAttachmentKey;
$Self->{FirstBodyAttachmentMimeType} = $MimeType;
}
elsif ( !$ContentAlternative ) {
# For multipart/alternative, we allow both text/plain and text/html. Otherwise, concatenate
# all subsequent elements to the first found body element.
$BodyAttachmentKey = $Self->{FirstBodyAttachmentKey};
$TargetMimeType = $Self->{FirstBodyAttachmentMimeType};
}
# For concatenating multipart/mixed text parts, we have to convert all of them to utf-8 to be sure that
# the contents fit together and that all characters can be displayed.
$PartData{Content} = $Kernel::OM->Get('Kernel::System::Encode')->Convert2CharsetInternal(
Text => $PartData{Content},
From => $PartData{Charset},
Check => 1,
);
$PartData{ContentType} = "$MimeType; charset=utf-8";
my $OldCharset = $PartData{Charset};
$PartData{Charset} = "utf-8";
# Also replace charset in meta tags of HTML emails.
if ( $MimeType eq 'text/html' ) {
$PartData{Content} =~ s/(<meta[^>]+charset=("|'|))\Q$OldCharset\E/$1utf-8/gi;
}
$PartData{Filesize} = bytes::length( $PartData{Content} );
# Is it a subsequent body element? Then concatenate it to the first one and skip it as attachment.
if ( $Self->{$BodyAttachmentKey} ) {
# This concatenation only works if all parts have the utf-8 flag on (from Convert2CharsetInternal).
if ( $MimeType ne $TargetMimeType ) {
my $HTMLUtilsObject = $Kernel::OM->Get('Kernel::System::HTMLUtils');
if ( $TargetMimeType eq 'text/html' ) {
my $HTMLContent = $HTMLUtilsObject->ToHTML(
String => $PartData{Content},
);
$PartData{Content} = $HTMLUtilsObject->DocumentComplete(
String => $HTMLContent,
Charset => 'utf-8',
);
}
else {
$PartData{Content} = $HTMLUtilsObject->ToAscii(
String => $PartData{Content},
);
}
$PartData{Filesize} = bytes::length( $PartData{Content} );
}
$Self->{$BodyAttachmentKey}->{Content} .= $PartData{Content};
$Self->{$BodyAttachmentKey}->{Filesize} += $PartData{Filesize};
# Don't create an attachment for this part, as it was concatenated to the first body element.
return 1;
}
# Remember the first found body element for possible later concatenation.
$Self->{$BodyAttachmentKey} = \%PartData;
}
push @{ $Self->{Attachments} }, \%PartData;
return 1;
}
=head2 GetReferences()
To get an array of reference ids of the parsed email
my @References = $ParserObject->GetReferences();
This returns an array with ('fasfda@host.de', '4124.2313.1231@host.com').
=cut
sub GetReferences {
my ( $Self, %Param ) = @_;
# get references ids
my @ReferencesAll;
my $ReferencesString = $Self->GetParam( WHAT => 'References' );
if ($ReferencesString) {
push @ReferencesAll, ( $ReferencesString =~ /<([^>]+)>/g );
}
# get in reply to id
my $InReplyToString = $Self->GetParam( WHAT => 'In-Reply-To' );
if ($InReplyToString) {
chomp $InReplyToString;
$InReplyToString =~ s/.*?<([^>]+)>.*/$1/;
push @ReferencesAll, $InReplyToString;
}
# get uniq
my %Checked;
my @References;
for ( reverse @ReferencesAll ) {
if ( !$Checked{$_} ) {
push @References, $_;
}
$Checked{$_} = 1;
}
return @References;
}
# just for internal
sub GetContentTypeParams {
my ( $Self, %Param ) = @_;
my $ContentType = $Param{ContentType} || return;
if ( $Param{ContentType} =~ /charset\s*=.+?/i ) {
$Param{Charset} = $Param{ContentType};
$Param{Charset} =~ s/.*?charset\s*=\s*(.*?)/$1/i;
$Param{Charset} =~ s/"|'//g;
$Param{Charset} =~ s/(.+?)(;|\s).*/$1/g;
}
if ( !$Param{Charset} ) {
if (
$Param{ContentType}
=~ /\?(iso-\d{3,4}-(\d{1,2}|[A-z]{1,2})|utf(-8|8)|windows-\d{3,5}|koi8-.+?|cp(-|)\d{2,4}|big5(|.+?)|shift(_|-)jis|euc-.+?|tcvn|visii|vps|gb.+?)\?/i
)
{
$Param{Charset} = $1;
}
elsif ( $Param{ContentType} =~ /name\*0\*=(utf-8|utf8)/i ) {
$Param{Charset} = $1;
}
elsif (
$Param{ContentType}
=~ /filename\*=(iso-\d{3,4}-(\d{1,2}|[A-z]{1,2})|utf(-8|8)|windows-\d{3,5}|koi8-.+?|cp(-|)\d{2,4}|big5(|.+?)|shift(_|-)jis|euc-.+?|tcvn|visii|vps|gb.+?)''/i
)
{
$Param{Charset} = $1;
}
}
if ( $Param{ContentType} =~ /Content-Type:\s{0,1}(.+?\/.+?)(;|'|"|\s)/i ) {
$Param{MimeType} = $1;
$Param{MimeType} =~ s/"|'//g;
}
return %Param;
}
# just for internal
sub CheckMessageBody {
my ( $Self, %Param ) = @_;
# if already checked, just return
return if $Self->{MessageChecked};
# return if no auto convert from html2text is needed
return if !$Kernel::OM->Get('Kernel::Config')->Get('PostmasterAutoHTML2Text');
# return if no auto convert from html2text is needed
return if $Self->{NoHTMLChecks};
# check if it's just a html email (store it as attachment and add text/plain)
if ( $Self->GetReturnContentType() =~ /text\/html/i ) {
$Self->{MessageChecked} = 1;
# add html email as attachment (if needed)
if ( !$Self->{MimeEmail} ) {
push(
@{ $Self->{Attachments} },
{
Charset => $Self->GetCharset(),
ContentType => $Self->GetReturnContentType(),
Content => $Self->{MessageBody},
Filename => 'file-1',
}
);
}
# add .html suffix to filename if not aleady there
else {
if ( $Self->{Attachments}->[0]->{Filename} ) {
if ( $Self->{Attachments}->[0]->{Filename} !~ /\.(htm|html)/i ) {
$Self->{Attachments}->[0]->{Filename} .= '.html';
}
}
}
# remember to be a mime email now
$Self->{MimeEmail} = 1;
# convert from html to ascii
$Self->{MessageBody} = $Kernel::OM->Get('Kernel::System::HTMLUtils')->ToAscii(
String => $Self->{MessageBody},
);
$Self->{ContentType} = 'text/plain';
if ( $Self->{Debug} > 0 ) {
$Kernel::OM->Get('Kernel::System::Log')->Log(
Priority => 'debug',
Message =>
'It\'s an html only email, added ascii dump, attached html email as attachment.',
);
}
}
return;
}
=begin Internal:
=head2 _DecodeString()
Decode all encoded substrings.
my $Result = $Self->_DecodeString(
String => 'some text',
);
=cut
sub _DecodeString {
my ( $Self, %Param ) = @_;
# get encode object
my $EncodeObject = $Kernel::OM->Get('Kernel::System::Encode');
my $DecodedString;
my $BufferedString;
my $PrevEncoding;
$BufferedString = '';
# call MIME::Words::decode_mimewords()
for my $Entry ( decode_mimewords( $Param{String} ) ) {
if (
$BufferedString ne ''
&& ( !$PrevEncoding || !$Entry->[1] || lc($PrevEncoding) ne lc( $Entry->[1] ) )
)
{
my $Encoding = $EncodeObject->FindAsciiSupersetEncoding(
Encodings => [ $PrevEncoding, $Param{Encode}, $Self->GetCharset() ],
);
$DecodedString .= $EncodeObject->Convert2CharsetInternal(
Text => $BufferedString,
From => $Encoding,
Check => 1,
);
$BufferedString = '';
}
$BufferedString .= $Entry->[0];
$PrevEncoding = $Entry->[1];
}
if ( $BufferedString ne '' ) {
my $Encoding = $EncodeObject->FindAsciiSupersetEncoding(
Encodings => [ $PrevEncoding, $Param{Encode}, $Self->GetCharset() ],
);
$DecodedString .= $EncodeObject->Convert2CharsetInternal(
Text => $BufferedString,
From => $Encoding,
Check => 1,
);
}
return $DecodedString;
}
=head2 _MailAddressParse()
my @Chunks = $ParserObject->_MailAddressParse(Email => $Email);
Wrapper for C<Mail::Address->parse($Email)>, but cache it, since it's
not too fast, and often called.
=cut
sub _MailAddressParse {
my ( $Self, %Param ) = @_;
my $Email = $Param{Email};
my $Cache = $Self->{EmailCache};
if ( $Self->{EmailCache}->{$Email} ) {
return @{ $Self->{EmailCache}->{$Email} };
}
my @Chunks = Mail::Address->parse($Email);
$Self->{EmailCache}->{$Email} = \@Chunks;
return @Chunks;
}
=end Internal:
=head1 TERMS AND CONDITIONS
This software is part of the OTRS project (L<https://otrs.org/>).
This software comes with ABSOLUTELY NO WARRANTY. For details, see
the enclosed file COPYING for license information (GPL). If you
did not receive this file, see L<https://www.gnu.org/licenses/gpl-3.0.txt>.
=cut
1;