move get_part_details() to Util so it can be used by other plugins
git-svn-id: https://svn.apache.org/repos/asf/spamassassin/trunk@1910147 13f79535-47bb-0310-9956-ffa450edef68
diff --git a/lib/Mail/SpamAssassin/Plugin/OLEVBMacro.pm b/lib/Mail/SpamAssassin/Plugin/OLEVBMacro.pm
index f4a8905..09d1eaa 100644
--- a/lib/Mail/SpamAssassin/Plugin/OLEVBMacro.pm
+++ b/lib/Mail/SpamAssassin/Plugin/OLEVBMacro.pm
@@ -81,7 +81,7 @@
use warnings;
use Mail::SpamAssassin::Plugin;
-use Mail::SpamAssassin::Util qw(compile_regexp);
+use Mail::SpamAssassin::Util qw(compile_regexp get_part_details);
use constant HAS_ARCHIVE_ZIP => eval { require Archive::Zip; };
use constant HAS_IO_STRING => eval { require IO::String; };
@@ -550,7 +550,7 @@
foreach my $part ($pms->{msg}->find_parts(qr/./, 1)) {
next if $part->{type} =~ /$conf->{olemacro_skip_ctypes}/i;
- my ($ctt, $ctd, $cte, $name) = _get_part_details($pms, $part);
+ my ($ctt, $ctd, $cte, $name) = get_part_details($pms, $part, $conf->{olemacro_prefer_contentdisposition});
next unless defined $ctt;
next if $name eq '';
@@ -771,53 +771,6 @@
return 1;
}
-sub _get_part_details {
- my ($pms, $part) = @_;
- #https://en.wikipedia.org/wiki/MIME#Content-Disposition
- #https://github.com/mikel/mail/pull/464
-
- my $ctt = $part->get_header('content-type');
- return undef unless defined $ctt; ## no critic (ProhibitExplicitReturnUndef)
-
- my $cte = lc($part->get_header('content-transfer-encoding') || '');
- return undef unless ($cte =~ /^(?:base64|quoted\-printable)$/); ## no critic (ProhibitExplicitReturnUndef)
-
- $ctt = _decode_part_header($part, $ctt || '');
-
- my $name = '';
- my $cttname = '';
- my $ctdname = '';
-
- if ($ctt =~ m/name\s*=\s*["']?([^"';]*)/is) {
- $cttname = $1;
- $cttname =~ s/\s+$//;
- }
-
- my $ctd = $part->get_header('content-disposition');
- $ctd = _decode_part_header($part, $ctd || '');
-
- if ($ctd =~ m/filename\s*=\s*["']?([^"';]*)/is) {
- $ctdname = $1;
- $ctdname =~ s/\s+$//;
- }
-
- if (lc $ctdname eq lc $cttname) {
- $name = $ctdname;
- } elsif ($ctdname eq '') {
- $name = $cttname;
- } elsif ($cttname eq '') {
- $name = $ctdname;
- } else {
- if ($pms->{conf}->{olemacro_prefer_contentdisposition}) {
- $name = $ctdname;
- } else {
- $name = $cttname;
- }
- }
-
- return $ctt, $ctd, $cte, $name;
-}
-
sub _open_zip_handle {
my ($data) = @_;
@@ -1093,35 +1046,6 @@
1;
}
-sub _decode_part_header {
- my($part, $header_field_body) = @_;
-
- return '' unless defined $header_field_body && $header_field_body ne '';
-
- # deal with folding and cream the newlines and such
- $header_field_body =~ s/\n[ \t]+/\n /g;
- $header_field_body =~ s/\015?\012//gs;
-
- local($1,$2,$3);
-
- # Multiple encoded sections must ignore the interim whitespace.
- # To avoid possible FPs with (\s+(?==\?))?, look for the whole RE
- # separated by whitespace.
- 1 while $header_field_body =~
- s{ ( = \? [A-Za-z0-9_-]+ \? [bqBQ] \? [^?]* \? = ) \s+
- ( = \? [A-Za-z0-9_-]+ \? [bqBQ] \? [^?]* \? = ) }
- {$1$2}xsg;
-
- # transcode properly encoded RFC 2047 substrings into UTF-8 octets,
- # leave everything else unchanged as it is supposed to be UTF-8 (RFC 6532)
- # or plain US-ASCII
- $header_field_body =~
- s{ (?: = \? ([A-Za-z0-9_-]+) \? ([bqBQ]) \? ([^?]*) \? = ) }
- { $part->__decode_header($1, uc($2), $3) }xsge;
-
- return $header_field_body;
-}
-
# Version features
sub has_olemacro_redirect_uri { 1 }
sub has_olemacro_mhtml_uri { 1 }
diff --git a/lib/Mail/SpamAssassin/Util.pm b/lib/Mail/SpamAssassin/Util.pm
index 064fa02..2414466 100644
--- a/lib/Mail/SpamAssassin/Util.pm
+++ b/lib/Mail/SpamAssassin/Util.pm
@@ -60,7 +60,7 @@
&parse_rfc822_date &idn_to_ascii &is_valid_utf_8
&get_user_groups &compile_regexp &qr_to_string
&is_fqdn_valid &parse_header_addresses &force_die
- &domain_to_search_list);
+ &domain_to_search_list &get_part_details);
our $AM_TAINTED;
@@ -2555,6 +2555,82 @@
return @results;
}
+sub get_part_details {
+ my ($pms, $part, $prefer_contentdisposition) = @_;
+ #https://en.wikipedia.org/wiki/MIME#Content-Disposition
+ #https://github.com/mikel/mail/pull/464
+
+ my $ctt = $part->get_header('content-type');
+ return undef unless defined $ctt; ## no critic (ProhibitExplicitReturnUndef)
+
+ my $cte = lc($part->get_header('content-transfer-encoding') || '');
+ return undef unless ($cte =~ /^(?:base64|quoted\-printable)$/); ## no critic (ProhibitExplicitReturnUndef)
+
+ $ctt = _decode_part_header($part, $ctt || '');
+
+ my $name = '';
+ my $cttname = '';
+ my $ctdname = '';
+
+ if ($ctt =~ m/name\s*=\s*["']?([^"';]*)/is) {
+ $cttname = $1;
+ $cttname =~ s/\s+$//;
+ }
+
+ my $ctd = $part->get_header('content-disposition');
+ $ctd = _decode_part_header($part, $ctd || '');
+
+ if ($ctd =~ m/filename\s*=\s*["']?([^"';]*)/is) {
+ $ctdname = $1;
+ $ctdname =~ s/\s+$//;
+ }
+
+ if (lc $ctdname eq lc $cttname) {
+ $name = $ctdname;
+ } elsif ($ctdname eq '') {
+ $name = $cttname;
+ } elsif ($cttname eq '') {
+ $name = $ctdname;
+ } else {
+ if ((defined $ctdname) and $prefer_contentdisposition) {
+ $name = $ctdname;
+ } else {
+ $name = $cttname;
+ }
+ }
+
+ return $ctt, $ctd, $cte, $name;
+}
+
+sub _decode_part_header {
+ my($part, $header_field_body) = @_;
+
+ return '' unless defined $header_field_body && $header_field_body ne '';
+
+ # deal with folding and cream the newlines and such
+ $header_field_body =~ s/\n[ \t]+/\n /g;
+ $header_field_body =~ s/\015?\012//gs;
+
+ local($1,$2,$3);
+
+ # Multiple encoded sections must ignore the interim whitespace.
+ # To avoid possible FPs with (\s+(?==\?))?, look for the whole RE
+ # separated by whitespace.
+ 1 while $header_field_body =~
+ s{ ( = \? [A-Za-z0-9_-]+ \? [bqBQ] \? [^?]* \? = ) \s+
+ ( = \? [A-Za-z0-9_-]+ \? [bqBQ] \? [^?]* \? = ) }
+ {$1$2}xsg;
+
+ # transcode properly encoded RFC 2047 substrings into UTF-8 octets,
+ # leave everything else unchanged as it is supposed to be UTF-8 (RFC 6532)
+ # or plain US-ASCII
+ $header_field_body =~
+ s{ (?: = \? ([A-Za-z0-9_-]+) \? ([bqBQ]) \? ([^?]*) \? = ) }
+ { $part->__decode_header($1, uc($2), $3) }xsge;
+
+ return $header_field_body;
+}
+
# Check some basic parsing mistakes
sub _valid_parsed_address {
return 0 if !defined $_[0];