move get_part_details() to Util so it can be used by other plugins git-svn-id: https://svn.apache.org/repos/asf/spamassassin/trunk@1910147 13f79535-47bb-0310-9956-ffa450edef68

commit: 1d79774202a26c50070e1896b5d5f72f15f4ac0f [log] [tgz]
author: Giovanni Bechis <gbechis@apache.org> Wed May 31 13:11:16 2023 +0000
committer: Giovanni Bechis <gbechis@apache.org> Wed May 31 13:11:16 2023 +0000
tree: f3ddf22c4f50d6b66876591de87a9d805e827440
parent: a1afeca64331ad506fdf9dcf130d027b6e7bb068 [diff]
diff --git a/lib/Mail/SpamAssassin/Plugin/OLEVBMacro.pm b/lib/Mail/SpamAssassin/Plugin/OLEVBMacro.pm
index f4a8905..09d1eaa 100644
--- a/lib/Mail/SpamAssassin/Plugin/OLEVBMacro.pm
+++ b/lib/Mail/SpamAssassin/Plugin/OLEVBMacro.pm

@@ -81,7 +81,7 @@
 use warnings;
 
 use Mail::SpamAssassin::Plugin;
-use Mail::SpamAssassin::Util qw(compile_regexp);
+use Mail::SpamAssassin::Util qw(compile_regexp get_part_details);
 
 use constant HAS_ARCHIVE_ZIP => eval { require Archive::Zip; };
 use constant HAS_IO_STRING => eval { require IO::String; };
@@ -550,7 +550,7 @@
   foreach my $part ($pms->{msg}->find_parts(qr/./, 1)) {
     next if $part->{type} =~ /$conf->{olemacro_skip_ctypes}/i;
 
-    my ($ctt, $ctd, $cte, $name) = _get_part_details($pms, $part);
+    my ($ctt, $ctd, $cte, $name) = get_part_details($pms, $part, $conf->{olemacro_prefer_contentdisposition});
     next unless defined $ctt;
     next if $name eq '';
 
@@ -771,53 +771,6 @@
   return 1;
 }
 
-sub _get_part_details {
-    my ($pms, $part) = @_;
-    #https://en.wikipedia.org/wiki/MIME#Content-Disposition
-    #https://github.com/mikel/mail/pull/464
-
-    my $ctt = $part->get_header('content-type');
-    return undef unless defined $ctt; ## no critic (ProhibitExplicitReturnUndef)
-
-    my $cte = lc($part->get_header('content-transfer-encoding') || '');
-    return undef unless ($cte =~ /^(?:base64|quoted\-printable)$/); ## no critic (ProhibitExplicitReturnUndef)
-
-    $ctt = _decode_part_header($part, $ctt || '');
-
-    my $name = '';
-    my $cttname = '';
-    my $ctdname = '';
-
-    if ($ctt =~ m/name\s*=\s*["']?([^"';]*)/is) {
-      $cttname = $1;
-      $cttname =~ s/\s+$//;
-    }
-
-    my $ctd = $part->get_header('content-disposition');
-    $ctd = _decode_part_header($part, $ctd || '');
-
-    if ($ctd =~ m/filename\s*=\s*["']?([^"';]*)/is) {
-      $ctdname = $1;
-      $ctdname =~ s/\s+$//;
-    }
-
-    if (lc $ctdname eq lc $cttname) {
-      $name = $ctdname;
-    } elsif ($ctdname eq '') {
-      $name = $cttname;
-    } elsif ($cttname eq '') {
-      $name = $ctdname;
-    } else {
-      if ($pms->{conf}->{olemacro_prefer_contentdisposition}) {
-        $name = $ctdname;
-      } else {
-        $name = $cttname;
-      }
-    }
-
-    return $ctt, $ctd, $cte, $name;
-}
-
 sub _open_zip_handle {
   my ($data) = @_;
 
@@ -1093,35 +1046,6 @@
   1;
 }
 
-sub _decode_part_header {
-  my($part, $header_field_body) = @_;
-
-  return '' unless defined $header_field_body && $header_field_body ne '';
-
-  # deal with folding and cream the newlines and such
-  $header_field_body =~ s/\n[ \t]+/\n /g;
-  $header_field_body =~ s/\015?\012//gs;
-
-  local($1,$2,$3);
-
-  # Multiple encoded sections must ignore the interim whitespace.
-  # To avoid possible FPs with (\s+(?==\?))?, look for the whole RE
-  # separated by whitespace.
-  1 while $header_field_body =~
-            s{ ( = \? [A-Za-z0-9_-]+ \? [bqBQ] \? [^?]* \? = ) \s+
-               ( = \? [A-Za-z0-9_-]+ \? [bqBQ] \? [^?]* \? = ) }
-             {$1$2}xsg;
-
-  # transcode properly encoded RFC 2047 substrings into UTF-8 octets,
-  # leave everything else unchanged as it is supposed to be UTF-8 (RFC 6532)
-  # or plain US-ASCII
-  $header_field_body =~
-    s{ (?: = \? ([A-Za-z0-9_-]+) \? ([bqBQ]) \? ([^?]*) \? = ) }
-     { $part->__decode_header($1, uc($2), $3) }xsge;
-
-  return $header_field_body;
-}
-
 # Version features
 sub has_olemacro_redirect_uri { 1 }
 sub has_olemacro_mhtml_uri { 1 }

diff --git a/lib/Mail/SpamAssassin/Util.pm b/lib/Mail/SpamAssassin/Util.pm
index 064fa02..2414466 100644
--- a/lib/Mail/SpamAssassin/Util.pm
+++ b/lib/Mail/SpamAssassin/Util.pm

@@ -60,7 +60,7 @@
                   &parse_rfc822_date &idn_to_ascii &is_valid_utf_8
                   &get_user_groups &compile_regexp &qr_to_string
                   &is_fqdn_valid &parse_header_addresses &force_die
-                  &domain_to_search_list);
+                  &domain_to_search_list &get_part_details);
 
 our $AM_TAINTED;
 
@@ -2555,6 +2555,82 @@
   return @results;
 }
 
+sub get_part_details {
+    my ($pms, $part, $prefer_contentdisposition) = @_;
+    #https://en.wikipedia.org/wiki/MIME#Content-Disposition
+    #https://github.com/mikel/mail/pull/464
+
+    my $ctt = $part->get_header('content-type');
+    return undef unless defined $ctt; ## no critic (ProhibitExplicitReturnUndef)
+
+    my $cte = lc($part->get_header('content-transfer-encoding') || '');
+    return undef unless ($cte =~ /^(?:base64|quoted\-printable)$/); ## no critic (ProhibitExplicitReturnUndef)
+
+    $ctt = _decode_part_header($part, $ctt || '');
+
+    my $name = '';
+    my $cttname = '';
+    my $ctdname = '';
+
+    if ($ctt =~ m/name\s*=\s*["']?([^"';]*)/is) {
+      $cttname = $1;
+      $cttname =~ s/\s+$//;
+    }
+
+    my $ctd = $part->get_header('content-disposition');
+    $ctd = _decode_part_header($part, $ctd || '');
+
+    if ($ctd =~ m/filename\s*=\s*["']?([^"';]*)/is) {
+      $ctdname = $1;
+      $ctdname =~ s/\s+$//;
+    }
+
+    if (lc $ctdname eq lc $cttname) {
+      $name = $ctdname;
+    } elsif ($ctdname eq '') {
+      $name = $cttname;
+    } elsif ($cttname eq '') {
+      $name = $ctdname;
+    } else {
+      if ((defined $ctdname) and $prefer_contentdisposition) {
+        $name = $ctdname;
+      } else {
+        $name = $cttname;
+      }
+    }
+
+    return $ctt, $ctd, $cte, $name;
+}
+
+sub _decode_part_header {
+  my($part, $header_field_body) = @_;
+
+  return '' unless defined $header_field_body && $header_field_body ne '';
+
+  # deal with folding and cream the newlines and such
+  $header_field_body =~ s/\n[ \t]+/\n /g;
+  $header_field_body =~ s/\015?\012//gs;
+
+  local($1,$2,$3);
+
+  # Multiple encoded sections must ignore the interim whitespace.
+  # To avoid possible FPs with (\s+(?==\?))?, look for the whole RE
+  # separated by whitespace.
+  1 while $header_field_body =~
+            s{ ( = \? [A-Za-z0-9_-]+ \? [bqBQ] \? [^?]* \? = ) \s+
+               ( = \? [A-Za-z0-9_-]+ \? [bqBQ] \? [^?]* \? = ) }
+             {$1$2}xsg;
+
+  # transcode properly encoded RFC 2047 substrings into UTF-8 octets,
+  # leave everything else unchanged as it is supposed to be UTF-8 (RFC 6532)
+  # or plain US-ASCII
+  $header_field_body =~
+    s{ (?: = \? ([A-Za-z0-9_-]+) \? ([bqBQ]) \? ([^?]*) \? = ) }
+     { $part->__decode_header($1, uc($2), $3) }xsge;
+
+  return $header_field_body;
+}
+
 # Check some basic parsing mistakes
 sub _valid_parsed_address {
   return 0 if !defined $_[0];
commit	1d79774202a26c50070e1896b5d5f72f15f4ac0f	[log] [tgz]
author	Giovanni Bechis <gbechis@apache.org>	Wed May 31 13:11:16 2023 +0000
committer	Giovanni Bechis <gbechis@apache.org>	Wed May 31 13:11:16 2023 +0000
tree	f3ddf22c4f50d6b66876591de87a9d805e827440
parent	a1afeca64331ad506fdf9dcf130d027b6e7bb068 [diff]