| #!/usr/bin/perl -w -T |
| |
| # test URIs as grabbed from text/plain messages |
| |
| BEGIN { |
| if (-e 't/test_dir') { # if we are running "t/rule_names.t", kluge around ... |
| chdir 't'; |
| } |
| |
| if (-e 'test_dir') { # running from test directory, not .. |
| unshift(@INC, '../blib/lib'); |
| } |
| } |
| |
| my $prefix = '.'; |
| if (-e 'test_dir') { # running from test directory, not .. |
| $prefix = '..'; |
| } |
| |
| use strict; |
| use lib '.'; use lib 't'; |
| use SATest; sa_t_init("uri_text"); |
| use Test::More tests => 168; |
| use Mail::SpamAssassin; |
| use vars qw(%patterns %anti_patterns); |
| |
| # initialize SpamAssassin |
| my $sa = create_saobj({ |
| require_rules => 0, |
| site_rules_filename => "$prefix/t/log/localrules.tmp", |
| rules_filename => "$prefix/rules", |
| local_tests_only => 1, |
| dont_copy_prefs => 1, |
| }); |
| $sa->init(0); # parse rules |
| |
| # load tests and write mail |
| %patterns = (); |
| %anti_patterns = (); |
| my $message = write_mail(); |
| |
| my $mail = $sa->parse($message); |
| my $msg = Mail::SpamAssassin::PerMsgStatus->new($sa, $mail); |
| |
| my $uris = join("\n", $msg->get_uri_list(), ""); |
| |
| # run patterns and anti-patterns |
| my $failures = 0; |
| for my $pattern (keys %patterns) { |
| if (!ok($uris =~ /${pattern}/m)) { |
| warn "failure: did not find /$pattern/\n"; |
| $failures++; |
| } |
| } |
| |
| for my $anti_pattern (keys %anti_patterns) { |
| if (!ok($uris !~ /${anti_pattern}/m)) { |
| warn "failure: did find /$anti_pattern/\n"; |
| $failures++; |
| } |
| } |
| |
| if ($failures) { |
| print "URIs found:\n$uris"; |
| } |
| |
| # function to write test email |
| sub write_mail { |
| my $message = <<'EOF'; |
| Message-ID: <clean.1010101@example.com> |
| Date: Mon, 07 Oct 2002 09:00:00 +0000 |
| From: Sender <sender@example.com> |
| MIME-Version: 1.0 |
| To: Recipient <recipient@example.com> |
| Subject: this is a trivial message |
| Content-Type: text/plain |
| Content-Transfer-Encoding: 7bit |
| |
| EOF |
| |
| while (<DATA>) { |
| chomp; |
| next if /^#/; |
| next if /^\s*$/; |
| if (/^(.*?)\t+(.*?)\s*$/) { |
| my $string = $1; |
| my @patterns = split(' ', $2); |
| if ($string && @patterns) { |
| $string =~ s/{ESC}/\x1b/gs; # magic, to avoid ^[ chars in source |
| $message .= "$string\n"; |
| for my $pattern (@patterns) { |
| if ($pattern =~ /^\!(.*)/) { |
| $anti_patterns{$1} = 1; |
| } |
| else { |
| $patterns{$pattern} = 1; |
| } |
| } |
| } |
| } |
| else { |
| warn "unparseable line: $_"; |
| } |
| } |
| |
| return $message; |
| } |
| |
| # <line> : <string><tabs><matches> |
| # <string> : string in the body |
| # <tabs> : one or more tabs |
| # <matches> : patterns expected to be found in URI output, if preceded by ! if |
| # it is an antipattern, each pattern is separated by whitespace |
| __DATA__ |
| www5.poh6feib.com poh6feib |
| vau6yaer.com vau6yaer |
| www5.poh6feib.info poh6feib |
| Haegh3de.co.uk Haegh3de |
| |
| ftp.yeinaix3.co.uk ftp://ftp\.yeinaix3\.co\.uk !http://ftp\.yeinaix3\.co\.uk |
| ftp5.riexai5r.co.uk http://ftp5\.riexai5r\.co\.uk !ftp://ftp5\.riexai5r\.co\.uk |
| |
| 10.1.1.1 !10\.1\.1\.1 |
| 10.1.2.1/ !10\.1\.2\.1 |
| http://10.1.3.1/ 10\.1\.3\.1 |
| |
| quau0wig.quau0wig !quau0wig |
| foo.Cahl1goo.php !Cahl1goo |
| www5.mi1coozu.php !mi1coozu |
| www.mezeel0P.php !mezeel0P |
| bar.neih6fee.com.php !neih6fee |
| www.zai6Vuwi.com.invalid !zai6Vuwi |
| |
| =www.deiJ1pha.com www\.deiJ1pha\.com |
| @www.Te0xohxu.com www\.Te0xohxu\.com |
| .www.kuiH5sai.com www\.kuiH5sai\.com |
| |
| a=www.zaiNgoo7.com www\.zaiNgoo7\.com |
| b@www.vohWais0.com mailto:b\@www\.vohWais0\.com !http://www\.vohWais0\.com |
| c.www.moSaoga8.com www\.moSaoga8\.com |
| |
| xyz..geifoza0.com !geifoza0 |
| xyz.geifoza1.com/..xyz xyz\.geifoza1\.com/\.\.xyz |
| xyz.geifoza2.CoM xyz\.geifoza2\.CoM |
| http://xyz..geifoza3.com !geifoza3 |
| http://xyz.geifoza4.com/..xyz xyz\.geifoza4\.com/\.\.xyz |
| http://xyz.geifoza5.CoM xyz\.geifoza5\.CoM |
| |
| joe@koja3fui.koja3fui !koja3fui |
| |
| <xuq@dsj.x.thriyi.com> mailto:xuq\@dsj\.x\.thriyi\.com !http\S*thriyi |
| |
| http://www.example.com/about/wahfah7d.html wahfah7d |
| http://www.example.com?xa1kaLuo \?xa1kaLuo |
| http://www.example.com#xa1kaLup \#xa1kaLup |
| http://www.lap7thob.com/ ^http://www\.lap7thob\.com/$ |
| |
| www.phoh1Koh.com/ ^http://www\.phoh1Koh\.com/$ |
| www.Tar4caeg.com:80 ^http://www\.Tar4caeg\.com:80 |
| www.Coo4mowe.com:80/foo/foo.html ^http://www\.Coo4mowe\.com:80/foo/foo\.html |
| www.Nee2quae.com:80/ ^http://www\.Nee2quae\.com:80/$ |
| www.foo@Qii3mafs.com:80 ^http://www\.foo\@Qii3mafs\.com:80$ |
| www.foo:bar@Qii3maft.com:80 ^http://www\.foo:bar\@Qii3maft\.com:80$ |
| |
| HAETEI3D.com HAETEI3D |
| CUK3VEIZ.us CUK3VEIZ |
| CHAI7SAI.biz CHAI7SAI |
| VU4YAPHU.info VU4YAPHU |
| NAUVE1PH.net NAUVE1PH |
| LEIX6QUU.org LEIX6QUU |
| LOT1GOHV.ws LOT1GOHV |
| LI4JAIZI.name LI4JAIZI |
| BA1LOOXU.tv BA1LOOXU |
| yiez7too.CC yiez7too |
| huwaroo1.DE huwaroo1 |
| chohza7t.JP chohza7t |
| the7zuum.BE the7zuum |
| sai6bahg.AT sai6bahg |
| leow3del.UK leow3del |
| ba5keinu.NZ ba5keinu |
| chae2shi.CN chae2shi |
| roo7kiey.TW roo7kiey |
| |
| www.Chiew0ch.COM ^http://www\.Chiew0ch\.COM |
| www.thohY2qu.US ^http://www\.thohY2qu\.US |
| www.teiP7gei.BIZ ^http://www\.teiP7gei\.BIZ |
| www.xohThai8.INFO ^http://www\.xohThai8\.INFO |
| www.haik7Ram.NET ^http://www\.haik7Ram\.NET |
| www.Quaes3se.ORG ^http://www\.Quaes3se\.ORG |
| www.Chai6tah.WS ^http://www\.Chai6tah\.WS |
| www.Thuoth1y.NAME ^http://www\.Thuoth1y\.NAME |
| www.Chieb8ge.TV ^http://www\.Chieb8ge\.TV |
| WWW.quus4Rok.cc ^http://WWW\.quus4Rok\.cc |
| WWW.maic6Hei.de ^http://WWW\.maic6Hei\.de |
| WWW.he4Hiize.jp ^http://WWW\.he4Hiize\.jp |
| WWW.Soh1toob.be ^http://WWW\.Soh1toob\.be |
| WWW.chahMee5.at ^http://WWW\.chahMee5\.at |
| WWW.peepooN0.uk ^http://WWW\.peepooN0\.uk |
| WWW.Kiox3phi.nz ^http://WWW\.Kiox3phi\.nz |
| WWW.jong3Xou.cn ^http://WWW\.jong3Xou\.cn |
| WWW.waeShoe0.tw ^http://WWW\.waeShoe0\.tw |
| |
| invalid_ltd.notword !invalid_tld |
| invalid_ltd.invalid !invalid_tld |
| invalid_ltd.xyzzy !invalid_tld |
| invalid_ltd.co.zz !invalid_tld |
| |
| www.invalid_ltd.notword !invalid_tld |
| www.invalid_ltd.invalid !invalid_tld |
| www.invalid_ltd.xyzzy !invalid_tld |
| www.invalid_ltd.co.zz !invalid_tld |
| |
| # underscores allowed, but not at 1st-2nd level |
| uctest.zyb2n2ef.c_om !zyb2n2ef |
| uctest.zyb2_n2ef.com !zyb2_n2ef |
| uc_test.u8uwe8qu.com ^http://uc_test\.u8uwe8qu\.com |
| |
| # invalid hostnames with - |
| http://-sdfisiz2e.com !sdfisiz2e |
| ESRYnSeM7s-.com !ESRYnSeM7s |
| foo-.CgPcASgHNa.com !CgPcASgHNa |
| |
| # valid hostnames with - |
| www.eZxdy-TWA4z.com ^http://www\.eZxdy-TWA4z\.com |
| www-3.WV7jujA10G.com ^http://www-3\.WV7jujA10G\.com |
| |
| command.com command\.com |
| cmd.exe !cmd\.exe |
| |
| commander !commander |
| aaacomaaa !aaacomaaa |
| com.foo.web !com\.foo\.web |
| |
| # IPs for www.yahoo.com |
| 66.94.230.32 !66\.94\.230\.32 |
| http://66.94.230.33 ^http://66\.94\.230\.33 |
| http://1113515555 ^http://66\.94\.230\.35 |
| |
| gooboo4k@xieyohy0.com ^mailto:gooboo4k\@xieyohy0\.com |
| mailto:baeb1fai@quo6puyo.com ^mailto:baeb1fai\@quo6puyo\.com |
| |
| http://www.luzoop5k.com ^http://www\.luzoop5k\.com |
| https://www.luzoop5k.com ^https://www\.luzoop5k\.com |
| ftp://www.luzoop5k.com ^ftp://www\.luzoop5k\.com |
| |
| Mailto:aaeb1fai@quo6puyo.com ^Mailto:aaeb1fai\@quo6puyo\.com |
| Http://www.auzoop5k.com ^Http://www\.auzoop5k\.com |
| Https://www.auzoop5k.com ^Https://www\.auzoop5k\.com |
| Ftp://www.auzoop5k.com ^Ftp://www\.auzoop5k\.com |
| |
| mailto:www.luzoop5k.com !^mailto:www\.luzoop5k\.com |
| # no longer accept file: scheme |
| file://www.luzoop5k.com !^file://www\.luzoop5k\.com |
| |
| # //<user>:<password>@<host>:<port>/<url-path> |
| http://user:pass@jiefeet4.com:80/x/y ^http://user:pass\@jiefeet4\.com:80/x/y |
| |
| www.liy8quei:80 www\.liy8quei\.com |
| www.veibi6cu:443 !veibi6cu |
| www.puahi9si.com:80 puahi9si\.com:80 |
| www.puahi9si2.com:80 puahi9si2\.com$ |
| www.chop9tan.com:443 chop9tan\.com:443 |
| |
| ftp://name@su5queib.ca//etc/motd ^ftp://name\@su5queib\.ca//etc/motd |
| ftp://name@faikaj4t.dom/%2Fetc/motd !^ftp://name\@faikaj4t\.dom//etc/motd |
| ftp://name@faikaj4t.com/%2Fetc/motd ^ftp://name\@faikaj4t\.com//etc/motd |
| |
| keyword:sportscar !sportscar |
| |
| # questionable tests |
| mailto://cah3neun@thaihe4d.com ^mailto://cah3neun\@thaihe4d\.com |
| |
| mailto://jicu8vah@another@jicu8vah !jicu8vah\@another\@jicu8vah |
| baeb1fai@@example.com !baeb1fai\@\@example\.com |
| mailto://yie6xuna !yie6xuna |
| mailto://yie6xuna@nottld !yie6xuna\@nottld |
| |
| <sentto-4934-foo=addr.com@verper.com> !^http://.*addr\.com\@verper\.com |
| <sentto-4934-foo=addr.com@verper.com> ^mailto:sentto-4934-foo=addr\.com\@verper\.com |
| |
| http://foo23498.com/{ESC}(B ^http://foo23498\.com/$ |
| {ESC}(Bhttp://foo23499.com/ ^http://foo23499\.com/$ |
| http://foo23500.com{ESC}(B/ ^http://foo23500\.com(?:/?)$ |
| |
| M0"-AE/9Y.KN:_0D2F:95^H*:I,8 !9Y\.KN |
| >delimtest1.com ^http://delimtest1\.com |
| <delimtest2.com ^http://delimtest2\.com |
| "delimtest3.com ^http://delimtest3\.com |
| \delimtest4.com ^http://delimtest4\.com |
| 'delimtest5.com ^http://delimtest5\.com |
| `delimtest6.com ^http://delimtest6\.com |
| ,delimtest7.com ^http://delimtest7\.com |
| {delimtest8.com ^http://delimtest8\.com |
| [delimtest9.com ^http://delimtest9\.com |
| (delimtest10.com ^http://delimtest10\.com |
| |delimtest11.com ^http://delimtest11\.com |
| delimtest12.com ^http://delimtest12\.com |
| ignorethishttp://delimtest13.org ^http://delimtest13\.org |
| donotignorethiswww.delimtest14.com donotignorethiswww\.delimtest14\.com |
| <www.delimtest15.com/foo-~!@#^&*()_+=:;'?,.xyz-~!@#^&*()_+=:;'?,.> ^http://www\.delimtest15\.com/foo-~!\@#\^&\*\(\)_\+=:;'\?,\.xyz$ |
| .....www.delimtest16.com.......... ^http://www\.delimtest16\.com$ |
| -----www.delimtest17.com---------- ^http://www\.delimtest17\.com$ |
| .....http://www.delimtest18.com.......... ^http://www\.delimtest18\.com$ |
| -----http://www.delimtest19.com---------- ^http://www\.delimtest19\.com$ |
| |
| # emails with a comma at the end |
| test@delimtest20.com,stuff stuff delimtest20\.com |
| |
| # check some TLDs, no point testing all here |
| # the inactive TLDs have negative checks |
| |
| # first confirm that it will not match on not a TLD |
| example.invalid !^http://example\.invalid$ |
| example.zzf !^http://example\.zzf$ |
| |
| example.ac ^http://example\.ac$ |
| example.eu ^http://example\.eu$ |
| example.fi ^http://example\.fi$ |
| example.tp !^http://example\.tp$ |
| example.travel ^http://example\.travel$ |
| example.um !^http://example\.um$ |
| example.us ^http://example\.us$ |
| |
| # with www. prefix tests a different table of TLDs |
| |
| www.example.foo ^http://www\.example\.foo$ |
| www.example.zzf !^http://www\.example\.zzf$ |
| |
| www.example.ac ^http://www\.example\.ac$ |
| www.example.an !^http://www\.example\.an$ |
| www.example.ao ^http://www\.example\.ao$ |
| www.example.arpa ^http://www\.example\.arpa$ |
| www.example.ci ^http://www\.example\.ci$ |
| www.example.edu ^http://www\.example\.edu$ |
| www.example.tp !^http://www\.example\.tp$ |
| www.example.ws ^http://www\.example\.ws$ |
| www.example.yu !^http://www\.example\.yu$ |
| www.example.za ^http://www\.example\.za$ |