blob: dfae8b05772cb3dd55a2df68e8ffb01097e3b33b [file] [log] [blame]
# The default url filter.
# Better for whole-internet crawling.
# Each non-comment, non-blank line contains a regular expression
# prefixed by '+' or '-'. The first matching pattern in the file
# determines whether a URL is included or ignored. If no pattern
# matches, the URL is ignored.
# skip file: ftp: and mailto: urls
-(file|ftp|mailto):.*
# skip image and other suffixes we can't yet parse
-.*\.(gif|GIF|jpg|JPG|ico|ICO|css|sit|eps|wmf|zip|ppt|mpg|xls|gz|rpm|tgz|mov|MOV|exe)
# skip URLs containing certain characters as probable queries, etc.
-.*[?*!@=].*
# accept anything else
+.*