| <?xml version="1.0"?> |
| <!-- This is the configuration file for the RegexUrlNormalize Class. |
| This is intended so that users can specify substitutions to be |
| done on URLs. The regex engine that is used is Perl5 compatible. |
| The rules are applied to URLs in the order they occur in this file. --> |
| |
| <!-- WATCH OUT: an xml parser reads this file an ampersands must be |
| expanded to & --> |
| |
| <!-- |
| The following rules show how to reduce urls so that |
| urls from the same domain are identical. This is useful |
| e.g. when calculating host counts, or splitting fetchlists. |
| --> |
| <regex-normalize> |
| <regex> |
| <pattern>(^[a-z]{3,5}://)([\w]+\.)*?(\w+\.\w+)[/$].*</pattern> |
| <substitution>$1$3/</substitution> |
| </regex> |
| </regex-normalize> |
| |