blob: 369896878f4938a42f79aa47a56be2dbe34bc9b7 [file] [log] [blame]
<?xml version="1.0"?>
<!-- This is the configuration file for the RegexUrlNormalize Class.
This is intended so that users can specify substitutions to be
done on URLs. The regex engine that is used is Perl5 compatible.
The rules are applied to URLs in the order they occur in this file. -->
<!-- WATCH OUT: an xml parser reads this file an ampersands must be
expanded to &amp; -->
<!--
The following rules show how to reduce urls so that
urls from the same domain are identical. This is useful
e.g. when calculating host counts, or splitting fetchlists.
-->
<regex-normalize>
<regex>
<pattern>(^[a-z]{3,5}://)([\w]+\.)*?(\w+\.\w+)[/$].*</pattern>
<substitution>$1$3/</substitution>
</regex>
</regex-normalize>