NUTCH-2858 urlnormalizer-protocol: URL port is lost during normalization
- add note in config file that URLs including port are not left
unchanged
diff --git a/conf/protocols.txt.template b/conf/protocols.txt.template
index 14d48ff..ad71c9c 100644
--- a/conf/protocols.txt.template
+++ b/conf/protocols.txt.template
@@ -4,4 +4,6 @@
# protocol. Useful in cases where a host accepts both http and https, doubling
# the site's size.
#
+# Note: if the URL includes a port number, the protocol is left unchanged.
+#
# format: <host>\t<protocol>\n