Sign in
apache
/
nutch
/
ac9c435db2c9b1317fc195a762fa84d4e79fd97c
/
.
/
src
/
plugin
/
urlfilter-regex
/
sample
/
IntranetCrawling.urls
blob: b1ad9b7d38324bde999f33b6e038bee354e8b2af [
file
] [
log
] [
blame
]
-
file
:
//home/jc/nutch/index.html
-
ftp
:
//ftp.apache.org/nutch.html
-
mailto
:
jerome
.
charron@gmail
.
com
-
news
:
//any.news.server/comp.lang.java
-
whois
:/
nutch
.
org
+
http
:
//MY.DOMAIN.NAME/
+
http
:
//MY.DOMAIN.NAME/nutch
+
http
:
//www.MY.DOMAIN.NAME/