Merge pull request #567 from sebastian-nagel/NUTCH-2847-http-date-format-new-api

NUTCH-2847 HttpDateFormat: Simplify based on new Java 8 DateTime API
diff --git a/.gitignore b/.gitignore
index 249ca77..0612a99 100644
--- a/.gitignore
+++ b/.gitignore
@@ -11,6 +11,7 @@
 ivy/ivy-2.4.0.jar
 ivy/ivy-2.5.0-rc1.jar
 ivy/ivy-2.5.0.jar
+ivy/spotbugs-*/
 naivebayes-model
 .naivebayes-model.crc
 .gitconfig
@@ -24,3 +25,5 @@
 *.iml
 *.swp
 csvindexwriter
+lib/spotbugs-*
+ivy/dependency-check-ant/*
diff --git a/build.xml b/build.xml
index 68a0f44..57ec4fa 100644
--- a/build.xml
+++ b/build.xml
@@ -37,12 +37,14 @@
   <property name="maven-javadoc-jar" value="${release.dir}/${artifactId}-${version}-javadoc.jar" />
   <property name="maven-sources-jar" value="${release.dir}/${artifactId}-${version}-sources.jar" />
 
+  <property name="dependency-check-ant.version" value="6.1.0" />
+  <property name="dependency-check-ant.home" value="${ivy.dir}/dependency-check-ant" />
+  <property name="dependency-check-ant.jar" value="${dependency-check-ant.home}/dependency-check-ant.jar" />
+
   <property environment="env"/>
 
-  <property name="dependency-check.home" value="${ivy.dir}/dependency-check-ant/"/>
-
-  <property name="spotbugs.version" value="4.1.1" />
-  <property name="spotbugs.home" value="${basedir}/lib/spotbugs-${spotbugs.version}" />
+  <property name="spotbugs.version" value="4.2.0" />
+  <property name="spotbugs.home" value="${ivy.dir}/spotbugs-${spotbugs.version}" />
   <property name="spotbugs.jar" value="${spotbugs.home}/lib/spotbugs-ant.jar" />
 
   <property name="apache-rat.version" value="0.13" />
@@ -241,6 +243,7 @@
       <packageset dir="${plugins.dir}/scoring-opic/src/java"/>
       <packageset dir="${plugins.dir}/scoring-orphan/src/java"/>
       <packageset dir="${plugins.dir}/scoring-similarity/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-metadata/src/java"/>
       <packageset dir="${plugins.dir}/subcollection/src/java"/>
       <packageset dir="${plugins.dir}/tld/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/>
@@ -646,24 +649,38 @@
   </target>
 
   <!-- Check dependencies for security vulnerabilities                                    -->
-  <!-- requires installation of OWASP dependency check tool, see                          -->
-  <!--   https://jeremylong.github.io/DependencyCheck/dependency-check-ant/index.html     -->
-  <!-- get http://dl.bintray.com/jeremy-long/owasp/dependency-check-ant-3.3.2-release.zip -->
-  <!-- and unzip in directory ./ivy/                                                      -->
-  <path id="dependency-check.path">
-    <pathelement location="${dependency-check.home}/dependency-check-ant.jar"/>
-    <fileset dir="${dependency-check.home}/lib" erroronmissingdir="false">
+  <target name="dependency-check-ant-download" description="--> download dependency-check-ant jar">
+    <available file="${dependency-check-ant.jar}" property="dependency-check-ant.jar.found"/>
+    <antcall target="dependency-check-ant-download-unchecked"/>
+  </target>
+
+  <target name="dependency-check-ant-download-unchecked" unless="dependency-check-ant.jar.found"
+          description="--> downloads the dependency-check-ant binary (dependency-check-ant-*.zip).">
+    <get src="https://github.com/jeremylong/DependencyCheck/releases/download/v${dependency-check-ant.version}/dependency-check-ant-${dependency-check-ant.version}-release.zip"
+         dest="${ivy.dir}/dependency-check-ant-${dependency-check-ant.version}-release.zip" usetimestamp="false" />
+
+    <unzip src="${ivy.dir}/dependency-check-ant-${dependency-check-ant.version}-release.zip"
+           dest="${ivy.dir}">
+    </unzip>
+
+    <delete file="${ivy.dir}/dependency-check-ant-${dependency-check-ant.version}-release.zip" />
+  </target>
+
+  <path id="dependency-check-ant.path">
+    <pathelement location="${dependency-check-ant.home}/dependency-check-ant.jar"/>
+    <fileset dir="${dependency-check-ant.home}/lib">
       <include name="*.jar"/>
     </fileset>
   </path>
-  <taskdef resource="dependency-check-taskdefs.properties" onerror="ignore">
-    <classpath refid="dependency-check.path" />
-  </taskdef>
-  <target name="report-vulnerabilities" description="--> check dependencies for security vulnerabilities">
+
+  <target name="report-vulnerabilities" depends="jar, compile-plugins, dependency-check-ant-download" description="--> check dependencies for security vulnerabilities">
+    <taskdef resource="dependency-check-taskdefs.properties">
+      <classpath refid="dependency-check-ant.path" />
+    </taskdef>
     <dependency-check projectname="${name}"
-                      reportoutputdirectory="${build.dir}"
+                      reportoutputdirectory="${dependency-check-ant.home}"
                       reportformat="ALL">
-        <suppressionfile path="${dependency-check.home}/dependency-check-suppressions.xml" />
+        <suppressionfile path="${dependency-check-ant.home}/dependency-check-suppressions.xml" />
         <retirejsFilter regex="copyright.*jeremy long" />
         <fileset dir="${build.dir}">
           <include name="lib/*.jar"/>
@@ -754,6 +771,7 @@
       <packageset dir="${plugins.dir}/scoring-opic/src/java"/>
       <packageset dir="${plugins.dir}/scoring-orphan/src/java"/>
       <packageset dir="${plugins.dir}/scoring-similarity/src/java"/>
+      <packageset dir="${plugins.dir}/scoring-metadata/src/java"/>
       <packageset dir="${plugins.dir}/subcollection/src/java"/>
       <packageset dir="${plugins.dir}/tld/src/java"/>
       <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/>
@@ -1066,20 +1084,19 @@
   <target name="spotbugs-download-unchecked" unless="spotbugs.jar.found"
           description="--> downloads the spotbugs binary (spotbugs-*.tgz).">
     <get src="https://github.com/spotbugs/spotbugs/releases/download/${spotbugs.version}/spotbugs-${spotbugs.version}.tgz "
-         dest="${basedir}/lib/spotbugs-${spotbugs.version}.tgz" usetimestamp="false" />
+         dest="${ivy.dir}/spotbugs-${spotbugs.version}.tgz" usetimestamp="false" />
 
-    <untar src="${basedir}/lib/spotbugs-${spotbugs.version}.tgz"
-           dest="${basedir}/lib/" compression="gzip">
+    <untar src="${ivy.dir}/spotbugs-${spotbugs.version}.tgz"
+           dest="${ivy.dir}" compression="gzip">
     </untar>
 
-    <delete file="${basedir}/lib/spotbugs-${spotbugs.version}.tgz" />
+    <delete file="${ivy.dir}/spotbugs-${spotbugs.version}.tgz" />
   </target>
 
-  <taskdef
-    resource="edu/umd/cs/findbugs/anttask/tasks.properties"
-    classpath="${spotbugs.jar}" />
-
   <target name="spotbugs" depends="jar, compile-plugins, spotbugs-download" description="--> runs spotbugs source code analysis.">
+    <taskdef
+        resource="edu/umd/cs/findbugs/anttask/tasks.properties"
+        classpath="${spotbugs.jar}" />
     <spotbugs home="${spotbugs.home}"
             output="html"
             outputFile="${build.dir}/nutch-spotbugs.html"
@@ -1262,6 +1279,8 @@
         <source path="${plugins.dir}/scoring-orphan/src/java"/>
         <source path="${plugins.dir}/scoring-orphan/src/test"/>
         <source path="${plugins.dir}/scoring-similarity/src/java/" />
+        <source path="${plugins.dir}/scoring-metadata/src/java/" />
+        <source path="${plugins.dir}/scoring-metadata/src/test" />
         <source path="${plugins.dir}/subcollection/src/java/" />
         <source path="${plugins.dir}/subcollection/src/test/" />
         <source path="${plugins.dir}/tld/src/java/" />
diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml
index 36c6f86..5548a30 100644
--- a/conf/nutch-default.xml
+++ b/conf/nutch-default.xml
@@ -1871,6 +1871,37 @@
   </description>
 </property>
 
+
+<!-- scoring metadata properties
+Add scoring-metadata to the list of active plugins
+ in the parameter 'plugin.includes' in order to use it.
+ -->
+<property>
+  <name>scoring.db.md</name>
+  <value></value>
+  <description> 
+  Comma-separated list of keys to be taken from crawldb metadata of a url to the fetched content metadata.
+  </description>
+</property>
+
+<property>
+  <name>scoring.content.md</name>
+  <value></value>
+  <description> 
+  Comma-separated list of keys to be taken from content metadata of a url and put as metadata in the parse data.
+  </description>
+</property>
+
+<property>
+  <name>scoring.parse.md</name>
+  <value></value>
+  <description> 
+  Comma-separated list of keys to be taken from metadata of the parse data of a url and propogated as metadata to the url outlinks.
+  </description>
+</property>
+
+
+
 <!-- language-identifier plugin properties -->
 
 <property>
diff --git a/conf/suffix-urlfilter.txt.template b/conf/suffix-urlfilter.txt.template
index 6f02aed..e329f3c 100644
--- a/conf/suffix-urlfilter.txt.template
+++ b/conf/suffix-urlfilter.txt.template
@@ -19,13 +19,18 @@
 ### prohibit these
 # pictures
 .gif
+.gifv
 .jpg
 .jpeg
+.jp2
+.jpf
+.jpx
 .bmp
 .png
 .tif
 .tiff
 .ico
+.icns
 .eps
 .ps
 .wmf
@@ -38,13 +43,19 @@
 .psp
 .psd
 .tga
+.webp
 .xbm
 .xpm
+.kdc
+.svg
+.svgz
 
 # web-formats
 .css
+.js
 
 # archives/packages
+.apk
 .arj
 .arc
 .7z
@@ -52,14 +63,25 @@
 .lzw
 .lha
 .lzh
+.mar
 .zip
 .gz
 .tar
 .tgz
+.rar
 .sit
 .rpm
 .deb
+.udeb
 .pkg
+.bz2
+.dmg
+.lzma
+.xz
+.ipk
+.whl
+.egg
+.crx
 
 # audio/video
 .mid
@@ -68,11 +90,19 @@
 .mpeg
 .mpg
 .mpe
+.mp4
 .mp3
 .mp2
 .aac
 .mov
+.m4a
+.m4r
+.m4v
+.mp4a
+.mpga
+.f4v
 .fla
+.flac
 .flv
 .ra
 .ram
@@ -82,14 +112,41 @@
 .wmv
 .wav
 .wave
+.oga
 .ogg
+.webm
 .avi
+.avif
 .au
 .snd
+.3gp
+.3g2
+.qt
+.mka
+.mks
+.mkv
+.mk3d
+.opus
+.xm
+.m3u8
+.movie
+.aif
+.aiff
+.gblorb
+.xhr
 
-# executables
+# fonts
+.ttf
+.otf
+.pfb
+.afm
+.woff
+.woff2
+
+# executables and shared libraries
 .exe
 .com
+.dll
 
 # windows links
 .lnk
diff --git a/default.properties b/default.properties
index a675853..48bdb43 100644
--- a/default.properties
+++ b/default.properties
@@ -134,7 +134,8 @@
    org.apache.nutch.scoring.orphan*:\
    org.apache.nutch.scoring.similarity*:\
    org.apache.nutch.scoring.tld*:\
-   org.apache.nutch.scoring.urlmeta*
+   org.apache.nutch.scoring.urlmeta*\
+   org.apache.nutch.scoring.metadata*
    
 #
 # Parse Plugins
diff --git a/ivy/dependency-check-ant/lib/.gitignore b/ivy/dependency-check-ant/lib/.gitignore
new file mode 100644
index 0000000..e2dec72
--- /dev/null
+++ b/ivy/dependency-check-ant/lib/.gitignore
@@ -0,0 +1,19 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Ignore everything in this directory
+*
+# Except this file
+!.gitignore
diff --git a/src/java/org/apache/nutch/crawl/CrawlDatum.java b/src/java/org/apache/nutch/crawl/CrawlDatum.java
index 5159bdb..b18eda3 100644
--- a/src/java/org/apache/nutch/crawl/CrawlDatum.java
+++ b/src/java/org/apache/nutch/crawl/CrawlDatum.java
@@ -524,7 +524,8 @@
     int res = 0;
     if (signature != null) {
       for (int i = 0; i < signature.length / 4; i += 4) {
-        res ^= (signature[i] << 24 + signature[i + 1] << 16 + signature[i + 2] << 8 + signature[i + 3]);
+        res ^= ((signature[i] << 24) + (signature[i + 1] << 16)
+            + (signature[i + 2] << 8) + signature[i + 3]);
       }
     }
     if (metaData != null) {
diff --git a/src/java/org/apache/nutch/fetcher/Fetcher.java b/src/java/org/apache/nutch/fetcher/Fetcher.java
index 6d4c195..568bf8e 100644
--- a/src/java/org/apache/nutch/fetcher/Fetcher.java
+++ b/src/java/org/apache/nutch/fetcher/Fetcher.java
@@ -337,7 +337,7 @@
 
             int averageBdwPerThread = 0;
             if (activeThreads.get() > 0)
-              averageBdwPerThread = Math.round(bpsSinceLastCheck
+              averageBdwPerThread = (int) (bpsSinceLastCheck
                   / activeThreads.get());
 
             LOG.info("averageBdwPerThread : {} kbps", (averageBdwPerThread / 1000));
diff --git a/src/java/org/apache/nutch/fetcher/FetcherThread.java b/src/java/org/apache/nutch/fetcher/FetcherThread.java
index 6cd1772..40b7201 100644
--- a/src/java/org/apache/nutch/fetcher/FetcherThread.java
+++ b/src/java/org/apache/nutch/fetcher/FetcherThread.java
@@ -386,7 +386,7 @@
               if (pstatus != null && pstatus.isSuccess()
                   && pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
                 String newUrl = pstatus.getMessage();
-                int refreshTime = Integer.valueOf(pstatus.getArgs()[1]);
+                int refreshTime = Integer.parseInt(pstatus.getArgs()[1]);
                 Text redirUrl = handleRedirect(fit, newUrl,
                     refreshTime < Fetcher.PERM_REFRESH_TIME,
                     Fetcher.CONTENT_REDIR);
diff --git a/src/java/org/apache/nutch/parse/ParseOutputFormat.java b/src/java/org/apache/nutch/parse/ParseOutputFormat.java
index fcaa1d1..d47043c 100644
--- a/src/java/org/apache/nutch/parse/ParseOutputFormat.java
+++ b/src/java/org/apache/nutch/parse/ParseOutputFormat.java
@@ -271,7 +271,7 @@
         if (pstatus != null && pstatus.isSuccess()
             && pstatus.getMinorCode() == ParseStatus.SUCCESS_REDIRECT) {
           String newUrl = pstatus.getMessage();
-          int refreshTime = Integer.valueOf(pstatus.getArgs()[1]);
+          int refreshTime = Integer.parseInt(pstatus.getArgs()[1]);
           newUrl = filterNormalize(fromUrl, newUrl, origin,
               ignoreInternalLinks, ignoreExternalLinks, ignoreExternalLinksMode, filters, exemptionFilters, normalizers,
               URLNormalizers.SCOPE_FETCHER);
diff --git a/src/java/org/apache/nutch/util/SitemapProcessor.java b/src/java/org/apache/nutch/util/SitemapProcessor.java
index 76dcef9..bcbacdd 100644
--- a/src/java/org/apache/nutch/util/SitemapProcessor.java
+++ b/src/java/org/apache/nutch/util/SitemapProcessor.java
@@ -487,7 +487,7 @@
         LOG.info("SitemapProcessor: sitemap urls dir: {}", urlDir);
       }
       else if (args[i].equals("-threads")) {
-        threads = Integer.valueOf(args[++i]);
+        threads = Integer.parseInt(args[++i]);
         LOG.info("SitemapProcessor: threads: {}", threads);
       }
       else if (args[i].equals("-noStrict")) {
diff --git a/src/plugin/build.xml b/src/plugin/build.xml
index dd2a507..95d7a16 100755
--- a/src/plugin/build.xml
+++ b/src/plugin/build.xml
@@ -84,6 +84,7 @@
     <ant dir="scoring-opic" target="deploy"/>
     <ant dir="scoring-orphan" target="deploy"/>
     <ant dir="scoring-similarity" target="deploy"/>
+    <ant dir="scoring-metadata"  target="deploy"/>
     <ant dir="subcollection" target="deploy"/>
     <ant dir="tld" target="deploy"/>
     <ant dir="urlfilter-automaton" target="deploy"/>
@@ -142,6 +143,7 @@
      <ant dir="protocol-httpclient" target="test"/>
      <ant dir="protocol-okhttp" target="test"/>
      <ant dir="scoring-orphan" target="test"/>
+     <ant dir="scoring-metadata" target="test"/>
      <ant dir="subcollection" target="test"/>
      <ant dir="urlfilter-automaton" target="test"/>
      <ant dir="urlfilter-domain" target="test"/>
@@ -230,6 +232,7 @@
     <ant dir="scoring-opic" target="clean"/>
     <ant dir="scoring-orphan" target="clean"/>
     <ant dir="scoring-similarity" target="clean"/>
+    <ant dir="scoring-metadata" target="clean"/>
     <ant dir="subcollection" target="clean"/>
     <ant dir="tld" target="clean"/>
     <ant dir="urlfilter-automaton" target="clean"/>
diff --git a/src/plugin/parsefilter-naivebayes/src/java/org/apache/nutch/parsefilter/naivebayes/Classify.java b/src/plugin/parsefilter-naivebayes/src/java/org/apache/nutch/parsefilter/naivebayes/Classify.java
index c98a843..9bad065 100644
--- a/src/plugin/parsefilter-naivebayes/src/java/org/apache/nutch/parsefilter/naivebayes/Classify.java
+++ b/src/plugin/parsefilter-naivebayes/src/java/org/apache/nutch/parsefilter/naivebayes/Classify.java
@@ -70,15 +70,15 @@
       BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(
           fs.open(new Path("naivebayes-model"))));
 
-      uniquewords_size = Integer.valueOf(bufferedReader.readLine());
+      uniquewords_size = Integer.parseInt(bufferedReader.readLine());
       bufferedReader.readLine();
 
-      numof_ir = Integer.valueOf(bufferedReader.readLine());
-      numwords_ir = Integer.valueOf(bufferedReader.readLine());
+      numof_ir = Integer.parseInt(bufferedReader.readLine());
+      numwords_ir = Integer.parseInt(bufferedReader.readLine());
       wordfreq_ir = unflattenToHashmap(bufferedReader.readLine());
       bufferedReader.readLine();
-      numof_r = Integer.valueOf(bufferedReader.readLine());
-      numwords_r = Integer.valueOf(bufferedReader.readLine());
+      numof_r = Integer.parseInt(bufferedReader.readLine());
+      numwords_r = Integer.parseInt(bufferedReader.readLine());
       wordfreq_r = unflattenToHashmap(bufferedReader.readLine());
 
       ismodel = true;
diff --git a/src/plugin/scoring-metadata/build.xml b/src/plugin/scoring-metadata/build.xml
new file mode 100644
index 0000000..4f62ed1
--- /dev/null
+++ b/src/plugin/scoring-metadata/build.xml
@@ -0,0 +1,21 @@
+<?xml version="1.0"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<project name="scoring-metadata" default="jar-core">
+
+  <import file="../build-plugin.xml"/>
+</project>
diff --git a/src/plugin/scoring-metadata/ivy.xml b/src/plugin/scoring-metadata/ivy.xml
new file mode 100644
index 0000000..24d7606
--- /dev/null
+++ b/src/plugin/scoring-metadata/ivy.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" ?>
+
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+-->
+
+<ivy-module version="1.0">
+  <info organisation="org.apache.nutch" module="${ant.project.name}">
+    <license name="Apache 2.0"/>
+    <ivyauthor name="Apache Nutch Team" url="http://nutch.apache.org"/>
+    <description>
+        Apache Nutch
+    </description>
+  </info>
+
+  <configurations>
+    <include file="../../../ivy/ivy-configurations.xml"/>
+  </configurations>
+
+  <publications>
+    <!--get the artifact from our module name-->
+    <artifact conf="master"/>
+  </publications>
+
+  <dependencies>
+  </dependencies>
+  
+</ivy-module>
diff --git a/src/plugin/scoring-metadata/plugin.xml b/src/plugin/scoring-metadata/plugin.xml
new file mode 100644
index 0000000..ca47e37
--- /dev/null
+++ b/src/plugin/scoring-metadata/plugin.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<plugin
+   id="scoring-metadata"
+   name="Metadata Scoring Filter"
+   version="1.0.0"
+   provider-name="nutch">
+
+
+   <runtime>
+      <library name="scoring-metadata.jar">
+         <export name="*"/>
+      </library>
+   </runtime>
+
+   <requires>
+      <import plugin="nutch-extensionpoints"/>
+   </requires>
+
+   <extension      id="org.apache.nutch.scoring.metadata"
+                    name="Metadata Scoring Filter"
+                    point="org.apache.nutch.scoring.ScoringFilter">
+   <implementation id="scoring-metadata"
+                    class="org.apache.nutch.scoring.metadata.MetadataScoringFilter" />
+   </extension>
+</plugin>
diff --git a/src/plugin/scoring-metadata/src/java/org/apache/nutch/scoring/metadata/MetadataScoringFilter.java b/src/plugin/scoring-metadata/src/java/org/apache/nutch/scoring/metadata/MetadataScoringFilter.java
new file mode 100644
index 0000000..e3ad56e
--- /dev/null
+++ b/src/plugin/scoring-metadata/src/java/org/apache/nutch/scoring/metadata/MetadataScoringFilter.java
@@ -0,0 +1,143 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.scoring.metadata;
+
+import java.util.Collection;
+import java.util.Map.Entry;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.io.Text;
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.crawl.Inlinks;
+import org.apache.nutch.indexer.NutchDocument;
+import org.apache.nutch.parse.Parse;
+import org.apache.nutch.parse.ParseData;
+import org.apache.nutch.protocol.Content;
+import org.apache.nutch.scoring.ScoringFilter;
+import org.apache.nutch.scoring.AbstractScoringFilter;
+import org.apache.nutch.scoring.ScoringFilterException;
+
+
+/**
+ * For documentation:
+ * 
+ * {@link org.apache.nutch.scoring.metadata}
+ */
+public class MetadataScoringFilter extends AbstractScoringFilter  {
+
+  public static final String METADATA_DATUM   = "scoring.db.md";
+  public static final String METADATA_CONTENT = "scoring.content.md";
+  public static final String METADATA_PARSED  = "scoring.parse.md";
+  private static String[] datumMetadata;
+  private static String[] contentMetadata;
+  private static String[] parseMetadata;
+  private Configuration conf;
+
+  /**
+   * This will take the metadata that you have listed in your "scoring.parse.md"
+   * property, and looks for them inside the parseData object. If they exist,
+   * this will be propagated into your 'targets' Collection's ["outlinks"]
+   * attributes.
+   * 
+   * @see ScoringFilter#distributeScoreToOutlinks
+   */
+  public CrawlDatum distributeScoreToOutlinks(Text fromUrl,
+      ParseData parseData, Collection<Entry<Text, CrawlDatum>> targets,
+      CrawlDatum adjust, int allCount) throws ScoringFilterException {
+    if (parseMetadata == null || targets == null || parseData == null)
+      return adjust;
+
+    Iterator<Entry<Text, CrawlDatum>> targetIterator = targets.iterator();
+
+    while (targetIterator.hasNext()) {
+      Entry<Text, CrawlDatum> nextTarget = targetIterator.next();
+
+      for (String meta : parseMetadata) {
+        String metaFromParse = parseData.getMeta(meta);
+
+        if (metaFromParse == null)
+          continue;
+
+        nextTarget.getValue().getMetaData()
+            .put(new Text(meta), new Text(metaFromParse));
+      }
+    }
+    return adjust;
+  }
+
+  /**
+   * Takes the metadata, specified in your "scoring.db.md" property, from the
+   * datum object and injects it into the content. This is transfered to the
+   * parseData object.
+   * 
+   * @see ScoringFilter#passScoreBeforeParsing
+   * @see MetadataScoringFilter#passScoreAfterParsing
+   */
+  public void passScoreBeforeParsing(Text url, CrawlDatum datum, Content content) {
+    if (datumMetadata == null || content == null || datum == null)
+      return;
+
+    for (String meta : datumMetadata) {
+      Text metaFromDatum = (Text) datum.getMetaData().get(new Text(meta));
+
+      if (metaFromDatum == null) {
+        continue;
+      }
+
+      content.getMetadata().set(meta, metaFromDatum.toString());
+    }
+  }
+
+  /**
+   * Takes the metadata, which was lumped inside the content, and replicates it
+   * within your parse data.
+   * 
+   * @see MetadataScoringFilter#passScoreBeforeParsing
+   * @see ScoringFilter#passScoreAfterParsing
+   */
+  public void passScoreAfterParsing(Text url, Content content, Parse parse) {
+    if (contentMetadata == null || content == null || parse == null)
+      return;
+
+    for (String meta : contentMetadata) {
+      String metaFromContent = content.getMetadata().get(meta);
+
+      if (metaFromContent == null)
+        continue;
+
+      parse.getData().getParseMeta().set(meta, metaFromContent);
+    }
+  }
+
+  /**
+   * handles conf assignment and pulls the value assignment from the
+   * "scoring.db.md", "scoring.content.md" and "scoring.parse.md" properties.
+   */
+  public void setConf(Configuration conf) {
+    super.setConf(conf);
+
+    if (conf == null)
+      return;
+
+    datumMetadata = conf.getStrings(METADATA_DATUM);
+    contentMetadata = conf.getStrings(METADATA_CONTENT);
+    parseMetadata = conf.getStrings(METADATA_PARSED);
+  }
+}
diff --git a/src/plugin/scoring-metadata/src/java/org/apache/nutch/scoring/metadata/package.html b/src/plugin/scoring-metadata/src/java/org/apache/nutch/scoring/metadata/package.html
new file mode 100644
index 0000000..0356152
--- /dev/null
+++ b/src/plugin/scoring-metadata/src/java/org/apache/nutch/scoring/metadata/package.html
@@ -0,0 +1,33 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+  <body>
+    <p>
+      Metadata Scoring Plugin
+    </p>
+    <p>
+      Propagates Metadata from an injected or outlink url in the crawldb to the url's different procecssed objects. In moving any metadata item, you need to copy metadata in three steps:
+    <ul>
+      <li>Crawldb to content: Copy a metadata entry stored in the crawldb record of the url to the url's fetched content object. You need to specify the entry in the <b>scoring.db.md</b> property</li>
+      <li>Content to parsedData: Copy a metadata entry stored in the Content object of a crawled url to its parsedData.  You need to specify the entry in the <b>scoring.content.md</b> property</li>
+      <li>ParsedData to outlink objects: Copy a metadata entry stored in the parsedData of a crawl item to the crawldb records of the url's outlinks. You need to specify the entry in the <b>scoring.parse.md</b> property</li>
+    </ul>
+
+    Note that you can not move data directly from a crawldb record to parseData or outlink objects. The sequence of moving the metadata should be crawldb -> content -> parsedData -> outlink objects.
+    </p>
+  </body>
+</html>
diff --git a/src/plugin/scoring-metadata/src/test/org/apache/nutch/scoring/metadata/TestMetadataScoringFilter.java b/src/plugin/scoring-metadata/src/test/org/apache/nutch/scoring/metadata/TestMetadataScoringFilter.java
new file mode 100644
index 0000000..8683cec
--- /dev/null
+++ b/src/plugin/scoring-metadata/src/test/org/apache/nutch/scoring/metadata/TestMetadataScoringFilter.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.scoring.metadata;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.io.Text;
+import org.apache.nutch.crawl.CrawlDatum;
+import org.apache.nutch.parse.*;
+import org.apache.nutch.protocol.Content;
+import org.apache.nutch.scoring.ScoringFilterException;
+import org.apache.nutch.util.NutchConfiguration;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.HashMap;
+
+public class TestMetadataScoringFilter {
+
+
+  @Test
+  public void distributeScoreToOutlinks() throws ScoringFilterException {
+    Configuration conf = NutchConfiguration.create();
+    conf.set(MetadataScoringFilter.METADATA_PARSED,"parent,depth");
+
+    MetadataScoringFilter metadataScoringFilter = new MetadataScoringFilter();
+    metadataScoringFilter.setConf(conf);
+    CrawlDatum crawlDatum = new CrawlDatum();
+
+    Text from = new Text("https://nutch.apache.org/");
+    ParseData parseData = new ParseData();
+    String PARENT = "parent";
+    String DEPTH = "depth";
+
+    String parentMD = "https://nutch.apache.org/";
+    String depthMD  = "1";
+    parseData.getParseMeta().add("parent",parentMD);
+    parseData.getParseMeta().add("depth",depthMD);
+
+    HashMap<Text,CrawlDatum> targets = new HashMap();
+    targets.put(new Text("https://nutch.apache.org/downloads.html"),new CrawlDatum());
+    targets.put(new Text("https://wiki.apache.org/nutch"),new CrawlDatum());
+
+    metadataScoringFilter.distributeScoreToOutlinks(from,parseData,targets.entrySet(),crawlDatum,2);
+
+    for (CrawlDatum outlink : targets.values()){
+      Text parent = (Text) outlink.getMetaData().get(new Text(PARENT));
+      Text depth = (Text) outlink.getMetaData().get(new Text(DEPTH));
+
+      Assert.assertEquals(parentMD,parent.toString());
+      Assert.assertEquals(depthMD,depth.toString());
+    }
+  }
+
+  @Test
+  public void passScoreBeforeParsing() {
+    Configuration conf = NutchConfiguration.create();
+    conf.set(MetadataScoringFilter.METADATA_DATUM,"parent,depth");
+
+    MetadataScoringFilter metadataScoringFilter = new MetadataScoringFilter();
+    metadataScoringFilter.setConf(conf);
+    CrawlDatum crawlDatum = new CrawlDatum();
+
+    Text from = new Text("https://nutch.apache.org/");
+
+    String PARENT = "parent";
+    String DEPTH = "depth";
+
+    String parentMD = "https://nutch.apache.org/";
+    String depthMD  = "1";
+    crawlDatum.getMetaData().put(new Text(PARENT), new Text(parentMD));
+    crawlDatum.getMetaData().put(new Text(DEPTH), new Text(depthMD));
+    Content content = new Content();
+
+    metadataScoringFilter.passScoreBeforeParsing(from,crawlDatum,content);
+
+    Assert.assertEquals(parentMD,content.getMetadata().get(PARENT));
+    Assert.assertEquals(depthMD,content.getMetadata().get(DEPTH));
+  }
+
+  @Test
+  public void passScoreAfterParsing() {
+    Configuration conf = NutchConfiguration.create();
+    conf.set(MetadataScoringFilter.METADATA_DATUM,"parent,depth");
+    conf.set(MetadataScoringFilter.METADATA_CONTENT,"parent,depth");
+
+    MetadataScoringFilter metadataScoringFilter = new MetadataScoringFilter();
+    metadataScoringFilter.setConf(conf);
+    CrawlDatum crawlDatum = new CrawlDatum();
+
+    Text from = new Text("https://nutch.apache.org/");
+
+    String PARENT = "parent";
+    String DEPTH = "depth";
+
+    String parentMD = "https://nutch.apache.org/";
+    String depthMD  = "1";
+    crawlDatum.getMetaData().put(new Text(PARENT), new Text(parentMD));
+    crawlDatum.getMetaData().put(new Text(DEPTH), new Text(depthMD));
+    Content content = new Content();
+    metadataScoringFilter.passScoreBeforeParsing(from,crawlDatum,content);
+
+    ParseData parseData = new  ParseData(ParseStatus.STATUS_SUCCESS, null, null, content.getMetadata());
+    Parse parse = new ParseImpl(from.toString(),parseData);
+    metadataScoringFilter.passScoreAfterParsing(from,content,parse);
+
+
+    Assert.assertEquals(parentMD,parse.getData().getMeta(PARENT));
+    Assert.assertEquals(depthMD,parse.getData().getMeta(DEPTH));
+  }
+}
diff --git a/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java b/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
index 5479882..48c4a66 100644
--- a/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
+++ b/src/plugin/urlnormalizer-basic/src/java/org/apache/nutch/net/urlnormalizer/basic/BasicURLNormalizer.java
@@ -150,9 +150,6 @@
   private boolean hostASCIItoIDN;
   private boolean hostTrimTrailingDot;
 
-  public void BasicUrlNormalizer() {
-  }
-
   @Override
   public Configuration getConf() {
     return conf;