Prepare for Nutch 1.19-SNAPSHOT development
diff --git a/CHANGES.txt b/CHANGES.txt
index e5c5984..9946bc9 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -1,10 +1,55 @@
# Nutch Change Log
-Nutch 1.18 Development
+Nutch 1.18 Release 14/01/2021 (dd/mm/yyyy)
+Release Report: https://s.apache.org/lqara
Breaking Changes
- - As part of NUTCH-2805, the plugin urlfilter-domainblacklist has been renamed to urlfilter-domaindenylist. And the fields required for the plugin urlfilter.domainblacklist.rules and urlfilter.domainblacklist.file has been replaced with urlfilter.domaindenylist.rules and urlfilter.domaindenylist.file respectively. See NUTCH-2802 for more details.
+ - As part of NUTCH-2805, the plugin urlfilter-domainblacklist has been renamed to urlfilter-domaindenylist. And the fields required for the plugin urlfilter.domainblacklist.rules and urlfilter.domainblacklist.file has been replaced with urlfilter.domaindenylist.rules and urlfilter.domaindenylist.file respectively. See NUTCH-2802 for more details.
+
+Sub-task
+
+ [NUTCH-2671] - Upgrade ant ivy library
+ [NUTCH-2672] - Ant build erronously installs *-test.jar instead *.jar for target "nightly"
+ [NUTCH-2805] - Rename plugin urlfilter-domainblacklist
+ [NUTCH-2809] - Upgrade any23 plugin dependency to 2.4
+ [NUTCH-2816] - Add Spotbugs target to ant build
+ [NUTCH-2817] - Avoid check for equality of URL path and file part using ==/!=
+ [NUTCH-2829] - Fix ant target "clean-cache"
+
+Bug
+
+ [NUTCH-2669] - Reliable solution for javax.ws packaging.type
+ [NUTCH-2697] - Upgrade Ivy to fix the issue of an unset packaging.type property
+ [NUTCH-2801] - RobotsRulesParser command-line checker to use http.robots.agents as fall-back
+ [NUTCH-2810] - FreeGenerator to actually apply configured number of fetch lists
+ [NUTCH-2813] - MoreIndexingFilter - can't parse erroneous date - 2019-07-03T10:28:14
+ [NUTCH-2814] - HttpDateFormat's internal time zone may change after parsing a date
+ [NUTCH-2818] - Ant build: upgrade Apache Rat report task
+ [NUTCH-2823] - IllegalStateException in IndexWriters.describe() when validating url param for SolrIndexer
+ [NUTCH-2824] - urlnormalizer-basic to unescape percent-encoded host names
+
+Improvement
+
+ [NUTCH-1190] - MoreIndexingFilter refactor: move data formats used to parse "lastModified" to a config file.
+ [NUTCH-2582] - Set pool size of XML SAX parsers used for MIME detection in Tika 1.19
+ [NUTCH-2730] - SitemapProcessor to treat sitemap URLs as Set instead of List
+ [NUTCH-2782] - protocol-http / lib-http: support TLSv1.3
+ [NUTCH-2796] - Upgrade to crawler-commons 1.1
+ [NUTCH-2799] - Add .asf.yaml file
+ [NUTCH-2833] - Upgrade to Tika 1.25
+ [NUTCH-2835] - Upgrade commons-jexl from 2 --> 3
+ [NUTCH-2836] - Upgrade various commons dependencies
+ [NUTCH-2837] - Update multiple dependencies
+ [NUTCH-2841] - Upgrade xercesImpl dependency
+
+Wish
+
+ [NUTCH-2834] - Deduplication mode via command line in crawl script
+
+Task
+
+ [NUTCH-2830] - Upgrade any23 to v2.4
Nutch 1.17 Release 18/06/2020 (dd/mm/yyyy)
Release Report: https://s.apache.org/ovhry
diff --git a/NOTICE.txt b/NOTICE.txt
index 71f29fa..1c9efd0 100644
--- a/NOTICE.txt
+++ b/NOTICE.txt
@@ -1,5 +1,5 @@
Apache Nutch
-Copyright 2020 The Apache Software Foundation
+Copyright 2021 The Apache Software Foundation
This product includes software developed by The Apache Software
Foundation (http://www.apache.org/).
diff --git a/build.xml b/build.xml
index 62ed5d1..68a0f44 100644
--- a/build.xml
+++ b/build.xml
@@ -37,6 +37,8 @@
<property name="maven-javadoc-jar" value="${release.dir}/${artifactId}-${version}-javadoc.jar" />
<property name="maven-sources-jar" value="${release.dir}/${artifactId}-${version}-sources.jar" />
+ <property environment="env"/>
+
<property name="dependency-check.home" value="${ivy.dir}/dependency-check-ant/"/>
<property name="spotbugs.version" value="4.1.1" />
@@ -311,8 +313,9 @@
</ivy:makepom>
<!-- sign and deploy the main artifact -->
- <artifact:mvn>
- <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.5:sign-and-deploy-file" />
+ <artifact:mvn mavenHome="${env.MVN_HOME}" fork="true" failonerror="true">
+ <jvmarg value="-Dmaven.multiModuleProjectDirectory=false" />
+ <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.6:sign-and-deploy-file" />
<arg value="-Durl=${maven-repository-url}" />
<arg value="-DrepositoryId=${maven-repository-id}" />
<arg value="-DpomFile=pom.xml" />
@@ -321,8 +324,9 @@
</artifact:mvn>
<!-- sign and deploy the sources artifact -->
- <artifact:mvn>
- <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.5:sign-and-deploy-file" />
+ <artifact:mvn mavenHome="${env.MVN_HOME}" fork="true" failonerror="true">
+ <jvmarg value="-Dmaven.multiModuleProjectDirectory=false" />
+ <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.6:sign-and-deploy-file" />
<arg value="-Durl=${maven-repository-url}" />
<arg value="-DrepositoryId=${maven-repository-id}" />
<arg value="-DpomFile=pom.xml" />
@@ -332,8 +336,9 @@
</artifact:mvn>
<!-- sign and deploy the javadoc artifact -->
- <artifact:mvn>
- <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.5:sign-and-deploy-file" />
+ <artifact:mvn mavenHome="${env.MVN_HOME}" fork="true" failonerror="true">
+ <jvmarg value="-Dmaven.multiModuleProjectDirectory=false" />
+ <arg value="org.apache.maven.plugins:maven-gpg-plugin:1.6:sign-and-deploy-file" />
<arg value="-Durl=${maven-repository-url}" />
<arg value="-DrepositoryId=${maven-repository-id}" />
<arg value="-DpomFile=pom.xml" />
@@ -362,10 +367,12 @@
</dependency>
</artifact:dependencies-->
- <artifact:mvn>
- <arg value="test"/>
+ <artifact:mvn mavenHome="${env.MVN_HOME}" fork="true" failonerror="true">
+ <jvmarg value="-Dmaven.multiModuleProjectDirectory=false" />
+ <arg value="package"/>
+ <arg value="-DskipTests"/>
<arg value="-e"/>
- <arg value="-o"/>
+ <!--arg value="-o"/-->
<!-- run offline (-o): must not download dependencies as this is
done from http://repo1.maven.org/ hardwired in
maven-ant-tasks-2.1.3.jar, see NUTCH-2722.
diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml
index 6932eb5..36c6f86 100644
--- a/conf/nutch-default.xml
+++ b/conf/nutch-default.xml
@@ -164,7 +164,7 @@
<property>
<name>http.agent.version</name>
- <value>Nutch-1.18-SNAPSHOT</value>
+ <value>Nutch-1.19-SNAPSHOT</value>
<description>A version string to advertise in the User-Agent
header.</description>
</property>
diff --git a/default.properties b/default.properties
index e4b9619..a675853 100644
--- a/default.properties
+++ b/default.properties
@@ -14,9 +14,9 @@
# limitations under the License.
name=apache-nutch
-version=1.18-SNAPSHOT
+version=1.19-SNAPSHOT
final.name=${name}-${version}
-year=2020
+year=2021
basedir = ./
src.dir = ./src/java
diff --git a/ivy/mvn.template b/ivy/mvn.template
index 6d22c84..edfb550 100644
--- a/ivy/mvn.template
+++ b/ivy/mvn.template
@@ -19,6 +19,11 @@
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache</groupId>
+ <artifactId>apache</artifactId>
+ <version>23</version>
+ </parent>
<groupId>${ivy.pom.groupId}</groupId>
<artifactId>${ivy.pom.artifactId}</artifactId>
<packaging>${ivy.pom.packaging}</packaging>
@@ -46,6 +51,10 @@
<name>MireDot Releases</name>
<url>http://nexus.qmino.com/content/repositories/miredot</url>
</pluginRepository>
+ <pluginRepository>
+ <id>maven2</id>
+ <url>https://repo.maven.apache.org/maven2/</url>
+ </pluginRepository>
</pluginRepositories>
<developers>
@@ -119,6 +128,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
+ <version>3.8.1</version>
<configuration>
<source>1.8</source>
<target>1.8</target>
@@ -130,7 +140,7 @@
<plugin>
<groupId>com.qmino</groupId>
<artifactId>miredot-plugin</artifactId>
- <version>2.0.1</version>
+ <version>2.4.0</version>
<executions>
<execution>
<goals>
diff --git a/src/bin/nutch b/src/bin/nutch
index 7d0d8ee..9240a9e 100755
--- a/src/bin/nutch
+++ b/src/bin/nutch
@@ -60,7 +60,7 @@
# if no args specified, show usage
if [ $# = 0 ]; then
- echo "nutch 1.18-SNAPSHOT"
+ echo "nutch 1.19-SNAPSHOT"
echo "Usage: nutch COMMAND [-Dproperty=value]... [command-specific args]..."
echo "where COMMAND is one of:"
echo " readdb read / dump crawl db"