Merge pull request #551 from sebastian-nagel/NUTCH-2823
NUTCH-2823 IllegalStateException in IndexWriters.describe() when valiā¦
diff --git a/build.xml b/build.xml
index bbe4aaf..67a3aa2 100644
--- a/build.xml
+++ b/build.xml
@@ -15,7 +15,11 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
-<project name="${name}" default="runtime" xmlns:ivy="antlib:org.apache.ivy.ant" xmlns:artifact="antlib:org.apache.maven.artifact.ant">
+<project name="${name}" default="runtime"
+ xmlns:ivy="antlib:org.apache.ivy.ant"
+ xmlns:artifact="antlib:org.apache.maven.artifact.ant"
+ xmlns:rat="antlib:org.apache.rat.anttasks"
+ xmlns="antlib:org.apache.tools.ant">
<!-- Load all the default properties, and any the user wants -->
<!-- to contribute (without having to type -D or edit this file -->
@@ -33,10 +37,16 @@
<property name="maven-javadoc-jar" value="${release.dir}/${artifactId}-${version}-javadoc.jar" />
<property name="maven-sources-jar" value="${release.dir}/${artifactId}-${version}-sources.jar" />
+ <property name="dependency-check.home" value="${ivy.dir}/dependency-check-ant/"/>
+
<property name="spotbugs.version" value="4.1.1" />
<property name="spotbugs.home" value="${basedir}/lib/spotbugs-${spotbugs.version}" />
<property name="spotbugs.jar" value="${spotbugs.home}/lib/spotbugs-ant.jar" />
+ <property name="apache-rat.version" value="0.13" />
+ <property name="apache-rat.home" value="${ivy.dir}/apache-rat-${apache-rat.version}" />
+ <property name="apache-rat.jar" value="${apache-rat.home}/apache-rat-${apache-rat.version}.jar" />
+
<!-- the normal classpath -->
<path id="classpath">
<pathelement location="${build.classes}"/>
@@ -633,7 +643,6 @@
<!-- https://jeremylong.github.io/DependencyCheck/dependency-check-ant/index.html -->
<!-- get http://dl.bintray.com/jeremy-long/owasp/dependency-check-ant-3.3.2-release.zip -->
<!-- and unzip in directory ./ivy/ -->
- <property name="dependency-check.home" value="${ivy.dir}/dependency-check-ant/"/>
<path id="dependency-check.path">
<pathelement location="${dependency-check.home}/dependency-check-ant.jar"/>
<fileset dir="${dependency-check.home}/lib" erroronmissingdir="false">
@@ -1003,17 +1012,35 @@
<!-- ================================================================== -->
<!-- RAT targets -->
<!-- ================================================================== -->
- <target name="rat-sources-typedef" description="--> run RAT antlib task">
- <typedef resource="org/apache/rat/anttasks/antlib.xml" >
- <classpath>
- <fileset dir="." includes="rat*.jar"/>
- </classpath>
- </typedef>
+ <target name="apache-rat-download" description="--> download Apache Rat jar">
+ <available file="${apache-rat.jar}" property="apache-rat.jar.found"/>
+ <antcall target="apache-rat-download-unchecked"/>
</target>
- <target name="rat-sources" depends="rat-sources-typedef"
+ <target name="apache-rat-download-unchecked" unless="apache-rat.jar.found"
+ description="--> downloads the Apache Rat jar">
+ <get src="https://www.apache.org/dist/creadur/apache-rat-${apache-rat.version}/apache-rat-${apache-rat.version}-bin.tar.gz"
+ dest="${ivy.dir}/apache-rat-${apache-rat.version}-bin.tar.gz" usetimestamp="false" />
+
+ <untar src="${ivy.dir}/apache-rat-${apache-rat.version}-bin.tar.gz"
+ dest="${ivy.dir}/" compression="gzip">
+ </untar>
+
+ <delete file="${ivy.dir}/apache-rat-${apache-rat.version}-bin.tar.gz" />
+ </target>
+
+ <taskdef
+ uri="antlib:org.apache.rat.anttasks"
+ resource="org/apache/rat/anttasks/antlib.xml">
+ <classpath>
+ <pathelement location="${apache-rat.jar}" />
+ </classpath>
+ </taskdef>
+
+ <target name="rat-sources" depends="init, apache-rat-download"
description="--> runs RAT tasks over src/java">
- <rat:report xmlns:rat="antlib:org.apache.rat.anttasks">
+ <rat:report
+ reportFile="${build.dir}/apache-rat-report.txt">
<fileset dir="src">
<include name="java/**/*"/>
<include name="plugin/**/src/**/*"/>
diff --git a/src/java/org/apache/nutch/net/protocols/HttpDateFormat.java b/src/java/org/apache/nutch/net/protocols/HttpDateFormat.java
index fbd45a2..f30fb20 100644
--- a/src/java/org/apache/nutch/net/protocols/HttpDateFormat.java
+++ b/src/java/org/apache/nutch/net/protocols/HttpDateFormat.java
@@ -24,48 +24,35 @@
import java.text.ParseException;
/**
- * class to handle HTTP dates.
+ * Parse and format HTTP dates in HTTP headers, e.g., used to fill the
+ * "If-Modified-Since" request header field.
*
- * Modified from FastHttpDateFormat.java in jakarta-tomcat.
+ * HTTP dates use Greenwich Mean Time (GMT) as time zone and a date format like:
*
- * @author John Xing
+ * <pre>
+ * Sun, 06 Nov 1994 08:49:37 GMT
+ * </pre>
+ *
+ * See <a href=
+ * "https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1">sec. 3.3.1
+ * in RFC 2616</a> and
+ * <a href="https://tools.ietf.org/html/rfc7231#section-7.1.1.1">sec. 7.1.1.1 in
+ * RFC 7231</a>.
*/
public class HttpDateFormat {
protected static SimpleDateFormat format = new SimpleDateFormat(
"EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US);
+ protected static TimeZone gmt = TimeZone.getTimeZone("GMT");
+
/**
* HTTP date uses TimeZone GMT
*/
static {
- format.setTimeZone(TimeZone.getTimeZone("GMT"));
+ format.setTimeZone(gmt);
}
- // HttpDate (long t) {
- // }
-
- // HttpDate (String s) {
- // }
-
- // /**
- // * Get the current date in HTTP format.
- // */
- // public static String getCurrentDate() {
- //
- // long now = System.currentTimeMillis();
- // if ((now - currentDateGenerated) > 1000) {
- // synchronized (format) {
- // if ((now - currentDateGenerated) > 1000) {
- // currentDateGenerated = now;
- // currentDate = format.format(new Date(now));
- // }
- // }
- // }
- // return currentDate;
- //
- // }
-
/**
* Get the HTTP format of the specified date.
*/
@@ -97,6 +84,7 @@
Date date;
synchronized (format) {
date = format.parse(dateString);
+ format.setTimeZone(gmt);
}
return date;
}
@@ -105,6 +93,7 @@
long time;
synchronized (format) {
time = format.parse(dateString).getTime();
+ format.setTimeZone(gmt);
}
return time;
}
diff --git a/src/test/org/apache/nutch/net/protocols/TestHttpDateFormat.java b/src/test/org/apache/nutch/net/protocols/TestHttpDateFormat.java
new file mode 100644
index 0000000..94f30c3
--- /dev/null
+++ b/src/test/org/apache/nutch/net/protocols/TestHttpDateFormat.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.net.protocols;
+
+import java.text.ParseException;
+import java.util.Date;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestHttpDateFormat {
+
+ /**
+ * Test date as string and epoche milliseconds:
+ *
+ * <pre>
+ * $> date --date "Sun, 06 Nov 1994 08:49:37 GMT" '+%s'
+ * 784111777
+ * </pre>
+ */
+ private final String dateString = "Sun, 06 Nov 1994 08:49:37 GMT";
+ private long dateMillis = 784111777000L;
+
+ @Test
+ public void testHttpDateFormat() throws ParseException {
+
+ Assert.assertEquals(dateMillis, HttpDateFormat.toLong(dateString));
+ Assert.assertEquals(dateString, HttpDateFormat.toString(dateMillis));
+ Assert.assertEquals(new Date(dateMillis), HttpDateFormat.toDate(dateString));
+
+ String ds2 = "Sun, 6 Nov 1994 08:49:37 GMT";
+ Assert.assertEquals(dateMillis, HttpDateFormat.toLong(ds2));
+ }
+
+ @Test(expected = ParseException.class)
+ public void testHttpDateFormatException() throws ParseException {
+ String ds = "this is not a valid date";
+ HttpDateFormat.toLong(ds);
+ }
+
+ /**
+ * NUTCH-2814 - HttpDateFormat's internal time zone must not change when
+ * parsing a date using a different time zone
+ */
+ @Test
+ public void testHttpDateFormatTimeZone() throws ParseException {
+ String dateStringPDT = "Mon, 21 Oct 2019 03:18:16 PDT";
+ HttpDateFormat.toLong(dateStringPDT); // must not affect internal time zone
+ Assert.assertEquals(dateString, HttpDateFormat.toString(dateMillis));
+ }
+}