NUTCH-2814 HttpDateFormat's internal time zone may change after parsing a date
- reset time zone to GMT after parsing a date
diff --git a/src/java/org/apache/nutch/net/protocols/HttpDateFormat.java b/src/java/org/apache/nutch/net/protocols/HttpDateFormat.java
index fbd45a2..f30fb20 100644
--- a/src/java/org/apache/nutch/net/protocols/HttpDateFormat.java
+++ b/src/java/org/apache/nutch/net/protocols/HttpDateFormat.java
@@ -24,48 +24,35 @@
 import java.text.ParseException;
 
 /**
- * class to handle HTTP dates.
+ * Parse and format HTTP dates in HTTP headers, e.g., used to fill the
+ * "If-Modified-Since" request header field.
  * 
- * Modified from FastHttpDateFormat.java in jakarta-tomcat.
+ * HTTP dates use Greenwich Mean Time (GMT) as time zone and a date format like:
  * 
- * @author John Xing
+ * <pre>
+ * Sun, 06 Nov 1994 08:49:37 GMT
+ * </pre>
+ * 
+ * See <a href=
+ * "https://www.w3.org/Protocols/rfc2616/rfc2616-sec3.html#sec3.3.1">sec. 3.3.1
+ * in RFC 2616</a> and
+ * <a href="https://tools.ietf.org/html/rfc7231#section-7.1.1.1">sec. 7.1.1.1 in
+ * RFC 7231</a>.
  */
 public class HttpDateFormat {
 
   protected static SimpleDateFormat format = new SimpleDateFormat(
       "EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US);
 
+  protected static TimeZone gmt = TimeZone.getTimeZone("GMT");
+
   /**
    * HTTP date uses TimeZone GMT
    */
   static {
-    format.setTimeZone(TimeZone.getTimeZone("GMT"));
+    format.setTimeZone(gmt);
   }
 
-  // HttpDate (long t) {
-  // }
-
-  // HttpDate (String s) {
-  // }
-
-  // /**
-  // * Get the current date in HTTP format.
-  // */
-  // public static String getCurrentDate() {
-  //
-  // long now = System.currentTimeMillis();
-  // if ((now - currentDateGenerated) > 1000) {
-  // synchronized (format) {
-  // if ((now - currentDateGenerated) > 1000) {
-  // currentDateGenerated = now;
-  // currentDate = format.format(new Date(now));
-  // }
-  // }
-  // }
-  // return currentDate;
-  //
-  // }
-
   /**
    * Get the HTTP format of the specified date.
    */
@@ -97,6 +84,7 @@
     Date date;
     synchronized (format) {
       date = format.parse(dateString);
+      format.setTimeZone(gmt);
     }
     return date;
   }
@@ -105,6 +93,7 @@
     long time;
     synchronized (format) {
       time = format.parse(dateString).getTime();
+      format.setTimeZone(gmt);
     }
     return time;
   }
diff --git a/src/test/org/apache/nutch/net/protocols/TestHttpDateFormat.java b/src/test/org/apache/nutch/net/protocols/TestHttpDateFormat.java
new file mode 100644
index 0000000..94f30c3
--- /dev/null
+++ b/src/test/org/apache/nutch/net/protocols/TestHttpDateFormat.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.net.protocols;
+
+import java.text.ParseException;
+import java.util.Date;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class TestHttpDateFormat {
+
+  /**
+   * Test date as string and epoche milliseconds:
+   * 
+   * <pre>
+   *   $> date --date "Sun, 06 Nov 1994 08:49:37 GMT" '+%s'
+   *   784111777
+   * </pre>
+   */
+  private final String dateString = "Sun, 06 Nov 1994 08:49:37 GMT";
+  private long dateMillis = 784111777000L;
+
+  @Test
+  public void testHttpDateFormat() throws ParseException {
+
+    Assert.assertEquals(dateMillis, HttpDateFormat.toLong(dateString));
+    Assert.assertEquals(dateString, HttpDateFormat.toString(dateMillis));
+    Assert.assertEquals(new Date(dateMillis), HttpDateFormat.toDate(dateString));
+
+    String ds2 = "Sun, 6 Nov 1994 08:49:37 GMT";
+    Assert.assertEquals(dateMillis, HttpDateFormat.toLong(ds2));
+  }
+
+  @Test(expected = ParseException.class)
+  public void testHttpDateFormatException() throws ParseException {
+    String ds = "this is not a valid date";
+    HttpDateFormat.toLong(ds);
+  }
+
+  /**
+   * NUTCH-2814 - HttpDateFormat's internal time zone must not change when
+   * parsing a date using a different time zone
+   */
+  @Test
+  public void testHttpDateFormatTimeZone() throws ParseException {
+    String dateStringPDT = "Mon, 21 Oct 2019 03:18:16 PDT";
+    HttpDateFormat.toLong(dateStringPDT); // must not affect internal time zone
+    Assert.assertEquals(dateString, HttpDateFormat.toString(dateMillis));
+  }
+}