Merge pull request #495 from sebastian-nagel/NUTCH-2672-build-docs-use-https

NUTCH-2762 Replace http:// URLs by https:// (build files and documentation)
diff --git a/src/bin/crawl b/src/bin/crawl
index 81d30cc..56bb237 100755
--- a/src/bin/crawl
+++ b/src/bin/crawl
@@ -34,8 +34,9 @@
 #   --hostdbupdate                        Boolean indicator if we call hostdbupdate or not
 #   --hostdbgenerate                      Boolean indicator if we use hostdb in generate or not
 #
-#   --num-slaves <num_slaves>             Number of slave nodes [default: 1]
-#                                         Note: This can only be set when running in distribution mode
+#   --num-fetchers <num_fetchers>         Number of tasks used for fetching (fetcher map tasks) [default: 1]
+#                                         Note: This can only be set when running in distributed mode and
+#                                               should correspond to the number of worker nodes in the cluster.
 #   --num-tasks <num_tasks>               Number of reducer tasks [default: 2]
 #   --size-fetchlist <size_fetchlist>     Number of URLs to fetch in one iteration [default: 50000]
 #   --time-limit-fetch <time_limit_fetch> Number of minutes allocated to the fetching [default: 180]
@@ -83,10 +84,11 @@
   echo -e "  \t\t\t\t\tspecified second is used by default. [default: -1]"
   echo -e "  -s <seed_dir>\t\t\t\tPath to seeds file(s)"
   echo -e "  -sm <sitemap_dir>\t\t\tPath to sitemap URL file(s)"
-  echo -e "  --hostdbupdate\t\t\t\tBoolean flag showing if we either update or not update hostdb for each round"
+  echo -e "  --hostdbupdate\t\t\tBoolean flag showing if we either update or not update hostdb for each round"
   echo -e "  --hostdbgenerate\t\t\tBoolean flag showing if we use hostdb in generate or not"
-  echo -e "  --num-slaves <num_slaves>\t\tNumber of slave nodes [default: 1]"
-  echo -e "  \t\t\t\t\tNote: This can only be set when running in distribution mode"
+  echo -e "  --num-fetchers <num_fetchers>\t\tNumber of tasks used for fetching (fetcher map tasks) [default: 1]"
+  echo -e "  \t\t\t\t\tNote: This can only be set when running in distributed mode and"
+  echo -e "  \t\t\t\t\t      should correspond to the number of worker nodes in the cluster."
   echo -e "  --num-tasks <num_tasks>\t\tNumber of reducer tasks [default: 2]"
   echo -e "  --size-fetchlist <size_fetchlist>\tNumber of URLs to fetch in one iteration [default: 50000]"
   echo -e "  --time-limit-fetch <time_limit_fetch>\tNumber of minutes allocated to the fetching [default: 180]"
@@ -107,8 +109,8 @@
 JAVA_PROPERTIES=""
 WAIT=-1 # don't wait if there are no URLs to fetch
 SEEDDIR=""
-NUM_SLAVES=1
-NUM_TASKS=2 # 2 x NUM_SLAVES
+NUM_FETCHERS=1
+NUM_TASKS=2 # 2 x NUM_FETCHERS
 SIZE_FETCHLIST=50000 # 25K x NUM_TASKS
 TIME_LIMIT_FETCH=180
 NUM_THREADS=50
@@ -138,7 +140,12 @@
             shift 2
             ;;
         --num-slaves)
-            NUM_SLAVES="${2}"
+            # back-ward compatibility: NUTCH-2759 renamed option --num-slaves to --num-fetchers
+            NUM_FETCHERS="${2}"
+            shift 2
+            ;;
+        --num-fetchers)
+            NUM_FETCHERS="${2}"
             shift 2
             ;;
         --num-tasks)
@@ -203,7 +210,10 @@
   mode=distributed
 fi
 if [[ "$mode" = "local" ]]; then
-  NUM_SLAVES=1
+  if [[ "$NUM_FETCHERS" -ne 1 ]]; then
+    echo "Ignoring configured number of fetchers (--num_fetchers): a single fetcher task is used when running in local mode."
+  fi
+  NUM_FETCHERS=1
 fi
 
 # note that some of the options listed here could be set in the
@@ -296,9 +306,9 @@
 
   echo "Generating a new segment"
   if [[ "$HOSTDBGENERATE" == "true" ]] && __directory_exists "$CRAWL_PATH"/hostdb; then
-   generate_args=($commonOptions "$CRAWL_PATH"/crawldb "$CRAWL_PATH"/segments -topN $SIZE_FETCHLIST -numFetchers $NUM_SLAVES -noFilter -hostdb "$CRAWL_PATH"/hostdb)
+   generate_args=($commonOptions "$CRAWL_PATH"/crawldb "$CRAWL_PATH"/segments -topN $SIZE_FETCHLIST -numFetchers $NUM_FETCHERS -noFilter -hostdb "$CRAWL_PATH"/hostdb)
   else
-   generate_args=($commonOptions "$CRAWL_PATH"/crawldb "$CRAWL_PATH"/segments -topN $SIZE_FETCHLIST -numFetchers $NUM_SLAVES -noFilter)
+   generate_args=($commonOptions "$CRAWL_PATH"/crawldb "$CRAWL_PATH"/segments -topN $SIZE_FETCHLIST -numFetchers $NUM_FETCHERS -noFilter)
   fi
 
   echo "$bin/nutch generate ${generate_args[@]}"
diff --git a/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/MetadataIndexer.java b/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/MetadataIndexer.java
index be56377..3d4f9c5 100644
--- a/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/MetadataIndexer.java
+++ b/src/plugin/index-metadata/src/java/org/apache/nutch/indexer/metadata/MetadataIndexer.java
@@ -42,7 +42,7 @@
 public class MetadataIndexer implements IndexingFilter {
   private Configuration conf;
   private String[] dbFieldnames;
-  private Map<String, String> parseFieldnames;
+  private String[] parseFieldnames;
   private String[] contentFieldnames;
   private String separator;
   private Set<String> mvFields;
@@ -70,10 +70,10 @@
 
     // add the fields from parsemd
     if (parseFieldnames != null) {
-      for (String metatag : parseFieldnames.keySet()) {
+      for (String metatag : parseFieldnames) {
         for (String value : parse.getData().getParseMeta().getValues(metatag)) {
           if (value != null)
-            add(doc, parseFieldnames.get(metatag), value);
+            add(doc, metatag, value);
         }
       }
     }
@@ -111,14 +111,12 @@
   public void setConf(Configuration conf) {
     this.conf = conf;
     dbFieldnames = conf.getStrings(db_CONF_PROPERTY);
-    parseFieldnames = new HashMap<String, String>();
-    for (String metatag : conf.getStrings(parse_CONF_PROPERTY)) {
-      parseFieldnames.put(metatag.toLowerCase(Locale.ROOT), metatag);
-    }
+    parseFieldnames = conf.getStrings(parse_CONF_PROPERTY);
     contentFieldnames = conf.getStrings(content_CONF_PROPERTY);
     
     separator = conf.get(separator_CONF_PROPERTY, null);
-    mvFields = new HashSet(Arrays.asList(conf.getStrings(mvfields_CONF_PROPERTY, new String[0])));
+    mvFields = new HashSet<>(
+        Arrays.asList(conf.getStrings(mvfields_CONF_PROPERTY, new String[0])));
     // TODO check conflict between field names e.g. could have same label
     // from different sources
 
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/DummyX509TrustManager.java b/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/DummyX509TrustManager.java
new file mode 100644
index 0000000..6092e78
--- /dev/null
+++ b/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/DummyX509TrustManager.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.protocol.htmlunit;
+
+import java.security.KeyStore;
+import java.security.KeyStoreException;
+import java.security.NoSuchAlgorithmException;
+import java.security.cert.CertificateException;
+import java.security.cert.X509Certificate;
+
+import javax.net.ssl.TrustManagerFactory;
+import javax.net.ssl.TrustManager;
+import javax.net.ssl.X509TrustManager;
+
+/*
+ * Based on EasyX509TrustManager from commons-httpclient.
+ */
+public class DummyX509TrustManager implements X509TrustManager {
+  private X509TrustManager standardTrustManager = null;
+
+  /**
+   * Constructor for DummyX509TrustManager.
+   */
+  public DummyX509TrustManager(KeyStore keystore)
+      throws NoSuchAlgorithmException, KeyStoreException {
+    super();
+    String algo = TrustManagerFactory.getDefaultAlgorithm();
+    TrustManagerFactory factory = TrustManagerFactory.getInstance(algo);
+    factory.init(keystore);
+    TrustManager[] trustmanagers = factory.getTrustManagers();
+    if (trustmanagers.length == 0) {
+      throw new NoSuchAlgorithmException(algo + " trust manager not supported");
+    }
+    this.standardTrustManager = (X509TrustManager) trustmanagers[0];
+  }
+
+  /**
+   * @see javax.net.ssl.X509TrustManager#checkClientTrusted(X509Certificate[],
+   *      String)
+   */
+  public boolean isClientTrusted(X509Certificate[] certificates) {
+    return true;
+  }
+
+  /**
+   * @see javax.net.ssl.X509TrustManager#checkServerTrusted(X509Certificate[],
+   *      String)
+   */
+  public boolean isServerTrusted(X509Certificate[] certificates) {
+    return true;
+  }
+
+  /**
+   * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers()
+   */
+  public X509Certificate[] getAcceptedIssuers() {
+    return this.standardTrustManager.getAcceptedIssuers();
+  }
+
+  public void checkClientTrusted(X509Certificate[] arg0, String arg1)
+      throws CertificateException {
+    // do nothing
+
+  }
+
+  public void checkServerTrusted(X509Certificate[] arg0, String arg1)
+      throws CertificateException {
+    // do nothing
+
+  }
+}
diff --git a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HttpResponse.java b/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HttpResponse.java
index e76bc04..ced2e0f 100644
--- a/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HttpResponse.java
+++ b/src/plugin/protocol-htmlunit/src/java/org/apache/nutch/protocol/htmlunit/HttpResponse.java
@@ -26,12 +26,17 @@
 import java.net.InetSocketAddress;
 import java.net.Socket;
 import java.net.URL;
+import java.security.KeyManagementException;
+import java.security.KeyStoreException;
+import java.security.NoSuchAlgorithmException;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 
+import javax.net.ssl.SSLContext;
 import javax.net.ssl.SSLSocket;
 import javax.net.ssl.SSLSocketFactory;
+import javax.net.ssl.TrustManager;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.nutch.crawl.CrawlDatum;
@@ -129,10 +134,20 @@
       socket.connect(sockAddr, http.getTimeout());
 
       if (scheme == Scheme.HTTPS) {
-        SSLSocketFactory factory = (SSLSocketFactory) SSLSocketFactory
-            .getDefault();
-        SSLSocket sslsocket = (SSLSocket) factory
-            .createSocket(socket, sockHost, sockPort, true);
+
+        // Optionally skip TLS/SSL certificate validation
+        SSLSocketFactory factory;
+        if (http.isTlsCheckCertificates()) {
+          factory = (SSLSocketFactory) SSLSocketFactory.getDefault();
+        } else {
+          SSLContext sslContext = SSLContext.getInstance("TLS");
+          sslContext.init(null,
+              new TrustManager[] { new DummyX509TrustManager(null) }, null);
+          factory = sslContext.getSocketFactory();
+        }
+
+        SSLSocket sslsocket = (SSLSocket) factory.createSocket(socket, sockHost,
+            sockPort, true);
         sslsocket.setUseClientMode(true);
 
         // Get the protocols and ciphers supported by this JVM
@@ -199,8 +214,8 @@
       reqStr.append("\r\n");
 
       if (http.isIfModifiedSinceEnabled() && datum.getModifiedTime() > 0) {
-        reqStr.append("If-Modified-Since: " + HttpDateFormat
-            .toString(datum.getModifiedTime()));
+        reqStr.append("If-Modified-Since: "
+            + HttpDateFormat.toString(datum.getModifiedTime()));
         reqStr.append("\r\n");
       }
       reqStr.append("\r\n");
@@ -216,9 +231,8 @@
       req.flush();
 
       PushbackInputStream in = // process response
-          new PushbackInputStream(
-              new BufferedInputStream(socket.getInputStream(),
-                  Http.BUFFER_SIZE), Http.BUFFER_SIZE);
+          new PushbackInputStream(new BufferedInputStream(
+              socket.getInputStream(), Http.BUFFER_SIZE), Http.BUFFER_SIZE);
 
       StringBuffer line = new StringBuffer();
 
@@ -227,7 +241,8 @@
         httpHeaders = new StringBuffer();
       }
 
-      headers.add("nutch.fetch.time", Long.toString(System.currentTimeMillis()));
+      headers.add("nutch.fetch.time",
+          Long.toString(System.currentTimeMillis()));
 
       boolean haveSeenNonContinueStatus = false;
       while (!haveSeenNonContinueStatus) {
@@ -243,27 +258,30 @@
       // Get Content type header
       String contentType = getHeader(Response.CONTENT_TYPE);
 
-      // handle with HtmlUnit only if content type in HTML or XHTML 
+      // handle with HtmlUnit only if content type in HTML or XHTML
       if (contentType != null) {
-        if (contentType.contains("text/html") || contentType.contains("application/xhtml")) {
+        if (contentType.contains("text/html")
+            || contentType.contains("application/xhtml")) {
           readContentFromHtmlUnit(url);
         } else {
           String transferEncoding = getHeader(Response.TRANSFER_ENCODING);
-          if (transferEncoding != null && "chunked"
-              .equalsIgnoreCase(transferEncoding.trim())) {
+          if (transferEncoding != null
+              && "chunked".equalsIgnoreCase(transferEncoding.trim())) {
             readChunkedContent(in, line);
           } else {
             readPlainContent(in);
           }
 
           String contentEncoding = getHeader(Response.CONTENT_ENCODING);
-          if ("gzip".equals(contentEncoding) || "x-gzip".equals(contentEncoding)) {
+          if ("gzip".equals(contentEncoding)
+              || "x-gzip".equals(contentEncoding)) {
             content = http.processGzipEncoded(content, url);
           } else if ("deflate".equals(contentEncoding)) {
             content = http.processDeflateEncoded(content, url);
           } else {
             if (Http.LOG.isTraceEnabled()) {
-              Http.LOG.trace("fetched " + content.length + " bytes from " + url);
+              Http.LOG
+                  .trace("fetched " + content.length + " bytes from " + url);
             }
           }
         }
@@ -272,6 +290,8 @@
         }
       }
 
+    }catch(KeyManagementException | NoSuchAlgorithmException | KeyStoreException e) {
+      throw new ProtocolException(e);
     } finally {
       if (socket != null)
         socket.close();
@@ -313,7 +333,7 @@
     String page = HtmlUnitWebDriver.getHtmlPage(url.toString(), conf);
     content = page.getBytes("UTF-8");
   }
-  
+
   private void readPlainContent(InputStream in)
       throws HttpException, IOException {
 
@@ -328,8 +348,7 @@
         throw new HttpException("bad content length: " + contentLengthString);
       }
     }
-    if (http.getMaxContent() >= 0 && contentLength > http
-        .getMaxContent()) // limit
+    if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) // limit
       // download
       // size
       contentLength = http.getMaxContent();
@@ -408,17 +427,17 @@
         break;
       }
 
-      if (http.getMaxContent() >= 0 && (contentBytesRead + chunkLen) > http
-          .getMaxContent())
+      if (http.getMaxContent() >= 0
+          && (contentBytesRead + chunkLen) > http.getMaxContent())
         chunkLen = http.getMaxContent() - contentBytesRead;
 
       // read one chunk
       int chunkBytesRead = 0;
       while (chunkBytesRead < chunkLen) {
 
-        int toRead = (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE ?
-            (chunkLen - chunkBytesRead) :
-            Http.BUFFER_SIZE;
+        int toRead = (chunkLen - chunkBytesRead) < Http.BUFFER_SIZE
+            ? (chunkLen - chunkBytesRead)
+            : Http.BUFFER_SIZE;
         int len = in.read(bytes, 0, toRead);
 
         if (len == -1)
@@ -510,9 +529,9 @@
 
       // handle HTTP responses with missing blank line after headers
       int pos;
-      if (((pos = line.indexOf("<!DOCTYPE")) != -1) || (
-          (pos = line.indexOf("<HTML")) != -1) || ((pos = line.indexOf("<html"))
-          != -1)) {
+      if (((pos = line.indexOf("<!DOCTYPE")) != -1)
+          || ((pos = line.indexOf("<HTML")) != -1)
+          || ((pos = line.indexOf("<html")) != -1)) {
 
         in.unread(line.substring(pos).getBytes("UTF-8"));
         line.setLength(pos);
@@ -570,4 +589,4 @@
     return value;
   }
 
-}
+}
\ No newline at end of file
diff --git a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/DummyX509TrustManager.java b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/DummyX509TrustManager.java
new file mode 100644
index 0000000..ec1354f
--- /dev/null
+++ b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/DummyX509TrustManager.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.protocol.interactiveselenium;
+
+import java.security.KeyStore;
+import java.security.KeyStoreException;
+import java.security.NoSuchAlgorithmException;
+import java.security.cert.CertificateException;
+import java.security.cert.X509Certificate;
+
+import javax.net.ssl.TrustManagerFactory;
+import javax.net.ssl.TrustManager;
+import javax.net.ssl.X509TrustManager;
+
+/*
+ * Based on EasyX509TrustManager from commons-httpclient.
+ */
+public class DummyX509TrustManager implements X509TrustManager {
+  private X509TrustManager standardTrustManager = null;
+
+  /**
+   * Constructor for DummyX509TrustManager.
+   */
+  public DummyX509TrustManager(KeyStore keystore)
+      throws NoSuchAlgorithmException, KeyStoreException {
+    super();
+    String algo = TrustManagerFactory.getDefaultAlgorithm();
+    TrustManagerFactory factory = TrustManagerFactory.getInstance(algo);
+    factory.init(keystore);
+    TrustManager[] trustmanagers = factory.getTrustManagers();
+    if (trustmanagers.length == 0) {
+      throw new NoSuchAlgorithmException(algo + " trust manager not supported");
+    }
+    this.standardTrustManager = (X509TrustManager) trustmanagers[0];
+  }
+
+  /**
+   * @see javax.net.ssl.X509TrustManager#checkClientTrusted(X509Certificate[],
+   *      String)
+   */
+  public boolean isClientTrusted(X509Certificate[] certificates) {
+    return true;
+  }
+
+  /**
+   * @see javax.net.ssl.X509TrustManager#checkServerTrusted(X509Certificate[],
+   *      String)
+   */
+  public boolean isServerTrusted(X509Certificate[] certificates) {
+    return true;
+  }
+
+  /**
+   * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers()
+   */
+  public X509Certificate[] getAcceptedIssuers() {
+    return this.standardTrustManager.getAcceptedIssuers();
+  }
+
+  public void checkClientTrusted(X509Certificate[] arg0, String arg1)
+      throws CertificateException {
+    // do nothing
+
+  }
+
+  public void checkServerTrusted(X509Certificate[] arg0, String arg1)
+      throws CertificateException {
+    // do nothing
+
+  }
+}
diff --git a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java
index 8ebd898..a5793c6 100644
--- a/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java
+++ b/src/plugin/protocol-interactiveselenium/src/java/org/apache/nutch/protocol/interactiveselenium/HttpResponse.java
@@ -26,12 +26,17 @@
 import java.net.InetSocketAddress;
 import java.net.Socket;
 import java.net.URL;
+import java.security.KeyManagementException;
+import java.security.KeyStoreException;
+import java.security.NoSuchAlgorithmException;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 
+import javax.net.ssl.SSLContext;
 import javax.net.ssl.SSLSocket;
 import javax.net.ssl.SSLSocketFactory;
+import javax.net.ssl.TrustManager;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
@@ -65,10 +70,12 @@
   protected enum Scheme {
     HTTP, HTTPS,
   }
+
   /** The nutch configuration */
   private Configuration conf = null;
 
-  public HttpResponse(Http http, URL url, CrawlDatum datum) throws ProtocolException, IOException {
+  public HttpResponse(Http http, URL url, CrawlDatum datum)
+      throws ProtocolException, IOException {
 
     this.conf = http.getConf();
     this.http = http;
@@ -122,33 +129,43 @@
       socket.connect(sockAddr, http.getTimeout());
 
       if (scheme == Scheme.HTTPS) {
-        SSLSocketFactory factory = (SSLSocketFactory) SSLSocketFactory
-                .getDefault();
-        SSLSocket sslsocket = (SSLSocket) factory
-                .createSocket(socket, sockHost, sockPort, true);
+
+        // Optionally skip TLS/SSL certificate validation
+        SSLSocketFactory factory;
+        if (http.isTlsCheckCertificates()) {
+          factory = (SSLSocketFactory) SSLSocketFactory.getDefault();
+        } else {
+          SSLContext sslContext = SSLContext.getInstance("TLS");
+          sslContext.init(null,
+              new TrustManager[] { new DummyX509TrustManager(null) }, null);
+          factory = sslContext.getSocketFactory();
+        }
+
+        SSLSocket sslsocket = (SSLSocket) factory.createSocket(socket, sockHost,
+            sockPort, true);
         sslsocket.setUseClientMode(true);
 
         // Get the protocols and ciphers supported by this JVM
         Set<String> protocols = new HashSet<String>(
-                Arrays.asList(sslsocket.getSupportedProtocols()));
+            Arrays.asList(sslsocket.getSupportedProtocols()));
         Set<String> ciphers = new HashSet<String>(
-                Arrays.asList(sslsocket.getSupportedCipherSuites()));
+            Arrays.asList(sslsocket.getSupportedCipherSuites()));
 
         // Intersect with preferred protocols and ciphers
         protocols.retainAll(http.getTlsPreferredProtocols());
         ciphers.retainAll(http.getTlsPreferredCipherSuites());
 
         sslsocket.setEnabledProtocols(
-                protocols.toArray(new String[protocols.size()]));
+            protocols.toArray(new String[protocols.size()]));
         sslsocket.setEnabledCipherSuites(
-                ciphers.toArray(new String[ciphers.size()]));
+            ciphers.toArray(new String[ciphers.size()]));
 
         sslsocket.startHandshake();
         socket = sslsocket;
       }
 
       if (sockAddr != null
-              && conf.getBoolean("store.ip.address", false) == true) {
+          && conf.getBoolean("store.ip.address", false) == true) {
         headers.add("_ip_", sockAddr.getAddress().getHostAddress());
       }
       // make request
@@ -203,17 +220,17 @@
       }
 
       if (http.isCookieEnabled()
-              && datum.getMetaData().containsKey(HttpBase.COOKIE)) {
+          && datum.getMetaData().containsKey(HttpBase.COOKIE)) {
         String cookie = ((Text) datum.getMetaData().get(HttpBase.COOKIE))
-                .toString();
+            .toString();
         reqStr.append("Cookie: ");
         reqStr.append(cookie);
         reqStr.append("\r\n");
       }
 
       if (http.isIfModifiedSinceEnabled() && datum.getModifiedTime() > 0) {
-        reqStr.append("If-Modified-Since: " + HttpDateFormat
-                .toString(datum.getModifiedTime()));
+        reqStr.append("If-Modified-Since: "
+            + HttpDateFormat.toString(datum.getModifiedTime()));
         reqStr.append("\r\n");
       }
       reqStr.append("\r\n");
@@ -223,25 +240,24 @@
         headers.add("_request_", reqStr.toString());
       }
 
-
       byte[] reqBytes = reqStr.toString().getBytes();
 
       req.write(reqBytes);
       req.flush();
 
       PushbackInputStream in = // process response
-          new PushbackInputStream(new BufferedInputStream(socket.getInputStream(), Http.BUFFER_SIZE),
-              Http.BUFFER_SIZE);
+          new PushbackInputStream(new BufferedInputStream(
+              socket.getInputStream(), Http.BUFFER_SIZE), Http.BUFFER_SIZE);
 
       StringBuffer line = new StringBuffer();
 
-
       // store the http headers verbatim
       if (conf.getBoolean("store.http.headers", false) == true) {
         httpHeaders = new StringBuffer();
       }
 
-      headers.add("nutch.fetch.time", Long.toString(System.currentTimeMillis()));
+      headers.add("nutch.fetch.time",
+          Long.toString(System.currentTimeMillis()));
 
       boolean haveSeenNonContinueStatus = false;
       while (!haveSeenNonContinueStatus) {
@@ -257,9 +273,10 @@
       // Get Content type header
       String contentType = getHeader(Response.CONTENT_TYPE);
 
-      // handle with Selenium only if content type in HTML or XHTML 
+      // handle with Selenium only if content type in HTML or XHTML
       if (contentType != null) {
-        if (contentType.contains("text/html") || contentType.contains("application/xhtml")) {
+        if (contentType.contains("text/html")
+            || contentType.contains("application/xhtml")) {
           readPlainContent(url);
         } else {
           try {
@@ -269,11 +286,13 @@
               try {
                 contentLength = Integer.parseInt(contentLengthString.trim());
               } catch (NumberFormatException ex) {
-                throw new HttpException("bad content length: " + contentLengthString);
+                throw new HttpException(
+                    "bad content length: " + contentLengthString);
               }
             }
 
-            if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
+            if (http.getMaxContent() >= 0
+                && contentLength > http.getMaxContent()) {
               contentLength = http.getMaxContent();
             }
 
@@ -299,17 +318,20 @@
             }
           }
         }
-      } 
+      }
 
+    }catch(KeyManagementException | NoSuchAlgorithmException | KeyStoreException e) {
+        throw new ProtocolException(e);
     } finally {
       if (socket != null)
         socket.close();
     }
   }
 
-  /* ------------------------- *
-   * <implementation:Response> *
-   * ------------------------- */
+  /*
+   * ------------------------- * <implementation:Response> *
+   * -------------------------
+   */
 
   public URL getUrl() {
     return url;
@@ -331,56 +353,63 @@
     return content;
   }
 
-  /* ------------------------- *
-   * <implementation:Response> *
-   * ------------------------- */
+  /*
+   * ------------------------- * <implementation:Response> *
+   * -------------------------
+   */
   private void loadSeleniumHandlers() {
-    if (handlers != null) return;
+    if (handlers != null)
+      return;
 
-    String handlerConfig = this.conf.get("interactiveselenium.handlers", "DefaultHandler");
+    String handlerConfig = this.conf.get("interactiveselenium.handlers",
+        "DefaultHandler");
     String[] handlerNames = handlerConfig.split(",");
     handlers = new InteractiveSeleniumHandler[handlerNames.length];
     for (int i = 0; i < handlerNames.length; i++) {
+      try {
+        String classToLoad = this.getClass().getPackage().getName()
+            + ".handlers." + handlerNames[i];
         try {
-            String classToLoad = this.getClass().getPackage().getName() + ".handlers." + handlerNames[i];
-            try {
-              handlers[i] = InteractiveSeleniumHandler.class.cast(Class.forName(classToLoad).getConstructor().newInstance());
-            } catch (IllegalArgumentException | InvocationTargetException | NoSuchMethodException | SecurityException e) {
-              e.printStackTrace();
-            }
-            Http.LOG.info("Successfully loaded " + classToLoad);
-        } catch (ClassNotFoundException e) {
-            Http.LOG.info("Unable to load Handler class for: " + handlerNames[i]);
-        } catch (InstantiationException e) {
-            Http.LOG.info("Unable to instantiate Handler: " + handlerNames[i]);
-        } catch (IllegalAccessException e) {
-            Http.LOG.info("Illegal access with Handler: " + handlerNames[i]);
+          handlers[i] = InteractiveSeleniumHandler.class
+              .cast(Class.forName(classToLoad).getConstructor().newInstance());
+        } catch (IllegalArgumentException | InvocationTargetException
+            | NoSuchMethodException | SecurityException e) {
+          e.printStackTrace();
         }
+        Http.LOG.info("Successfully loaded " + classToLoad);
+      } catch (ClassNotFoundException e) {
+        Http.LOG.info("Unable to load Handler class for: " + handlerNames[i]);
+      } catch (InstantiationException e) {
+        Http.LOG.info("Unable to instantiate Handler: " + handlerNames[i]);
+      } catch (IllegalAccessException e) {
+        Http.LOG.info("Illegal access with Handler: " + handlerNames[i]);
+      }
     }
   }
 
   private void readPlainContent(URL url) throws IOException {
     if (handlers == null)
-        loadSeleniumHandlers();
+      loadSeleniumHandlers();
 
     String processedPage = "";
 
     for (InteractiveSeleniumHandler handler : this.handlers) {
-        if (! handler.shouldProcessURL(url.toString())) {
-            continue;
-        }
+      if (!handler.shouldProcessURL(url.toString())) {
+        continue;
+      }
 
-        WebDriver driver = HttpWebClient.getDriverForPage(url.toString(), conf);
+      WebDriver driver = HttpWebClient.getDriverForPage(url.toString(), conf);
 
-        processedPage += handler.processDriver(driver);
+      processedPage += handler.processDriver(driver);
 
-        HttpWebClient.cleanUpDriver(driver);
+      HttpWebClient.cleanUpDriver(driver);
     }
 
     content = processedPage.getBytes("UTF-8");
   }
 
-  private int parseStatusLine(PushbackInputStream in, StringBuffer line) throws IOException, HttpException {
+  private int parseStatusLine(PushbackInputStream in, StringBuffer line)
+      throws IOException, HttpException {
     readLine(in, line, false);
 
     int codeStart = line.indexOf(" ");
@@ -395,13 +424,15 @@
     try {
       code = Integer.parseInt(line.substring(codeStart + 1, codeEnd));
     } catch (NumberFormatException e) {
-      throw new HttpException("bad status line '" + line + "': " + e.getMessage(), e);
+      throw new HttpException(
+          "bad status line '" + line + "': " + e.getMessage(), e);
     }
 
     return code;
   }
 
-  private void processHeaderLine(StringBuffer line) throws IOException, HttpException {
+  private void processHeaderLine(StringBuffer line)
+      throws IOException, HttpException {
 
     int colonIndex = line.indexOf(":"); // key is up to colon
     if (colonIndex == -1) {
@@ -427,24 +458,26 @@
   }
 
   // Adds headers to our headers Metadata
-  private void parseHeaders(PushbackInputStream in, StringBuffer line) throws IOException, HttpException {
+  private void parseHeaders(PushbackInputStream in, StringBuffer line)
+      throws IOException, HttpException {
 
     while (readLine(in, line, true) != 0) {
 
       // handle HTTP responses with missing blank line after headers
       int pos;
-      if (((pos = line.indexOf("<!DOCTYPE")) != -1) || ((pos = line.indexOf("<HTML")) != -1)
+      if (((pos = line.indexOf("<!DOCTYPE")) != -1)
+          || ((pos = line.indexOf("<HTML")) != -1)
           || ((pos = line.indexOf("<html")) != -1)) {
 
         in.unread(line.substring(pos).getBytes("UTF-8"));
         line.setLength(pos);
 
         try {
-          //TODO: (CM) We don't know the header names here
-          //since we're just handling them generically. It would
-          //be nice to provide some sort of mapping function here
-          //for the returned header names to the standard metadata
-          //names in the ParseData class
+          // TODO: (CM) We don't know the header names here
+          // since we're just handling them generically. It would
+          // be nice to provide some sort of mapping function here
+          // for the returned header names to the standard metadata
+          // names in the ParseData class
           processHeaderLine(line);
         } catch (Exception e) {
           // fixme:
@@ -457,8 +490,8 @@
     }
   }
 
-  private static int readLine(PushbackInputStream in, StringBuffer line, boolean allowContinuedLine)
-      throws IOException {
+  private static int readLine(PushbackInputStream in, StringBuffer line,
+      boolean allowContinuedLine) throws IOException {
     line.setLength(0);
     for (int c = in.read(); c != -1; c = in.read()) {
       switch (c) {
@@ -491,4 +524,4 @@
     in.unread(value);
     return value;
   }
-}
+}
\ No newline at end of file
diff --git a/src/plugin/protocol-okhttp/ivy.xml b/src/plugin/protocol-okhttp/ivy.xml
index f529892..c5c09c8 100644
--- a/src/plugin/protocol-okhttp/ivy.xml
+++ b/src/plugin/protocol-okhttp/ivy.xml
@@ -36,7 +36,8 @@
   </publications>
 
   <dependencies>
-    <dependency org="com.squareup.okhttp3" name="okhttp" rev="3.14.2"/>
+    <dependency org="com.squareup.okhttp3" name="okhttp" rev="4.3.1"/>
+    <dependency org="com.squareup.okhttp3" name="okhttp-brotli" rev="4.3.1"/>
   </dependencies>
   
 </ivy-module>
diff --git a/src/plugin/protocol-okhttp/plugin.xml b/src/plugin/protocol-okhttp/plugin.xml
index b843736..bbeb5da 100755
--- a/src/plugin/protocol-okhttp/plugin.xml
+++ b/src/plugin/protocol-okhttp/plugin.xml
@@ -25,8 +25,13 @@
       <library name="protocol-okhttp.jar">
          <export name="*"/>
       </library>
-      <library name="okhttp-3.14.2.jar"/>
-      <library name="okio-1.17.2.jar"/>
+      <library name="okhttp-4.3.1.jar"/>
+      <library name="okio-2.4.1.jar"/>
+      <library name="kotlin-stdlib-1.3.61.jar"/>
+      <library name="kotlin-stdlib-common-1.3.61.jar"/>
+      <library name="annotations-13.0.jar"/>
+      <library name="okhttp-brotli-4.3.1.jar"/>
+      <library name="dec-0.1.2.jar"/>
    </runtime>
 
    <requires>
diff --git a/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java
index b4edb19..708863b 100644
--- a/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java
+++ b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java
@@ -56,6 +56,7 @@
 import okhttp3.OkHttpClient;
 import okhttp3.Protocol;
 import okhttp3.Request;
+import okhttp3.brotli.BrotliInterceptor;
 
 public class OkHttp extends HttpBase {
 
@@ -216,6 +217,9 @@
       builder.addNetworkInterceptor(new HTTPHeadersInterceptor());
     }
 
+    // enable support for Brotli compression (Content-Encoding)
+    builder.addInterceptor(BrotliInterceptor.INSTANCE);
+
     client = builder.build();
   }
 
diff --git a/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/DummyX509TrustManager.java b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/DummyX509TrustManager.java
new file mode 100644
index 0000000..1eea806
--- /dev/null
+++ b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/DummyX509TrustManager.java
@@ -0,0 +1,85 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.nutch.protocol.selenium;
+
+import java.security.KeyStore;
+import java.security.KeyStoreException;
+import java.security.NoSuchAlgorithmException;
+import java.security.cert.CertificateException;
+import java.security.cert.X509Certificate;
+
+import javax.net.ssl.TrustManagerFactory;
+import javax.net.ssl.TrustManager;
+import javax.net.ssl.X509TrustManager;
+
+/*
+ * Based on EasyX509TrustManager from commons-httpclient.
+ */
+public class DummyX509TrustManager implements X509TrustManager {
+  private X509TrustManager standardTrustManager = null;
+
+  /**
+   * Constructor for DummyX509TrustManager.
+   */
+  public DummyX509TrustManager(KeyStore keystore)
+      throws NoSuchAlgorithmException, KeyStoreException {
+    super();
+    String algo = TrustManagerFactory.getDefaultAlgorithm();
+    TrustManagerFactory factory = TrustManagerFactory.getInstance(algo);
+    factory.init(keystore);
+    TrustManager[] trustmanagers = factory.getTrustManagers();
+    if (trustmanagers.length == 0) {
+      throw new NoSuchAlgorithmException(algo + " trust manager not supported");
+    }
+    this.standardTrustManager = (X509TrustManager) trustmanagers[0];
+  }
+
+  /**
+   * @see javax.net.ssl.X509TrustManager#checkClientTrusted(X509Certificate[],
+   *      String)
+   */
+  public boolean isClientTrusted(X509Certificate[] certificates) {
+    return true;
+  }
+
+  /**
+   * @see javax.net.ssl.X509TrustManager#checkServerTrusted(X509Certificate[],
+   *      String)
+   */
+  public boolean isServerTrusted(X509Certificate[] certificates) {
+    return true;
+  }
+
+  /**
+   * @see javax.net.ssl.X509TrustManager#getAcceptedIssuers()
+   */
+  public X509Certificate[] getAcceptedIssuers() {
+    return this.standardTrustManager.getAcceptedIssuers();
+  }
+
+  public void checkClientTrusted(X509Certificate[] arg0, String arg1)
+      throws CertificateException {
+    // do nothing
+
+  }
+
+  public void checkServerTrusted(X509Certificate[] arg0, String arg1)
+      throws CertificateException {
+    // do nothing
+
+  }
+}
diff --git a/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java
index 4a20b04..b394d02 100644
--- a/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java
+++ b/src/plugin/protocol-selenium/src/java/org/apache/nutch/protocol/selenium/HttpResponse.java
@@ -25,13 +25,17 @@
 import java.net.InetSocketAddress;
 import java.net.Socket;
 import java.net.URL;
+import java.security.KeyManagementException;
+import java.security.KeyStoreException;
+import java.security.NoSuchAlgorithmException;
 import java.util.Arrays;
 import java.util.HashSet;
 import java.util.Set;
 
-
+import javax.net.ssl.SSLContext;
 import javax.net.ssl.SSLSocket;
 import javax.net.ssl.SSLSocketFactory;
+import javax.net.ssl.TrustManager;
 
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.io.Text;
@@ -61,10 +65,12 @@
   protected enum Scheme {
     HTTP, HTTPS,
   }
+
   /** The nutch configuration */
   private Configuration conf = null;
 
-  public HttpResponse(Http http, URL url, CrawlDatum datum) throws ProtocolException, IOException {
+  public HttpResponse(Http http, URL url, CrawlDatum datum)
+      throws ProtocolException, IOException {
 
     this.conf = http.getConf();
     this.http = http;
@@ -118,33 +124,43 @@
       socket.connect(sockAddr, http.getTimeout());
 
       if (scheme == Scheme.HTTPS) {
-        SSLSocketFactory factory = (SSLSocketFactory) SSLSocketFactory
-                .getDefault();
-        SSLSocket sslsocket = (SSLSocket) factory
-                .createSocket(socket, sockHost, sockPort, true);
+
+        // Optionally skip TLS/SSL certificate validation
+        SSLSocketFactory factory;
+        if (http.isTlsCheckCertificates()) {
+          factory = (SSLSocketFactory) SSLSocketFactory.getDefault();
+        } else {
+          SSLContext sslContext = SSLContext.getInstance("TLS");
+          sslContext.init(null,
+              new TrustManager[] { new DummyX509TrustManager(null) }, null);
+          factory = sslContext.getSocketFactory();
+        }
+
+        SSLSocket sslsocket = (SSLSocket) factory.createSocket(socket, sockHost,
+            sockPort, true);
         sslsocket.setUseClientMode(true);
 
         // Get the protocols and ciphers supported by this JVM
         Set<String> protocols = new HashSet<String>(
-                Arrays.asList(sslsocket.getSupportedProtocols()));
+            Arrays.asList(sslsocket.getSupportedProtocols()));
         Set<String> ciphers = new HashSet<String>(
-                Arrays.asList(sslsocket.getSupportedCipherSuites()));
+            Arrays.asList(sslsocket.getSupportedCipherSuites()));
 
         // Intersect with preferred protocols and ciphers
         protocols.retainAll(http.getTlsPreferredProtocols());
         ciphers.retainAll(http.getTlsPreferredCipherSuites());
 
         sslsocket.setEnabledProtocols(
-                protocols.toArray(new String[protocols.size()]));
+            protocols.toArray(new String[protocols.size()]));
         sslsocket.setEnabledCipherSuites(
-                ciphers.toArray(new String[ciphers.size()]));
+            ciphers.toArray(new String[ciphers.size()]));
 
         sslsocket.startHandshake();
         socket = sslsocket;
       }
 
       if (sockAddr != null
-              && conf.getBoolean("store.ip.address", false) == true) {
+          && conf.getBoolean("store.ip.address", false) == true) {
         headers.add("_ip_", sockAddr.getAddress().getHostAddress());
       }
       // make request
@@ -199,17 +215,17 @@
       }
 
       if (http.isCookieEnabled()
-              && datum.getMetaData().containsKey(HttpBase.COOKIE)) {
+          && datum.getMetaData().containsKey(HttpBase.COOKIE)) {
         String cookie = ((Text) datum.getMetaData().get(HttpBase.COOKIE))
-                .toString();
+            .toString();
         reqStr.append("Cookie: ");
         reqStr.append(cookie);
         reqStr.append("\r\n");
       }
 
       if (http.isIfModifiedSinceEnabled() && datum.getModifiedTime() > 0) {
-        reqStr.append("If-Modified-Since: " + HttpDateFormat
-                .toString(datum.getModifiedTime()));
+        reqStr.append("If-Modified-Since: "
+            + HttpDateFormat.toString(datum.getModifiedTime()));
         reqStr.append("\r\n");
       }
       reqStr.append("\r\n");
@@ -219,25 +235,24 @@
         headers.add("_request_", reqStr.toString());
       }
 
-
       byte[] reqBytes = reqStr.toString().getBytes();
 
       req.write(reqBytes);
       req.flush();
 
       PushbackInputStream in = // process response
-          new PushbackInputStream(new BufferedInputStream(socket.getInputStream(), Http.BUFFER_SIZE),
-              Http.BUFFER_SIZE);
+          new PushbackInputStream(new BufferedInputStream(
+              socket.getInputStream(), Http.BUFFER_SIZE), Http.BUFFER_SIZE);
 
       StringBuffer line = new StringBuffer();
 
-
       // store the http headers verbatim
       if (conf.getBoolean("store.http.headers", false) == true) {
         httpHeaders = new StringBuffer();
       }
 
-      headers.add("nutch.fetch.time", Long.toString(System.currentTimeMillis()));
+      headers.add("nutch.fetch.time",
+          Long.toString(System.currentTimeMillis()));
 
       boolean haveSeenNonContinueStatus = false;
       while (!haveSeenNonContinueStatus) {
@@ -253,9 +268,10 @@
       // Get Content type header
       String contentType = getHeader(Response.CONTENT_TYPE);
 
-      // handle with Selenium only if content type in HTML or XHTML 
+      // handle with Selenium only if content type in HTML or XHTML
       if (contentType != null) {
-        if (contentType.contains("text/html") || contentType.contains("application/xhtml")) {
+        if (contentType.contains("text/html")
+            || contentType.contains("application/xhtml")) {
           readPlainContent(url);
         } else {
           try {
@@ -265,11 +281,13 @@
               try {
                 contentLength = Integer.parseInt(contentLengthString.trim());
               } catch (NumberFormatException ex) {
-                throw new HttpException("bad content length: " + contentLengthString);
+                throw new HttpException(
+                    "bad content length: " + contentLengthString);
               }
             }
 
-            if (http.getMaxContent() >= 0 && contentLength > http.getMaxContent()) {
+            if (http.getMaxContent() >= 0
+                && contentLength > http.getMaxContent()) {
               contentLength = http.getMaxContent();
             }
 
@@ -298,17 +316,20 @@
         if (httpHeaders != null) {
           headers.add(Response.RESPONSE_HEADERS, httpHeaders.toString());
         }
-      } 
+      }
 
+    } catch(KeyManagementException | NoSuchAlgorithmException | KeyStoreException e) {
+        throw new ProtocolException(e);
     } finally {
       if (socket != null)
         socket.close();
     }
   }
 
-  /* ------------------------- *
-   * <implementation:Response> *
-   * ------------------------- */
+  /*
+   * ------------------------- * <implementation:Response> *
+   * -------------------------
+   */
 
   public URL getUrl() {
     return url;
@@ -330,9 +351,10 @@
     return content;
   }
 
-  /* ------------------------- *
-   * <implementation:Response> *
-   * ------------------------- */
+  /*
+   * ------------------------- * <implementation:Response> *
+   * -------------------------
+   */
 
   private void readPlainContent(URL url) throws IOException {
     String page = HttpWebClient.getHtmlPage(url.toString(), conf);
@@ -340,7 +362,8 @@
     content = page.getBytes("UTF-8");
   }
 
-  private int parseStatusLine(PushbackInputStream in, StringBuffer line) throws IOException, HttpException {
+  private int parseStatusLine(PushbackInputStream in, StringBuffer line)
+      throws IOException, HttpException {
     readLine(in, line, false);
 
     int codeStart = line.indexOf(" ");
@@ -355,13 +378,15 @@
     try {
       code = Integer.parseInt(line.substring(codeStart + 1, codeEnd));
     } catch (NumberFormatException e) {
-      throw new HttpException("bad status line '" + line + "': " + e.getMessage(), e);
+      throw new HttpException(
+          "bad status line '" + line + "': " + e.getMessage(), e);
     }
 
     return code;
   }
 
-  private void processHeaderLine(StringBuffer line) throws IOException, HttpException {
+  private void processHeaderLine(StringBuffer line)
+      throws IOException, HttpException {
 
     int colonIndex = line.indexOf(":"); // key is up to colon
     if (colonIndex == -1) {
@@ -387,24 +412,26 @@
   }
 
   // Adds headers to our headers Metadata
-  private void parseHeaders(PushbackInputStream in, StringBuffer line) throws IOException, HttpException {
+  private void parseHeaders(PushbackInputStream in, StringBuffer line)
+      throws IOException, HttpException {
 
     while (readLine(in, line, true) != 0) {
 
       // handle HTTP responses with missing blank line after headers
       int pos;
-      if (((pos = line.indexOf("<!DOCTYPE")) != -1) || ((pos = line.indexOf("<HTML")) != -1)
+      if (((pos = line.indexOf("<!DOCTYPE")) != -1)
+          || ((pos = line.indexOf("<HTML")) != -1)
           || ((pos = line.indexOf("<html")) != -1)) {
 
         in.unread(line.substring(pos).getBytes("UTF-8"));
         line.setLength(pos);
 
         try {
-          //TODO: (CM) We don't know the header names here
-          //since we're just handling them generically. It would
-          //be nice to provide some sort of mapping function here
-          //for the returned header names to the standard metadata
-          //names in the ParseData class
+          // TODO: (CM) We don't know the header names here
+          // since we're just handling them generically. It would
+          // be nice to provide some sort of mapping function here
+          // for the returned header names to the standard metadata
+          // names in the ParseData class
           processHeaderLine(line);
         } catch (Exception e) {
           // fixme:
@@ -417,8 +444,8 @@
     }
   }
 
-  private static int readLine(PushbackInputStream in, StringBuffer line, boolean allowContinuedLine)
-      throws IOException {
+  private static int readLine(PushbackInputStream in, StringBuffer line,
+      boolean allowContinuedLine) throws IOException {
     line.setLength(0);
     for (int c = in.read(); c != -1; c = in.read()) {
       switch (c) {
@@ -451,4 +478,4 @@
     in.unread(value);
     return value;
   }
-}
+}
\ No newline at end of file