Merge pull request #697 from sebastian-nagel/NUTCH-2896-okhttp-connection-pool

NUTCH-2896 Protocol-okhttp: make connection pool configurable
diff --git a/conf/nutch-default.xml b/conf/nutch-default.xml
index 6a3c828..1ad02a0 100644
--- a/conf/nutch-default.xml
+++ b/conf/nutch-default.xml
@@ -2133,7 +2133,8 @@
   'domainDatabase', 'ispDatabase' or 'insightsService'. If you wish to use any one of the 
   Database options, you should make one of GeoIP2-City.mmdb, GeoIP2-Connection-Type.mmdb, 
   GeoIP2-Domain.mmdb or GeoIP2-ISP.mmdb files respectively available on the classpath and
-  available at runtime.
+  available at runtime. Alternatively, also the GeoLite2 IP databases (GeoLite2-*.mmdb)
+  can be used.
   </description>
 </property>
 
diff --git a/ivy/ivy.xml b/ivy/ivy.xml
index a03bce4..175443e 100644
--- a/ivy/ivy.xml
+++ b/ivy/ivy.xml
@@ -36,10 +36,10 @@
 	</publications>
 
 	<dependencies>
-		<dependency org="org.apache.logging.log4j" name="log4j-api" rev="2.17.0" conf="*->master" />
-		<dependency org="org.apache.logging.log4j" name="log4j-core" rev="2.17.0" conf="*->master" />
-		<dependency org="org.apache.logging.log4j" name="log4j-slf4j-impl" rev="2.17.0" conf="*->master" />
-		<dependency org="org.slf4j" name="slf4j-api" rev="1.7.32" conf="*->master" />
+		<dependency org="org.apache.logging.log4j" name="log4j-api" rev="2.17.2" conf="*->master" />
+		<dependency org="org.apache.logging.log4j" name="log4j-core" rev="2.17.2" conf="*->master" />
+		<dependency org="org.apache.logging.log4j" name="log4j-slf4j-impl" rev="2.17.2" conf="*->master" />
+		<dependency org="org.slf4j" name="slf4j-api" rev="1.7.36" conf="*->master" />
 
 		<dependency org="org.apache.commons" name="commons-lang3" rev="3.12.0" conf="*->default" />
 		<dependency org="org.apache.commons" name="commons-collections4" rev="4.4" conf="*->master" />
@@ -50,7 +50,7 @@
 		<dependency org="com.tdunning" name="t-digest" rev="3.2" />
 
 		<!-- Hadoop Dependencies -->
-		<dependency org="org.apache.hadoop" name="hadoop-common" rev="3.1.3" conf="*->default">
+		<dependency org="org.apache.hadoop" name="hadoop-common" rev="3.3.3" conf="*->default">
 			<exclude org="hsqldb" name="hsqldb" />
 			<exclude org="net.sf.kosmosfs" name="kfs" />
 			<exclude org="net.java.dev.jets3t" name="jets3t" />
@@ -58,23 +58,23 @@
 			<exclude org="org.mortbay.jetty" name="jsp-*" />
 			<exclude org="ant" name="ant" />
 		</dependency>
-		<dependency org="org.apache.hadoop" name="hadoop-hdfs" rev="3.1.3" conf="*->default" />
-		<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core" rev="3.1.3" conf="*->default" />
-		<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-jobclient" rev="3.1.3" conf="*->default" />
+		<dependency org="org.apache.hadoop" name="hadoop-hdfs" rev="3.3.3" conf="*->default" />
+		<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-core" rev="3.3.3" conf="*->default" />
+		<dependency org="org.apache.hadoop" name="hadoop-mapreduce-client-jobclient" rev="3.3.3" conf="*->default" />
 		<!-- End of Hadoop Dependencies -->
 
 		<dependency org="org.apache.tika" name="tika-core" rev="2.3.0" />
 
 		<dependency org="xml-apis" name="xml-apis" rev="1.4.01" /><!-- force this version as it is required by Tika -->
-		<dependency org="xerces" name="xercesImpl" rev="2.12.1" />
+		<dependency org="xerces" name="xercesImpl" rev="2.12.2" />
 
-		<dependency org="com.ibm.icu" name="icu4j" rev="68.2" />
+		<dependency org="com.ibm.icu" name="icu4j" rev="71.1" />
 
-		<dependency org="com.google.guava" name="guava" rev="30.1-jre" />
+		<dependency org="com.google.guava" name="guava" rev="31.1-jre" />
 
-		<dependency org="com.github.crawler-commons" name="crawler-commons" rev="1.2" />
+		<dependency org="com.github.crawler-commons" name="crawler-commons" rev="1.3" />
 
-		<dependency org="com.google.code.gson" name="gson" rev="2.8.9"/>
+		<dependency org="com.google.code.gson" name="gson" rev="2.9.0"/>
 		<dependency org="com.martinkl.warc" name="warc-hadoop" rev="0.1.0">
 			<exclude module="hadoop-client" />
 		</dependency>
@@ -84,10 +84,10 @@
 		<dependency org="org.apache.cxf" name="cxf-rt-transports-http" rev="3.4.1" conf="*->default" />
 		<dependency org="org.apache.cxf" name="cxf-rt-transports-http-jetty" rev="3.4.1" conf="*->default" />
 		<dependency org="org.apache.cxf" name="cxf-rt-rs-client" rev="3.4.1" conf="test->default" />
-		<dependency org="com.fasterxml.jackson.core" name="jackson-databind" rev="2.12.0" conf="*->default" />
-		<dependency org="com.fasterxml.jackson.core" name="jackson-annotations" rev="2.12.0" conf="*->default" />
-		<dependency org="com.fasterxml.jackson.dataformat" name="jackson-dataformat-cbor" rev="2.12.0" conf="*->default" />
-		<dependency org="com.fasterxml.jackson.jaxrs" name="jackson-jaxrs-json-provider" rev="2.12.0" conf="*->default" />
+		<dependency org="com.fasterxml.jackson.core" name="jackson-databind" rev="2.13.3" conf="*->default" />
+		<dependency org="com.fasterxml.jackson.core" name="jackson-annotations" rev="2.13.3" conf="*->default" />
+		<dependency org="com.fasterxml.jackson.dataformat" name="jackson-dataformat-cbor" rev="2.13.3" conf="*->default" />
+		<dependency org="com.fasterxml.jackson.jaxrs" name="jackson-jaxrs-json-provider" rev="2.13.3" conf="*->default" />
 
 		<!-- WARC artifacts needed -->
 		<dependency org="org.netpreserve.commons" name="webarchive-commons" rev="1.1.9" conf="*->default">
@@ -111,16 +111,12 @@
 			<artifact name="mrunit" ns0:classifier="hadoop2" />
 			<exclude org="log4j" module="log4j" />
 		</dependency>
-		<dependency org="org.mortbay.jetty" name="jetty-client" rev="6.1.26" conf="test->default" />
 
-		<!-- web app dependencies -->
-		<dependency org="org.mortbay.jetty" name="jetty" rev="6.1.26" />
+		<dependency org="org.mortbay.jetty" name="jetty-client" rev="6.1.26" conf="test->default" />
+		<dependency org="org.mortbay.jetty" name="jetty" rev="6.1.26" conf="test->default" />
 
 		<dependency org="org.apache.commons" name="commons-collections4" rev="4.1" conf="*->default" />
 
-		<!-- RabbitMQ dependencies -->
-		<dependency org="com.rabbitmq" name="amqp-client" rev="5.2.0" conf="*->default" />
-
 		<!--Added Because of Elasticsearch JEST client-->
 		<!--TODO refactor these to indexer-elastic-rest plugin somehow, currently doesn't resolve correctly-->
 		<dependency org="org.apache.httpcomponents" name="httpcore-nio" rev="4.4.9" />
@@ -139,4 +135,4 @@
 
 	</dependencies>
 
-</ivy-module>
\ No newline at end of file
+</ivy-module>
diff --git a/src/java/org/apache/nutch/crawl/Generator.java b/src/java/org/apache/nutch/crawl/Generator.java
index 15fedbf..0fce6b3 100644
--- a/src/java/org/apache/nutch/crawl/Generator.java
+++ b/src/java/org/apache/nutch/crawl/Generator.java
@@ -238,10 +238,11 @@
       LongWritable oldGenTime = (LongWritable) crawlDatum.getMetaData()
           .get(Nutch.WRITABLE_GENERATE_TIME_KEY);
       if (oldGenTime != null) { // awaiting fetch & update
-        if (oldGenTime.get() + genDelay > curTime) // still wait for
+        if (oldGenTime.get() + genDelay > curTime) { // still wait for
           // update
           context.getCounter("Generator", "WAIT_FOR_UPDATE").increment(1);
-        return;
+          return;
+        }
       }
       float sort = 1.0f;
       try {
diff --git a/src/java/org/apache/nutch/plugin/Extension.java b/src/java/org/apache/nutch/plugin/Extension.java
index 246e8ff..e949ea3 100644
--- a/src/java/org/apache/nutch/plugin/Extension.java
+++ b/src/java/org/apache/nutch/plugin/Extension.java
@@ -143,15 +143,15 @@
    * Return an instance of the extension implementation. Before we create a
    * extension instance we startup the plugin if it is not already done. The
    * plugin instance and the extension instance use the same
-   * {@link org.apache.nutch.plugin.PluginClassLoader}.
-   * Each Plugin use its own classloader. The
-   * {@link org.apache.nutch.plugin.PluginClassLoader} knows only its own
-   * <i>plugin runtime libraries</i> defined
-   * in the <code>plugin.xml</code> manifest file and exported libraries
-   * of the dependent plugins.
+   * {@link org.apache.nutch.plugin.PluginClassLoader}. Each Plugin uses its own
+   * classloader. The {@link org.apache.nutch.plugin.PluginClassLoader} knows
+   * only its own <i>plugin runtime libraries</i> defined in the
+   * <code>plugin.xml</code> manifest file and exported libraries of the
+   * dependent plugins.
    * 
    * @return Object An instance of the extension implementation
-   * @throws PluginRuntimeException if there is a fatal runtime error
+   * @throws PluginRuntimeException
+   *           if there is a fatal runtime error
    */
   public Object getExtensionInstance() throws PluginRuntimeException {
     // Must synchronize here to make sure creation and initialization
diff --git a/src/java/org/apache/nutch/plugin/Plugin.java b/src/java/org/apache/nutch/plugin/Plugin.java
index 314a866..306ada3 100644
--- a/src/java/org/apache/nutch/plugin/Plugin.java
+++ b/src/java/org/apache/nutch/plugin/Plugin.java
@@ -27,7 +27,7 @@
  * provide a API and invoke one or a set of installed extensions.
  * 
  * Each plugin may extend the base <code>Plugin</code>. <code>Plugin</code>
- * instances are used as the point of life cycle managemet of plugin related
+ * instances are used as the point of life cycle management of plugin related
  * functionality.
  * 
  * The <code>Plugin</code> will be started up and shutdown by the nutch plugin
diff --git a/src/java/org/apache/nutch/plugin/PluginRepository.java b/src/java/org/apache/nutch/plugin/PluginRepository.java
index 3c55409..d80f971 100644
--- a/src/java/org/apache/nutch/plugin/PluginRepository.java
+++ b/src/java/org/apache/nutch/plugin/PluginRepository.java
@@ -38,11 +38,11 @@
 import org.slf4j.LoggerFactory;
 
 /**
- * <p>The plugin repositority is a registry of all plugins.</p>
+ * <p>The plugin repository is a registry of all plugins.</p>
  * 
- * <p>At system boot up a repositority is built by parsing the mainifest files of
+ * <p>At system boot up a repository is built by parsing the manifest files of
  * all plugins. Plugins that require other plugins which do not exist are not
- * registed. For each plugin a plugin descriptor instance will be created. The
+ * registered. For each plugin a plugin descriptor instance will be created. The
  * descriptor represents all meta information about a plugin. So a plugin
  * instance will be created later when it is required, this allow lazy plugin
  * loading.</p>
@@ -64,8 +64,7 @@
 
   private HashMap<String, Plugin> fActivatedPlugins;
 
-  @SuppressWarnings("rawtypes")
-  private static final Map<String, Map<PluginClassLoader, Class>> CLASS_CACHE = new HashMap<>();
+  private static final Map<String, Map<PluginClassLoader, Class<?>>> CLASS_CACHE = new HashMap<>();
 
   private Configuration conf;
 
@@ -267,14 +266,14 @@
   }
 
   /**
-   * <p>Returns a instance of a plugin. Plugin instances are cached. So a plugin
-   * exist only as one instance. This allow a central management of plugin own
+   * <p>Returns an instance of a plugin. Plugin instances are cached. So a plugin
+   * exist only as one instance. This allow a central management of plugin's own
    * resources.</p>
    * 
    * <p>After creating the plugin instance the startUp() method is invoked. The
    * plugin use a own classloader that is used as well by all instance of
    * extensions of the same plugin. This class loader use all exported libraries
-   * from the dependend plugins and all plugin libraries.</p>
+   * from the dependent plugins and all plugin libraries.</p>
    * 
    * @param pDescriptor a {@link PluginDescriptor} for which to retrieve a 
    * {@link Plugin} instance
@@ -337,16 +336,15 @@
     }
   }
 
-  @SuppressWarnings("rawtypes")
-  public static Class getCachedClass(PluginDescriptor pDescriptor, String className)
+  public Class<?> getCachedClass(PluginDescriptor pDescriptor, String className)
           throws ClassNotFoundException {
-    Map<PluginClassLoader, Class> descMap = CLASS_CACHE.get(className);
+    Map<PluginClassLoader, Class<?>> descMap = CLASS_CACHE.get(className);
     if (descMap == null) {
       descMap = new HashMap<>();
       CLASS_CACHE.put(className, descMap);
     }
     PluginClassLoader loader = pDescriptor.getClassLoader();
-    Class clazz = descMap.get(loader);
+    Class<?> clazz = descMap.get(loader);
     if (clazz == null) {
       clazz = loader.loadClass(className);
       descMap.put(loader, clazz);
@@ -543,8 +541,8 @@
 
   /**
    * Registers this PluginRepository to be invoked whenever URLs have to be
-   * parsed. This allows to check the registered protocol plugins for uncommon
-   * protocols.
+   * parsed. This allows to check the registered protocol plugins for custom
+   * protocols not covered by standard {@link URLStreamHandler}s of the JVM.
    */
   private void registerURLStreamHandlerFactory() {
     org.apache.nutch.plugin.URLStreamHandlerFactory.getInstance().registerPluginRepository(this);
diff --git a/src/java/org/apache/nutch/plugin/URLStreamHandlerFactory.java b/src/java/org/apache/nutch/plugin/URLStreamHandlerFactory.java
index 5aed76a..bd7e377 100644
--- a/src/java/org/apache/nutch/plugin/URLStreamHandlerFactory.java
+++ b/src/java/org/apache/nutch/plugin/URLStreamHandlerFactory.java
@@ -20,6 +20,9 @@
 import java.net.URL;
 import java.net.URLStreamHandler;
 import java.util.ArrayList;
+import java.util.Map;
+import java.util.Optional;
+import java.util.concurrent.ConcurrentHashMap;
 
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -35,80 +38,126 @@
  */
 public class URLStreamHandlerFactory
     implements java.net.URLStreamHandlerFactory {
-  
+
   protected static final Logger LOG = LoggerFactory
       .getLogger(URLStreamHandlerFactory.class);
-  
+
   /** The singleton instance. */
   private static URLStreamHandlerFactory instance;
-  
-  /** Here we register all PluginRepositories.
-   * In this class we do not know why several instances of PluginRepository
-   * are kept, nor do we know how long they will be used. To prevent
-   * a memory leak, this class must not keep references to PluginRepository
-   * but use WeakReference which allows PluginRepository to still be
-   * garbage collected. The prize is we need to clean the list for
-   * outdated references which is done in the {@link #removeInvalidRefs()} method.
+
+  /**
+   * Here we register all PluginRepositories. In this class we do not know why
+   * several instances of PluginRepository are kept, nor do we know how long
+   * they will be used. To prevent a memory leak, this class must not keep
+   * references to PluginRepository but use WeakReference which allows
+   * PluginRepository to still be garbage collected. The prize is we need to
+   * clean the list for outdated references which is done in the
+   * {@link #removeInvalidRefs()} method.
    */
   private ArrayList<WeakReference<PluginRepository>> prs;
-  
+
+  /**
+   * Cache of URLStreamHandlers for each protocol supported by
+   * <ul>
+   * <li>one of the registered and active plugins</li>
+   * <li>or by the JVM</li>
+   * </ul>
+   * Using the cache avoids that {@link URLStreamHandler} instances are created
+   * multiple times anew. The cache is also pre-populated with protocols handled
+   * obligatorily by the JVM, see {@link SYSTEM_PROTOCOLS}.
+   */
+  private Map<String, Optional<URLStreamHandler>> cache;
+
+  /**
+   * Protocols covered by standard JVM URL handlers. These protocols must not be
+   * handled by Nutch plugins, in order to avoid that basic actions (eg. loading
+   * of classes and configuration files) break.
+   */
+  public static final String[] SYSTEM_PROTOCOLS = { //
+      "http", "https", "file", "jar" };
+
   static {
     instance = new URLStreamHandlerFactory();
     URL.setURLStreamHandlerFactory(instance);
     LOG.debug("Registered URLStreamHandlerFactory with the JVM.");
   }
-  
+
   private URLStreamHandlerFactory() {
     this.prs = new ArrayList<>();
+    initCache();
+  }
+
+  /** Reset and initialize cache (protocol -> URLStreamHandler) */
+  private synchronized void initCache() {
+    cache = new ConcurrentHashMap<>();
+    // pre-populate cache with protocols to be handled by the JVM
+    for (String protocol : SYSTEM_PROTOCOLS) {
+      cache.put(protocol, Optional.empty());
+    }
   }
 
   /** 
    * Get the singleton instance of this class.
-   * @return a {@link org.apache.nutch.plugin.URLStreamHandlerFactory} instance 
+   * @return a {@link org.apache.nutch.plugin.URLStreamHandlerFactory} instance
    */
   public static URLStreamHandlerFactory getInstance() {
     return instance;
   }
-  
+
   /** Use this method once a new PluginRepository was created to register it.
    * 
    * @param pr The PluginRepository to be registered.
    */
   public void registerPluginRepository(PluginRepository pr) {
     this.prs.add(new WeakReference<PluginRepository>(pr));
-    
+
+    // reset the cache, so that the new PluginRepository is used from now on
+    initCache();
+
     removeInvalidRefs();
   }
 
   @Override
   public URLStreamHandler createURLStreamHandler(String protocol) {
+
+    if (cache.containsKey(protocol)) {
+      // use the cached handler, including "null" for standard
+      // handlers implemented by the JVM
+      return cache.get(protocol).orElse(null);
+    }
+
     LOG.debug("Creating URLStreamHandler for protocol: {}", protocol);
-    
+
     removeInvalidRefs();
-    
+
     // find the 'correct' PluginRepository. For now we simply take the first.
     // then ask it to return the URLStreamHandler
-    for(WeakReference<PluginRepository> ref: this.prs) {
+    for (WeakReference<PluginRepository> ref : this.prs) {
       PluginRepository pr = ref.get();
-      if(pr != null) {
+      if (pr != null) {
         // found PluginRepository. Let's get the URLStreamHandler...
-        return pr.createURLStreamHandler(protocol);
+        URLStreamHandler handler = pr.createURLStreamHandler(protocol);
+        cache.put(protocol, Optional.of(handler));
+        return handler;
       }
     }
+
+    cache.put(protocol, Optional.empty());
     return null;
   }
 
-  /** Maintains the list of PluginRepositories by
-   * removing the references whose referents have been
-   * garbage collected meanwhile.
+  /**
+   * Maintains the list of PluginRepositories by removing the references whose
+   * referents have been garbage collected meanwhile.
    */
   private void removeInvalidRefs() {
     ArrayList<WeakReference<PluginRepository>> copy = new ArrayList<>(this.prs);
-    for(WeakReference<PluginRepository> ref: copy) {
-      if(ref.get() == null) {
+    for (WeakReference<PluginRepository> ref : copy) {
+      if (ref.get() == null) {
         this.prs.remove(ref);
       }
     }
-    LOG.debug("Removed '{}' invalid references. '{}' remaining.", copy.size()-this.prs.size(), this.prs.size());
+    LOG.debug("Removed '{}' invalid references. '{}' remaining.",
+        copy.size() - this.prs.size(), this.prs.size());
   }
 }
diff --git a/src/plugin/index-geoip/ivy.xml b/src/plugin/index-geoip/ivy.xml
index 4fa6f71..2eda5a6 100644
--- a/src/plugin/index-geoip/ivy.xml
+++ b/src/plugin/index-geoip/ivy.xml
@@ -36,12 +36,11 @@
   </publications>
 
   <dependencies>
-    <dependency org="com.maxmind.geoip2" name="geoip2" rev="2.12.0" >
-      <!-- Exlude due to classpath issues -->
-      <exclude org="org.apache.httpcomponents" name="httpclient" />
-      <exclude org="org.apache.httpcomponents" name="httpcore" />
-      <exclude org="commons-codec" name="commons-codec" />
-      <exclude org="commons-logging" name="commons-logging" />
+    <dependency org="com.maxmind.geoip2" name="geoip2" rev="3.0.1">
+      <!-- Exlude libs provided in Nutch core -->
+      <exclude org="com.fasterxml.jackson.core" name="jackson-annotations" />
+      <exclude org="com.fasterxml.jackson.core" name="jackson-databind" />
+      <exclude org="com.fasterxml.jackson.core" name="jackson-core" />
     </dependency>
   </dependencies>
   
diff --git a/src/plugin/index-geoip/plugin.xml b/src/plugin/index-geoip/plugin.xml
index 6148f59..c4efadf 100644
--- a/src/plugin/index-geoip/plugin.xml
+++ b/src/plugin/index-geoip/plugin.xml
@@ -25,11 +25,8 @@
       <library name="index-geoip.jar">
          <export name="*"/>
       </library>
-      <library name="geoip2-2.12.0.jar"/>
-      <library name="jackson-annotations-2.9.5.jar"/>
-      <library name="jackson-core-2.9.5.jar"/>
-      <library name="jackson-databind-2.9.5.jar"/>
-      <library name="maxmind-db-1.2.2.jar"/>
+      <library name="geoip2-3.0.1.jar"/>
+      <library name="maxmind-db-2.0.0.jar"/>
    </runtime>
 
    <requires>
diff --git a/src/plugin/index-geoip/src/java/org/apache/nutch/indexer/geoip/GeoIPDocumentCreator.java b/src/plugin/index-geoip/src/java/org/apache/nutch/indexer/geoip/GeoIPDocumentCreator.java
index 1c697a2..64b3862 100644
--- a/src/plugin/index-geoip/src/java/org/apache/nutch/indexer/geoip/GeoIPDocumentCreator.java
+++ b/src/plugin/index-geoip/src/java/org/apache/nutch/indexer/geoip/GeoIPDocumentCreator.java
@@ -17,13 +17,17 @@
 package org.apache.nutch.indexer.geoip;
 
 import java.io.IOException;
+import java.lang.invoke.MethodHandles;
 import java.net.InetAddress;
 import java.net.UnknownHostException;
 
 import org.apache.nutch.indexer.NutchDocument;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 import com.maxmind.geoip2.DatabaseReader;
 import com.maxmind.geoip2.WebServiceClient;
+import com.maxmind.geoip2.exception.AddressNotFoundException;
 import com.maxmind.geoip2.exception.GeoIp2Exception;
 import com.maxmind.geoip2.model.InsightsResponse;
 import com.maxmind.geoip2.model.CityResponse;
@@ -54,6 +58,9 @@
  */
 public class GeoIPDocumentCreator {
 
+  private static final Logger LOG = LoggerFactory
+      .getLogger(MethodHandles.lookup().lookupClass());
+
   /**
    * Add field to document but only if value isn't null
    * @param doc the {@link NutchDocument} to augment
@@ -61,21 +68,7 @@
    * @param value the String value to associate with the target field
    */
   public static void addIfNotNull(NutchDocument doc, String name,
-      String value) {
-    if (value != null) {
-      doc.add(name, value);
-    }
-  }
-
-  /**
-   * Add field to document but only if value isn't null
-   * @param doc the {@link NutchDocument} to augment
-   * @param name the name of the target field
-   * @param value the {@link java.lang.Integer} value to 
-   * associate with the target field
-   */
-  public static void addIfNotNull(NutchDocument doc, String name,
-      Integer value) {
+      Object value) {
     if (value != null) {
       doc.add(name, value);
     }
@@ -87,7 +80,6 @@
     addIfNotNull(doc, "ip", serverIp);
     InsightsResponse response = client
         .insights(InetAddress.getByName(serverIp));
-    // CityResponse response = client.city(InetAddress.getByName(serverIp));
 
     City city = response.getCity();
     addIfNotNull(doc, "cityName", city.getName()); // 'Minneapolis'
@@ -103,7 +95,7 @@
     addIfNotNull(doc, "countryIsoCode", country.getIsoCode()); // 'US'
     addIfNotNull(doc, "countryName", country.getName()); // 'United States'
     addIfNotNull(doc, "countryConfidence", country.getConfidence()); // 99
-    addIfNotNull(doc, "countryGeoName", country.getGeoNameId());
+    addIfNotNull(doc, "countryGeoNameId", country.getGeoNameId());
 
     Location location = response.getLocation();
     addIfNotNull(doc, "latLon", location.getLatitude() + "," + location.getLongitude()); // 44.9733,
@@ -121,7 +113,7 @@
 
     Subdivision subdivision = response.getMostSpecificSubdivision();
     addIfNotNull(doc, "subDivName", subdivision.getName()); // 'Minnesota'
-    addIfNotNull(doc, "subDivIdoCode", subdivision.getIsoCode()); // 'MN'
+    addIfNotNull(doc, "subDivIsoCode", subdivision.getIsoCode()); // 'MN'
     addIfNotNull(doc, "subDivConfidence", subdivision.getConfidence()); // 90
     addIfNotNull(doc, "subDivGeoNameId", subdivision.getGeoNameId());
 
@@ -169,7 +161,13 @@
   public static NutchDocument createDocFromDomainDb(String serverIp,
       NutchDocument doc, DatabaseReader reader) throws UnknownHostException,
       IOException, GeoIp2Exception {
-    DomainResponse response = reader.domain(InetAddress.getByName(serverIp));
+    DomainResponse response;
+    try {
+      response = reader.domain(InetAddress.getByName(serverIp));
+    } catch (AddressNotFoundException e) {
+      LOG.debug("IP address not found: {}", serverIp);
+      return doc;
+    }
     addIfNotNull(doc, "ip", serverIp);
     addIfNotNull(doc, "domain", response.getDomain());
     return doc;
@@ -189,7 +187,14 @@
       NutchDocument doc, DatabaseReader reader) throws UnknownHostException,
       IOException, GeoIp2Exception {
     addIfNotNull(doc, "ip", serverIp);
-    CityResponse response = reader.city(InetAddress.getByName(serverIp));
+
+    CityResponse response;
+    try {
+      response = reader.city(InetAddress.getByName(serverIp));
+    } catch (AddressNotFoundException e) {
+      LOG.debug("IP address not found: {}", serverIp);
+      return doc;
+    }
 
     City city = response.getCity();
     addIfNotNull(doc, "cityName", city.getName()); // 'Minneapolis'
@@ -206,7 +211,7 @@
     addIfNotNull(doc, "countryIsoCode", country.getIsoCode()); // 'US'
     addIfNotNull(doc, "countryName", country.getName()); // 'United States'
     addIfNotNull(doc, "countryConfidence", country.getConfidence()); // 99
-    addIfNotNull(doc, "countryGeoName", country.getGeoNameId());
+    addIfNotNull(doc, "countryGeoNameId", country.getGeoNameId());
 
     Location location = response.getLocation();
     addIfNotNull(doc, "latLon", location.getLatitude() + "," + location.getLongitude()); // 44.9733,
@@ -224,7 +229,7 @@
 
     Subdivision subdivision = response.getMostSpecificSubdivision();
     addIfNotNull(doc, "subDivName", subdivision.getName()); // 'Minnesota'
-    addIfNotNull(doc, "subDivIdoCode", subdivision.getIsoCode()); // 'MN'
+    addIfNotNull(doc, "subDivIsoCode", subdivision.getIsoCode()); // 'MN'
     addIfNotNull(doc, "subDivConfidence", subdivision.getConfidence()); // 90
     addIfNotNull(doc, "subDivGeoNameId", subdivision.getGeoNameId());
     return doc;
diff --git a/src/plugin/index-geoip/src/java/org/apache/nutch/indexer/geoip/GeoIPIndexingFilter.java b/src/plugin/index-geoip/src/java/org/apache/nutch/indexer/geoip/GeoIPIndexingFilter.java
index 4e21273..ea30b8c 100644
--- a/src/plugin/index-geoip/src/java/org/apache/nutch/indexer/geoip/GeoIPIndexingFilter.java
+++ b/src/plugin/index-geoip/src/java/org/apache/nutch/indexer/geoip/GeoIPIndexingFilter.java
@@ -87,7 +87,8 @@
  *   'domainDatabase', 'ispDatabase' or 'insightsService'. If you wish to use any one of the 
  *   Database options, you should make one of GeoIP2-City.mmdb, GeoIP2-Connection-Type.mmdb, 
  *   GeoIP2-Domain.mmdb or GeoIP2-ISP.mmdb files respectively available on the Hadoop classpath 
- *   and available at runtime. This can be achieved by adding it to $NUTCH_HOME/conf
+ *   and available at runtime. This can be achieved by adding it to `$NUTCH_HOME/conf`.
+ *   Alternatively, also the GeoLite2 IP databases (GeoLite2-*.mmdb) can be used.
  *   </description>
  * </property>
  * 
@@ -152,24 +153,29 @@
           conf.getInt("index.geoip.userid", 12345),
           conf.get("index.geoip.licensekey")).build();
     } else {
-      String db = null;
+      String dbSuffix = null;
       if (usage.equalsIgnoreCase("cityDatabase")) {
-        db = "GeoIP2-City.mmdb";
+        dbSuffix = "-City.mmdb";
       } else if (usage.equalsIgnoreCase("connectionTypeDatabase")) {
-        db = "GeoIP2-Connection-Type.mmdb";
+        dbSuffix = "-Connection-Type.mmdb";
       } else if (usage.equalsIgnoreCase("domainDatabase")) {
-        db = "GeoIP2-Domain.mmdb";
+        dbSuffix = "-Domain.mmdb";
       } else if (usage.equalsIgnoreCase("ispDatabase")) {
-        db = "GeoIP2-ISP.mmdb";
+        dbSuffix = "-ISP.mmdb";
       }
-      URL dbFileUrl = conf.getResource(db);
-      if (dbFileUrl == null) {
-        LOG.error("GeoDb file {} not found on classpath", db);
-      } else {
-        try {
-          buildDb(new File(dbFileUrl.getFile()));
-        } catch (Exception e) {
-          LOG.error("Failed to read geoDb file {}: ", db, e);
+      String[] dbPrefixes = {"GeoIP2", "GeoLite2"};
+      for (String dbPrefix : dbPrefixes) {
+        String db = dbPrefix + dbSuffix;
+        URL dbFileUrl = conf.getResource(db);
+        if (dbFileUrl == null) {
+          LOG.error("GeoDb file {} not found on classpath", db);
+        } else {
+          try {
+            LOG.info("Reading GeoDb file {}", db);
+            buildDb(new File(dbFileUrl.getFile()));
+          } catch (Exception e) {
+            LOG.error("Failed to read geoDb file {}: ", db, e);
+          }
         }
       }
     }
diff --git a/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java b/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java
index 7885a52..053bfd6 100644
--- a/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java
+++ b/src/plugin/indexer-elastic/src/java/org/apache/nutch/indexwriter/elastic/ElasticIndexWriter.java
@@ -25,14 +25,20 @@
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
 
+import javax.net.ssl.SSLContext;
+
 import org.apache.commons.lang.StringUtils;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.http.HttpHost;
 import org.apache.http.auth.AuthScope;
 import org.apache.http.auth.UsernamePasswordCredentials;
 import org.apache.http.client.CredentialsProvider;
+import org.apache.http.conn.ssl.NoopHostnameVerifier;
+import org.apache.http.conn.ssl.TrustSelfSignedStrategy;
 import org.apache.http.impl.client.BasicCredentialsProvider;
 import org.apache.http.impl.nio.client.HttpAsyncClientBuilder;
+import org.apache.http.ssl.SSLContextBuilder;
+import org.apache.http.ssl.SSLContexts;
 import org.apache.nutch.indexer.IndexWriter;
 import org.apache.nutch.indexer.IndexWriterParams;
 import org.apache.nutch.indexer.NutchDocument;
@@ -181,6 +187,7 @@
         hostsList[i++] = new HttpHost(host, port, scheme);
       }
       RestClientBuilder restClientBuilder = RestClient.builder(hostsList);
+
       if (auth) {
         restClientBuilder
             .setHttpClientConfigCallback(new HttpClientConfigCallback() {
@@ -191,6 +198,28 @@
               }
             });
       }
+
+      // In case of HTTPS, set the client up for ignoring problems with self-signed
+      // certificates and stuff
+      if ("https".equals(scheme)) {
+        try {
+          SSLContextBuilder sslBuilder = SSLContexts.custom();
+          sslBuilder.loadTrustMaterial(null, new TrustSelfSignedStrategy());
+          final SSLContext sslContext = sslBuilder.build();
+
+          restClientBuilder.setHttpClientConfigCallback(new HttpClientConfigCallback() {
+            @Override
+            public HttpAsyncClientBuilder customizeHttpClient(HttpAsyncClientBuilder httpClientBuilder) {
+              // ignore issues with self-signed certificates
+              httpClientBuilder.setSSLHostnameVerifier(NoopHostnameVerifier.INSTANCE);
+              return httpClientBuilder.setSSLContext(sslContext);
+            }
+          });
+        } catch (Exception e) {
+          LOG.error("Error setting up SSLContext because: " + e.getMessage(), e);
+        }
+      }
+
       client = new RestHighLevelClient(restClientBuilder);
     } else {
       throw new IOException(
@@ -344,4 +373,4 @@
   public Configuration getConf() {
     return config;
   }
-}
\ No newline at end of file
+}
diff --git a/src/plugin/indexer-solr/schema.xml b/src/plugin/indexer-solr/schema.xml
index 6865eb0..ba71fe1 100644
--- a/src/plugin/indexer-solr/schema.xml
+++ b/src/plugin/indexer-solr/schema.xml
@@ -356,7 +356,7 @@
     <field name="cityGeoNameId" type="int" stored="true" indexed="true" />
     <field name="continentCode" type="string" stored="true" indexed="true" />
     <field name="continentGeoNameId" type="int" stored="true" indexed="true" />
-    <field name="contentName" type="string" stored="true" indexed="true" />
+    <field name="continentName" type="string" stored="true" indexed="true" />
     <field name="countryIsoCode" type="string" stored="true" indexed="true"/>
     <field name="countryName" type="string" stored="true" indexed="true" />
     <field name="countryConfidence" type="int" stored="true" indexed="true"/>
@@ -379,7 +379,6 @@
     <field name="org" type="string" stored="true" indexed="true" />
     <field name="userType" type="string" stored="true" indexed="true" />
     <field name="isAnonProxy" type="boolean" stored="true" indexed="true" />
-    <field name="isSatelitteProv" type="boolean" stored="true" indexed="true" />
     <field name="connType" type="string" stored="true" indexed="true" />
     <field name="location" type="location" stored="true" indexed="true" />
 
diff --git a/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java
index 9fbcda7..63fa328 100644
--- a/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java
+++ b/src/plugin/protocol-okhttp/src/java/org/apache/nutch/protocol/okhttp/OkHttp.java
@@ -90,21 +90,6 @@
         }
       } };
 
-  private static final SSLContext trustAllSslContext;
-
-  static {
-    try {
-      trustAllSslContext = SSLContext.getInstance("SSL");
-      trustAllSslContext.init(null, trustAllCerts,
-          new java.security.SecureRandom());
-    } catch (Exception e) {
-      throw new RuntimeException(e);
-    }
-  }
-
-  private static final SSLSocketFactory trustAllSslSocketFactory = trustAllSslContext
-      .getSocketFactory();
-
   public OkHttp() {
     super(LOG);
   }
@@ -129,8 +114,18 @@
         .readTimeout(this.timeout, TimeUnit.MILLISECONDS);
 
     if (!this.tlsCheckCertificate) {
-      builder.sslSocketFactory(trustAllSslSocketFactory,
-          (X509TrustManager) trustAllCerts[0]);
+      try {
+        SSLContext trustAllSslContext = SSLContext.getInstance("TLS");
+        trustAllSslContext.init(null, trustAllCerts, null);
+        SSLSocketFactory trustAllSslSocketFactory = trustAllSslContext
+            .getSocketFactory();
+        builder.sslSocketFactory(trustAllSslSocketFactory,
+            (X509TrustManager) trustAllCerts[0]);
+      } catch (Exception e) {
+        LOG.error(
+            "Failed to disable TLS certificate verification (property http.tls.certificates.check)",
+            e);
+      }
       builder.hostnameVerifier(new HostnameVerifier() {
         @Override
         public boolean verify(String hostname, SSLSession session) {
diff --git a/src/plugin/publish-rabbitmq/ivy.xml b/src/plugin/publish-rabbitmq/ivy.xml
index dd450cf..7b5e3dd 100644
--- a/src/plugin/publish-rabbitmq/ivy.xml
+++ b/src/plugin/publish-rabbitmq/ivy.xml
@@ -34,5 +34,5 @@
     <!--get the artifact from our module name-->
     <artifact conf="master"/>
   </publications>
-  
+
 </ivy-module>