OPENNLP-614
Fixed a bug in the GeoEntityLinker. No gaz lookup was being performed if no country context was found.
diff --git a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContext.java b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContext.java
index 541d042..05fe749 100644
--- a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContext.java
+++ b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContext.java
@@ -18,11 +18,6 @@
 import java.io.BufferedReader;

 import java.io.FileReader;

 import java.io.IOException;

-import java.sql.CallableStatement;

-import java.sql.Connection;

-import java.sql.DriverManager;

-import java.sql.ResultSet;

-import java.sql.SQLException;

 import java.util.ArrayList;

 import java.util.HashMap;

 import java.util.HashSet;

@@ -42,12 +37,23 @@
  */

 public class CountryContext {

 

-  private Connection con;

+ 

   private List<CountryContextEntry> countrydata;

   private Map<String, Set<String>> nameCodesMap = new HashMap<String, Set<String>>();

   private Map<String, Set<Integer>> countryMentions = new HashMap<String, Set<Integer>>();

   private Set<CountryContextEntry> countryHits = new HashSet<>();

 

+  public CountryContext() {

+  }

+

+  public Map<String, Set<Integer>> getCountryMentions() {

+    return countryMentions;

+  }

+

+  public Set<CountryContextEntry> getCountryHits() {

+    return countryHits;

+  }

+

   public Map<String, Set<String>> getNameCodesMap() {

     return nameCodesMap;

   }

@@ -56,10 +62,6 @@
     this.nameCodesMap = nameCodesMap;

   }

 

-  public CountryContext() {

-  }

-

-

   /**

    * Finds mentions of countries based on a list from MySQL stored procedure

    * called getCountryList. This method finds country mentions in documents,

@@ -71,15 +73,13 @@
    * @return

    */

   public Map<String, Set<Integer>> regexfind(String docText, EntityLinkerProperties properties) {

-    countryMentions = new HashMap<String, Set<Integer>>();

+    countryMentions = new HashMap<>();

     nameCodesMap.clear();

     try {

-//      if (con == null) {

-//        con = getMySqlConnection(properties);

-//      }

+

       if (countrydata == null) {

-         countrydata = getCountryContextFromFile(properties);

-     //   countrydata = getCountryData(properties);

+        countrydata = getCountryContextFromFile(properties);

+        //   countrydata = getCountryData(properties);

       }

       for (CountryContextEntry entry : countrydata) {

         Pattern regex = Pattern.compile(entry.getFull_name_nd_ro().trim(), Pattern.CASE_INSENSITIVE | Pattern.DOTALL);

@@ -122,95 +122,6 @@
     return countryMentions;

   }

 

-  /**

-   * returns a unique list of country codes

-   *

-   * @param countryMentions the countryMentions discovered

-   * @return

-   */

-  public static Set<String> getCountryCodes(List<CountryContextHit> hits) {

-    Set<String> ccs = new HashSet<String>();

-    for (CountryContextHit hit : hits) {

-      ccs.add(hit.getCountryCode().toLowerCase());

-    }

-    return ccs;

-  }

-

-  public static String getCountryCodeCSV(Set<String> hits) {

-    String csv = "";

-    if (hits.isEmpty()) {

-      return csv;

-    }

-

-    for (String code : hits) {

-      csv += "," + code;

-    }

-    return csv.substring(1);

-  }

-

-  private Connection getMySqlConnection(EntityLinkerProperties properties) throws Exception {

-

-    String driver = properties.getProperty("db.driver", "org.gjt.mm.mysql.Driver");

-    String url = properties.getProperty("db.url", "jdbc:mysql://localhost:3306/world");

-    String username = properties.getProperty("db.username", "root");

-    String password = properties.getProperty("db.password", "?");

-

-    Class.forName(driver);

-    Connection conn = DriverManager.getConnection(url, username, password);

-    return conn;

-  }

-

-  /**

-   * reads the list from the database by calling a stored procedure

-   * getCountryList

-   *

-   * @param properties

-   * @return

-   * @throws SQLException

-   */

-  private List<CountryContextEntry> getCountryData(EntityLinkerProperties properties) throws SQLException {

-    List<CountryContextEntry> entries = new ArrayList<CountryContextEntry>();

-    try {

-      if (con == null) {

-        con = getMySqlConnection(properties);

-      }

-      CallableStatement cs;

-      cs = con.prepareCall("CALL `getCountryList`()");

-      ResultSet rs;

-      rs = cs.executeQuery();

-      if (rs == null) {

-        return entries;

-      }

-      while (rs.next()) {

-        CountryContextEntry s = new CountryContextEntry();

-        //rc,cc1, full_name_nd_ro,dsg

-        s.setRc(rs.getString(1));

-        s.setCc1(rs.getString(2));

-//a.district, 

-        s.setFull_name_nd_ro(rs.getString(3));

-//b.name as countryname, 

-        s.setDsg(rs.getString(4));

-        entries.add(s);

-      }

-

-    } catch (SQLException ex) {

-      System.err.println(ex);

-    } catch (Exception e) {

-      System.err.println(e);

-    } finally {

-      con.close();

-    }

-    return entries;

-  }

-

-  public Map<String, Set<Integer>> getCountryMentions() {

-    return countryMentions;

-  }

-

-  public Set<CountryContextEntry> getCountryHits() {

-    return countryHits;

-  }

-

   private List<CountryContextEntry> getCountryContextFromFile(EntityLinkerProperties properties) {

     List<CountryContextEntry> entries = new ArrayList<>();

     String path = "";// properties.getProperty("geoentitylinker.countrycontext.filepath", "");

diff --git a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContextEntry.java b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContextEntry.java
index 827ec77..a32642b 100644
--- a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContextEntry.java
+++ b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryContextEntry.java
@@ -30,7 +30,7 @@
   private String cc1;

   private String full_name_nd_ro;

   private String dsg;

-

+  private String provCode;

   public CountryContextEntry() {

   }

 

@@ -41,6 +41,14 @@
     this.dsg = dsg;

   }

 

+  public String getProvCode() {

+    return provCode;

+  }

+

+  public void setProvCode(String provCode) {

+    this.provCode = provCode;

+  }

+

   public String getRc() {

     return rc;

   }

diff --git a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryProximityScorer.java b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryProximityScorer.java
index 3198650..36bfb86 100644
--- a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryProximityScorer.java
+++ b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/CountryProximityScorer.java
@@ -36,7 +36,7 @@
   String dominantCode = "";

 

   @Override

-  public void score(List<LinkedSpan> linkedSpans, String docText, Span[] sentenceSpans,EntityLinkerProperties properties, CountryContext additionalContext) {

+  public void score(List<LinkedSpan> linkedSpans, String docText, Span[] sentenceSpans, EntityLinkerProperties properties, CountryContext additionalContext) {

 

     score(linkedSpans, additionalContext.getCountryMentions(), additionalContext.getNameCodesMap(), docText, sentenceSpans, 1000);

 

@@ -134,10 +134,10 @@
 

     /**

      * the gaz matches that have a country code that have mentions in the doc

-     * that are closest to the Named Entity should return the best score Analyze

-     * map generates a likelihood score that the toponym from the gaz is

-     * referring to one of the countries Map<countrycode, prob that this span is

-     * referring to the toponym form this code key>

+     * that are closest to the Named Entity should return the best score.

+     * Analyzemap generates a likelihood score that the toponym from the gaz is

+     * referring to one of the countries, i.e, Map<countrycode, prob that this

+     * span is referring to the toponym form this code key>

      */

     Map<String, Double> scoreMap = analyzeMap(distancesFromCodeMap, sentences, span);

     for (BaseLink link : span.getLinkedEntries()) {

@@ -148,21 +148,16 @@
         score = scoreMap.get(spanCountryCode);

         ///does the name extracted match a country name?

         if (nameCodesMap.containsKey(link.getItemName().toLowerCase())) {

-          //if so, is it the correct country code for that name

+          //if so, is it the correct country code for that name?

           if (nameCodesMap.get(link.getItemName().toLowerCase()).contains(link.getItemParentID())) {

             //boost the score becuase it is likely that this is the location in the text, so add 50% to the score or set to 1

             //TODO: make this multiplier configurable

-            //TODO: improve this with a geographic/geometry based clustering (linear binning to be more precise) of points returned from the gaz

             score = (score + .75) > 1.0 ? 1d : (score + .75);

-            //boost the score if the hit is from the dominant country context

 

             if (link.getItemParentID().equals(dominantCode)) {

               score = (score + .25) > 1.0 ? 1d : (score + .25);

             }

-

-

           }

-

         }

       }

       link.getScoreMap().put("countrycontext", score);

@@ -184,7 +179,7 @@
   private Map<String, Double> analyzeMap(Map<String, Set<Integer>> distanceMap, Span[] sentences, LinkedSpan<BaseLink> span) {

 

     Map<String, Double> scoreMap = new HashMap<String, Double>();

-    if(distanceMap.isEmpty()){

+    if (distanceMap.isEmpty()) {

       return scoreMap;

     }

     TreeSet<Integer> all = new TreeSet<Integer>();

@@ -195,8 +190,8 @@
 

     Integer min = all.first();

     Integer max = all.last();

-    if(min==max){

-      min=0;

+    if (min == max) {

+      min = 0;

     }

     for (String key : distanceMap.keySet()) {

 

diff --git a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java
index 5429590..ca9b93f 100644
--- a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java
+++ b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java
@@ -72,7 +72,9 @@
       /**

        * build the search string

        */

-      String luceneQueryString = "FULL_NAME_ND_RO:" + searchString.toLowerCase().trim() + " AND CC1:" + code.toLowerCase() + "^10000";

+      String luceneQueryString = !code.equals("")

+              ? "FULL_NAME_ND_RO:" + searchString.toLowerCase().trim() + " AND CC1:" + code.toLowerCase() + "^1000"

+              : "FULL_NAME_ND_RO:" + searchString.toLowerCase().trim();

       /**

        * check the cache and go no further if the records already exist

        */

@@ -82,7 +84,7 @@
       }

       if (geonamesIndex == null) {

         String indexloc = properties.getProperty("opennlp.geoentitylinker.gaz.geonames", "");

-        String cutoff = properties.getProperty("opennlp.geoentitylinker.gaz.lucenescore.min", ".75");

+        String cutoff = properties.getProperty("opennlp.geoentitylinker.gaz.lucenescore.min", ".60");

         scoreCutoff = Double.valueOf(cutoff);

         geonamesIndex = new MMapDirectory(new File(indexloc));

         geonamesReader = DirectoryReader.open(geonamesIndex);

diff --git a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinker.java b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinker.java
index 1404ce9..05c63d7 100644
--- a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinker.java
+++ b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinker.java
@@ -68,10 +68,15 @@
         ArrayList<BaseLink> geoNamesEntries = new ArrayList<BaseLink>();

         if (!(countryMentions.keySet().contains("us") && countryMentions.keySet().size() == 1) || countryMentions.keySet().size() > 1 || countryMentions.keySet().isEmpty()) {

           // geoNamesEntries = geoNamesGaz.find(matches[i], names[i], countryMentions, linkerProperties);

-          for (String code : countryMentions.keySet()) {

-            if (!code.equals("us")) {

-              geoNamesEntries.addAll(gazateerSearcher.geonamesFind(matches[i], 10, code, linkerProperties));

+          if (!countryMentions.keySet().isEmpty()) {

+            for (String code : countryMentions.keySet()) {

+              if (!code.equals("us")) {

+                geoNamesEntries.addAll(gazateerSearcher.geonamesFind(matches[i], 10, code, linkerProperties));

+              }

             }

+          } else {

+            geoNamesEntries.addAll(gazateerSearcher.geonamesFind(matches[i], 10, "", linkerProperties));

+

           }

 

         }