OPENNLP-630
Fixed ltoString() in linkedspan and baselink to be more friendly to the cli tool (and others).

diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerEntry.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerEntry.java
index 0770474..a208d78 100644
--- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerEntry.java
+++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerEntry.java
@@ -122,7 +122,7 @@
   @Override

   public String toString() {

 

-    return super.toString() + "\n GazateerEntry{\n" + "\tlatitude=" + latitude + ", \n\tlongitude=" + longitude + ", \n\tsource=" + source + ", \n\tindexID=" + indexID + ", \n\tindexData=" + indexData + "\n}";

+    return super.toString() + "\n\t\tGazateerEntry\n" + "\t\tlatitude=" + latitude + ", \n\t\tlongitude=" + longitude + ", \n\t\tsource=" + source + ", \n\t\tindexID=" + indexID + ",\n\t\tindexData=" + indexData + "\n";

   }

 

   @Override

diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerIndexer.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerIndexer.java
index 34724e1..3b01c90 100644
--- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerIndexer.java
+++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerIndexer.java
@@ -40,13 +40,9 @@
 public class GazetteerIndexer {

 

   public GazetteerIndexer() {

-    // loadAnalyzerMap();

+

   }

 

-  /**

-   * build this into a future release, causing problems at query time

-   */

-  // Map<String, Analyzer> languageAnalyzerMap = new HashMap<>();

 

   public static interface Separable {

 

@@ -82,15 +78,15 @@
   /**

    * indexes the USGS or Geonames gazateers.

    *

-   * @param outputIndexDir    a DIRECTORY path where you would like to store the

-   *                          output lucene indexes

-   * @param gazateerInputData the file, "as is" that was downloaded from the

-   *                          USGS and GEONAMES website

-   * @param type              indicates whether the data is USGS or GEONAMES

-   *                          format

+   * @param outputIndexDir     a DIRECTORY path where you would like to store

+   *                           the output lucene indexes

+   * @param gazetteerInputData the file, "as is" that was downloaded from the

+   *                           USGS and GEONAMES website

+   * @param type               indicates whether the data is USGS or GEONAMES

+   *                           format

    * @throws Exception

    */

-  public void index(File outputIndexDir, File gazateerInputData, GazType type) throws Exception {

+  public void index(File outputIndexDir, File gazetteerInputData, GazType type) throws Exception {

     if (!outputIndexDir.isDirectory()) {

       throw new IllegalArgumentException("outputIndexDir must be a directory.");

     }

@@ -103,7 +99,7 @@
 

     IndexWriter w = new IndexWriter(index, config);

 

-    readFile(gazateerInputData, w, type);

+    readFile(gazetteerInputData, w, type);

     w.commit();

     w.close();

 

@@ -114,31 +110,24 @@
     List<String> fields = new ArrayList<String>();

     int counter = 0;

     // int langCodeIndex = 0;

-    System.out.println("reading gazateer data from file...........");

+    System.out.println("reading gazetteer data from file...........");

     while (reader.read() != -1) {

       String line = reader.readLine();

       String[] values = line.split(type.getSeparator());

       if (counter == 0) {

-        // build fields

-        for (int i = 0; i < values.length; i++) {

-          String columnName = values[i];

+        for (String columnName : values) {

           fields.add(columnName.replace("»¿", "").trim());

-         

         }

 

       } else {

         Document doc = new Document();

-        for (int i = 0; i < fields.size() - 1; i++) {

-

-          doc.add(new TextField(fields.get(i), values[i], Field.Store.YES));

-

-        }

-      

-          w.addDocument(doc);

-        

+        for (int i = 0; i < fields.size() - 1; i++) {         

+          doc.add(new TextField(fields.get(i), values[i].trim(), Field.Store.YES));

+        }     

+        w.addDocument(doc);

       }

       counter++;

-      if (counter % 10000 == 0) {

+      if (counter % 100000 == 0) {

         w.commit();

         System.out.println(counter + " .........committed to index..............");

       }

diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java
index c25695b..b5f9817 100644
--- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java
+++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java
@@ -38,7 +38,7 @@
 

 /**

  *

- * Searches Gazateers stored in a MMapDirectory Lucene index. The structure of

+ * Searches Gazetteers stored in a MMapDirectory Lucene index. The structure of

  * these indices are based on loading the indexes using the

  * GeoEntityLinkerSetupUtils

  *

@@ -67,12 +67,13 @@
    * @param searchString the named entity to look up in the lucene index

    * @param rowsReturned how many rows to allow lucene to return

    * @param code         the country code

-

+   *

    * @return

    */

   public ArrayList<GazetteerEntry> geonamesFind(String searchString, int rowsReturned, String code) {

     ArrayList<GazetteerEntry> linkedData = new ArrayList<>();

-    if(code.toLowerCase().equals("in") && searchString.toLowerCase().equals("india")){

+    if (code.toLowerCase().equals("in") && searchString.toLowerCase().equals("india")) {

+      rowsReturned=100;

       System.out.println("india");

     }

     String luceneQueryString = "";

@@ -82,7 +83,7 @@
        * case the code variable will be an empty string

        */

       luceneQueryString = !code.equals("")

-              ? "FULL_NAME_ND_RO:" + searchString.toLowerCase().trim() + " AND CC1:\""+code.toLowerCase()+"\"" //[\"" + code.toLowerCase()+"\" TO \"" + code.toLowerCase() + "\"]"

+              ? "FULL_NAME_ND_RO:" + searchString.toLowerCase().trim() + " AND CC1:" + code.toLowerCase()+"^90000" //[\"" + code.toLowerCase()+"\" TO \"" + code.toLowerCase() + "\"]"

               : "FULL_NAME_ND_RO:" + searchString.toLowerCase().trim();

       /**

        * check the cache and go no further if the records already exist

@@ -93,10 +94,10 @@
         return get;

       }

 

-

       QueryParser parser = new QueryParser(Version.LUCENE_45, luceneQueryString, geonamesAnalyzer);

       Query q = parser.parse(luceneQueryString);

 

+

       TopDocs search = geonamesSearcher.search(q, rowsReturned);

 

       for (int i = 0; i < search.scoreDocs.length; ++i) {

@@ -105,8 +106,6 @@
         double sc = search.scoreDocs[i].score;

 

         entry.getScoreMap().put("lucene", sc);

-

-

         entry.setIndexID(docId + "");

         entry.setSource("geonames");

 

@@ -136,8 +135,8 @@
               break;

             case 12:

               entry.setItemParentID(value);

-              if(entry.getItemParentID().equals("in")){

-                System.out.println("");

+              if(!value.toLowerCase().equals(code.toLowerCase())){

+                continue;

               }

               break;

             case 23:

@@ -191,7 +190,6 @@
     String luceneQueryString = "FEATURE_NAME:" + searchString.toLowerCase().trim() + " OR MAP_NAME: " + searchString.toLowerCase().trim();

     try {

 

-

       /**

        * hit the cache

        */

diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GeoEntityLinker.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GeoEntityLinker.java
index 872e2e5..854ca73 100644
--- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GeoEntityLinker.java
+++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GeoEntityLinker.java
@@ -42,7 +42,7 @@
    * Flag for deciding whether to search gaz only for toponyms within countries

    * that are mentioned in the document

    */

-  private Boolean filterCountryContext = true;

+ // private Boolean filterCountryContext = true;

 

   public GeoEntityLinker() throws Exception {

   }