OPENNLP-698
Fixed cleanInput() method so it handles multi token names. Now there is a property that can be added to the entitylinker.properties file, in which user can define whether to use double quotes around names or not.
diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java
index 1d49277..1f976d6 100644
--- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java
+++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerSearcher.java
@@ -19,6 +19,7 @@
 import java.io.IOException;

 import java.util.ArrayList;

 import java.util.List;

+import java.util.logging.Level;

 import org.apache.lucene.analysis.Analyzer;

 import org.apache.lucene.analysis.standard.StandardAnalyzer;

 import org.apache.lucene.document.Document;

@@ -49,7 +50,8 @@
 

   private final String REGEX_CLEAN = "[^\\p{L}\\p{Nd}]";

   private static final Logger LOGGER = Logger.getLogger(GazetteerSearcher.class);

-  private double scoreCutoff = .90;

+  private double scoreCutoff = .70;

+  private boolean doubleQuoteAllSearchTerms = false;

   private Directory geonamesIndex;//= new MMapDirectory(new File(indexloc));

   private IndexReader geonamesReader;// = DirectoryReader.open(geonamesIndex);

   private IndexSearcher geonamesSearcher;// = new IndexSearcher(geonamesReader);

@@ -61,7 +63,17 @@
   private Analyzer usgsAnalyzer;

   private EntityLinkerProperties properties;

 

+  public static void main(String[] args) {

+    try {

+      boolean b = Boolean.valueOf("true");

 

+      new GazetteerSearcher(new EntityLinkerProperties(new File("c:\\temp\\entitylinker.properties"))).geonamesFind("townsville, queensland", 5, "");

+    } catch (IOException ex) {

+      java.util.logging.Logger.getLogger(GazetteerSearcher.class.getName()).log(Level.SEVERE, null, ex);

+    } catch (Exception ex) {

+      java.util.logging.Logger.getLogger(GazetteerSearcher.class.getName()).log(Level.SEVERE, null, ex);

+    }

+  }

 

   public GazetteerSearcher(EntityLinkerProperties properties) throws Exception {

     this.properties = properties;

@@ -272,14 +284,19 @@
   }

 

   /**

-   * Replaces any noise chars with 

+   * Replaces any noise chars with a space, and depending on configuration adds double quotes to the string

+   *

    * @param input

-   * @return 

+   * @return

    */

   private String cleanInput(String input) {

     String output = input.replaceAll(REGEX_CLEAN, " ").trim();

-    System.out.println(output);

-    return "\"" + output + "\"";

+    if (doubleQuoteAllSearchTerms) {

+      return "\"" + output + "\"";

+    } else {

+      return output;

+    }

+

   }

 

   private void init() throws Exception {

@@ -290,7 +307,10 @@
         LOGGER.error(new Exception("USGS Gaz location not found"));

       }

       String cutoff = properties.getProperty("opennlp.geoentitylinker.gaz.lucenescore.min", String.valueOf(scoreCutoff));

+

       scoreCutoff = Double.valueOf(cutoff);

+      String doubleQuote = properties.getProperty("opennlp.geoentitylinker.gaz.doublequote", String.valueOf(doubleQuoteAllSearchTerms));

+      doubleQuoteAllSearchTerms = Boolean.valueOf(doubleQuote);

       usgsIndex = new MMapDirectory(new File(indexloc));

       usgsReader = DirectoryReader.open(usgsIndex);

       usgsSearcher = new IndexSearcher(usgsReader);