OPENNLP-579
Added simple caching to improve performance.
diff --git a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearchCache.java b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearchCache.java
new file mode 100644
index 0000000..d4470d9
--- /dev/null
+++ b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearchCache.java
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2013 The Apache Software Foundation.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.opennlp.addons.tools.entitylinker.geoentitylinker;
+
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ *
+ * Caches gazateer query results statically
+ */
+public class GazateerSearchCache {
+
+ private static Map<String, ArrayList<GazateerEntry>> gazCache = new HashMap<>();
+
+
+ public static synchronized ArrayList<GazateerEntry> get(String searchString) {
+ return gazCache.get(searchString);
+ }
+
+ public static synchronized void put(String searchString, ArrayList<GazateerEntry> hits) {
+ if (gazCache.size() > 10000) {
+ gazCache.clear();
+ }
+ if (!gazCache.containsKey(searchString)) {
+ gazCache.put(searchString, hits);
+ }
+ }
+
+
+}
diff --git a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java
index 88f3bd7..5429590 100644
--- a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java
+++ b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java
@@ -36,13 +36,13 @@
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.util.Version;
import opennlp.tools.entitylinker.EntityLinkerProperties;
+
/**
*
* Searches Gazateers stored in a MMapDirectory lucene index
*/
public class GazateerSearcher {
- //private FuzzyStringMatchScorer diceScorer = new FuzzyStringMatchScorer();
private double scoreCutoff = .75;
private Directory geonamesIndex;//= new MMapDirectory(new File(indexloc));
private IndexReader geonamesReader;// = DirectoryReader.open(geonamesIndex);
@@ -57,11 +57,29 @@
public GazateerSearcher() {
}
+ /**
+ *
+ * @param searchString the nameed entity to look up in the lucene index
+ * @param rowsReturned how many rows to allow lucene to return
+ * @param code the country code
+ * @param properties properties file that states where the lucene indexes
+ * are
+ * @return
+ */
public ArrayList<GazateerEntry> geonamesFind(String searchString, int rowsReturned, String code, EntityLinkerProperties properties) {
ArrayList<GazateerEntry> linkedData = new ArrayList<>();
try {
-
-
+ /**
+ * build the search string
+ */
+ String luceneQueryString = "FULL_NAME_ND_RO:" + searchString.toLowerCase().trim() + " AND CC1:" + code.toLowerCase() + "^10000";
+ /**
+ * check the cache and go no further if the records already exist
+ */
+ ArrayList<GazateerEntry> get = GazateerSearchCache.get(searchString);
+ if (get != null) {
+ return get;
+ }
if (geonamesIndex == null) {
String indexloc = properties.getProperty("opennlp.geoentitylinker.gaz.geonames", "");
String cutoff = properties.getProperty("opennlp.geoentitylinker.gaz.lucenescore.min", ".75");
@@ -73,7 +91,8 @@
}
- String luceneQueryString = "FULL_NAME_ND_RO:" + searchString + " AND CC1:" + code.toLowerCase() + "^10000";
+
+
QueryParser parser = new QueryParser(Version.LUCENE_45, luceneQueryString, geonamesAnalyzer);
Query q = parser.parse(luceneQueryString);
@@ -87,7 +106,7 @@
double sc = search.scoreDocs[i].score;
entry.getScoreMap().put("lucene", sc);
-
+
entry.getScoreMap().put("rawlucene", sc);
entry.setIndexID(docId + "");
entry.setSource("geonames");
@@ -130,14 +149,35 @@
} catch (IOException | ParseException ex) {
System.err.println(ex);
}
+ /**
+ * add the records to the cache for this query
+ */
+ GazateerSearchCache.put(searchString, linkedData);
return linkedData;
}
+ /**
+ * Looks up the name in the USGS gazateer, after checking the cache
+ *
+ * @param searchString the nameed entity to look up in the lucene index
+ * @param rowsReturned how many rows to allow lucene to return
+ *
+ * @param properties properties file that states where the lucene indexes
+ * @return
+ */
public ArrayList<GazateerEntry> usgsFind(String searchString, int rowsReturned, EntityLinkerProperties properties) {
ArrayList<GazateerEntry> linkedData = new ArrayList<>();
try {
-
+ String luceneQueryString = "FEATURE_NAME:" + searchString.toLowerCase().trim() + " OR MAP_NAME: " + searchString.toLowerCase().trim();
+ /**
+ * hit the cache
+ */
+ ArrayList<GazateerEntry> get = GazateerSearchCache.get(searchString);
+ if (get != null) {
+ //if the name is already there, return the list of cavhed results
+ return get;
+ }
if (usgsIndex == null) {
String indexloc = properties.getProperty("opennlp.geoentitylinker.gaz.usgs", "");
String cutoff = properties.getProperty("opennlp.geoentitylinker.gaz.lucenescore.min", ".75");
@@ -148,7 +188,7 @@
usgsAnalyzer = new StandardAnalyzer(Version.LUCENE_45);
}
- String luceneQueryString = "FEATURE_NAME:" + searchString + " OR MAP_NAME: " + searchString;
+
QueryParser parser = new QueryParser(Version.LUCENE_45, luceneQueryString, usgsAnalyzer);
Query q = parser.parse(luceneQueryString);
@@ -204,7 +244,10 @@
} catch (IOException | ParseException ex) {
System.err.println(ex);
}
-
+ /**
+ * add the records to the cache for this query
+ */
+ GazateerSearchCache.put(searchString, linkedData);
return linkedData;
}