OPENNLP-579
Fixed a bug in the GazateerIndexer. Refined the SetupUtils.
diff --git a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java
index 5ea08ad..e2409fa 100644
--- a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java
+++ b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerIndexer.java
@@ -38,12 +38,22 @@
*/
public class GazateerIndexer {
- public enum GazType {
+ public static interface Separable {
+
+ String getSeparator();
+ }
+
+ public enum GazType implements Separable {
GEONAMES {
@Override
public String toString() {
- return "/opennlp_geoentitylinker_usgsgaz_idx";
+ return "/opennlp_geoentitylinker_geonames_idx";
+ }
+
+ @Override
+ public String getSeparator() {
+ return "\t";
}
},
USGS {
@@ -51,6 +61,11 @@
public String toString() {
return "/opennlp_geoentitylinker_usgsgaz_idx";
}
+
+ @Override
+ public String getSeparator() {
+ return "\\|";
+ }
}
}
@@ -67,24 +82,24 @@
IndexWriter w = new IndexWriter(index, config);
- readFile(gazateerInputData, w);
+ readFile(gazateerInputData, w, type);
w.commit();
w.close();
}
- public void readFile(File gazateerInputData, IndexWriter w) throws Exception {
+ public void readFile(File gazateerInputData, IndexWriter w, GazType type) throws Exception {
BufferedReader reader = new BufferedReader(new FileReader(gazateerInputData));
List<String> fields = new ArrayList<String>();
int counter = 0;
System.out.println("reading gazateer data from file...........");
while (reader.read() != -1) {
String line = reader.readLine();
- String[] values = line.split("\\|");//nga format
+ String[] values = line.split(type.getSeparator());
if (counter == 0) {
// build fields
for (String columnName : values) {
- fields.add(columnName.replace("»¿", ""));
+ fields.add(columnName.replace("»¿", "").trim());
}
@@ -102,6 +117,7 @@
}
}
-
+ w.commit();
+ System.out.println("Completed indexing gaz! index name is: " + type.toString());
}
}
diff --git a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java
index 13cce97..88f3bd7 100644
--- a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java
+++ b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GazateerSearcher.java
@@ -70,9 +70,10 @@
geonamesReader = DirectoryReader.open(geonamesIndex);
geonamesSearcher = new IndexSearcher(geonamesReader);
geonamesAnalyzer = new StandardAnalyzer(Version.LUCENE_45);
+
}
- String luceneQueryString = "FULL_NAME_ND_RO:" + searchString + " AND CC1:" + code.toLowerCase() + "^100";
+ String luceneQueryString = "FULL_NAME_ND_RO:" + searchString + " AND CC1:" + code.toLowerCase() + "^10000";
QueryParser parser = new QueryParser(Version.LUCENE_45, luceneQueryString, geonamesAnalyzer);
Query q = parser.parse(luceneQueryString);
diff --git a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinkerSetupUtils.java b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinkerSetupUtils.java
index 05fe374..b1b9d11 100644
--- a/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinkerSetupUtils.java
+++ b/apache-opennlp-addons/src/main/java/org/apache/opennlp/addons/tools/entitylinker/geoentitylinker/GeoEntityLinkerSetupUtils.java
@@ -83,7 +83,7 @@
}
}
}
- System.out.println("Document processing complete. Writing traininf data to file");
+ System.out.println("Document processing complete. Writing training data to "+ annotationOutFile.getAbsolutePath());
writer.close();
System.out.println("Building Doccat model...");
DoccatModel model = null;
@@ -116,7 +116,7 @@
* @param radius
* @return
*/
- public static Map<String, ArrayList<String>> modelCountryContext(String docText, CountryContext additionalContext, int radius) {
+ private static Map<String, ArrayList<String>> modelCountryContext(String docText, CountryContext additionalContext, int radius) {
Map<String, ArrayList< String>> featureBags = new HashMap<>();
Map<String, Set<Integer>> countryMentions = additionalContext.getCountryMentions();
/**