OPENNLP-637
There was an invalid comparison in equals and toHashCode inside GazetteerEntry. Fixed. Also added better checks inside the geoentitylinker to ensure no dupes are added across where clauses.
diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerEntry.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerEntry.java
index 0c37eee..6f3ac87 100644
--- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerEntry.java
+++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GazetteerEntry.java
@@ -130,11 +130,9 @@
@Override
public int hashCode() {
- int hash = 7;
- hash = 29 * hash + Objects.hashCode(this.latitude);
- hash = 29 * hash + Objects.hashCode(this.longitude);
- hash = 29 * hash + Objects.hashCode(this.source);
- hash = 29 * hash + Objects.hashCode(this.indexID);
+ int hash = 5;
+ hash = 71 * hash + Objects.hashCode(this.source);
+ hash = 71 * hash + Objects.hashCode(this.indexID);
return hash;
}
@@ -147,12 +145,6 @@
return false;
}
final GazetteerEntry other = (GazetteerEntry) obj;
- if (!Objects.equals(this.latitude, other.latitude)) {
- return false;
- }
- if (!Objects.equals(this.longitude, other.longitude)) {
- return false;
- }
if (!Objects.equals(this.source, other.source)) {
return false;
}
@@ -162,6 +154,7 @@
return true;
}
+
public String getCountryCode() {
return countryCode;
}
diff --git a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GeoEntityLinker.java b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GeoEntityLinker.java
index e8ab244..c9edc64 100644
--- a/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GeoEntityLinker.java
+++ b/geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/GeoEntityLinker.java
@@ -35,8 +35,8 @@
import opennlp.tools.entitylinker.EntityLinker;
/**
- * Links location entities to the USGS and GeoNames gazatteers, and uses several
- * scoring techniques to enable resolution. The gazateers are stored in lucene
+ * Links location entities to the USGS and GeoNames gazetteers, and uses several
+ * scoring techniques to enable resolution. The gazetteers are stored in lucene
* indexes. The indexes can be built using the GeoEntityLinkerSetupUtils class
* in this same package.
*/
@@ -67,16 +67,28 @@
ArrayList<BaseLink> geoNamesEntries = new ArrayList<>();
if (!context.getWhereClauses().isEmpty()) {
for (String whereclause : context.getWhereClauses()) {
- geoNamesEntries.addAll(gazateerSearcher.find(matches[i], topN, whereclause));
+ ArrayList<GazetteerEntry> find = gazateerSearcher.find(matches[i], topN, whereclause);
+ for (GazetteerEntry gazetteerEntry : find) {
+ if (!geoNamesEntries.contains(gazetteerEntry)) {
+ geoNamesEntries.add(gazetteerEntry);
+ }
+ }
+
}
} else {//this means there were no where clauses generated so the where clause will default to look at the entire index
- geoNamesEntries.addAll(gazateerSearcher.find(matches[i], topN, " gaztype:usgs geonames regions "));
+ ArrayList<GazetteerEntry> find = gazateerSearcher.find(matches[i], topN, " gaztype:usgs geonames regions ");
+ for (GazetteerEntry gazetteerEntry : find) {
+ if (!geoNamesEntries.contains(gazetteerEntry)) {
+ geoNamesEntries.add(gazetteerEntry);
+ }
+ }
}
if (geoNamesEntries.isEmpty()) {
continue;
}
/**
- * Normalize the returned scores for this name... this will assist the sort
+ * Normalize the returned scores for this name... this will assist the
+ * sort
*/
if (!spans.isEmpty()) {