blob: beca793c338402b0d39b852bff493c13679e7883 [file] [log] [blame]
/*
* Copyright 2013 The Apache Software Foundation.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package opennlp.addons.geoentitylinker;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import opennlp.tools.entitylinker.EntityLinkerProperties;
import opennlp.tools.entitylinker.domain.BaseLink;
import opennlp.tools.entitylinker.domain.LinkedSpan;
import opennlp.tools.util.Span;
/**
* Scores toponymns based on geographic point binning. Based on the heuristic
* that docs are generally about a small amount of locations, so one can detect
* outliers by finding those points that are not near the majority
*
*/
public class GeoHashBinningScorer implements LinkedEntityScorer<CountryContext> {
private final PointClustering CLUSTERER = new PointClustering();
private int PRECISION = 4;
@Override
public void score(List<LinkedSpan> linkedSpans, String docText, Span[] sentenceSpans, EntityLinkerProperties properties, CountryContext additionalContext) {
//Map<Double, Double> latLongs = new HashMap<Double, Double>();
List<GazateerEntry> allGazEntries = new ArrayList<>();
/**
* collect all the lat longs
*/
for (LinkedSpan<BaseLink> ls : linkedSpans) {
for (BaseLink bl : ls.getLinkedEntries()) {
if (bl instanceof GazateerEntry) {
allGazEntries.add((GazateerEntry) bl);
}
}
}
/**
* use the point clustering to score each hit
*/
Map<String, List<GazateerEntry>> cluster = CLUSTERER.cluster(allGazEntries, PRECISION);
CLUSTERER.scoreClusters(cluster);
}
}