blob: 789b227bc4f3c6e24d309694ad96543ec9d882c1 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.entityhublinking;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.CASE_SENSITIVE;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_CASE_SENSITIVE_MATCHING_STATE;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_DEREFERENCE_ENTITIES_STATE;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_MATCHING_LANGUAGE;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig.DEFAULT_MIN_SEARCH_TOKEN_LENGTH;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_MIN_TOKEN_SCORE;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEFAULT_SUGGESTIONS;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEREFERENCE_ENTITIES;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.DEREFERENCE_ENTITIES_FIELDS;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig.MIN_SEARCH_TOKEN_LENGTH;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.ENTITY_TYPES;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.MIN_TOKEN_SCORE;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.NAME_FIELD;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.REDIRECT_FIELD;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.REDIRECT_MODE;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.SUGGESTIONS;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.TYPE_FIELD;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.TYPE_MAPPINGS;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig.DEFAULT_PROCESS_ONLY_PROPER_NOUNS_STATE;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig.PROCESSED_LANGUAGES;
import static org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig.PROCESS_ONLY_PROPER_NOUNS_STATE;
import static org.apache.stanbol.enhancer.servicesapi.EnhancementEngine.PROPERTY_NAME;
import static org.osgi.framework.Constants.SERVICE_RANKING;
import java.util.Dictionary;
import java.util.Hashtable;
import java.util.NavigableSet;
import java.util.NoSuchElementException;
import java.util.TreeSet;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.ConfigurationPolicy;
import org.apache.felix.scr.annotations.Deactivate;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.PropertyOption;
import org.apache.felix.scr.annotations.Reference;
import org.apache.stanbol.commons.namespaceprefix.NamespacePrefixService;
import org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer;
import org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig;
import org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig;
import org.apache.stanbol.enhancer.engines.entitylinking.engine.EntityLinkingEngine;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.entityhub.servicesapi.Entityhub;
import org.osgi.framework.BundleContext;
import org.osgi.framework.Constants;
import org.osgi.framework.ServiceReference;
import org.osgi.framework.ServiceRegistration;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.osgi.util.tracker.ServiceTracker;
import org.osgi.util.tracker.ServiceTrackerCustomizer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The EntityhubLinkingEngine in NOT an {@link EnhancementEngine} but only an
* OSGI {@link Component} that allows to configure instances of the
* {@link EntityLinkingEngine} using an {@link ReferencedSiteSearcher} or
* {@link EntityhubSearcher} to link entities.
* @author Rupert Westenthaler
*
*/
@Component(
configurationFactory = true,
policy = ConfigurationPolicy.REQUIRE, // the baseUri is required!
specVersion = "1.1",
metatype = true,
immediate = true,
inherit = true)
@org.apache.felix.scr.annotations.Properties(value={
@Property(name=PROPERTY_NAME),
@Property(name=EntityhubLinkingEngine.SITE_ID),
@Property(name=NAME_FIELD,value="rdfs:label"),
@Property(name=CASE_SENSITIVE,boolValue=DEFAULT_CASE_SENSITIVE_MATCHING_STATE),
@Property(name=TYPE_FIELD,value="rdf:type"),
@Property(name=ENTITY_TYPES,cardinality=Integer.MAX_VALUE),
@Property(name=REDIRECT_FIELD,value="rdfs:seeAlso"),
@Property(name=REDIRECT_MODE,options={
@PropertyOption(
value='%'+REDIRECT_MODE+".option.ignore",
name="IGNORE"),
@PropertyOption(
value='%'+REDIRECT_MODE+".option.addValues",
name="ADD_VALUES"),
@PropertyOption(
value='%'+REDIRECT_MODE+".option.follow",
name="FOLLOW")
},value="IGNORE"),
@Property(name=MIN_SEARCH_TOKEN_LENGTH, intValue=DEFAULT_MIN_SEARCH_TOKEN_LENGTH),
@Property(name=MIN_TOKEN_SCORE,floatValue=DEFAULT_MIN_TOKEN_SCORE),
@Property(name=SUGGESTIONS, intValue=DEFAULT_SUGGESTIONS),
@Property(name=PROCESS_ONLY_PROPER_NOUNS_STATE, boolValue=DEFAULT_PROCESS_ONLY_PROPER_NOUNS_STATE),
@Property(name=PROCESSED_LANGUAGES,
cardinality=Integer.MAX_VALUE,
value={"*;lmmtip;uc=LINK;prob=0.75;pprob=0.75", // link multiple matchable tokens in chunks; link upper case words
"de;uc=MATCH", //in German all Nouns are upper case
"es;lc=Noun", //the OpenNLP POS tagger for Spanish does not support ProperNouns
"nl;lc=Noun"}), //same for Dutch
@Property(name=DEFAULT_MATCHING_LANGUAGE,value=""),
@Property(name=TYPE_MAPPINGS,cardinality=Integer.MAX_VALUE, value={
"dbp-ont:Organisation; dbp-ont:Newspaper; schema:Organization > dbp-ont:Organisation",
"dbp-ont:Person; foaf:Person; schema:Person > dbp-ont:Person",
"dbp-ont:Place; schema:Place > dbp-ont:Place",
"dbp-ont:Work; schema:CreativeWork > dbp-ont:Work",
"dbp-ont:Event; schema:Event > dbp-ont:Event",
"schema:Product > schema:Product",
"skos:Concept > skos:Concept"}),
@Property(name=DEREFERENCE_ENTITIES, boolValue=DEFAULT_DEREFERENCE_ENTITIES_STATE),
@Property(name=DEREFERENCE_ENTITIES_FIELDS,cardinality=Integer.MAX_VALUE,
value={"rdfs:comment","geo:lat","geo:long","foaf:depiction","dbp-ont:thumbnail"}),
@Property(name=SERVICE_RANKING,intValue=0)
})
public class EntityhubLinkingEngine implements ServiceTrackerCustomizer {
private final Logger log = LoggerFactory.getLogger(EntityhubLinkingEngine.class);
@Reference
NamespacePrefixService prefixService;
/**
* The id of the Entityhub Site (Referenced or Managed Site) used for matching. <p>
* To match against the Entityhub use "entityhub" as value.
*/
public static final String SITE_ID = "enhancer.engines.linking.entityhub.siteId";
/**
* The engine initialised based on the configuration of this component
*/
protected EntityLinkingEngine entityLinkingEngine;
protected Dictionary<String,Object> engineMetadata;
/**
* The service registration for the {@link #entityLinkingEngine}
*/
protected ServiceRegistration engineRegistration;
/**
* The EntitySearcher used for the {@link #entityLinkingEngine}
*/
private TrackingEntitySearcher<?> entitySearcher;
int trackedServiceCount = 0;
/**
* the ServiceTracker for the {@link LabelTokenizer}
*/
private ServiceTracker labelTokenizerTracker;
private NavigableSet<ServiceReference> labelTokenizersRefs = new TreeSet<ServiceReference>();
/**
* The name of the reference site ('local' or 'entityhub') if the
* Entityhub is used for enhancing
*/
protected String siteName;
private BundleContext bundleContext;
/**
* Default constructor as used by OSGI. This expects that
* {@link #activate(ComponentContext)} is called before usage
*/
public EntityhubLinkingEngine() {
}
@Activate
@SuppressWarnings("unchecked")
protected void activate(ComponentContext ctx) throws ConfigurationException {
Dictionary<String,Object> properties = ctx.getProperties();
bundleContext = ctx.getBundleContext();
EntityLinkerConfig linkerConfig = EntityLinkerConfig.createInstance(properties,prefixService);
TextProcessingConfig textProcessingConfig = TextProcessingConfig.createInstance(properties);
Object value = properties.get(SITE_ID);
//init the EntitySource
if (value == null) {
throw new ConfigurationException(SITE_ID,
"The ID of the Referenced Site is a required Parameter and MUST NOT be NULL!");
}
siteName = value.toString();
if (siteName.isEmpty()) {
throw new ConfigurationException(SITE_ID,
"The ID of the Referenced Site is a required Parameter and MUST NOT be an empty String!");
}
//get the metadata later set to the enhancement engine
String engineName;
engineMetadata = new Hashtable<String,Object>();
value = properties.get(PROPERTY_NAME);
if(value == null || value.toString().isEmpty()){
throw new ConfigurationException(PROPERTY_NAME, "The EnhancementEngine name MUST BE configured!");
} else {
engineName = value.toString();
}
engineMetadata.put(PROPERTY_NAME, value);
value = properties.get(Constants.SERVICE_RANKING);
engineMetadata.put(Constants.SERVICE_RANKING, value == null ? Integer.valueOf(0) : value);
//init the tracking entity searcher
trackedServiceCount = 0;
if(Entityhub.ENTITYHUB_IDS.contains(siteName.toLowerCase())){
entitySearcher = new EntityhubSearcher(bundleContext, 10, this);
} else {
entitySearcher = new ReferencedSiteSearcher(bundleContext,siteName,10,this);
}
labelTokenizerTracker = new ServiceTracker(bundleContext, LabelTokenizer.class.getName(),
new ServiceTrackerCustomizer() {
@Override
public Object addingService(ServiceReference reference) {
Object service = bundleContext.getService(reference);
if(service == null){
return service;
}
synchronized (labelTokenizersRefs) {
labelTokenizersRefs.add(reference);
ServiceReference higest;
try {
higest = labelTokenizersRefs.last();
} catch (NoSuchElementException e) {
higest = null;
}
EntityLinkingEngine engine = entityLinkingEngine;
ServiceTracker tracker = labelTokenizerTracker;
if(engine != null && tracker != null){
LabelTokenizer lt = (LabelTokenizer)
(reference.equals(higest) || higest == null ? service :
tracker.getService(higest));
if(!lt.equals(engine.getLabelTokenizer())){
log.info(" ... setting LabelTokenizer of Engine '{}' to {}",
engine.getName(),lt);
engine.setLabelTokenizer(lt);
}
}//if engine or tracker is null deactivate was already called
}
return service;
}
@Override
public void removedService(ServiceReference reference, Object service) {
synchronized (labelTokenizersRefs) {
labelTokenizersRefs.remove(reference); //override
EntityLinkingEngine engine = entityLinkingEngine;
ServiceTracker tracker = labelTokenizerTracker;
if(engine != null && tracker != null){
if(labelTokenizersRefs.isEmpty()){
log.info(" ... setting LabelTokenizer of Engine '{}' to null",
engine.getName());
engine.setLabelTokenizer(null);
} else {
LabelTokenizer lt = (LabelTokenizer)tracker.getService(
labelTokenizersRefs.last());
if(!lt.equals(engine.getLabelTokenizer())){
log.info(" ... setting LabelTokenizer of Engine '{}' to {}",
engine.getName(),lt);
engine.setLabelTokenizer(lt);
}
}
} //if engine or tracker is null deactivate was already called
}
bundleContext.ungetService(reference);
}
@Override
public void modifiedService(ServiceReference reference, Object service) {
synchronized (labelTokenizersRefs) {
labelTokenizersRefs.remove(reference); //override
labelTokenizersRefs.add(reference);
ServiceReference higest;
try {
higest = labelTokenizersRefs.last();
} catch (NoSuchElementException e) {
higest = null;
}
EntityLinkingEngine engine = entityLinkingEngine;
ServiceTracker tracker = labelTokenizerTracker;
if(engine != null && tracker != null){
LabelTokenizer lt = (LabelTokenizer)
(reference.equals(higest) || higest == null ? service :
tracker.getService(higest));
if(!lt.equals(engine.getLabelTokenizer())){
log.info(" ... setting LabelTokenizer of Engine '{}' to {}",
engine.getName(),lt);
engine.setLabelTokenizer(lt);
}
} //if engine or tracker is null deactivate was already called
}
}
});
//create the engine
entityLinkingEngine = new EntityLinkingEngine(engineName,
entitySearcher, //the searcher might not be available
textProcessingConfig, linkerConfig,
null);
//start tracking
labelTokenizerTracker.open();
entitySearcher.open();
}
/**
* Deactivates this components.
*/
@Deactivate
protected void deactivate(ComponentContext context) {
//TODO:
//* unregister service
ServiceRegistration reg = engineRegistration;
if(reg != null){
reg.unregister();
engineRegistration = null;
}
//* reset engine
entityLinkingEngine = null;
engineMetadata = null;
labelTokenizerTracker.close();
labelTokenizerTracker = null;
//close the tracking EntitySearcher
entitySearcher.close();
entitySearcher = null;
}
@Override
public Object addingService(ServiceReference reference) {
BundleContext bc = this.bundleContext;
if(bc != null){
Object service = bc.getService(reference);
if(service != null){
if(trackedServiceCount == 0){
//register the service
engineRegistration = bc.registerService(
new String[]{EnhancementEngine.class.getName(),
ServiceProperties.class.getName()},
entityLinkingEngine,
engineMetadata);
}
trackedServiceCount++;
}
return service;
} else {
return null;
}
}
@Override
public void modifiedService(ServiceReference reference, Object service) {
}
@Override
public void removedService(ServiceReference reference, Object service) {
BundleContext bc = this.bundleContext;
if(bc != null){
trackedServiceCount--;
if(trackedServiceCount == 0 && engineRegistration != null){
engineRegistration.unregister();
}
bc.ungetService(reference);
}
}
}