blob: 60d4b2715caa5e6718a3efb433d130b113303151 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.refactor;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Dictionary;
import java.util.Enumeration;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.access.TcProvider;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.ConfigurationPolicy;
import org.apache.felix.scr.annotations.Deactivate;
import org.apache.felix.scr.annotations.Properties;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
import org.apache.stanbol.commons.indexedgraph.IndexedMGraph;
import org.apache.stanbol.commons.owl.transformation.OWLAPIToClerezzaConverter;
import org.apache.stanbol.enhancer.engines.refactor.dereferencer.Dereferencer;
import org.apache.stanbol.enhancer.engines.refactor.dereferencer.DereferencerImpl;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.entityhub.core.utils.OsgiUtils;
import org.apache.stanbol.entityhub.model.clerezza.RdfRepresentation;
import org.apache.stanbol.entityhub.model.clerezza.RdfValueFactory;
import org.apache.stanbol.entityhub.servicesapi.model.Entity;
import org.apache.stanbol.entityhub.servicesapi.site.SiteManager;
import org.apache.stanbol.ontologymanager.servicesapi.collector.DuplicateIDException;
import org.apache.stanbol.ontologymanager.servicesapi.collector.UnmodifiableOntologyCollectorException;
import org.apache.stanbol.ontologymanager.servicesapi.io.OntologyInputSource;
import org.apache.stanbol.ontologymanager.servicesapi.io.Origin;
import org.apache.stanbol.ontologymanager.servicesapi.ontology.OntologyProvider;
import org.apache.stanbol.ontologymanager.servicesapi.scope.OntologySpace;
import org.apache.stanbol.ontologymanager.servicesapi.scope.Scope;
import org.apache.stanbol.ontologymanager.servicesapi.scope.ScopeManager;
import org.apache.stanbol.ontologymanager.servicesapi.session.Session;
import org.apache.stanbol.ontologymanager.servicesapi.session.SessionLimitException;
import org.apache.stanbol.ontologymanager.servicesapi.session.SessionManager;
import org.apache.stanbol.ontologymanager.sources.clerezza.GraphContentInputSource;
import org.apache.stanbol.ontologymanager.sources.clerezza.GraphSource;
import org.apache.stanbol.ontologymanager.sources.owlapi.RootOntologyIRISource;
import org.apache.stanbol.rules.base.api.AlreadyExistingRecipeException;
import org.apache.stanbol.rules.base.api.NoSuchRecipeException;
import org.apache.stanbol.rules.base.api.Recipe;
import org.apache.stanbol.rules.base.api.RecipeConstructionException;
import org.apache.stanbol.rules.base.api.RecipeEliminationException;
import org.apache.stanbol.rules.base.api.RuleStore;
import org.apache.stanbol.rules.refactor.api.Refactorer;
import org.apache.stanbol.rules.refactor.api.RefactoringException;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.osgi.service.component.ComponentFactory;
import org.osgi.service.component.ComponentInstance;
import org.semanticweb.owlapi.apibinding.OWLManager;
import org.semanticweb.owlapi.model.IRI;
import org.semanticweb.owlapi.model.OWLOntology;
import org.semanticweb.owlapi.model.OWLOntologyCreationException;
import org.semanticweb.owlapi.model.OWLOntologyID;
import org.semanticweb.owlapi.model.OWLOntologyManager;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
* This an engine to post-process the enhancements. Its main goal is to refactor the RDF produced by the
* enhancement applying some vocabulary related to a specific task.
*
* To do that, exploit a Refactor recipe and an ontology scope of OntoNet.
*
* The first implementation is targeted to SEO use case. * It retrieves data by dereferencing the entities, *
* includes the DBpedia ontology * refactor the data using the google rich snippets vocabulary.
*
* @author anuzzolese, alberto.musetti
*
*/
@Component(configurationFactory = true, policy = ConfigurationPolicy.REQUIRE, specVersion = "1.1", metatype = true, immediate = true, inherit = true)
@Service
@Properties(value = {@Property(name = EnhancementEngine.PROPERTY_NAME, value = "seo_refactoring")
})
public class RefactorEnhancementEngine extends AbstractEnhancementEngine<RuntimeException,RuntimeException>
implements EnhancementEngine, ServiceProperties {
/**
* A special input source that allows to bind a physical IRI with an ontology parsed from an input stream.
* Due to its unconventional nature it is kept private.
*
* @author alexdma
*
*/
private class GraphContentSourceWithPhysicalIRI extends GraphContentInputSource {
public GraphContentSourceWithPhysicalIRI(InputStream content, IRI physicalIri) {
super(content);
bindPhysicalOrigin(Origin.create(physicalIri));
}
}
@Property(boolValue = true)
public static final String APPEND_OTHER_ENHANCEMENT_GRAPHS = RefactorEnhancementEngineConf.APPEND_OTHER_ENHANCEMENT_GRAPHS;
@Property(value = "google_rich_snippet_rules")
public static final String RECIPE_ID = RefactorEnhancementEngineConf.RECIPE_ID;
@Property(value = "")
public static final String RECIPE_LOCATION = RefactorEnhancementEngineConf.RECIPE_LOCATION;
@Property(value = "seo")
public static final String SCOPE = RefactorEnhancementEngineConf.SCOPE;
@Property(cardinality = 1000, value = {"http://ontologydesignpatterns.org/ont/iks/kres/dbpedia_demo.owl"})
public static final String SCOPE_CORE_ONTOLOGY = RefactorEnhancementEngineConf.SCOPE_CORE_ONTOLOGY;
@Property(boolValue = true)
public static final String USE_ENTITY_HUB = RefactorEnhancementEngineConf.USE_ENTITY_HUB;
private ComponentContext context;
@Reference
Dereferencer dereferencer;
private RefactorEnhancementEngineConf engineConfiguration;
private final Object lock = new Object();
private final Logger log = LoggerFactory.getLogger(getClass());
@Reference
ScopeManager onManager;
@Reference
OntologyProvider<TcProvider> ontologyProvider;
private ComponentInstance refactorEngineComponentInstance;
@Reference
Refactorer refactorer;
@Reference
SiteManager referencedSiteManager;
@Reference
RuleStore ruleStore;
private Scope scope;
@Reference
SessionManager sessionManager;
/**
* Activating the component
*
* @param context
*/
@SuppressWarnings("unchecked")
@Activate
protected void activate(final ComponentContext context) throws ConfigurationException {
log.info("in " + RefactorEnhancementEngine.class + " activate with context " + context);
if (context == null) {
throw new IllegalStateException("No valid" + ComponentContext.class + " parsed in activate!");
}
super.activate(context);
this.context = context;
Map<String,Object> config = new HashMap<String,Object>();
Dictionary<String,Object> properties = (Dictionary<String,Object>) context.getProperties();
// copy the properties to a map
for (Enumeration<String> e = properties.keys(); e.hasMoreElements();) {
String key = e.nextElement();
config.put(key, properties.get(key));
log.debug("Configuration property: " + key + " :- " + properties.get(key));
}
// Initialize engine-specific features.
engineConfiguration = new DefaultRefactorEnhancementEngineConf(properties);
initEngine(engineConfiguration);
log.debug(RefactorEnhancementEngine.class + " activated.");
}
@Override
public int canEnhance(ContentItem ci) throws EngineException {
/*
* Being a post-processing engine, the Refactor can enhance only content items that are previously
* enhanced by other enhancement engines.
*/
return ci.getMetadata() == null ? CANNOT_ENHANCE : ENHANCE_SYNCHRONOUS;
}
@Override
public void computeEnhancements(ContentItem ci) throws EngineException {
// Prepare the OntoNet environment. First we create the OntoNet session in which run the whole
final Session session;
try {
session = sessionManager.createSession();
} catch (SessionLimitException e1) {
throw new EngineException(
"OntoNet session quota reached. The Refactor Engine requires its own new session to execute.");
}
if (session == null) throw new EngineException(
"Failed to create OntoNet session. The Refactor Engine requires its own new session to execute.");
log.debug("Refactor enhancement job will run in session '{}'.", session.getID());
// Retrieve and filter the metadata graph for entities recognized by the engines.
final MGraph metadataGraph = ci.getMetadata(), signaturesGraph = new IndexedMGraph();
// FIXME the Stanbol Enhancer vocabulary should be retrieved from somewhere in the enhancer API.
final UriRef ENHANCER_ENTITY_REFERENCE = new UriRef(
"http://fise.iks-project.eu/ontology/entity-reference");
Iterator<Triple> tripleIt = metadataGraph.filter(null, ENHANCER_ENTITY_REFERENCE, null);
while (tripleIt.hasNext()) {
// Get the entity URI
Resource obj = tripleIt.next().getObject();
if (!(obj instanceof UriRef)) {
log.warn("Invalid UriRef for entity reference {}. Skipping.", obj);
continue;
}
final String entityReference = ((UriRef) obj).getUnicodeString();
log.debug("Trying to resolve entity {}", entityReference);
// Populate the entity signatures graph, by querying either the Entity Hub or the dereferencer.
if (engineConfiguration.isEntityHubUsed()) {
MGraph result = populateWithEntity(entityReference, signaturesGraph);
if (result != signaturesGraph && result != null) {
log.warn("Entity Hub query added triples to a new graph instead of populating the supplied one!"
+ " New signatures will be discarded.");
}
} else try {
OntologyInputSource<TripleCollection> source = new GraphContentSourceWithPhysicalIRI(
dereferencer.resolve(entityReference), IRI.create(entityReference));
signaturesGraph.addAll(source.getRootOntology());
} catch (FileNotFoundException e) {
log.error("Failed to dereference entity " + entityReference + ". Skipping.", e);
continue;
}
}
try {
/*
* The dedicated session for this job will store the following: (1) all the (merged) signatures
* for all detected entities; (2) the original content metadata graph returned earlier in the
* chain.
*
* There is no chance that (2) could be null, as it was previously controlled by the JobManager
* through the canEnhance() method and the computeEnhancement is always called iff the former
* returns true.
*/
session.addOntology(new GraphSource(signaturesGraph));
session.addOntology(new GraphSource(metadataGraph));
} catch (UnmodifiableOntologyCollectorException e1) {
throw new EngineException("Cannot add enhancement graph to OntoNet session for refactoring", e1);
}
try {
/*
* Export the entire session (incl. entities and enhancement graph) as a single merged ontology.
*
* TODO the refactorer should have methods to accommodate an OntologyCollector directly instead.
*/
OWLOntology ontology = session.export(OWLOntology.class, true);
log.debug("Refactoring recipe IRI is : " + engineConfiguration.getRecipeId());
/*
* We pass the ontology and the recipe IRI to the Refactor that returns the refactored graph
* expressed by using the given vocabulary.
*
* To perform the refactoring of the ontology to a given vocabulary we use the Stanbol Refactor.
*/
Recipe recipe = ruleStore.getRecipe(new UriRef(engineConfiguration.getRecipeId()));
log.debug("Recipe {} contains {} rules.", recipe, recipe.getRuleList().size());
log.debug("The ontology to be refactor is {}", ontology);
TripleCollection tc = refactorer.graphRefactoring(
OWLAPIToClerezzaConverter.owlOntologyToClerezzaMGraph(ontology), recipe);
/*
* ontology = refactorer .ontologyRefactoring(ontology,
* IRI.create(engineConfiguration.getRecipeId()));
*/
/*
* The newly generated ontology is converted to Clarezza format and then added os substitued to
* the old mGraph.
*/
if (engineConfiguration.isInGraphAppendMode()) {
log.debug("Metadata of the content will replace old ones.", this);
} else {
metadataGraph.clear();
log.debug("Content metadata will be appended to the existing ones.", this);
}
metadataGraph.addAll(tc);
} catch (RefactoringException e) {
String msg = "Refactor engine execution failed on content item " + ci + ".";
log.error(msg, e);
throw new EngineException(msg, e);
} catch (NoSuchRecipeException e) {
String msg = "Refactor engine could not find recipe " + engineConfiguration.getRecipeId()
+ " to refactor content item " + ci + ".";
log.error(msg, e);
throw new EngineException(msg, e);
} catch (Exception e) {
throw new EngineException("Refactor Engine has failed.", e);
} finally {
/*
* The session needs to be destroyed anyhow.
*
* Clear contents before destroying (FIXME only do this until this is implemented in the
* destroySession() method).
*/
for (OWLOntologyID id : session.listManagedOntologies()) {
try {
String key = ontologyProvider.getKey(id.getOntologyIRI());
ontologyProvider.getStore().deleteTripleCollection(new UriRef(key));
} catch (Exception ex) {
log.error("Failed to delete triple collection " + id, ex);
continue;
}
}
sessionManager.destroySession(session.getID());
}
}
@SuppressWarnings("unchecked")
protected void createRefactorEngineComponent(ComponentFactory factory) {
// both create*** methods sync on the searcherAndDereferencerLock to avoid
// multiple component instances because of concurrent calls
synchronized (this.lock) {
if (refactorEngineComponentInstance == null) {
this.refactorEngineComponentInstance = factory.newInstance(OsgiUtils.copyConfig(context
.getProperties()));
}
}
}
@Deactivate
protected void deactivate(ComponentContext context) {
// Deactivation clears all the rules and releases OntoNet resources.
UriRef recipeId = new UriRef(engineConfiguration.getRecipeId());
try {
// step 1: get all the rules
log.debug("Recipe {} and its associated rules will be removed from the rule store.", recipeId);
Recipe recipe = null;
try {
recipe = ruleStore.getRecipe(recipeId);
} catch (RecipeConstructionException e) {
log.error(e.getMessage());
}
if (recipe != null) {
// step 2: remove the recipe
try {
if (ruleStore.removeRecipe(recipeId)) {
log.debug(
"Recipe {} has been removed correctly. Note that its rules will be removed separately.",
recipeId);
} else log.error("Recipe {} cannot be removed.", recipeId);
} catch (RecipeEliminationException e) {
log.error(e.getMessage());
}
}
} catch (NoSuchRecipeException ex) {
log.error("The recipe " + engineConfiguration.getRecipeId() + " doesn't exist", ex);
}
// step 3: clear OntoNet resources
scope.getCoreSpace().tearDown();
scope.tearDown();
onManager.deregisterScope(scope);
log.debug("OntoNet resources released : scope {}", scope);
log.info("in " + RefactorEnhancementEngine.class + " deactivate with context " + context);
}
/**
* Fetch the OWLOntology containing the graph associated to an entity from Linked Data. It uses the Entity
* Hub for accessing LOD and fetching entities.
*
* @param entityURI
* {@link String}
* @return the {@link OWLOntology} of the entity
*/
private MGraph populateWithEntity(String entityURI, MGraph target) {
log.debug("Requesting signature of entity {}", entityURI);
MGraph graph = target != null ? target : new IndexedMGraph();
// Query the Entity Hub
Entity signature = referencedSiteManager.getEntity(entityURI);
if (signature != null) {
RdfRepresentation rdfSignature = RdfValueFactory.getInstance().toRdfRepresentation(
signature.getRepresentation());
graph.addAll(rdfSignature.getRdfGraph());
}
return graph;
}
@Override
public Map<String,Object> getServiceProperties() {
return Collections.unmodifiableMap(Collections.singletonMap(
ServiceProperties.ENHANCEMENT_ENGINE_ORDERING,
(Object) ServiceProperties.ORDERING_POST_PROCESSING));
}
/**
* Method for adding ontologies to the scope core ontology.
* <ol>
* <li>Get all the ontologies from the property.</li>
* <li>Create a base scope.</li>
* <li>Retrieve the ontology space from the scope.</li>
* <li>Add the ontologies to the scope via ontology space.</li>
* </ol>
*/
private void initEngine(RefactorEnhancementEngineConf engineConfiguration) {
// IRI dulcifierScopeIRI = IRI.create((String) context.getProperties().get(SCOPE));
String scopeId = engineConfiguration.getScope();
// Create or get the scope with the configured ID
try {
scope = onManager.createOntologyScope(scopeId);
// No need to deactivate a newly created scope.
} catch (DuplicateIDException e) {
scope = onManager.getScope(scopeId);
onManager.setScopeActive(scopeId, false);
}
// All resolvable ontologies stated in the configuration are loaded into the core space.
OntologySpace ontologySpace = scope.getCoreSpace();
ontologySpace.tearDown();
String[] coreScopeOntologySet = engineConfiguration.getScopeCoreOntologies();
List<String> success = new ArrayList<String>(), failed = new ArrayList<String>();
try {
log.info("Will now load requested ontology into the core space of scope '{}'.", scopeId);
OWLOntologyManager sharedManager = OWLManager.createOWLOntologyManager();
IRI physicalIRI = null;
for (int o = 0; o < coreScopeOntologySet.length; o++) {
String url = coreScopeOntologySet[o];
try {
physicalIRI = IRI.create(url);
} catch (Exception e) {
failed.add(url);
}
try {
// TODO replace with a Clerezza equivalent
ontologySpace.addOntology(new RootOntologyIRISource(physicalIRI, sharedManager));
success.add(url);
} catch (OWLOntologyCreationException e) {
log.error("Failed to load ontology from physical location " + physicalIRI
+ " Continuing with next...", e);
failed.add(url);
}
}
} catch (UnmodifiableOntologyCollectorException ex) {
log.error("Ontology space {} was found locked for modification. Cannot populate.", ontologySpace);
}
for (String s : success)
log.info(" >> {} : SUCCESS", s);
for (String s : failed)
log.info(" >> {} : FAILED", s);
ontologySpace.setUp();
// if (!onManager.containsScope(scopeId)) onManager.registerScope(scope);
onManager.setScopeActive(scopeId, true);
/*
* The first thing to do is to create a recipe in the rule store that can be used by the engine to
* refactor the enhancement graphs.
*/
String recipeId = engineConfiguration.getRecipeId();
Recipe recipe = null;
try {
recipe = ruleStore.createRecipe(new UriRef(recipeId), null);
} catch (AlreadyExistingRecipeException e1) {
log.error("A recipe with ID {} already exists in the store.", recipeId);
}
if (recipe != null) {
log.debug("Initialised blank recipe with ID {}", recipeId);
/*
* The set of rule to put in the recipe can be provided by the user. A default set of rules is
* provided in /META-INF/default/seo_rules.sem. Use the property engine.refactor in the felix
* console to pass to the engine your set of rules.
*/
String recipeLocation = engineConfiguration.getRecipeLocation();
InputStream recipeStream = null;
String recipeString = null;
if (recipeLocation != null && !recipeLocation.isEmpty()) {
Dereferencer dereferencer = new DereferencerImpl();
try {
recipeStream = dereferencer.resolve(recipeLocation);
log.debug("Loaded recipe from external source {}", recipeLocation);
} catch (FileNotFoundException e) {
log.error("Recipe Stream is null.", e);
}
} else {
// TODO remove this part (or manage it better in the @Activate method).
String loc = "/META-INF/default/seo_rules.sem";
recipeStream = getClass().getResourceAsStream(loc);
log.debug("Loaded default recipe in {}.", loc);
}
if (recipeStream != null) {
BufferedReader reader = new BufferedReader(new InputStreamReader(recipeStream));
recipeString = "";
String line = null;
try {
while ((line = reader.readLine()) != null)
recipeString += line;
} catch (IOException e) {
log.error("Failed to load Refactor Engine recipe from stream. Aborting read. ", e);
recipeString = null;
}
}
log.debug("Recipe content follows :\n{}", recipeString);
if (recipeString != null) {
ruleStore.addRulesToRecipe(recipe, recipeString, null);
log.debug("Added rules to recipe {}", recipeId);
}
}
}
}