blob: 338dd2276c87c9bc4675a3b337ffa5caec40e0f5 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.entityhub.indexing.source.jenatdb;
import static org.apache.stanbol.entityhub.indexing.source.jenatdb.Utils.initTDBDataset;
import java.io.File;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import org.apache.commons.io.FilenameUtils;
import org.apache.marmotta.ldpath.api.backend.RDFBackend;
import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
import org.apache.stanbol.entityhub.indexing.core.EntityDataIterable;
import org.apache.stanbol.entityhub.indexing.core.EntityDataIterator;
import org.apache.stanbol.entityhub.indexing.core.EntityDataProvider;
import org.apache.stanbol.entityhub.indexing.core.IndexingComponent;
import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
import org.apache.stanbol.entityhub.indexing.core.source.ResourceLoader;
import org.apache.stanbol.entityhub.indexing.core.source.ResourceState;
import org.apache.stanbol.entityhub.servicesapi.model.Reference;
import org.apache.stanbol.entityhub.servicesapi.model.Representation;
import org.apache.stanbol.entityhub.servicesapi.model.Text;
import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
import org.apache.stanbol.entityhub.servicesapi.util.ModelUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.hp.hpl.jena.datatypes.BaseDatatype;
import com.hp.hpl.jena.datatypes.DatatypeFormatException;
import com.hp.hpl.jena.datatypes.RDFDatatype;
import com.hp.hpl.jena.datatypes.xsd.XSDDateTime;
import com.hp.hpl.jena.datatypes.xsd.XSDDuration;
import com.hp.hpl.jena.graph.Node;
import com.hp.hpl.jena.graph.NodeFactory;
import com.hp.hpl.jena.graph.Triple;
import com.hp.hpl.jena.graph.impl.LiteralLabel;
import com.hp.hpl.jena.query.Query;
import com.hp.hpl.jena.query.QueryExecutionFactory;
import com.hp.hpl.jena.query.QueryFactory;
import com.hp.hpl.jena.query.QuerySolution;
import com.hp.hpl.jena.query.ResultSet;
import com.hp.hpl.jena.query.Syntax;
import com.hp.hpl.jena.rdf.model.AnonId;
import com.hp.hpl.jena.rdf.model.RDFNode;
import com.hp.hpl.jena.sparql.core.DatasetGraph;
import com.hp.hpl.jena.sparql.core.Var;
import com.hp.hpl.jena.sparql.engine.binding.Binding;
import com.hp.hpl.jena.tdb.store.DatasetGraphTDB;
import com.hp.hpl.jena.util.iterator.ExtendedIterator;
/**
* Implementation of an {@link IndexingComponent} for Entity data that provides
* the possibility to both:<ol>
* <li>randomly access entity data via the {@link EntityDataProvider} interface
* <li>iterate over all entities in this store via the {@link EntityDataIterator}
* interface.
* </ol>
*
* @author Rupert Westenthaler
*
*/
public class RdfIndexingSource extends AbstractTdbBackend implements EntityDataIterable,EntityDataProvider, RDFBackend<Node> {
/**
* The Parameter used to configure the source folder(s) relative to the
* {@link IndexingConfig#getSourceFolder()}. The ',' (comma) is used as
* separator to parsed multiple sources.
*/
public static final String PARAM_SOURCE_FILE_OR_FOLDER = "source";
/**
* The directory where successfully imported files are copied to
*/
public static final String PARAM_IMPORTED_FOLDER = "imported";
/**
* Allows to enable/disable the indexing of Bnodes (see
* <a href="https://issues.apache.org/jira/browse/STANBOL-765">STANBOL-765</a>
* for details).
*/
private static final String PARAM_BNODE_STATE = "bnode";
/**
* If present, this Parameter allows to convert RDF BlankNodes to dereferable
* URIs by using {bnode-prefix}{bnode-id} (see
* <a href="https://issues.apache.org/jira/browse/STANBOL-765">STANBOL-765</a>
* for details)
*/
public static final String PARAM_BNODE_PREFIX = "bnode-prefix";
/**
* The Parameter that can be used to deactivate the importing of sources.
* If this parameter is set to <code>false</code> the values configured for
* {@link #PARAM_IMPORT_SOURCE} are ignored. The default value is
* <code>true</code>
*/
public static final String PARAM_IMPORT_SOURCE = "import";
/**
* Allows to configure a {@link RdfImportFilter} (full qualified class name).
* If present it gets the full configuration set for this component parsed.
* This means that the import filter can be configured by the same
* configuration as this component.
*/
public static final String PARAM_IMPORT_FILTER = "import-filter";
/**
* The default directory name used to search for RDF files to be imported
*/
public static final String DEFAULT_SOURCE_FOLDER_NAME = "rdfdata";
public static final String DEFAULT_IMPORTED_FOLDER_NAME = "imported";
//protected to allow internal classes direct access (without hidden getter/
//setter added by the compiler that decrease performance)
protected final static Logger log = LoggerFactory.getLogger(RdfIndexingSource.class);
/**
* The RDF data
*/
private DatasetGraphTDB indexingDataset;
/**
* The valueFactory used to create {@link Representation}s, {@link Reference}s
* and {@link Text} instances.
*/
private ValueFactory vf;
private ResourceLoader loader;
protected String bnodePrefix; //protected to allow direct access in inner classes
/**
* used for logging a single WARN level entry on the first ignored BlankNode
*/
private boolean bnodeIgnored = false;
private RdfImportFilter importFilter;
/**
* Default Constructor relaying on that {@link #setConfiguration(Map)} is
* called afterwards to provide the configuration!
*/
public RdfIndexingSource(){
this(null);
}
/**
* Internally used to initialise a {@link ValueFactory}
* @param valueFactory
*/
private RdfIndexingSource(ValueFactory valueFactory){
if(valueFactory == null){
this.vf = InMemoryValueFactory.getInstance();
} else {
this.vf = valueFactory;
}
}
/**
* Constructs an instance based on the provided parameter
* @param modelLocation the directory for the RDF model. MUST NOT be NULL
* however the parsed {@link File} needs not to exist.
* @param sourceFileOrDirectory the source file or directory containing the
* file(s) to import. Parse <code>null</code> if no RDF files need to be
* imported
* @param valueFactory The {@link ValueFactory} used to create instances
* or <code>null</code> to use the default implementation.
* @param importFilter Optionally an importFilter used for filtering some
* triples read from the RDF source files.
*/
public RdfIndexingSource(File modelLocation,
File sourceFileOrDirectory,
ValueFactory valueFactory,
RdfImportFilter importFilter){
if(modelLocation == null){
throw new IllegalArgumentException("The parsed model location MUST NOT be NULL!");
}
//init the store
this.indexingDataset = initTDBDataset(modelLocation);
//use a ResourceLoader that fails on the first invalid RDF file (STANBOL-328)
this.loader = new ResourceLoader(new RdfResourceImporter(indexingDataset,importFilter), true,true);
loader.addResource(sourceFileOrDirectory);
}
@Override
public void setConfiguration(Map<String,Object> config) {
IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
//first init the RDF Model
this.indexingDataset = Utils.getTDBDataset(config);
//second we need to check if we need to import RDF files to the RDF model
//look if we need want to use an import filter
Object value = config.get(PARAM_IMPORT_FILTER);
if(value == null){
log.info("No RDF Import Filter configured");
importFilter = null;
} else {
String[] filterNames = value.toString().split(",");
List<RdfImportFilter> filters = new ArrayList<RdfImportFilter>();
ClassLoader cl = indexingConfig.getClass().getClassLoader();
for(String filterName : filterNames){
filterName = filterName.trim();
try {
Class<? extends RdfImportFilter> importFilterClass = cl.loadClass(
filterName).asSubclass(RdfImportFilter.class);
RdfImportFilter filter = importFilterClass.newInstance();
filter.setConfiguration(config);
filters.add(filter);
log.info("Use RDF ImportFilter {} (type: {})",importFilter,importFilterClass.getSimpleName());
} catch (ClassNotFoundException e) {
throw new IllegalArgumentException("Configured RdfImportFilter '"
+filterName+"' not found", e);
} catch (InstantiationException e) {
throw new IllegalArgumentException("Configured RdfImportFilter '"
+filterName+"' can not be instantiated", e);
} catch (IllegalAccessException e) {
throw new IllegalArgumentException("Configured RdfImportFilter '"
+filterName+"' can not be created", e);
}
}
if(filters.isEmpty()){
this.importFilter = null;
} else if(filters.size() == 1){
this.importFilter = filters.get(0);
} else {
this.importFilter = new UnionImportFilter(filters.toArray(
new RdfImportFilter[filters.size()]));
}
}
boolean failOnError = indexingConfig.isFailOnError();
//create the ResourceLoader
this.loader = new ResourceLoader(new RdfResourceImporter(indexingDataset, importFilter), failOnError);
value = config.get(PARAM_IMPORTED_FOLDER);
String importedFolderName;
if(value != null && !value.toString().isEmpty()){
importedFolderName = value.toString();
} else {
importedFolderName = DEFAULT_IMPORTED_FOLDER_NAME;
}
File importedFolder = new File(indexingConfig.getSourceFolder(),importedFolderName);
log.info("Imported RDF File Folder: {}",importedFolder);
this.loader.setImportedDir(importedFolder);
//check if importing is deactivated
boolean importSource = true; //default is true
value = config.get(PARAM_IMPORT_SOURCE);
if(value != null){
importSource = Boolean.parseBoolean(value.toString());
}
if(importSource){ // if we need to import ... check the source config
log.info("Importing RDF data from:");
value = config.get(PARAM_SOURCE_FILE_OR_FOLDER);
if(value == null){ //if not set use the default
value = DEFAULT_SOURCE_FOLDER_NAME;
}
for(String source : value.toString().split(",")){
File sourceFileOrDirectory = indexingConfig.getSourceFile(source);
if(sourceFileOrDirectory.exists()){
//register the configured source with the ResourceLoader
this.loader.addResource(sourceFileOrDirectory);
} else {
if(FilenameUtils.getExtension(source).isEmpty()){
//non existent directory -> create
//This is typically the case if this method is called to
//initialise the default configuration. So we will try
//to create the directory users need to copy the source
//RDF files.
if(!sourceFileOrDirectory.mkdirs()){
log.warn("Unable to create directory {} configured to improt RDF data from. " +
"You will need to create this directory manually before copying the" +
"RDF files into it.",sourceFileOrDirectory);
//this would not be necessary because the directory will
//be empty - however I like to be consistent and have
//all configured and existent files & dirs added the the
//resource loader
this.loader.addResource(sourceFileOrDirectory);
}
} else {
log.warn("Unable to find RDF source {} within the indexing Source folder ",source,indexingConfig.getSourceFolder());
}
}
}
if(log.isInfoEnabled()){
for(String registeredSource : loader.getResources(ResourceState.REGISTERED)){
log.info(" > "+registeredSource);
}
}
} else {
log.info("Importing RDF data deactivated by parameer {}={}"+PARAM_IMPORT_SOURCE,value);
}
//STANBOL-765: parsed bnode-prefix from parsed configuration.
value = config.get(PARAM_BNODE_STATE);
final Boolean bnodeState;
if(value != null){
bnodeState = value instanceof Boolean ? (Boolean) value :
Boolean.parseBoolean(value.toString());
} else if(config.containsKey(PARAM_BNODE_STATE)){ //support key without value
bnodeState = true;
} else {
bnodeState = null; //undefined
}
if(bnodeState == null || bnodeState){ //null or enabled -> consider prefix
value = config.get(PARAM_BNODE_PREFIX);
if(value != null){
try {
new URI(value.toString());
} catch (URISyntaxException e) {
throw new IllegalArgumentException("The configured "+PARAM_BNODE_PREFIX+"='"
+ value.toString() + "' MUST BE a valid URI!");
}
bnodePrefix = value.toString();
} else if(bnodeState != null) { //use default prefix if bnodeState is true
bnodePrefix = String.format("urn:bnode:%s:",indexingConfig.getName());
} // else bnodeState == null and no custom prefix -> disable by default
}
if(bnodePrefix != null){
log.info("Indexing of Bnodes enabled (prefix: {}",bnodePrefix);
} else {
log.info("Indexing of Bnodes disabled");
}
}
@Override
public boolean needsInitialisation() {
return (importFilter != null && importFilter.needsInitialisation()) ||
!loader.getResources(ResourceState.REGISTERED).isEmpty();
}
@Override
public void initialise(){
if(importFilter != null && importFilter.needsInitialisation()){
importFilter.initialise();
}
if(!loader.getResources(ResourceState.REGISTERED).isEmpty()){
loader.loadResources();
}
}
@Override
public void close() {
loader = null;
indexingDataset.close();
if(importFilter != null){
importFilter.close();
}
}
public void debug(){
String entityVar = "s";
String fieldVar = "p";
String valueVar = "o";
StringBuilder qb = new StringBuilder();
qb.append(String.format("SELECT ?%s ?%s ?%s \n",
entityVar,fieldVar,valueVar)); //for the select
qb.append("{ \n");
qb.append(String.format(" ?%s ?%s ?%s . \n",
entityVar,fieldVar,valueVar)); //for the where
qb.append("} \n");
log.debug("EntityDataIterator Query: \n"+qb.toString());
Query q = QueryFactory.create(qb.toString(), Syntax.syntaxARQ);
ResultSet rs = QueryExecutionFactory.create(q, indexingDataset.toDataset()).execSelect();
Var s = Var.alloc(entityVar);
Var p = Var.alloc(fieldVar);
Var o = Var.alloc(valueVar);
while (rs.hasNext()){
Binding b = rs.nextBinding();
log.debug("{} {} {}",new Object[]{b.get(s),b.get(p),b.get(o)});
}
}
@Override
public EntityDataIterator entityDataIterator() {
String entityVar = "s";
String fieldVar = "p";
String valueVar = "o";
StringBuilder qb = new StringBuilder();
qb.append(String.format("SELECT ?%s ?%s ?%s \n",
entityVar,fieldVar,valueVar)); //for the select
qb.append("{ \n");
qb.append(String.format(" ?%s ?%s ?%s . \n",
entityVar,fieldVar,valueVar)); //for the where
qb.append("} \n");
log.debug("EntityDataIterator Query: \n"+qb.toString());
Query q = QueryFactory.create(qb.toString(), Syntax.syntaxARQ);
return new RdfEntityIterator(
QueryExecutionFactory.create(q, indexingDataset.toDataset()).execSelect(),
entityVar,fieldVar,valueVar);
}
@Override
public Representation getEntityData(String id) {
final Node resource;
//STANBOL-765: check if the parsed id represents an bnode
if(bnodePrefix != null && id.startsWith(bnodePrefix)){
resource = NodeFactory.createAnon(AnonId.create(id.substring(bnodePrefix.length())));
} else {
resource = NodeFactory.createURI(id);
}
Representation source = vf.createRepresentation(id);
boolean found;
ExtendedIterator<Triple> outgoing = null;
try { // There may still be exceptions while reading triples
outgoing = indexingDataset.getDefaultGraph().find(resource, null, null);
found = outgoing.hasNext();
while(outgoing.hasNext()){ //iterate over the statements for that resource
Triple statement = outgoing.next();
Node predicate = statement.getPredicate();
if(predicate == null || !predicate.isURI()){
log.warn("Ignore field {} for resource {} because it is null or not an URI!",
predicate,resource);
} else {
String field = predicate.getURI();
Node value = statement.getObject();
processValue(value, source, field);
} //end else predicate != null
} //end iteration over resource triple
} catch (Exception e) {
log.warn("Unable to retrieve entity data for Entity '"+id+"'",e);
found = false;
try {
if(outgoing != null){
outgoing.close();
}
} catch (Exception e1) { /* ignore */}
}
if(found) {
if(log.isTraceEnabled()){
log.info("RDFTerm: \n{}", ModelUtils.getRepresentationInfo(source));
}
return source;
} else {
log.debug("No Statements found for id {} (Node: {})!",id,resource);
return null;
}
}
/**
* Getter for the Jena TDB {@link DatasetGraph} used as source
* @return the indexingDataset
*/
public final DatasetGraphTDB getIndexingDataset() {
return indexingDataset;
}
/**
* Processes a {@link Node} and adds the according value to the parsed
* Representation.
* @param value The node to convert to an value for the Representation
* @param source the representation (MUST NOT be <code>null</code>
* @param field the field (MUST NOT be <code>null</code>)
*/
private void processValue(Node value, Representation source, String field) {
if(value == null){
log.warn("Encountered NULL value for field {} and entity {}",
field,source.getId());
} else if(value.isURI()){ //add a reference
source.addReference(field, value.getURI());
} else if(value.isLiteral()){ //add a value or a text depending on the dataType
LiteralLabel ll = value.getLiteral();
// log.debug("LL: lexical {} | value {} | dataType {} | language {}",
// new Object[]{ll.getLexicalForm(),ll.getValue(),ll.getDatatype(),ll.language()});
//if the dataType == null , than we can expect a plain literal
RDFDatatype dataType = ll.getDatatype();
if(dataType != null){ //add a value
Object literalValue;
try {
literalValue = ll.getValue();
if(literalValue instanceof BaseDatatype.TypedValue){
//used for unknown data types
// -> in such cases just use the lexical type
String lexicalValue = ((BaseDatatype.TypedValue)literalValue).lexicalValue;
if(lexicalValue != null && !lexicalValue.isEmpty()){
source.add(field,lexicalValue);
}
} else if(literalValue instanceof XSDDateTime) {
source.add(field, ((XSDDateTime)literalValue).asCalendar().getTime()); //Entityhub uses the time
} else if(literalValue instanceof XSDDuration) {
String duration = literalValue.toString();
if(duration != null && !duration.isEmpty()) {
source.add(field, literalValue.toString());
}
} else if(!ll.getLexicalForm().isEmpty()){
source.add(field, literalValue);
} //else ignore literals that are empty
} catch (DatatypeFormatException e) {
log.warn(" Unable to convert {} to {} -> use lecicalForm",
ll.getLexicalForm(),ll.getDatatype());
literalValue = ll.getLexicalForm();
}
} else { //add a text
String lexicalForm = ll.getLexicalForm();
if(lexicalForm != null && !lexicalForm.isEmpty()){
String language = ll.language();
if(language!=null && language.length()<1){
language = null;
}
source.addNaturalText(field, lexicalForm, language);
} //else ignore empty literals
}
// "" is parsed if there is no language
} else if(value.isBlank()) {
if(bnodePrefix != null) { //STANBOL-765: convert Bnodes to URIs
StringBuilder sb = new StringBuilder(bnodePrefix);
sb.append(value.getBlankNodeId().getLabelString());
source.addReference(field, sb.toString());
} else {
logIgnoredBnode(log, source, field, value);
}
} else {
log.warn("ignoreing value {} for field {} and RDFTerm {} because it is of an unsupported type!",
new Object[]{value,field,source.getId()});
} //end different value node type
}
/**
* Logs that a BlankNode was ignored (only the first time). Also debugs the
* ignored triple.
* @param log the logger to use
* @param s subject
* @param p predicate
* @param o object
*/
protected void logIgnoredBnode(Logger log, Object s, Object p, Object o) {
if(!bnodeIgnored){
bnodeIgnored = true;
log.warn("The Indexed RDF Data do contain Blank Nodes. Those are "
+ "ignored unless the '{}' parameter is set to valid URI. "
+ "If this parameter is set Bnodes are converted to URIs by "
+ "using {bnode-prefix}{bnodeId} (see STANBOL-765)",
PARAM_BNODE_PREFIX);
}
log.debug("ignoreing blank node value(s) for Triple {},{},{}!",
new Object[]{s,p,o});
}
/**
* Implementation of the iterator over the entities stored in a
* {@link RdfIndexingSource}. This Iterator is based on query
* {@link ResultSet}. It uses the low level SPARQL API because this allows
* to use the same code to create values for Representations
* @author Rupert Westenthaler
*
*/
public final class RdfEntityIterator implements EntityDataIterator {
/**
* Variable used to
*/
final Var entityVar;
final Var fieldVar;
final Var valueVar;
/**
* The result set containing all triples in the form of <code>
* "entity -&gt; field -&gt; value"</code>
*/
private final ResultSet resultSet;
/**
* The {@link Node} representing the current entity or <code>null</code>
* if the iterator is newly created.<p>
* {@link Node#isURI()} is guaranteed to return <code>true</code> and
* {@link Node#getURI()} is guaranteed to return the id for the entity
*/
private Node currentEntity = null;
/**
* The {@link Node} for the next Entity in the iteration or <code>null</code>
* in case there are no further or the iterator is newly created (in that
* case {@link #currentEntity} will be also <code>null</code>)<p>
* {@link Node#isURI()} is guaranteed to return <code>true</code> and
* {@link Node#getURI()} is guaranteed to return the id for the entity
*/
private Node nextEntity = null;
/**
* The Representation of the current Element. Only available after a
* call to {@link #getRepresentation()}
*/
private Representation currentRepresentation = null;
/**
* Holds all <code>field,value"</code> pairs of the current Entity.
* Elements at even positions represent<code>fields</code> and elements
* at uneven positions represent <code>values</code>.
*/
private List<Node> data = new ArrayList<Node>();
/**
* The next (not consumed) solution of the query.
*/
private Binding nextBinding = null;
protected RdfEntityIterator(ResultSet resultSet, String entityVar,String fieldVar, String valueVar){
if(resultSet == null){
throw new IllegalArgumentException("The parsed ResultSet MUST NOT be NULL!");
}
//check if the ResultSet provides the required variables to perform the query
List<String> vars = resultSet.getResultVars();
if(!vars.contains(entityVar)){
throw new IllegalArgumentException("The parsed ResultSet is missing the required" +
"Variable \""+entityVar+"\" representing the Entity!");
} else {
this.entityVar = Var.alloc(entityVar);
}
if(!vars.contains(fieldVar)){
throw new IllegalArgumentException("The parsed ResultSet is missing the required" +
"Variable \""+fieldVar+"\" representing the Field of an Entity!");
} else {
this.fieldVar = Var.alloc(fieldVar);
}
if(!vars.contains(valueVar)){
throw new IllegalArgumentException("The parsed ResultSet is missing the required" +
"Variable \""+valueVar+"\" representing the Value of a Field of an Entity!");
} else {
this.valueVar = Var.alloc(valueVar);
}
this.resultSet = resultSet;
//this will read until the first binding of the first Entity is found
initFirst();
}
private void initFirst(){
if(currentEntity == null && nextEntity == null){ //only for the first call
//consume binding until the first valid entity starts
while(nextEntity == null && resultSet.hasNext()){
Binding firstValid = resultSet.nextBinding();
Node entityNode = firstValid.get(entityVar);
if((entityNode.isURI() && !entityNode.toString().isEmpty()) ||
entityNode.isBlank() && bnodePrefix != null){
//store it temporarily in nextBinding
nextBinding = firstValid;
//store it as next (first) entity
nextEntity = entityNode;
} else {
logIgnoredBnode(log,entityNode,firstValid.get(fieldVar),firstValid.get(valueVar));
}
}
} else {
throw new IllegalStateException("This Mehtod MUST be only used for Initialisation!");
}
}
@Override
public void close() {
data.clear();
data = null;
currentEntity = null;
currentRepresentation = null;
//Looks like it is not possible to close a resultSet
}
@Override
public Representation getRepresentation() {
//current Entity will be null if
// - next() was never called
// - the end of the iteration was reached
if(currentEntity == null){
return null;
} else if(currentRepresentation == null){
currentRepresentation = createRepresentation();
}
return currentRepresentation;
}
@Override
public boolean hasNext() {
return resultSet.hasNext();
}
@Override
public String next() {
return getNext();
}
@Override
public void remove() {
throw new UnsupportedOperationException(
"Removal of Entities is not supported by this Implementation!");
}
/**
* Iterates over all {@link QuerySolution} of the {@link #resultSet}
* that do have {@link #currentEntity} as
* {@link RdfIndexingSource#VARIABLE_NAME_ENTITY VARIABLE_NAME_ENTITY}.
* NOTES: <ul>
* <li>This method also initialises the {@link #data} and sets the
* {@link #nextBinding} to the first solution of the next entity.<br>
* <li>That means also, that it would iterate over additional
* {@link RdfIndexingSource#VARIABLE_NAME_ENTITY VARIABLE_NAME_ENTITY}
* values that are not URIResources ( in cases
* {@link RDFNode#isURIResource()} returns <code>false</code>)
* <li>This method is also used to initialise the first Entity
* @return the URI of the current entity
*/
private String getNext(){
//check for more elements
if(!resultSet.hasNext()){
throw new NoSuchElementException("No more Entities available");
}
//clean up data of the previous entity
this.data.clear(); //remove data of the previous entity
this.currentRepresentation = null; //and the representation
this.currentEntity = nextEntity; //set the nextEntity to the current
//and process the first binding already consumed from the resultSet
//by calling this method for the previous Entity
if(nextBinding != null){ //will be null for the first Entity
processSolution(nextBinding);
}
//now get all the other Solutions for the current entity
boolean next = false;
while(!next && resultSet.hasNext()){
Binding binding = resultSet.nextBinding();
Node entityNode = binding.get(entityVar);
//NOTES:
// * for URIs we need to check for empty URIs!
// * STANBOL-765: added support for BlankNodes
if((entityNode.isURI() && !entityNode.toString().isEmpty()) ||
entityNode.isBlank() && bnodePrefix != null){
if(!entityNode.equals(currentEntity)){
//start of next Entity
this.nextEntity = entityNode; //store the node for the next entity
this.nextBinding = binding; //store the first binding of the next entity
//we are done for this entity -> exit the loop
next = true;
} else {
processSolution(binding);
}
} else {
logIgnoredBnode(log,entityNode,binding.get(fieldVar),binding.get(valueVar));
}
}
if(!next){ // exit the loop but still no new entity ... that means
nextEntity = null; //there are no more entities
nextBinding = null; // and there are also no more solutions
}
//STANBOL-765: if current is a Bnode add the bnode-prefix
return currentEntity.isBlank() ?
new StringBuilder(bnodePrefix).append(currentEntity.getBlankNodeId().getLabelString()).toString() :
currentEntity.getURI();
}
/**
* Processes a {@link Binding} by storing the {@link Node}s for the
* variables {@link #fieldVar} and {@link #valueVar} to {@link #data}.
* This method ensures that both values are not <code>null</code> and
* that the {@link Node} representing the field is an URI (
* returns <code>true</code> for {@link Node#isURI()}).
* @param binding the binding to process
*/
private void processSolution(Binding binding) {
Node field = binding.get(fieldVar);
if(field != null && field.isURI()){ //property MUST BE an URI
Node value = binding.get(valueVar);
if(value != null){
//add the pair
data.add(field);
data.add(value);
}
} else {
//This may only happen if the Query used to create the ResultSet
//containing this Solution does not link the variable
//VARIABLE_NAME_FIELD to properties.
log.error("Found Field {} for Entity {} that is not an URIResource",field,currentEntity);
}
}
/**
* Used to create the Representation the first time
* {@link #getRepresentation()} is called for the current entity. The
* information for the Representation are already stored in {@link #data}
*/
private Representation createRepresentation() {
final String uri;
if(currentEntity.isBlank()){ //STANBOL-765: support bNodes
StringBuilder sb = new StringBuilder(bnodePrefix);
sb.append(currentEntity.getBlankNodeId().getLabelString());
uri = sb.toString();
} else {
uri = currentEntity.getURI();
}
Representation representation = vf.createRepresentation(uri);
Iterator<Node> it = data.iterator();
while(it.hasNext()){
//data contains field,value pairs
//because of that we call two times next for
String field = it.next().getURI(); //the field
Node value = it.next();//and the value
processValue(value, representation, field);
}
return representation;
}
}
/* ----------------------------------------------------------------------
* RDF Backend implementation
* ----------------------------------------------------------------------
*/
@Override
public Collection<Node> listObjects(Node subject, Node property) {
Collection<Node> nodes = new ArrayList<Node>();
if(bnodePrefix != null && subject.isURI() && subject.getURI().startsWith(bnodePrefix)){
subject = NodeFactory.createAnon(new AnonId(subject.getURI().substring(bnodePrefix.length())));
}
ExtendedIterator<Triple> it = indexingDataset.getDefaultGraph().find(subject, property, null);
while(it.hasNext()){
//STANBOL-765: we need also to transform bnodes to URIs for the
//RDFBackend implementation
Node object = it.next().getObject();
if(bnodePrefix != null && object.isBlank()){
StringBuilder sb = new StringBuilder(bnodePrefix);
sb.append(object.getBlankNodeId().getLabelString());
object = NodeFactory.createURI(sb.toString());
}
nodes.add(object);
}
it.close();
return nodes;
}
@Override
public Collection<Node> listSubjects(Node property, Node object) {
Collection<Node> nodes = new ArrayList<Node>();
if(bnodePrefix != null && object.isURI() && object.getURI().startsWith(bnodePrefix)){
object = NodeFactory.createAnon(new AnonId(object.getURI().substring(bnodePrefix.length())));
}
ExtendedIterator<Triple> it = indexingDataset.getDefaultGraph().find(null, property, object);
while(it.hasNext()){
Node subject = it.next().getSubject();
//STANBOL-765: we need also to transform bnodes to URIs for the
//RDFBackend implementation
if(bnodePrefix != null && subject.isBlank()){
StringBuilder sb = new StringBuilder(bnodePrefix);
sb.append(subject.getBlankNodeId().getLabelString());
subject = NodeFactory.createURI(sb.toString());
}
nodes.add(subject);
}
it.close();
return nodes;
}
/**
* Since STANBOL-765 BlankNodes are converted to URIs if a {@link #bnodePrefix}
* is configured. This also means that one needs to expect calls to the
* {@link RDFBackend} interface with transformed Nodes. <p>
* This method ensures that if someone requests an uri {@link Node} for a
* URI that represents a transformed Bnode (when the URI starts with
* {@link #bnodePrefix}) that the according bnode {@link Node} is created
* @param node the node
* @return
*/
@Override
public Node createURI(String uri) {
if(bnodePrefix != null && uri.startsWith(bnodePrefix)){
return NodeFactory.createAnon(AnonId.create(uri.substring(bnodePrefix.length())));
} else {
return super.createURI(uri);
}
}
/**
* used in case multiple {@link RdfImportFilter}s are configured.
* @author Rupert Westenthaler
*
*/
private class UnionImportFilter implements RdfImportFilter {
RdfImportFilter[] filters;
UnionImportFilter(RdfImportFilter[] filters){
this.filters = filters;
}
@Override
public void setConfiguration(Map<String,Object> config) {}
@Override
public boolean needsInitialisation() { return false;}
@Override
public void initialise() {}
@Override
public void close() {}
@Override
public boolean accept(Node s, Node p, Node o) {
boolean state = true;
for(int i=0;state && i < filters.length;i++){
state = filters[i].accept(s, p, o);
}
return state;
}
}
}