entityhub/indexing/source/jenatdb/src/main/java/org/apache/stanbol/entityhub/indexing/source/jenatdb/RdfIndexingSource.java - stanbol - Git at Google

 /*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
 package org.apache.stanbol.entityhub.indexing.source.jenatdb;

 import static org.apache.stanbol.entityhub.indexing.source.jenatdb.Utils.initTDBDataset;

 import java.io.File;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.util.ArrayList;
 import java.util.Collection;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;
 import java.util.NoSuchElementException;

 import org.apache.commons.io.FilenameUtils;
 import org.apache.marmotta.ldpath.api.backend.RDFBackend;
 import org.apache.stanbol.entityhub.core.model.InMemoryValueFactory;
 import org.apache.stanbol.entityhub.indexing.core.EntityDataIterable;
 import org.apache.stanbol.entityhub.indexing.core.EntityDataIterator;
 import org.apache.stanbol.entityhub.indexing.core.EntityDataProvider;
 import org.apache.stanbol.entityhub.indexing.core.IndexingComponent;
 import org.apache.stanbol.entityhub.indexing.core.config.IndexingConfig;
 import org.apache.stanbol.entityhub.indexing.core.source.ResourceLoader;
 import org.apache.stanbol.entityhub.indexing.core.source.ResourceState;
 import org.apache.stanbol.entityhub.servicesapi.model.Reference;
 import org.apache.stanbol.entityhub.servicesapi.model.Representation;
 import org.apache.stanbol.entityhub.servicesapi.model.Text;
 import org.apache.stanbol.entityhub.servicesapi.model.ValueFactory;
 import org.apache.stanbol.entityhub.servicesapi.util.ModelUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 import com.hp.hpl.jena.datatypes.BaseDatatype;
 import com.hp.hpl.jena.datatypes.DatatypeFormatException;
 import com.hp.hpl.jena.datatypes.RDFDatatype;
 import com.hp.hpl.jena.datatypes.xsd.XSDDateTime;
 import com.hp.hpl.jena.datatypes.xsd.XSDDuration;
 import com.hp.hpl.jena.graph.Node;
 import com.hp.hpl.jena.graph.NodeFactory;
 import com.hp.hpl.jena.graph.Triple;
 import com.hp.hpl.jena.graph.impl.LiteralLabel;
 import com.hp.hpl.jena.query.Query;
 import com.hp.hpl.jena.query.QueryExecutionFactory;
 import com.hp.hpl.jena.query.QueryFactory;
 import com.hp.hpl.jena.query.QuerySolution;
 import com.hp.hpl.jena.query.ResultSet;
 import com.hp.hpl.jena.query.Syntax;
 import com.hp.hpl.jena.rdf.model.AnonId;
 import com.hp.hpl.jena.rdf.model.RDFNode;
 import com.hp.hpl.jena.sparql.core.DatasetGraph;
 import com.hp.hpl.jena.sparql.core.Var;
 import com.hp.hpl.jena.sparql.engine.binding.Binding;
 import com.hp.hpl.jena.tdb.store.DatasetGraphTDB;
 import com.hp.hpl.jena.util.iterator.ExtendedIterator;
 /**
  * Implementation of an {@link IndexingComponent} for Entity data that provides
  * the possibility to both:<ol>
  * <li>randomly access entity data via the {@link EntityDataProvider} interface
  * <li>iterate over all entities in this store via the {@link EntityDataIterator}
  * interface.
  * </ol>
  *
  * @author Rupert Westenthaler
  *
  */
 public class RdfIndexingSource extends AbstractTdbBackend implements EntityDataIterable,EntityDataProvider, RDFBackend<Node> {
     /**
      * The Parameter used to configure the source folder(s) relative to the
      * {@link IndexingConfig#getSourceFolder()}. The ',' (comma) is used as
      * separator to parsed multiple sources.
      */
     public static final String PARAM_SOURCE_FILE_OR_FOLDER = "source";
     /**
      * The directory where successfully imported files are copied to
      */
     public static final String PARAM_IMPORTED_FOLDER = "imported";
     /**
      * Allows to enable/disable the indexing of Bnodes (see
      * <a href="https://issues.apache.org/jira/browse/STANBOL-765">STANBOL-765</a>
      * for details).
      */
     private static final String PARAM_BNODE_STATE = "bnode";
     /**
      * If present, this Parameter allows to convert RDF BlankNodes to dereferable
      * URIs by using {bnode-prefix}{bnode-id} (see
      * <a href="https://issues.apache.org/jira/browse/STANBOL-765">STANBOL-765</a>
      * for details)
      */
     public static final String PARAM_BNODE_PREFIX = "bnode-prefix";
     /**
      * The Parameter that can be used to deactivate the importing of sources.
      * If this parameter is set to <code>false</code> the values configured for
      * {@link #PARAM_IMPORT_SOURCE} are ignored. The default value is
      * <code>true</code>
      */
     public static final String PARAM_IMPORT_SOURCE = "import";
     /**
      * Allows to configure a {@link RdfImportFilter} (full qualified class name).
      * If present it gets the full configuration set for this component parsed.
      * This means that the import filter can be configured by the same
      * configuration as this component.
      */
     public static final String PARAM_IMPORT_FILTER = "import-filter";
     /**
      * The default directory name used to search for RDF files to be imported
      */
     public static final String DEFAULT_SOURCE_FOLDER_NAME = "rdfdata";

     public static final String DEFAULT_IMPORTED_FOLDER_NAME = "imported";
     //protected to allow internal classes direct access (without hidden getter/
     //setter added by the compiler that decrease performance)
     protected final static Logger log = LoggerFactory.getLogger(RdfIndexingSource.class);

     /**
      * The RDF data
      */
     private DatasetGraphTDB indexingDataset;
     /**
      * The valueFactory used to create {@link Representation}s, {@link Reference}s
      * and {@link Text} instances.
      */
     private ValueFactory vf;

     private ResourceLoader loader;

     protected String bnodePrefix; //protected to allow direct access in inner classes
     /**
      * used for logging a single WARN level entry on the first ignored BlankNode
      */
     private boolean bnodeIgnored = false;
     private RdfImportFilter importFilter;

     /**
      * Default Constructor relaying on that {@link #setConfiguration(Map)} is
      * called afterwards to provide the configuration!
      */
     public RdfIndexingSource(){
         this(null);
     }
     /**
      * Internally used to initialise a {@link ValueFactory}
      * @param valueFactory
      */
     private RdfIndexingSource(ValueFactory valueFactory){
         if(valueFactory == null){
             this.vf = InMemoryValueFactory.getInstance();
         } else {
             this.vf = valueFactory;
         }
     }
     /**
      * Constructs an instance based on the provided parameter
      * @param modelLocation the directory for the RDF model. MUST NOT be NULL
      * however the parsed {@link File} needs not to exist.
      * @param sourceFileOrDirectory the source file or directory containing the
      * file(s) to import. Parse <code>null</code> if no RDF files need to be
      * imported
      * @param valueFactory The {@link ValueFactory} used to create instances
      * or <code>null</code> to use the default implementation.
      * @param importFilter Optionally an importFilter used for filtering some
      * triples read from the RDF source files.
      */
     public RdfIndexingSource(File modelLocation,
                                File sourceFileOrDirectory,
                                ValueFactory valueFactory,
                                RdfImportFilter importFilter){
         if(modelLocation == null){
             throw new IllegalArgumentException("The parsed model location MUST NOT be NULL!");
         }
         //init the store
         this.indexingDataset = initTDBDataset(modelLocation);
         //use a ResourceLoader that fails on the first invalid RDF file (STANBOL-328)
         this.loader =  new ResourceLoader(new RdfResourceImporter(indexingDataset,importFilter), true,true);
         loader.addResource(sourceFileOrDirectory);
     }
     @Override
     public void setConfiguration(Map<String,Object> config) {
         IndexingConfig indexingConfig = (IndexingConfig)config.get(IndexingConfig.KEY_INDEXING_CONFIG);
         //first init the RDF Model
         this.indexingDataset = Utils.getTDBDataset(config);
         //second we need to check if we need to import RDF files to the RDF model
         //look if we need want to use an import filter
         Object value = config.get(PARAM_IMPORT_FILTER);
         if(value == null){
             log.info("No RDF Import Filter configured");
             importFilter = null;
         } else {
             String[] filterNames = value.toString().split(",");
             List<RdfImportFilter> filters = new ArrayList<RdfImportFilter>();
             ClassLoader cl = indexingConfig.getClass().getClassLoader();
             for(String filterName : filterNames){
                 filterName = filterName.trim();
                 try {
                     Class<? extends RdfImportFilter> importFilterClass = cl.loadClass(
                         filterName).asSubclass(RdfImportFilter.class);
                     RdfImportFilter filter = importFilterClass.newInstance();
                     filter.setConfiguration(config);
                     filters.add(filter);
                     log.info("Use RDF ImportFilter {} (type: {})",importFilter,importFilterClass.getSimpleName());
                 } catch (ClassNotFoundException e) {
                     throw new IllegalArgumentException("Configured RdfImportFilter '"
                         +filterName+"' not found", e);
                 } catch (InstantiationException e) {
                     throw new IllegalArgumentException("Configured RdfImportFilter '"
                             +filterName+"' can not be instantiated", e);
                 } catch (IllegalAccessException e) {
                     throw new IllegalArgumentException("Configured RdfImportFilter '"
                             +filterName+"' can not be created", e);
                 }
             }
             if(filters.isEmpty()){
                 this.importFilter = null;
             } else if(filters.size() == 1){
                 this.importFilter = filters.get(0);
             } else {
                 this.importFilter = new UnionImportFilter(filters.toArray(
                     new RdfImportFilter[filters.size()]));
             }
         }

         boolean failOnError = indexingConfig.isFailOnError();
         //create the ResourceLoader
         this.loader =  new ResourceLoader(new RdfResourceImporter(indexingDataset, importFilter), failOnError);

         value = config.get(PARAM_IMPORTED_FOLDER);
         String importedFolderName;
         if(value != null && !value.toString().isEmpty()){
             importedFolderName = value.toString();
         } else {
             importedFolderName = DEFAULT_IMPORTED_FOLDER_NAME;
         }
         File importedFolder = new File(indexingConfig.getSourceFolder(),importedFolderName);
         log.info("Imported RDF File Folder: {}",importedFolder);
         this.loader.setImportedDir(importedFolder);
         //check if importing is deactivated
         boolean importSource = true; //default is true
         value = config.get(PARAM_IMPORT_SOURCE);
         if(value != null){
             importSource = Boolean.parseBoolean(value.toString());
         }
         if(importSource){ // if we need to import ... check the source config
             log.info("Importing RDF data from:");
             value = config.get(PARAM_SOURCE_FILE_OR_FOLDER);
             if(value == null){ //if not set use the default
                 value = DEFAULT_SOURCE_FOLDER_NAME;
             }
             for(String source : value.toString().split(",")){
                 File sourceFileOrDirectory = indexingConfig.getSourceFile(source);
                 if(sourceFileOrDirectory.exists()){
                     //register the configured source with the ResourceLoader
                     this.loader.addResource(sourceFileOrDirectory);
                 } else {
                     if(FilenameUtils.getExtension(source).isEmpty()){
                         //non existent directory -> create
                         //This is typically the case if this method is called to
                         //initialise the default configuration. So we will try
                         //to create the directory users need to copy the source
                         //RDF files.
                         if(!sourceFileOrDirectory.mkdirs()){
                             log.warn("Unable to create directory {} configured to improt RDF data from. " +
                             		"You will need to create this directory manually before copying the" +
                             		"RDF files into it.",sourceFileOrDirectory);
                             //this would not be necessary because the directory will
                             //be empty - however I like to be consistent and have
                             //all configured and existent files & dirs added the the
                             //resource loader
                             this.loader.addResource(sourceFileOrDirectory);
                         }
                     } else {
                         log.warn("Unable to find RDF source {} within the indexing Source folder ",source,indexingConfig.getSourceFolder());
                     }
                 }
             }
             if(log.isInfoEnabled()){
                 for(String registeredSource : loader.getResources(ResourceState.REGISTERED)){
                     log.info(" > "+registeredSource);
                 }
             }
         } else {
             log.info("Importing RDF data deactivated by parameer {}={}"+PARAM_IMPORT_SOURCE,value);
         }
         //STANBOL-765: parsed bnode-prefix from parsed configuration.
         value = config.get(PARAM_BNODE_STATE);
         final Boolean bnodeState;
         if(value != null){
             bnodeState = value instanceof Boolean ? (Boolean) value :
                 Boolean.parseBoolean(value.toString());
         } else if(config.containsKey(PARAM_BNODE_STATE)){ //support key without value
             bnodeState = true;
         } else {
             bnodeState = null; //undefined
         }
         if(bnodeState == null || bnodeState){ //null or enabled -> consider prefix
             value = config.get(PARAM_BNODE_PREFIX);
             if(value != null){
                 try {
                     new URI(value.toString());
                 } catch (URISyntaxException e) {
                     throw new IllegalArgumentException("The configured "+PARAM_BNODE_PREFIX+"='"
                         + value.toString() + "' MUST BE a valid URI!");
                 }
                 bnodePrefix = value.toString();
             } else if(bnodeState != null) { //use default prefix if bnodeState is true
                 bnodePrefix = String.format("urn:bnode:%s:",indexingConfig.getName());
             } // else bnodeState == null and no custom prefix -> disable by default
         }
         if(bnodePrefix != null){
             log.info("Indexing of Bnodes enabled (prefix: {}",bnodePrefix);
         } else {
             log.info("Indexing of Bnodes disabled");

         }
     }
     @Override
     public boolean needsInitialisation() {
         return (importFilter != null && importFilter.needsInitialisation()) ||
                 !loader.getResources(ResourceState.REGISTERED).isEmpty();
     }
     @Override
     public void initialise(){
         if(importFilter != null && importFilter.needsInitialisation()){
             importFilter.initialise();
         }
         if(!loader.getResources(ResourceState.REGISTERED).isEmpty()){
             loader.loadResources();
         }
     }
     @Override
     public void close() {
         loader = null;
         indexingDataset.close();
         if(importFilter != null){
             importFilter.close();
         }
     }
     public void debug(){
         String entityVar = "s";
         String fieldVar = "p";
         String valueVar = "o";
         StringBuilder qb = new StringBuilder();
         qb.append(String.format("SELECT ?%s ?%s ?%s \n",
             entityVar,fieldVar,valueVar)); //for the select
         qb.append("{ \n");
         qb.append(String.format("    ?%s ?%s ?%s . \n",
             entityVar,fieldVar,valueVar)); //for the where
         qb.append("} \n");
         log.debug("EntityDataIterator Query: \n"+qb.toString());
         Query q = QueryFactory.create(qb.toString(), Syntax.syntaxARQ);
         ResultSet rs = QueryExecutionFactory.create(q, indexingDataset.toDataset()).execSelect();
         Var s = Var.alloc(entityVar);
         Var p = Var.alloc(fieldVar);
         Var o = Var.alloc(valueVar);
         while (rs.hasNext()){
             Binding b = rs.nextBinding();
             log.debug("{} {} {}",new Object[]{b.get(s),b.get(p),b.get(o)});
         }
     }

     @Override
     public EntityDataIterator entityDataIterator() {
         String entityVar = "s";
         String fieldVar = "p";
         String valueVar = "o";
         StringBuilder qb = new StringBuilder();
         qb.append(String.format("SELECT ?%s ?%s ?%s \n",
             entityVar,fieldVar,valueVar)); //for the select
         qb.append("{ \n");
         qb.append(String.format("    ?%s ?%s ?%s . \n",
             entityVar,fieldVar,valueVar)); //for the where
         qb.append("} \n");
         log.debug("EntityDataIterator Query: \n"+qb.toString());
         Query q = QueryFactory.create(qb.toString(), Syntax.syntaxARQ);
         return new RdfEntityIterator(
             QueryExecutionFactory.create(q, indexingDataset.toDataset()).execSelect(),
             entityVar,fieldVar,valueVar);
     }

     @Override
     public Representation getEntityData(String id) {
         final Node resource;
         //STANBOL-765: check if the parsed id represents an bnode
         if(bnodePrefix != null && id.startsWith(bnodePrefix)){
             resource = NodeFactory.createAnon(AnonId.create(id.substring(bnodePrefix.length())));
         } else {
             resource = NodeFactory.createURI(id);
         }
         Representation source = vf.createRepresentation(id);
         boolean found;
         ExtendedIterator<Triple> outgoing = null;
         try { // There may still be exceptions while reading triples
             outgoing = indexingDataset.getDefaultGraph().find(resource, null, null);
             found = outgoing.hasNext();
             while(outgoing.hasNext()){ //iterate over the statements for that resource
                 Triple statement = outgoing.next();
                 Node predicate = statement.getPredicate();
                 if(predicate == null || !predicate.isURI()){
                     log.warn("Ignore field {} for resource {} because it is null or not an URI!",
                         predicate,resource);
                 } else {
                     String field = predicate.getURI();
                     Node value = statement.getObject();
                     processValue(value, source, field);
                 } //end else predicate != null
             } //end iteration over resource triple
         } catch (Exception e) {
             log.warn("Unable to retrieve entity data for Entity '"+id+"'",e);
             found = false;
             try {
                 if(outgoing != null){
                     outgoing.close();
                 }
             } catch (Exception e1) { /* ignore */}
         }
         if(found) {
             if(log.isTraceEnabled()){
                 log.info("RDFTerm: \n{}", ModelUtils.getRepresentationInfo(source));
             }
             return source;
         } else {
             log.debug("No Statements found for id {} (Node: {})!",id,resource);
             return null;
         }
     }
     /**
      * Getter for the Jena TDB {@link DatasetGraph} used as source
      * @return the indexingDataset
      */
     public final DatasetGraphTDB getIndexingDataset() {
         return indexingDataset;
     }

     /**
      * Processes a {@link Node} and adds the according value to the parsed
      * Representation.
      * @param value The node to convert to an value for the Representation
      * @param source the representation (MUST NOT be <code>null</code>
      * @param field the field (MUST NOT be <code>null</code>)
      */
     private void processValue(Node value, Representation source, String field) {
         if(value == null){
             log.warn("Encountered NULL value for field {} and entity {}",
                     field,source.getId());
         } else if(value.isURI()){ //add a reference
             source.addReference(field, value.getURI());
         } else if(value.isLiteral()){ //add a value or a text depending on the dataType
             LiteralLabel ll = value.getLiteral();
 //            log.debug("LL: lexical {} | value {} | dataType {} | language {}",
 //                new Object[]{ll.getLexicalForm(),ll.getValue(),ll.getDatatype(),ll.language()});
             //if the dataType == null , than we can expect a plain literal
             RDFDatatype dataType = ll.getDatatype();
             if(dataType != null){ //add a value
                 Object literalValue;
                 try {
                     literalValue = ll.getValue();
                     if(literalValue instanceof BaseDatatype.TypedValue){
                         //used for unknown data types
                         // -> in such cases just use the lexical type
                         String lexicalValue = ((BaseDatatype.TypedValue)literalValue).lexicalValue;
                         if(lexicalValue != null && !lexicalValue.isEmpty()){
                             source.add(field,lexicalValue);
                         }
                     } else if(literalValue instanceof XSDDateTime) {
                         source.add(field, ((XSDDateTime)literalValue).asCalendar().getTime()); //Entityhub uses the time
                     } else if(literalValue instanceof XSDDuration) {
                         String duration = literalValue.toString();
                         if(duration != null && !duration.isEmpty()) {
                             source.add(field, literalValue.toString());
                         }
                     } else if(!ll.getLexicalForm().isEmpty()){
                         source.add(field, literalValue);
                     } //else ignore literals that are empty
                 } catch (DatatypeFormatException e) {
                     log.warn(" Unable to convert {} to {} -> use lecicalForm",
                         ll.getLexicalForm(),ll.getDatatype());
                     literalValue = ll.getLexicalForm();
                 }
             } else { //add a text
                 String lexicalForm = ll.getLexicalForm();
                 if(lexicalForm != null && !lexicalForm.isEmpty()){
                     String language = ll.language();
                     if(language!=null && language.length()<1){
                         language = null;
                     }
                     source.addNaturalText(field, lexicalForm, language);
                 } //else ignore empty literals
             }
             // "" is parsed if there is no language
         } else if(value.isBlank()) {
             if(bnodePrefix != null) { //STANBOL-765: convert Bnodes to URIs
                 StringBuilder sb = new StringBuilder(bnodePrefix);
                 sb.append(value.getBlankNodeId().getLabelString());
                 source.addReference(field, sb.toString());
             } else {
                 logIgnoredBnode(log, source, field, value);
             }
         }  else {
             log.warn("ignoreing value {} for field {} and RDFTerm {} because it is of an unsupported type!",
                     new Object[]{value,field,source.getId()});
         } //end different value node type
     }
     /**
      * Logs that a BlankNode was ignored (only the first time). Also debugs the
      * ignored triple.
      * @param log the logger to use
      * @param s subject
      * @param p predicate
      * @param o object
      */
     protected void logIgnoredBnode(Logger log, Object s, Object p, Object o) {
         if(!bnodeIgnored){
             bnodeIgnored = true;
             log.warn("The Indexed RDF Data do contain Blank Nodes. Those are "
                 + "ignored unless the '{}' parameter is set to valid URI. "
                 + "If this parameter is set Bnodes are converted to URIs by "
                 + "using {bnode-prefix}{bnodeId} (see STANBOL-765)",
                 PARAM_BNODE_PREFIX);
         }
         log.debug("ignoreing blank node value(s) for Triple {},{},{}!",
             new Object[]{s,p,o});
     }
     /**
      * Implementation of the iterator over the entities stored in a
      * {@link RdfIndexingSource}. This Iterator is based on query
      * {@link ResultSet}. It uses the low level SPARQL API because this allows
      * to use the same code to create values for Representations
      * @author Rupert Westenthaler
      *
      */
     public final class RdfEntityIterator implements EntityDataIterator {
         /**
          * Variable used to
          */
         final Var entityVar;
         final Var fieldVar;
         final Var valueVar;
         /**
          * The result set containing all triples in the form of <code>
          * "entity -&gt; field -&gt; value"</code>
          */
         private final ResultSet resultSet;
         /**
          * The {@link Node} representing the current entity or <code>null</code>
          * if the iterator is newly created.<p>
          * {@link Node#isURI()} is guaranteed to return <code>true</code> and
          * {@link Node#getURI()} is guaranteed to return the id for the entity
          */
         private Node currentEntity = null;
         /**
          * The {@link Node} for the next Entity in the iteration or <code>null</code>
          * in case there are no further or the iterator is newly created (in that
          * case {@link #currentEntity} will be also <code>null</code>)<p>
          * {@link Node#isURI()} is guaranteed to return <code>true</code> and
          * {@link Node#getURI()} is guaranteed to return the id for the entity
          */
         private Node nextEntity = null;
         /**
          * The Representation of the current Element. Only available after a
          * call to {@link #getRepresentation()}
          */
         private Representation currentRepresentation = null;
         /**
          * Holds all <code>field,value"</code> pairs of the current Entity.
          * Elements at even positions represent<code>fields</code> and elements
          * at uneven positions represent <code>values</code>.
          */
         private List<Node> data = new ArrayList<Node>();
         /**
          * The next (not consumed) solution of the query.
          */
         private Binding nextBinding = null;

         protected RdfEntityIterator(ResultSet resultSet, String entityVar,String fieldVar, String valueVar){
             if(resultSet == null){
                 throw new IllegalArgumentException("The parsed ResultSet MUST NOT be NULL!");
             }
             //check if the ResultSet provides the required variables to perform the query
             List<String> vars = resultSet.getResultVars();
             if(!vars.contains(entityVar)){
                 throw new IllegalArgumentException("The parsed ResultSet is missing the required" +
                 		"Variable \""+entityVar+"\" representing the Entity!");
             } else {
                 this.entityVar = Var.alloc(entityVar);
             }
             if(!vars.contains(fieldVar)){
                 throw new IllegalArgumentException("The parsed ResultSet is missing the required" +
                         "Variable \""+fieldVar+"\" representing the Field of an Entity!");
             } else {
                 this.fieldVar = Var.alloc(fieldVar);
             }
             if(!vars.contains(valueVar)){
                 throw new IllegalArgumentException("The parsed ResultSet is missing the required" +
                         "Variable \""+valueVar+"\" representing the Value of a Field of an Entity!");
             } else {
                 this.valueVar = Var.alloc(valueVar);
             }
             this.resultSet = resultSet;
             //this will read until the first binding of the first Entity is found
             initFirst();
         }
         private void initFirst(){
             if(currentEntity == null && nextEntity == null){ //only for the first call
                 //consume binding until the first valid entity starts
                 while(nextEntity == null && resultSet.hasNext()){
                     Binding firstValid = resultSet.nextBinding();
                     Node entityNode = firstValid.get(entityVar);
                     if((entityNode.isURI() && !entityNode.toString().isEmpty()) ||
                             entityNode.isBlank() && bnodePrefix != null){
                       //store it temporarily in nextBinding
                         nextBinding = firstValid;
                         //store it as next (first) entity
                         nextEntity = entityNode;
                     } else {
                         logIgnoredBnode(log,entityNode,firstValid.get(fieldVar),firstValid.get(valueVar));
                     }
                 }
             } else {
                 throw new IllegalStateException("This Mehtod MUST be only used for Initialisation!");
             }
         }
         @Override
         public void close() {
             data.clear();
             data = null;
             currentEntity = null;
             currentRepresentation = null;
             //Looks like it is not possible to close a resultSet
         }

         @Override
         public Representation getRepresentation() {
             //current Entity will be null if
             //  - next() was never called
             //  - the end of the iteration was reached
             if(currentEntity == null){
                 return null;
             } else if(currentRepresentation == null){
                 currentRepresentation = createRepresentation();
             }
             return currentRepresentation;
         }

         @Override
         public boolean hasNext() {
             return resultSet.hasNext();
         }

         @Override
         public String next() {
             return getNext();
         }

         @Override
         public void remove() {
             throw new UnsupportedOperationException(
                 "Removal of Entities is not supported by this Implementation!");
         }
         /**
          * Iterates over all {@link QuerySolution} of the {@link #resultSet}
          * that do have {@link #currentEntity} as
          * {@link RdfIndexingSource#VARIABLE_NAME_ENTITY VARIABLE_NAME_ENTITY}.
          * NOTES: <ul>
          * <li>This method also initialises the {@link #data} and sets the
          * {@link #nextBinding} to the first solution of the next entity.<br>
          * <li>That means also, that it would iterate over additional
          * {@link RdfIndexingSource#VARIABLE_NAME_ENTITY VARIABLE_NAME_ENTITY}
          * values that are not URIResources ( in cases
          * {@link RDFNode#isURIResource()} returns <code>false</code>)
          * <li>This method is also used to initialise the first Entity
          * @return the URI of the current entity
          */
         private String getNext(){
             //check for more elements
             if(!resultSet.hasNext()){
                 throw new NoSuchElementException("No more Entities available");
             }
             //clean up data of the previous entity
             this.data.clear(); //remove data of the previous entity
             this.currentRepresentation = null; //and the representation
             this.currentEntity = nextEntity; //set the nextEntity to the current

             //and process the first binding already consumed from the resultSet
             //by calling this method for the previous Entity
             if(nextBinding != null){ //will be null for the first Entity
                 processSolution(nextBinding);
             }
             //now get all the other Solutions for the current entity
             boolean next = false;
             while(!next && resultSet.hasNext()){
                 Binding binding = resultSet.nextBinding();
                 Node entityNode = binding.get(entityVar);
                 //NOTES:
                 // * for URIs we need to check for empty URIs!
                 // * STANBOL-765: added support for BlankNodes
                 if((entityNode.isURI() && !entityNode.toString().isEmpty()) ||
                         entityNode.isBlank() && bnodePrefix != null){
                     if(!entityNode.equals(currentEntity)){
                         //start of next Entity
                         this.nextEntity = entityNode; //store the node for the next entity
                         this.nextBinding = binding; //store the first binding of the next entity
                         //we are done for this entity -> exit the loop
                         next = true;
                     } else {
                         processSolution(binding);
                     }
                 } else {
                     logIgnoredBnode(log,entityNode,binding.get(fieldVar),binding.get(valueVar));
                 }
             }
             if(!next){ // exit the loop but still no new entity ... that means
                 nextEntity = null; //there are no more entities
                 nextBinding = null; // and there are also no more solutions
             }
             //STANBOL-765: if current is a Bnode add the bnode-prefix
             return currentEntity.isBlank() ?
                 new StringBuilder(bnodePrefix).append(currentEntity.getBlankNodeId().getLabelString()).toString() :
                     currentEntity.getURI();
         }
         /**
          * Processes a {@link Binding} by storing the {@link Node}s for the
          * variables {@link #fieldVar} and {@link #valueVar} to {@link #data}.
          * This method ensures that both values are not <code>null</code> and
          * that the {@link Node} representing the field is an URI (
          * returns <code>true</code> for {@link Node#isURI()}).
          * @param binding the binding to process
          */
         private void processSolution(Binding binding) {
             Node field = binding.get(fieldVar);
             if(field != null && field.isURI()){ //property MUST BE an URI
                 Node value = binding.get(valueVar);
                 if(value != null){
                     //add the pair
                     data.add(field);
                     data.add(value);
                 }
             } else {
                 //This may only happen if the Query used to create the ResultSet
                 //containing this Solution does not link the variable
                 //VARIABLE_NAME_FIELD to properties.
                 log.error("Found Field {} for Entity {} that is not an URIResource",field,currentEntity);
             }
         }
         /**
          * Used to create the Representation the first time
          * {@link #getRepresentation()} is called for the current entity. The
          * information for the Representation are already stored in {@link #data}
          */
         private Representation createRepresentation() {
             final String uri;
             if(currentEntity.isBlank()){ //STANBOL-765: support bNodes
                 StringBuilder sb = new StringBuilder(bnodePrefix);
                 sb.append(currentEntity.getBlankNodeId().getLabelString());
                 uri = sb.toString();
             } else {
                 uri = currentEntity.getURI();
             }
             Representation representation = vf.createRepresentation(uri);
             Iterator<Node> it = data.iterator();
             while(it.hasNext()){
                 //data contains field,value pairs
                 //because of that we call two times next for
                 String field = it.next().getURI(); //the field
                 Node value = it.next();//and the value
                 processValue(value, representation, field);
             }
             return representation;
         }
     }

     /* ----------------------------------------------------------------------
      *     RDF Backend implementation
      * ----------------------------------------------------------------------
      */
     @Override
     public Collection<Node> listObjects(Node subject, Node property) {
         Collection<Node> nodes = new ArrayList<Node>();
         if(bnodePrefix != null && subject.isURI() && subject.getURI().startsWith(bnodePrefix)){
             subject = NodeFactory.createAnon(new AnonId(subject.getURI().substring(bnodePrefix.length())));
         }
         ExtendedIterator<Triple> it = indexingDataset.getDefaultGraph().find(subject, property, null);
         while(it.hasNext()){
             //STANBOL-765: we need also to transform bnodes to URIs for the
             //RDFBackend implementation
             Node object = it.next().getObject();
             if(bnodePrefix != null && object.isBlank()){
                 StringBuilder sb = new StringBuilder(bnodePrefix);
                 sb.append(object.getBlankNodeId().getLabelString());
                 object = NodeFactory.createURI(sb.toString());
             }
             nodes.add(object);
         }
         it.close();
         return nodes;
     }
     @Override
     public Collection<Node> listSubjects(Node property, Node object) {
         Collection<Node> nodes = new ArrayList<Node>();
         if(bnodePrefix != null && object.isURI() && object.getURI().startsWith(bnodePrefix)){
             object = NodeFactory.createAnon(new AnonId(object.getURI().substring(bnodePrefix.length())));
         }
         ExtendedIterator<Triple> it = indexingDataset.getDefaultGraph().find(null, property, object);
         while(it.hasNext()){
             Node subject = it.next().getSubject();
             //STANBOL-765: we need also to transform bnodes to URIs for the
             //RDFBackend implementation
             if(bnodePrefix != null && subject.isBlank()){
                 StringBuilder sb = new StringBuilder(bnodePrefix);
                 sb.append(subject.getBlankNodeId().getLabelString());
                 subject = NodeFactory.createURI(sb.toString());
             }
             nodes.add(subject);
         }
         it.close();
         return nodes;
     }
     /**
      * Since STANBOL-765 BlankNodes are converted to URIs if a {@link #bnodePrefix}
      * is configured. This also means that one needs to expect calls to the
      * {@link RDFBackend} interface with transformed Nodes. <p>
      * This method ensures that if someone requests an uri {@link Node} for a
      * URI that represents a transformed Bnode (when the URI starts with
      * {@link #bnodePrefix}) that the according bnode {@link Node} is created
      * @param node the node
      * @return
      */
     @Override
     public Node createURI(String uri) {
         if(bnodePrefix != null && uri.startsWith(bnodePrefix)){
             return NodeFactory.createAnon(AnonId.create(uri.substring(bnodePrefix.length())));
         } else {
             return super.createURI(uri);
         }
     }
     /**
      * used in case multiple {@link RdfImportFilter}s are configured.
      * @author Rupert Westenthaler
      *
      */
     private class UnionImportFilter implements RdfImportFilter {

         RdfImportFilter[] filters;

         UnionImportFilter(RdfImportFilter[] filters){
             this.filters = filters;
         }

         @Override
         public void setConfiguration(Map<String,Object> config) {}

         @Override
         public boolean needsInitialisation() { return false;}

         @Override
         public void initialise() {}

         @Override
         public void close() {}

         @Override
         public boolean accept(Node s, Node p, Node o) {
             boolean state = true;
             for(int i=0;state && i < filters.length;i++){
                 state = filters[i].accept(s, p, o);
             }
             return state;
         }

     }

 }