blob: df00dbdb84de797aad545e16d2c9fc5463ed1bc4 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.test.helper;
import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.DBPEDIA_ORGANISATION;
import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.DBPEDIA_PERSON;
import static org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses.DBPEDIA_PLACE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_RELATION;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_LABEL;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_ENTITY_REFERENCE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.DCTERMS_LINGUISTIC_SYSTEM;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENHANCEMENT;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENTITYANNOTATION;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TOPICANNOTATION;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertNull;
import static org.junit.Assert.assertTrue;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.clerezza.rdf.core.Literal;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.NonLiteral;
import org.apache.clerezza.rdf.core.PlainLiteral;
import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.TypedLiteral;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.ontologies.DCTERMS;
import org.apache.clerezza.rdf.ontologies.XSD;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.CONFIDENCE_LEVEL_ENUM;
public class EnhancementStructureHelper {
/**
* Validates all TextAnnotations contained in the parsed enhancement graph
* @param enhancements the enhancement graph
* @param content the enhanced content
* @param expectedValues the expected values of all validated EntityAnnotations.
* Properties are used as keys. Typical example would be fise:extracted-from
* with the id of the ContentItem as value; dc-terms:creator with the
* {@link Class#getName()} as value.
* @return the number of found TextAnnotations
*/
@SuppressWarnings("unchecked")
public static int validateAllTextAnnotations(TripleCollection enhancements, String content, Map<UriRef,Resource> expectedValues) {
expectedValues = expectedValues == null ? Collections.EMPTY_MAP : expectedValues;
Iterator<Triple> textAnnotationIterator = enhancements.filter(null,
RDF_TYPE, ENHANCER_TEXTANNOTATION);
// test if a textAnnotation is present
//assertTrue(textAnnotationIterator.hasNext());
// -> this might be used to test that there are no TextAnnotations
int textAnnotationCount = 0;
while (textAnnotationIterator.hasNext()) {
UriRef textAnnotation = (UriRef) textAnnotationIterator.next().getSubject();
// test if selected Text is added
validateTextAnnotation(enhancements, textAnnotation,content,expectedValues);
textAnnotationCount++;
}
return textAnnotationCount;
}
/**
* Validates the parsed TextAnnotation with a fise:selected-text. This
* method also validates rules defined by fise:Enhancement by calling
* {@link #validateEnhancement(TripleCollection, UriRef, Map)}<p>
* NOTE: this method MUST NOT be used to validate fise:TextAnnotations that
* do NOT select a part of the text - meaning TextAnnotations about the
* whole parsed content.
* @param enhancements the enhancements graph containing the text annotation
* @param textAnnotation the TextAnnotation to validate
* @param content the enhanced content
* @param expectedValues expected values (properties for the values are used as keys)
*/
public static void validateTextAnnotation(TripleCollection enhancements, UriRef textAnnotation, String content, Map<UriRef,Resource> expectedValues) {
//validate the rdf:type
Iterator<Triple> rdfTypeIterator = enhancements.filter(textAnnotation, RDF_TYPE, ENHANCER_TEXTANNOTATION);
assertTrue("Parsed Enhancement "+textAnnotation +" is missing the fise:TextAnnotation type ",
rdfTypeIterator.hasNext());
Iterator<Triple> selectedTextIterator = enhancements.filter(textAnnotation,
ENHANCER_SELECTED_TEXT, null);
// check if the selected text is added (or not)
Resource selectedTextResource;
if(selectedTextIterator.hasNext()){
// test if the selected text is part of the TEXT_TO_TEST
selectedTextResource = selectedTextIterator.next().getObject();
assertTrue("fise:selected-text MUST BE of type PlainLiteral (uri: "+textAnnotation+")",
selectedTextResource instanceof PlainLiteral);
Literal selectedText = (Literal)selectedTextResource;
assertTrue("The parsed content MUST contain the fise:selected-text value '"
+selectedText.getLexicalForm()+"' (uri: "+textAnnotation+")!",content.contains(selectedText.getLexicalForm()));
} else {
selectedTextResource = null; //no selected text
}
//check against an expected value
Resource expectedSelectedText = expectedValues.get(ENHANCER_SELECTED_TEXT);
if(expectedSelectedText != null){
assertEquals("The fise:selected-text is not the expected value "+expectedSelectedText+" (uri: "+textAnnotation+")!",
expectedSelectedText, selectedTextResource);
}
Resource selectionContextResource;
// test if context is added
Iterator<Triple> selectionContextIterator = enhancements.filter(textAnnotation,
ENHANCER_SELECTION_CONTEXT, null);
if(selectionContextIterator.hasNext()) { //context is optional
//selection context is not allowed without selected-text
assertNotNull("If fise:selection-context is present also fise:selected-text MUST BE present (uri: "+textAnnotation+")",
selectedTextResource);
// test if the selected text is part of the TEXT_TO_TEST
selectionContextResource = selectionContextIterator.next().getObject();
assertTrue("The fise:selection-context MUST BE of type PlainLiteral (uri: "+textAnnotation+")",
selectionContextResource instanceof PlainLiteral);
//check that the content contains the context
assertTrue("The fise:selection-context MUST BE contained in the Content | context= "+ selectionContextResource,
content.contains(((Literal)selectionContextResource).getLexicalForm()));
//check that the context contains the selected text
assertTrue("The fise:selected-text value MUST BE containted within the fise:selection-context value",
((Literal)selectionContextResource).getLexicalForm().contains(
((Literal)selectedTextResource).getLexicalForm()));
} else {
assertNull("If no fise:selection-context is present also fise:selected-text MUST BE NOT present!", selectedTextResource);
selectionContextResource = null;
}
Resource expectedSelectionContext = expectedValues.get(ENHANCER_SELECTION_CONTEXT);
if(expectedSelectionContext != null){
assertEquals("The value of fise:selection-context has not the expected value "+expectedSelectionContext,
expectedSelectionContext, selectionContextResource);
}
//test start/end if present
Iterator<Triple> startPosIterator = enhancements.filter(textAnnotation,
ENHANCER_START, null);
Iterator<Triple> endPosIterator = enhancements.filter(textAnnotation,
ENHANCER_END, null);
//start end is optional, but if start is present, that also end needs to be set
TypedLiteral startPosLiteral;
TypedLiteral endPosLiteral;
if(startPosIterator.hasNext()){
//NOTE: TextAnnotations might be use to select whole sections of a text
// (e.g. see STANBOL-617) in those cases adding the text of the
// whole section is not feasible.
//assertNotNull("If fise:start is present the fise:selection-context MUST also be present (uri: "+textAnnotation+")!",
// selectionContextResource);
Resource resource = startPosIterator.next().getObject();
//only a single start position is supported
assertFalse("fise:start MUST HAVE only a single value (uri: "+textAnnotation+")!",startPosIterator.hasNext());
assertTrue("fise:start MUST be a typed Literal (uri: "+textAnnotation+")!",resource instanceof TypedLiteral);
startPosLiteral = (TypedLiteral) resource;
assertEquals("fise:start MUST use xsd:int as data type (uri: "+textAnnotation+")",XSD.int_, startPosLiteral.getDataType());
resource = null;
Integer start = LiteralFactory.getInstance().createObject(Integer.class, startPosLiteral);
assertNotNull("Unable to parse Integer from TypedLiteral "+startPosLiteral,start);
//now get the end
//end must be defined if start is present
assertTrue("If fise:start is present also fise:end MUST BE defined (uri: "+textAnnotation+")!",endPosIterator.hasNext());
resource = endPosIterator.next().getObject();
//only a single end position is supported
assertFalse("fise:end MUST HAVE only a single value (uri: "+textAnnotation+")!",endPosIterator.hasNext());
assertTrue("fise:end values MUST BE TypedLiterals (uri: "+textAnnotation+")",resource instanceof TypedLiteral);
endPosLiteral = (TypedLiteral) resource;
assertEquals("fise:end MUST use xsd:int as data type (uri: "+textAnnotation+")",XSD.int_, endPosLiteral.getDataType());
resource = null;
Integer end = LiteralFactory.getInstance().createObject(Integer.class, endPosLiteral);
assertNotNull("Unable to parse Integer from TypedLiteral "+endPosLiteral,end);
endPosLiteral = null;
//check for equality of the selected text and the text on the selected position in the content
//System.out.println("TA ["+start+"|"+end+"]"+selectedText.getLexicalForm()+"<->"+content.substring(start,end));
if(selectedTextResource != null){
assertEquals("the substring [fise:start,fise:end] does not correspond to "
+ "the fise:selected-text value '"+((Literal)selectedTextResource).getLexicalForm()
+ "' of this TextAnnotation!",content.substring(start, end), ((Literal)selectedTextResource).getLexicalForm());
} // else no selected-text present ... unable to test this
} else {
assertNull("if fise:selected-text is present also fise:start AND fise:end MUST BE present!",selectedTextResource);
assertNull("If fise:selection-context is present also fise:start AND fise:end MUST BE present!",selectionContextResource);
assertFalse("if fise:end is presnet also fise:start AND fise:selection-context MUST BE present!",endPosIterator.hasNext());
startPosLiteral = null;
endPosLiteral = null;
}
Resource expectedStartPos = expectedValues.get(ENHANCER_START);
if(expectedStartPos != null){
assertEquals("The fise:start value is not the expected "+expectedStartPos,
expectedStartPos, startPosLiteral);
}
Resource expectedEndPos = expectedValues.get(ENHANCER_END);
if(expectedEndPos != null){
assertEquals("The fise:end value is not the expected "+expectedEndPos,
expectedEndPos, endPosLiteral);
}
//validate fise:Enhancement specific rules
validateEnhancement(enhancements, textAnnotation, expectedValues);
//validate for special TextAnnotations
validateLanguageAnnotations(enhancements,textAnnotation);
validateNERAnnotations(enhancements,textAnnotation, selectedTextResource);
}
/**
* Validates the correctness of fise:TextAnnotations that annotate the language
* of the text as defined by
* <a href="https://issues.apache.org/jira/browse/STANBOL-613">STANBOL-613</a><p>
* Called by {@link #validateTextAnnotation(TripleCollection, UriRef, String, Map)}
* @param enhancements
* @param textAnnotation
*/
private static void validateLanguageAnnotations(TripleCollection enhancements, UriRef textAnnotation) {
Iterator<Triple> dcLanguageIterator = enhancements.filter(textAnnotation, DC_LANGUAGE, null);
if(dcLanguageIterator.hasNext()){ //a language annotation
Resource dcLanguageResource = dcLanguageIterator.next().getObject();
assertTrue("The dc:language value MUST BE a PlainLiteral", dcLanguageResource instanceof PlainLiteral);
assertTrue("The dc:language value '"+dcLanguageResource+"'MUST BE at least two chars long",
((Literal)dcLanguageResource).getLexicalForm().length() >=2);
assertFalse("TextAnnotations with the dc:language property MUST only have a single dc:language value (uri "
+textAnnotation+")",dcLanguageIterator.hasNext());
Iterator<Triple> dcTypeIterator = enhancements.filter(textAnnotation, DC_TYPE, null);
assertTrue("TextAnnotations with the dc:language property MUST use dc:type dc:LinguisticSystem (uri "
+textAnnotation+")", dcTypeIterator.hasNext());
assertEquals("TextAnnotations with the dc:language property MUST use dc:type dc:LinguisticSystem (uri "
+textAnnotation+")", DCTERMS_LINGUISTIC_SYSTEM,dcTypeIterator.next().getObject());
assertFalse("TextAnnotations with the dc:language property MUST only have a single dc:type value (uri "
+textAnnotation+")",dcTypeIterator.hasNext());
//assert that the created TextAnnotation is correctly returned by the
//EnhancementEngineHelper methods
List<NonLiteral> languageAnnotation = EnhancementEngineHelper.getLanguageAnnotations(enhancements);
assertTrue("Language annotation "+textAnnotation+" was not returned by "
+"EnhancementEngineHelper.getLanguageAnnotations(..)!",languageAnnotation.contains(textAnnotation));
} else { //no language annotation
Iterator<Triple> dcTypeIterator = enhancements.filter(textAnnotation, DC_TYPE, null);
while(dcTypeIterator.hasNext()){
assertFalse("Only fise:TextAnnotations without a dc:language value MUST NOT use the "
+ "dc:type value dc:LinguisticSystem (uri "+textAnnotation+")",
DCTERMS_LINGUISTIC_SYSTEM.equals(dcTypeIterator.next().getObject()));
}
}
}
/**
* Validates that fise:TextAnnotations with the dc:type dbp-ont:Person,
* dbp-ont:Organisation and dbp-ont:Place do have a
* fise:selected-text value (this implicitly also checks that
* fise:selection-context, fise:start and fise:end are defined!<p>
* Called by {@link #validateTextAnnotation(TripleCollection, UriRef, String, Map)}
* @param enhancements
* @param textAnnotation
* @param selectedTextResource the fise:selected-text value
*/
private static void validateNERAnnotations(TripleCollection enhancements, UriRef textAnnotation, Resource selectedTextResource) {
Iterator<Triple> dcTypeIterator = enhancements.filter(textAnnotation, DC_TYPE, null);
boolean isNERAnnotation = false;
while(dcTypeIterator.hasNext() && !isNERAnnotation){
Resource dcTypeValue = dcTypeIterator.next().getObject();
isNERAnnotation = DBPEDIA_PERSON.equals(dcTypeValue) ||
DBPEDIA_ORGANISATION.equals(dcTypeValue) ||
DBPEDIA_PLACE.equals(dcTypeValue);
}
if(isNERAnnotation){
assertNotNull("fise:TextAnnotations with a dc:type of c:type dbp-ont:Person, "
+"dbp-ont:Organisation or dbp-ont:Place MUST have a fise:selected-text value (uri "
+textAnnotation+")", selectedTextResource);
}
}
/**
* Validates all fise:EntityAnnotations contained by the parsed enhancements
* graph.
* @param enhancements the enhancement graph
* @param expectedValues the expected values of all validated EntityAnnotations.
* Properties are used as keys. Typical example would be fise:extracted-from
* with the id of the ContentItem as value; dc-terms:creator with the
* {@link Class#getName()} as value.
* @return the number of found and validated EntityAnnotations.
*/
@SuppressWarnings("unchecked")
public static int validateAllEntityAnnotations(TripleCollection enhancements,Map<UriRef,Resource> expectedValues) {
expectedValues = expectedValues == null ? Collections.EMPTY_MAP : expectedValues;
Iterator<Triple> entityAnnotationIterator = enhancements.filter(null,
RDF_TYPE, ENHANCER_ENTITYANNOTATION);
int entityAnnotationCount = 0;
while (entityAnnotationIterator.hasNext()) {
UriRef entityAnnotation = (UriRef) entityAnnotationIterator.next().getSubject();
// test if selected Text is added
validateEntityAnnotation(enhancements, entityAnnotation,
expectedValues);
entityAnnotationCount++;
}
return entityAnnotationCount;
}
/**
* Checks if a fise:EntityAnnotation is valid. NOTE that this also validates
* all fise:Enhancement related requirements by calling
* {@link #validateEnhancement(TripleCollection, UriRef, Map)}
* @param enhancements the enhancements graph
* @param entityAnnotation the entity annotation to validate
* @param expectedValues expected values (properties for the values are used as keys)
*/
public static void validateEntityAnnotation(TripleCollection enhancements, UriRef entityAnnotation,Map<UriRef,Resource> expectedValues) {
Iterator<Triple> relationToTextAnnotationIterator = enhancements.filter(
entityAnnotation, DC_RELATION, null);
// check if the relation to the text annotation is set
assertTrue(relationToTextAnnotationIterator.hasNext());
while (relationToTextAnnotationIterator.hasNext()) {
// test if the referred annotations are text annotations or
// the referenced annotations is a fise:EntityAnnotation AND also a
// dc:requires link is defined (STANBOL-766)
UriRef referredTextAnnotation = (UriRef) relationToTextAnnotationIterator.next().getObject();
assertTrue("fise:EntityAnnotations MUST BE dc:related to a fise:TextAnnotation OR dc:requires and dc:related to the same fise:EntityAnnotation",
enhancements.filter(referredTextAnnotation, RDF_TYPE,
ENHANCER_TEXTANNOTATION).hasNext() || (
enhancements.filter(referredTextAnnotation, RDF_TYPE,
ENHANCER_ENTITYANNOTATION).hasNext() &&
enhancements.filter(entityAnnotation, Properties.DC_REQUIRES, referredTextAnnotation).hasNext()));
}
// test if an entity is referred
Iterator<Triple> entityReferenceIterator = enhancements.filter(entityAnnotation,
ENHANCER_ENTITY_REFERENCE, null);
assertTrue("fise:entity-reference MUST BE present! (EntityAnnotation: '"
+entityAnnotation+"')'",entityReferenceIterator.hasNext());
Resource expectedReferencedEntity = expectedValues.get(ENHANCER_ENTITY_REFERENCE);
while(entityReferenceIterator.hasNext()){ //check possible multiple references
Resource entityReferenceResource = entityReferenceIterator.next().getObject();
// test if the reference is an URI
assertTrue("fise:entity-reference value MUST BE of URIs",entityReferenceResource instanceof UriRef);
if(expectedReferencedEntity != null && expectedReferencedEntity.equals(entityReferenceResource)){
expectedReferencedEntity = null; //found
}
}
assertNull("EntityAnnotation "+entityAnnotation+"fise:entity-reference has not the expected value "
+expectedReferencedEntity+"!", expectedReferencedEntity);
//test if the entity label is set
Iterator<Triple> entityLabelIterator = enhancements.filter(entityAnnotation, ENHANCER_ENTITY_LABEL, null);
assertTrue(entityLabelIterator.hasNext());
Resource expectedEntityLabel = expectedValues.get(ENHANCER_ENTITY_LABEL);
while(entityLabelIterator.hasNext()){
Resource entityLabelResource = entityLabelIterator.next().getObject();
assertTrue("fise:entity-label values MUST BE PlainLiterals (EntityAnnotation: "+entityAnnotation+")!",
entityLabelResource instanceof PlainLiteral);
if(expectedEntityLabel != null && expectedEntityLabel.equals(entityLabelResource)){
expectedEntityLabel = null;
}
}
assertNull("The expected EntityLabel "+expectedEntityLabel+" was not found",
expectedEntityLabel);
//test the optional entity types
Iterator<Triple> entityTypeIterator = enhancements.filter(entityAnnotation, Properties.ENHANCER_ENTITY_TYPE, null);
Resource expectedEntityType = expectedValues.get(Properties.ENHANCER_ENTITY_TYPE);
if(entityTypeIterator.hasNext()){
Resource entityTypeResource = entityTypeIterator.next().getObject();
assertTrue("fise:entity-type values MUST BE URIs",entityTypeResource instanceof UriRef);
if(expectedEntityType != null && expectedEntityType.equals(entityTypeResource)){
expectedEntityType = null; //found
}
}
assertNull("The expected fise:entity-type value "+expectedEntityType+" was not found!", expectedEntityType);
//test all properties required by fise:Enhancement
validateEnhancement(enhancements, entityAnnotation, expectedValues);
}
/**
* Validates all fise:Enhancement related properties and values. NOTE that
* this method is called by {@link #validateEntityAnnotation(TripleCollection, UriRef, Map)}
* and {@link #validateTextAnnotation(TripleCollection, UriRef, String)}.
* @param enhancements the enhancements graph
* @param enhancement the fise:Enhancement to validate
* @param expectedValues expected values (properties for the values are used as keys)
*/
public static void validateEnhancement(TripleCollection enhancements, UriRef enhancement, Map<UriRef,Resource> expectedValues){
//validate the rdf:type
Iterator<Triple> rdfTypeIterator = enhancements.filter(enhancement, RDF_TYPE, ENHANCER_ENHANCEMENT);
assertTrue("Parsed Enhancement "+enhancement +" is missing the fise:Enhancement type ",
rdfTypeIterator.hasNext());
//validate the creator
Iterator<Triple> creatorIterator = enhancements.filter(enhancement, Properties.DC_CREATOR, null);
assertTrue("Enhancements MUST HAVE a creator",creatorIterator.hasNext());
Resource creatorResource = creatorIterator.next().getObject();
assertTrue("Creator MUST BE an TypedLiteral (found '"+creatorResource.getClass().getSimpleName()+"')!",
creatorResource instanceof TypedLiteral);
assertEquals("The dc:creator value MUST be of dataType xsd:string",
XSD.string,((TypedLiteral)creatorResource).getDataType());
Resource expectedCreator = expectedValues.get(Properties.DC_CREATOR);
if(expectedCreator != null){
assertEquals("Creator is not the expected value!",expectedCreator, creatorResource);
}
assertFalse("only a single creater MUST BE present for an Enhancement", creatorIterator.hasNext());
//validate the optional contributor
Resource expectedContributor = expectedValues.get(DCTERMS.contributor);
Iterator<Triple> contributorIterator = enhancements.filter(enhancement, DCTERMS.contributor, null);
while(contributorIterator.hasNext()){
Resource contributorResource = contributorIterator.next().getObject();
assertTrue("Creator MUST BE an UriRef (found '"+contributorResource.getClass().getSimpleName()+"')!",
contributorResource instanceof UriRef);
if(expectedContributor != null && expectedContributor.equals(expectedContributor)){
expectedContributor = null; //found
}
}
assertNull("The expected contributor '"+expectedContributor+"'was not present in the Enhancement",
expectedContributor);
//validate creation date
Iterator<Triple> createdIterator = enhancements.filter(enhancement, Properties.DC_CREATED, null);
assertTrue("The creation date MUST BE present for an Enhancement", createdIterator.hasNext());
Resource createdResource = createdIterator.next().getObject();
assertTrue("Creation date MUST be a typed Literal", createdResource instanceof TypedLiteral);
assertTrue("Creation date MUST have the dataTyoe xsd:dateTime",
XSD.dateTime.equals(((TypedLiteral)createdResource).getDataType()));
Date creationDate = LiteralFactory.getInstance().createObject(Date.class, (TypedLiteral)createdResource);
assertNotNull("Unable to convert "+createdResource+" to a Java Date object",creationDate);
Date now = new Date();
assertTrue("CreationDate MUST NOT be in the Future",now.after(creationDate) || now.equals(creationDate));
assertFalse("Only a single createnDate MUST BE present", createdIterator.hasNext());
//validate optional modification date if present
Iterator<Triple> modDateIterator = enhancements.filter(enhancement, DCTERMS.modified, null);
while(modDateIterator.hasNext()){
Resource modDateResurce = modDateIterator.next().getObject();
assertTrue("Creation date MUST be a typed Literal", modDateResurce instanceof TypedLiteral);
assertTrue("Creation date MUST have the dataTyoe xsd:dateTime",
XSD.dateTime.equals(((TypedLiteral)modDateResurce).getDataType()));
Date modDate = LiteralFactory.getInstance().createObject(Date.class, (TypedLiteral)modDateResurce);
assertNotNull("Unable to convert "+modDateResurce+" to a Java Date object",modDate);
assertTrue("CreationDate MUST NOT be in the Future",new Date().after(modDate));
}
//validate the fise:extracted-from
Iterator<Triple> extractedIterator = enhancements.filter(enhancement, Properties.ENHANCER_EXTRACTED_FROM, null);
assertTrue("The fise:extracted-from property MUST BE present for an Enhancement", extractedIterator.hasNext());
Resource extractedResource = extractedIterator.next().getObject();
assertTrue("Creator MUST BE an UriRef (found '"+extractedResource.getClass().getSimpleName()+"')!",
extractedResource instanceof UriRef);
Resource expectedExtractedFrom = expectedValues.get(Properties.ENHANCER_EXTRACTED_FROM);
if(expectedExtractedFrom != null){
assertEquals("Creator is not the expected value!",extractedResource, expectedExtractedFrom);
}
assertFalse("only a single creater MUST BE present for an Enhancement", extractedIterator.hasNext());
//validate that all dc:requires and dc:relation link to resources of type fise:Enhancement
Iterator<Triple> relatedIterator = enhancements.filter(enhancement, Properties.DC_RELATION, null);
while(relatedIterator.hasNext()){
Resource relatedResource = relatedIterator.next().getObject();
assertTrue("dc:relation values MUST BE URIs", relatedResource instanceof UriRef);
Iterator<Triple> relatedTypes = enhancements.filter((UriRef)relatedResource, RDF_TYPE, TechnicalClasses.ENHANCER_ENHANCEMENT);
assertTrue("dc:relation Resources MUST BE of rdf:type fise:Enhancement",relatedTypes.hasNext());
}
Iterator<Triple> requiresIterator = enhancements.filter(enhancement, Properties.DC_REQUIRES, null);
while(requiresIterator.hasNext()){
Resource requiredResource = requiresIterator.next().getObject();
assertTrue("dc:requires values MUST BE URIs", requiredResource instanceof UriRef);
Iterator<Triple> relatedTypes = enhancements.filter((UriRef)requiredResource, RDF_TYPE, TechnicalClasses.ENHANCER_ENHANCEMENT);
assertTrue("dc:requires Resources MUST BE of rdf:type fise:Enhancement",relatedTypes.hasNext());
}
//validate that fise:confidence has [0..1] values and are of type xsd:float
Iterator<Triple> confidenceIterator = enhancements.filter(enhancement,Properties.ENHANCER_CONFIDENCE,null);
boolean confidenceRequired = expectedValues.containsKey(Properties.ENHANCER_CONFIDENCE);
if(confidenceIterator.hasNext()){ //confidence is optional
Resource confidenceResource = confidenceIterator.next().getObject();
assertTrue("fise:confidence value MUST BE a TypedLiteral", confidenceResource instanceof TypedLiteral);
assertTrue("fise:confidence MUST BE xsd:double",
XSD.double_.equals(((TypedLiteral)confidenceResource).getDataType()));
Double confidence = LiteralFactory.getInstance().createObject(Double.class, (TypedLiteral)confidenceResource);
assertNotNull("Unable to convert TypedLiteral '"+confidenceResource+"' to a Java Double value",confidence);
assertFalse("fise:confidence MUST HAVE [0..1] values",confidenceIterator.hasNext());
//STANBOL-630: confidence [0..1]
assertTrue("fise:confidence MUST BE <= 1 (value= '"+confidence
+ "',enhancement " +enhancement+")",
1.0 >= confidence.doubleValue());
assertTrue("fise:confidence MUST BE >= 0 (value= '"+confidence
+"',enhancement "+enhancement+")",
0.0 <= confidence.doubleValue());
Resource expectedConfidence = expectedValues.get(Properties.ENHANCER_CONFIDENCE);
if(expectedConfidence != null){
assertEquals("The fise:confidence for enhancement "
+enhancement+" does not have the expected value", expectedConfidence,confidenceResource);
}
} else {
assertFalse("The required fise:confidence value is missing for enhancement "
+enhancement,confidenceRequired);
}
//validate that the (optional) dc:type is an URI and that there are not multiple values
Iterator<Triple> dcTypeIterator = enhancements.filter(enhancement, Properties.DC_TYPE, null);
Resource expectedDcType = expectedValues.get(Properties.DC_TYPE);
if(dcTypeIterator.hasNext()){ //dc:type is optional
Resource dcTypeResource = dcTypeIterator.next().getObject();
assertTrue("dc:type values MUST BE URIs",dcTypeResource instanceof UriRef);
if(expectedDcType != null) {
assertEquals("The dc:type value is not the expected "+expectedDcType+"!",
expectedDcType,dcTypeResource);
}
assertFalse("Only a single dc:type value is allowed!", dcTypeIterator.hasNext());
}
//validate the fise:confidence-value introduced by STANBOL-631
Iterator<Triple> confidenceLevelIterator = enhancements.filter(
enhancement, Properties.ENHANCER_CONFIDENCE_LEVEL, null);
Resource expectedConfidenceValue = expectedValues.get(Properties.ENHANCER_CONFIDENCE_LEVEL);
if(confidenceLevelIterator.hasNext()){
Resource confidenceLevelResource = confidenceLevelIterator.next().getObject();
assertTrue("fise:confidence-level values MUST BE URIs but found "+confidenceLevelResource,
confidenceLevelResource instanceof UriRef);
assertNotNull("The fise:confidence-level value MUST BE one of the four "
+ "values defined in the ontology! (found: "+ confidenceLevelResource
+ " | enhancement " + enhancement+")",
CONFIDENCE_LEVEL_ENUM.getConfidenceLevel((UriRef)confidenceLevelResource));
assertFalse("The fise:confidence-level property is functional and MUST "
+ "HAVE only a single value (enhancement " +
enhancement+")!",confidenceLevelIterator.hasNext());
} else {
assertNull("fise:confidence-level "+expectedConfidenceValue
+ "expected for Enhancement "+enhancement
+ "but no 'fise:confidence-level' value present!", expectedConfidenceValue);
}
}
/**
* Validates all fise:TopicAnnotations contained by the parsed enhancements
* graph.
* @param enhancements the enhancement graph
* @param expectedValues the expected values of all validated TopicAnnotations.
* Properties are used as keys. Typical example would be fise:extracted-from
* with the id of the ContentItem as value; dc-terms:creator with the
* {@link Class#getName()} as value.
* @return the number of found and validated TopicAnnotations.
*/
@SuppressWarnings("unchecked")
public static int validateAllTopicAnnotations(TripleCollection enhancements,Map<UriRef,Resource> expectedValues) {
expectedValues = expectedValues == null ? Collections.EMPTY_MAP : expectedValues;
Iterator<Triple> topicAnnotationIterator = enhancements.filter(null,
RDF_TYPE, ENHANCER_TOPICANNOTATION);
int topicAnnotationCount = 0;
while (topicAnnotationIterator.hasNext()) {
UriRef topicAnnotation = (UriRef) topicAnnotationIterator.next().getSubject();
// test if selected Text is added
validateTopicAnnotation(enhancements, topicAnnotation,
expectedValues);
topicAnnotationCount++;
}
return topicAnnotationCount;
}
/**
* Checks if a fise:TopicAnnotation is valid as defined by
* <a herf="https://issues.apache.org/jira/browse/STANBOL-617">STANBOL-617</a>.
* NOTE that this also validates all fise:Enhancement related requirements by
* calling {@link #validateEnhancement(TripleCollection, UriRef, Map)}
* @param enhancements the enhancements graph
* @param topicAnnotation the topic annotation to validate
* @param expectedValues expected values (properties for the values are used as keys)
*/
public static void validateTopicAnnotation(TripleCollection enhancements, UriRef topicAnnotation, Map<UriRef,Resource> expectedValues){
//validate the rdf:type
Iterator<Triple> rdfTypeIterator = enhancements.filter(topicAnnotation, RDF_TYPE, ENHANCER_TOPICANNOTATION);
assertTrue("Parsed Enhancement "+topicAnnotation +" is missing the fise:TopicAnnotation type ",
rdfTypeIterator.hasNext());
//TopicAnnotations need to be linked to TextAnnotations describing the
//section of the text that has a specific Topic.
//If the topic is for the whole text the TextAnnotation will have no
//selected-text value
Iterator<Triple> relationToTextAnnotationIterator = enhancements.filter(
topicAnnotation, DC_RELATION, null);
// check if the relation to the text annotation is set
assertTrue(relationToTextAnnotationIterator.hasNext());
while (relationToTextAnnotationIterator.hasNext()) {
// test if the referred annotations are text annotations
UriRef referredTextAnnotation = (UriRef) relationToTextAnnotationIterator.next().getObject();
assertTrue(enhancements.filter(referredTextAnnotation, RDF_TYPE,
ENHANCER_TEXTANNOTATION).hasNext());
}
// test if an entity (the topic) is referred (NOTE: in contrast to
// fise:EntityAnnotations this property is NOT required - cardinality [0..*]
Iterator<Triple> entityReferenceIterator = enhancements.filter(topicAnnotation,
ENHANCER_ENTITY_REFERENCE, null);
Resource expectedReferencedEntity = expectedValues.get(ENHANCER_ENTITY_REFERENCE);
while(entityReferenceIterator.hasNext()){ //check possible multiple references
Resource entityReferenceResource = entityReferenceIterator.next().getObject();
// test if the reference is an URI
assertTrue("fise:entity-reference value MUST BE of URIs",entityReferenceResource instanceof UriRef);
if(expectedReferencedEntity != null && expectedReferencedEntity.equals(entityReferenceResource)){
expectedReferencedEntity = null; //found
}
}
assertNull("EntityAnnotation "+topicAnnotation+"fise:entity-reference has not the expected value "
+expectedReferencedEntity+"!", expectedReferencedEntity);
//test if the entity label is set (required)
Iterator<Triple> entityLabelIterator = enhancements.filter(topicAnnotation, ENHANCER_ENTITY_LABEL, null);
assertTrue(entityLabelIterator.hasNext());
Resource expectedEntityLabel = expectedValues.get(ENHANCER_ENTITY_LABEL);
while(entityLabelIterator.hasNext()){
Resource entityLabelResource = entityLabelIterator.next().getObject();
assertTrue("fise:entity-label values MUST BE PlainLiterals (EntityAnnotation: "+topicAnnotation+")!",
entityLabelResource instanceof PlainLiteral);
if(expectedEntityLabel != null && expectedEntityLabel.equals(entityLabelResource)){
expectedEntityLabel = null;
}
}
assertNull("The expected EntityLabel "+expectedEntityLabel+" was not found",
expectedEntityLabel);
// test fise:entity-type(s). NOTE: this is not required - cardinality [0..*]
Iterator<Triple> entityTypeIterator = enhancements.filter(topicAnnotation, Properties.ENHANCER_ENTITY_TYPE, null);
Resource expectedEntityType = expectedValues.get(Properties.ENHANCER_ENTITY_TYPE);
if(entityTypeIterator.hasNext()){
Resource entityTypeResource = entityTypeIterator.next().getObject();
assertTrue("fise:entity-type values MUST BE URIs",entityTypeResource instanceof UriRef);
if(expectedEntityType != null && expectedEntityType.equals(entityTypeResource)){
expectedEntityType = null; //found
}
}
assertNull("The expected fise:entity-type value "+expectedEntityType+" was not found!", expectedEntityType);
//test all properties required by fise:Enhancement
validateEnhancement(enhancements, topicAnnotation, expectedValues);
}
}