blob: f84bbf8a0db221dd73d0d6dbcbc3eba524fa8b9a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.rdfentities.fise;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.*;
import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_TEXTANNOTATION;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.util.Arrays;
import java.util.Date;
import java.util.Iterator;
import org.apache.clerezza.rdf.core.Literal;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.Resource;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItem;
import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
import org.apache.stanbol.enhancer.rdfentities.RdfEntityFactory;
import org.apache.stanbol.enhancer.rdfentities.fise.EntityAnnotation;
import org.apache.stanbol.enhancer.rdfentities.fise.TextAnnotation;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.ContentSource;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
import org.junit.Test;
/**
* Tests if the FISE enhancement structure RDF facades
*
* @author Rupert Westenthaler
*/
public class TestEnhancementInterfaces {
public static final String SINGLE_SENTENCE = "Dr. Patrick Marshall (1869 - November 1950) was a"
+ " geologist who lived in New Zealand and worked at the University of Otago.";
protected static final ContentSource SINGLE_SENTENCE_SOURCE = new StringSource(SINGLE_SENTENCE);
public static final UriRef TEST_ENHANCEMENT_ENGINE_URI = new UriRef("urn:test:dummyEnhancementEngine");
private static ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();
public static ContentItem createContentItem(ContentSource cs) throws IOException {
return ciFactory.createContentItem(cs);
}
@Test
public void testEnhancementInterfaces() throws Exception {
ContentItem ci = createContentItem(SINGLE_SENTENCE_SOURCE);
UriRef ciUri = new UriRef(ci.getUri().getUnicodeString());
RdfEntityFactory factory = RdfEntityFactory.createInstance(ci.getMetadata());
long start = System.currentTimeMillis();
//create an Text Annotation representing an extracted Person
TextAnnotation personAnnotation = factory.getProxy(
createEnhancementURI(), TextAnnotation.class);
personAnnotation.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
personAnnotation.setCreated(new Date());
personAnnotation.setExtractedFrom(ciUri);
personAnnotation.getDcType().add(new UriRef("http://www.example.org/cv/annotatation-types/text#Person"));
personAnnotation.setConfidence(0.8);
personAnnotation.setSelectedText("Patrick Marshall");
personAnnotation.setStart(SINGLE_SENTENCE.indexOf(personAnnotation.getSelectedText()));
personAnnotation.setEnd(personAnnotation.getStart()+personAnnotation.getSelectedText().length());
personAnnotation.setSelectionContext(SINGLE_SENTENCE);
//create an Text Annotation representing an extracted Location
TextAnnotation locationAnnotation = factory.getProxy(
createEnhancementURI(), TextAnnotation.class);
locationAnnotation.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
locationAnnotation.setCreated(new Date());
locationAnnotation.setExtractedFrom(ciUri);
locationAnnotation.getDcType().add(new UriRef("http://www.example.org/cv/annotatation-types/text#Location"));
locationAnnotation.setConfidence(0.78);
locationAnnotation.setSelectedText("New Zealand");
locationAnnotation.setStart(SINGLE_SENTENCE.indexOf(locationAnnotation.getSelectedText()));
locationAnnotation.setEnd(locationAnnotation.getStart()+locationAnnotation.getSelectedText().length());
locationAnnotation.setSelectionContext(SINGLE_SENTENCE);
//create an Text Annotation representing an extracted Organisation
TextAnnotation orgAnnotation = factory.getProxy(
createEnhancementURI(), TextAnnotation.class);
orgAnnotation.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
orgAnnotation.setCreated(new Date());
orgAnnotation.setExtractedFrom(ciUri);
orgAnnotation.getDcType().add(new UriRef("http://www.example.org/cv/annotatation-types/text#Organisation"));
orgAnnotation.setConfidence(0.78);
orgAnnotation.setSelectedText("University of Otago");
orgAnnotation.setStart(SINGLE_SENTENCE.indexOf(orgAnnotation.getSelectedText()));
orgAnnotation.setEnd(orgAnnotation.getStart()+orgAnnotation.getSelectedText().length());
orgAnnotation.setSelectionContext(SINGLE_SENTENCE);
// create an Entity Annotation for the person TextAnnotation
EntityAnnotation patrickMarshall = factory.getProxy(
createEnhancementURI(), EntityAnnotation.class);
patrickMarshall.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
patrickMarshall.setCreated(new Date());
patrickMarshall.setExtractedFrom(ciUri);
patrickMarshall.getDcType().add(new UriRef("http://www.example.org/cv/annotatation-types/entity#Entity"));
patrickMarshall.setConfidence(0.56);
patrickMarshall.getRelations().add(personAnnotation);
patrickMarshall.setEntityLabel("Patrick Marshall");
patrickMarshall.setEntityReference(new UriRef("http://rdf.freebase.com/rdf/en/patrick_marshall"));
patrickMarshall.getEntityTypes().addAll(Arrays.asList(
new UriRef("http://rdf.freebase.com/ns/people.person"),
new UriRef("http://rdf.freebase.com/ns/common.topic"),
new UriRef("http://rdf.freebase.com/ns/education.academic")));
// and an other for New Zealand
EntityAnnotation newZealand = factory.getProxy(
createEnhancementURI(), EntityAnnotation.class);
newZealand.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
newZealand.setCreated(new Date());
newZealand.setExtractedFrom(ciUri);
newZealand.getDcType().add(new UriRef("http://www.example.org/cv/annotatation-types/entity#Entity"));
newZealand.setConfidence(0.98);
newZealand.getRelations().add(locationAnnotation);
newZealand.setEntityLabel("New Zealand");
newZealand.setEntityReference(new UriRef("http://rdf.freebase.com/rdf/en/new_zealand"));
newZealand.getEntityTypes().addAll(Arrays.asList(
new UriRef("http://rdf.freebase.com/ns/location.location"),
new UriRef("http://rdf.freebase.com/ns/common.topic"),
new UriRef("http://rdf.freebase.com/ns/location.country")));
// and an other option for New Zealand
EntityAnnotation airNewZealand = factory.getProxy(
createEnhancementURI(), EntityAnnotation.class);
airNewZealand.setCreator(TEST_ENHANCEMENT_ENGINE_URI);
airNewZealand.setCreated(new Date());
airNewZealand.setExtractedFrom(ciUri);
airNewZealand.getDcType().add(new UriRef("http://www.example.org/cv/annotatation-types/entity#Entity"));
airNewZealand.setConfidence(0.36);
airNewZealand.getRelations().add(locationAnnotation);
airNewZealand.setEntityLabel("New Zealand");
airNewZealand.setEntityReference(new UriRef("http://rdf.freebase.com/rdf/en/air_new_zealand"));
airNewZealand.getEntityTypes().addAll(Arrays.asList(
new UriRef("http://rdf.freebase.com/ns/business.sponsor"),
new UriRef("http://rdf.freebase.com/ns/common.topic"),
new UriRef("http://rdf.freebase.com/ns/travel.transport_operator"),
new UriRef("http://rdf.freebase.com/ns/aviation.airline"),
new UriRef("http://rdf.freebase.com/ns/aviation.aircraft_owner"),
new UriRef("http://rdf.freebase.com/ns/business.employer"),
new UriRef("http://rdf.freebase.com/ns/freebase.apps.hosts.com.appspot.acre.juggle.juggle"),
new UriRef("http://rdf.freebase.com/ns/business.company")));
System.out.println("creation time "+(System.currentTimeMillis()-start)+"ms");
//now test the enhancement
int numberOfTextAnnotations = checkAllTextAnnotations(ci.getMetadata());
assertEquals(3, numberOfTextAnnotations);
int numberOfEntityAnnotations = checkAllEntityAnnotations(ci.getMetadata());
assertEquals(3, numberOfEntityAnnotations);
}
private static UriRef createEnhancementURI() {
//TODO: add some Utility to create Instances to the RdfEntityFactory
// this should create a new URI by some default Algorithm
return new UriRef("urn:enhancement-" + EnhancementEngineHelper.randomUUID());
}
/*
* -----------------------------------------------------------------------
* Helper Methods to check Text and EntityAnnotations
* -----------------------------------------------------------------------
*/
private int checkAllEntityAnnotations(MGraph g) {
Iterator<Triple> entityAnnotationIterator = g.filter(null,
RDF_TYPE, TechnicalClasses.ENHANCER_ENTITYANNOTATION);
int entityAnnotationCount = 0;
while (entityAnnotationIterator.hasNext()) {
UriRef entityAnnotation = (UriRef) entityAnnotationIterator.next().getSubject();
// test if selected Text is added
checkEntityAnnotation(g, entityAnnotation);
entityAnnotationCount++;
}
return entityAnnotationCount;
}
private int checkAllTextAnnotations(MGraph g) {
Iterator<Triple> textAnnotationIterator = g.filter(null,
RDF_TYPE, ENHANCER_TEXTANNOTATION);
// test if a textAnnotation is present
assertTrue("Expecting non-empty textAnnotationIterator", textAnnotationIterator.hasNext());
int textAnnotationCount = 0;
while (textAnnotationIterator.hasNext()) {
UriRef textAnnotation = (UriRef) textAnnotationIterator.next().getSubject();
// test if selected Text is added
checkTextAnnotation(g, textAnnotation);
textAnnotationCount++;
}
return textAnnotationCount;
}
/**
* Checks if a text annotation is valid.
*/
private void checkTextAnnotation(MGraph g, UriRef textAnnotation) {
Iterator<Triple> selectedTextIterator = g.filter(textAnnotation,
ENHANCER_SELECTED_TEXT, null);
// check if the selected text is added
assertTrue(selectedTextIterator.hasNext());
// test if the selected text is part of the TEXT_TO_TEST
Resource object = selectedTextIterator.next().getObject();
assertTrue(object instanceof Literal);
assertTrue(SINGLE_SENTENCE.contains(((Literal) object).getLexicalForm()));
// test if context is added
Iterator<Triple> selectionContextIterator = g.filter(textAnnotation,
ENHANCER_SELECTION_CONTEXT, null);
assertTrue(selectionContextIterator.hasNext());
// test if the selected text is part of the TEXT_TO_TEST
object = selectionContextIterator.next().getObject();
assertTrue(object instanceof Literal);
assertTrue(SINGLE_SENTENCE.contains(((Literal) object).getLexicalForm()));
}
/**
* Checks if an entity annotation is valid.
*/
private void checkEntityAnnotation(MGraph g, UriRef entityAnnotation) {
Iterator<Triple> relationToTextAnnotationIterator = g.filter(
entityAnnotation, DC_RELATION, null);
// check if the relation to the text annotation is set
assertTrue(relationToTextAnnotationIterator.hasNext());
while (relationToTextAnnotationIterator.hasNext()) {
// test if the referred annotations are text annotations
UriRef referredTextAnnotation = (UriRef) relationToTextAnnotationIterator.next().getObject();
assertTrue(g.filter(referredTextAnnotation, RDF_TYPE,
ENHANCER_TEXTANNOTATION).hasNext());
}
// test if an entity is referred
Iterator<Triple> entityReferenceIterator = g.filter(entityAnnotation,
ENHANCER_ENTITY_REFERENCE, null);
assertTrue(entityReferenceIterator.hasNext());
// test if the reference is an URI
assertTrue(entityReferenceIterator.next().getObject() instanceof UriRef);
// test if there is only one entity referred
assertFalse(entityReferenceIterator.hasNext());
// finally test if the entity label is set
Iterator<Triple> entityLabelIterator = g.filter(entityAnnotation,
ENHANCER_ENTITY_LABEL, null);
assertTrue(entityLabelIterator.hasNext());
}
}