enhancement-engines/entitylinking/engine/src/test/java/org/apache/stanbol/enhancer/engines/entitylinking/engine/EntityLinkingEngineTest.java - stanbol - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.stanbol.enhancer.engines.entitylinking.engine;

 import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.PHRASE_ANNOTATION;
 import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.POS_ANNOTATION;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_CREATOR;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_LANGUAGE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_EXTRACTED_FROM;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.RDF_TYPE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses.ENHANCER_ENTITYANNOTATION;
 import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateAllTextAnnotations;
 import static org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper.validateEntityAnnotation;
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
 import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;

 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;

 import org.apache.clerezza.commons.rdf.Literal;
 import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.commons.rdf.Graph;
 import org.apache.clerezza.commons.rdf.RDFTerm;
 import org.apache.clerezza.commons.rdf.Triple;
 import org.apache.clerezza.commons.rdf.IRI;
 import org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl;
 import org.apache.clerezza.commons.rdf.impl.utils.TripleImpl;
 import org.apache.stanbol.commons.indexedgraph.IndexedGraph;
 import org.apache.stanbol.enhancer.contentitem.inmemory.InMemoryContentItemFactory;
 import org.apache.stanbol.enhancer.engines.entitylinking.Entity;
 import org.apache.stanbol.enhancer.engines.entitylinking.LabelTokenizer;
 import org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig;
 import org.apache.stanbol.enhancer.engines.entitylinking.config.LanguageProcessingConfig;
 import org.apache.stanbol.enhancer.engines.entitylinking.config.TextProcessingConfig;
 import org.apache.stanbol.enhancer.engines.entitylinking.config.EntityLinkerConfig.RedirectProcessingMode;
 import org.apache.stanbol.enhancer.engines.entitylinking.impl.EntityLinker;
 import org.apache.stanbol.enhancer.engines.entitylinking.impl.LinkedEntity;
 import org.apache.stanbol.enhancer.engines.entitylinking.impl.Suggestion;
 import org.apache.stanbol.enhancer.engines.entitylinking.impl.TestSearcherImpl;
 import org.apache.stanbol.enhancer.engines.entitylinking.labeltokenizer.SimpleLabelTokenizer;
 import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
 import org.apache.stanbol.enhancer.nlp.model.AnalysedTextFactory;
 import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
 import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag;
 import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
 import org.apache.stanbol.enhancer.nlp.pos.Pos;
 import org.apache.stanbol.enhancer.nlp.pos.PosTag;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.impl.StringSource;
 import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
 import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
 import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
 import org.apache.stanbol.enhancer.test.helper.EnhancementStructureHelper;
 import org.junit.After;
 import org.junit.AfterClass;
 import org.junit.Before;
 import org.junit.BeforeClass;
 import org.junit.Test;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 /**
  * TODO: convert this to an integration test!
  * @author Rupert Westenthaler
  */
 public class EntityLinkingEngineTest {

     private final static Logger log = LoggerFactory.getLogger(EntityLinkingEngineTest.class);

     /**
      * The context for the tests (same as in TestOpenNLPEnhancementEngine)
      */
     public static final String TEST_TEXT = "Dr. Patrick Marshall (1869 - November 1950) was a"
         + " geologist who lived in New Zealand and worked at the University of Otago.";

     /**
      * changed oder af given and family name
      */
     public static final String TEST_TEXT_WO = "Dr. Marshall Patrick (1869 - November 1950) was a"
         + " geologist who lived in New Zealand and worked at the University of Otago.";

     private static AnalysedText TEST_ANALYSED_TEXT;
     private static AnalysedText TEST_ANALYSED_TEXT_WO;

 //    public static final String TEST_TEXT2 = "A CBS televised debate between Australia's " +
 //    		"candidates for Prime Minister in the upcoming US election has been rescheduled " +
 //    		"and shortend, to avoid a clash with popular cookery sow MasterChef.";

     private static final ContentItemFactory ciFactory = InMemoryContentItemFactory.getInstance();

     private static final String TEST_REFERENCED_SITE_NAME = "dummRefSiteName";

     private static Value<PhraseTag> NOUN_PHRASE = Value.value(new PhraseTag("NP",LexicalCategory.Noun),1d);

     static TestSearcherImpl searcher;

     public static final IRI NAME = new IRI(NamespaceEnum.rdfs+"label");
     public static final IRI TYPE = new IRI(NamespaceEnum.rdf+"type");
     public static final IRI REDIRECT = new IRI(NamespaceEnum.rdfs+"seeAlso");

     @BeforeClass
     public static void setUpServices() throws IOException {
         searcher = new TestSearcherImpl(TEST_REFERENCED_SITE_NAME,NAME,new SimpleLabelTokenizer());
         //add some terms to the searcher
         Graph graph = new IndexedGraph();
         IRI uri = new IRI("urn:test:PatrickMarshall");
         graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Patrick Marshall")));
         graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PERSON));
         searcher.addEntity(new Entity(uri, graph));

         uri = new IRI("urn:test:Geologist");
         graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Geologist")));
         graph.add(new TripleImpl(uri, TYPE, new IRI(NamespaceEnum.skos+"Concept")));
         graph.add(new TripleImpl(uri, REDIRECT, new IRI("urn:test:redirect:Geologist")));
         searcher.addEntity(new Entity(uri, graph));
         //a redirect
         uri = new IRI("urn:test:redirect:Geologist");
         graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Geologe (redirect)")));
         graph.add(new TripleImpl(uri, TYPE, new IRI(NamespaceEnum.skos+"Concept")));
         searcher.addEntity(new Entity(uri, graph));

         uri = new IRI("urn:test:NewZealand");
         graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("New Zealand")));
         graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE));
         searcher.addEntity(new Entity(uri, graph));

         uri = new IRI("urn:test:UniversityOfOtago");
         graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University of Otago")));
         graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_ORGANISATION));
         searcher.addEntity(new Entity(uri, graph));

         uri = new IRI("urn:test:University");
         graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University")));
         graph.add(new TripleImpl(uri, TYPE, new IRI(NamespaceEnum.skos+"Concept")));
         searcher.addEntity(new Entity(uri, graph));

         uri = new IRI("urn:test:Otago");
         graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago")));
         graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE));
         searcher.addEntity(new Entity(uri, graph));
         //add a 2nd Otago (Place and University
         uri = new IRI("urn:test:Otago_Texas");
         graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago (Texas)")));
         graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("Otago")));
         graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_PLACE));
         searcher.addEntity(new Entity(uri, graph));

         uri = new IRI("urn:test:UniversityOfOtago_Texas");
         graph.add(new TripleImpl(uri, NAME, new PlainLiteralImpl("University of Otago (Texas)")));
         graph.add(new TripleImpl(uri, TYPE, OntologicalClasses.DBPEDIA_ORGANISATION));
         searcher.addEntity(new Entity(uri, graph));

         TEST_ANALYSED_TEXT = AnalysedTextFactory.getDefaultInstance().createAnalysedText(
             ciFactory.createBlob(new StringSource(TEST_TEXT)));
         TEST_ANALYSED_TEXT_WO = AnalysedTextFactory.getDefaultInstance().createAnalysedText(
                 ciFactory.createBlob(new StringSource(TEST_TEXT_WO)));
         initAnalyzedText(TEST_ANALYSED_TEXT);
         TEST_ANALYSED_TEXT.addChunk(0, "Dr. Patrick Marshall".length()).addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE);
         TEST_ANALYSED_TEXT.addToken(4, 11).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d));
         TEST_ANALYSED_TEXT.addToken(12, 20).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d));
         initAnalyzedText(TEST_ANALYSED_TEXT_WO);
         TEST_ANALYSED_TEXT_WO.addChunk(0, "Dr. Marshall Patrick".length()).addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE);
         TEST_ANALYSED_TEXT_WO.addToken(4, 12).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d));
         TEST_ANALYSED_TEXT_WO.addToken(13, 20).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d));
     }

     /**
      * @param nounPhrase
      */
     private static void initAnalyzedText(AnalysedText at) {
         at.addSentence(0, TEST_ANALYSED_TEXT.getEnd());
         at.addChunk(TEST_TEXT.indexOf("New Zealand"), TEST_TEXT.indexOf("New Zealand")+"New Zealand".length())
         .addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE);
         at.addChunk(TEST_TEXT.indexOf("geologist"), TEST_TEXT.indexOf("geologist")+"geologist".length())
         .addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE);
         at.addChunk(TEST_TEXT.indexOf("the University of Otago"),
             TEST_TEXT.length()-1).addAnnotation(PHRASE_ANNOTATION, NOUN_PHRASE);
         //add some tokens
         at.addToken(0, 2).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.Abbreviation),1d));
         at.addToken(2, 3).addAnnotation(POS_ANNOTATION, Value.value(new PosTag(".",Pos.Point),1d));
         int start = TEST_TEXT.indexOf("(1869 - November 1950)");
         at.addToken(start,start+1).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("(",Pos.OpenBracket),1d));
         at.addToken(start+1,start+5).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NUM",Pos.Numeral),1d));
         at.addToken(start+6,start+7).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("-",Pos.Hyphen),1d));
         at.addToken(start+8,start+16).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.CommonNoun),1d));
         at.addToken(start+17,start+21).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NUM",Pos.Numeral),1d));
         at.addToken(start+21,start+22).addAnnotation(POS_ANNOTATION, Value.value(new PosTag(")",Pos.CloseBracket),1d));

         at.addToken(start+23, start+26).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("O",LexicalCategory.Adjective)));
         at.addToken(start+27, start+28).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("A", LexicalCategory.Adposition)));

         start = TEST_TEXT.indexOf("geologist");
         at.addToken(start,start+9).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.CommonNoun),1d));

         at.addToken(start+10, start+13).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("O", LexicalCategory.Adjective)));
         at.addToken(start+14, start+19).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("V", LexicalCategory.Verb)));
         at.addToken(start+20, start+22).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("PP", LexicalCategory.PronounOrDeterminer)));

         start = TEST_TEXT.indexOf("New Zealand");
         at.addToken(start,start+3).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.CommonNoun),1d));
         at.addToken(start+4,start+11).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d));

         //add filler Tokens for "and worked at"
         at.addToken(start+12, start+15).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("O", LexicalCategory.Adjective)));
         at.addToken(start+16, start+22).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("V", LexicalCategory.Verb)));
         at.addToken(start+23, start+25).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("PP", LexicalCategory.PronounOrDeterminer)));

         start = TEST_TEXT.indexOf("the University of Otago");
         at.addToken(start,start+3).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("ART",Pos.Article),1d));
         at.addToken(start+4,start+14).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NE",Pos.CommonNoun),1d));
         at.addToken(start+15,start+17).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("OF",Pos.Preposition),1d));
         at.addToken(start+18,start+23).addAnnotation(POS_ANNOTATION, Value.value(new PosTag("NP",Pos.ProperNoun),1d));
         at.addToken(start+23,start+24).addAnnotation(POS_ANNOTATION, Value.value(new PosTag(".",Pos.Point),1d));
     }

     private LabelTokenizer labelTokenizer = new SimpleLabelTokenizer();


     @Before
     public void bindServices() throws IOException {
     }

     @After
     public void unbindServices() {
     }

     @AfterClass
     public static void shutdownServices() {
     }

     public static ContentItem getContentItem(final String id, final String text) throws IOException {
         return ciFactory.createContentItem(new IRI(id),new StringSource(text));
     }
     /**
      * This tests the EntityLinker functionality (if the expected Entities
      * are linked). In this case with the default configurations for
      * {@link LexicalCategory#Noun}.
      * @throws Exception
      */
     @Test
     public void testEntityLinkerWithNouns() throws Exception {
         LanguageProcessingConfig tpc = new LanguageProcessingConfig();
         tpc.setLinkedLexicalCategories(LanguageProcessingConfig.DEFAULT_LINKED_LEXICAL_CATEGORIES);
         tpc.setLinkedPos(Collections.EMPTY_SET);
         EntityLinkerConfig config = new EntityLinkerConfig();
         config.setMinFoundTokens(2);//this is assumed by this test
         config.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
         EntityLinker linker = new EntityLinker(TEST_ANALYSED_TEXT,"en",
             tpc, searcher, config, labelTokenizer);
         linker.process();
         Map<String,List<String>> expectedResults = new HashMap<String,List<String>>();
         expectedResults.put("Patrick Marshall", new ArrayList<String>(
                 Arrays.asList("urn:test:PatrickMarshall")));
         expectedResults.put("geologist", new ArrayList<String>(
                 Arrays.asList("urn:test:redirect:Geologist"))); //the redirected entity
         expectedResults.put("New Zealand", new ArrayList<String>(
                 Arrays.asList("urn:test:NewZealand")));
         expectedResults.put("University of Otago", new ArrayList<String>(
                 Arrays.asList("urn:test:UniversityOfOtago","urn:test:UniversityOfOtago_Texas")));
         validateEntityLinkerResults(linker, expectedResults);
     }
     /**
      * This tests the EntityLinker functionality (if the expected Entities
      * are linked). In this case with the default configurations for
      * {@link LexicalCategory#Noun}.
      * @throws Exception
      */
     @Test
     public void testEntityLinkerWithWrongOrder() throws Exception {
         LanguageProcessingConfig tpc = new LanguageProcessingConfig();
         tpc.setLinkedLexicalCategories(LanguageProcessingConfig.DEFAULT_LINKED_LEXICAL_CATEGORIES);
         tpc.setLinkedPos(Collections.EMPTY_SET);
         tpc.setIgnoreChunksState(true); //to emulate pre STANBOL-1211
         EntityLinkerConfig config = new EntityLinkerConfig();
         config.setMinFoundTokens(2);//this is assumed by this test
         config.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
         EntityLinker linker = new EntityLinker(TEST_ANALYSED_TEXT_WO,"en",
             tpc, searcher, config, labelTokenizer);
         linker.process();
         Map<String,List<String>> expectedResults = new HashMap<String,List<String>>();
         expectedResults.put("Marshall Patrick", new ArrayList<String>(
                 Arrays.asList("urn:test:PatrickMarshall")));
         expectedResults.put("geologist", new ArrayList<String>(
                 Arrays.asList("urn:test:redirect:Geologist"))); //the redirected entity
         expectedResults.put("New Zealand", new ArrayList<String>(
                 Arrays.asList("urn:test:NewZealand")));
         expectedResults.put("University of Otago", new ArrayList<String>(
                 Arrays.asList("urn:test:UniversityOfOtago","urn:test:UniversityOfOtago_Texas")));
         validateEntityLinkerResults(linker, expectedResults);
     }
     /**
      * This tests the EntityLinker functionality (if the expected Entities
      * are linked). In this case with the default configurations for
      * {@link Pos#ProperNoun}.
      * @throws Exception
      */
     @Test
     public void testEntityLinkerWithProperNouns() throws Exception {
         LanguageProcessingConfig tpc = new LanguageProcessingConfig();
         tpc.setLinkedLexicalCategories(Collections.EMPTY_SET);
         tpc.setLinkedPos(LanguageProcessingConfig.DEFAULT_LINKED_POS);
         EntityLinkerConfig config = new EntityLinkerConfig();
         config.setMinFoundTokens(2);//this is assumed by this test
         config.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
         EntityLinker linker = new EntityLinker(TEST_ANALYSED_TEXT,"en",
             tpc, searcher, config, labelTokenizer);
         linker.process();
         Map<String,List<String>> expectedResults = new HashMap<String,List<String>>();
         expectedResults.put("Patrick Marshall", new ArrayList<String>(
                 Arrays.asList("urn:test:PatrickMarshall")));
         //Geologist is a common noun and MUST NOT be found
         //expectedResults.put("geologist", new ArrayList<String>(
         //        Arrays.asList("urn:test:redirect:Geologist"))); //the redirected entity
         expectedResults.put("New Zealand", new ArrayList<String>(
                 Arrays.asList("urn:test:NewZealand")));
         expectedResults.put("University of Otago", new ArrayList<String>(
                 Arrays.asList("urn:test:UniversityOfOtago","urn:test:UniversityOfOtago_Texas")));
         validateEntityLinkerResults(linker, expectedResults);
     }
     private void validateEntityLinkerResults(EntityLinker linker, Map<String,List<String>> expectedResults) {
         log.info("---------------------");
         log.info("- Validating Results-");
         log.info("---------------------");
         for(LinkedEntity linkedEntity : linker.getLinkedEntities().values()){
             log.info("> LinkedEntity {}",linkedEntity);
             List<String> expectedSuggestions = expectedResults.remove(linkedEntity.getSelectedText());
             assertNotNull("LinkedEntity '"+linkedEntity.getSelectedText()+
                 "' is not an expected Result (or was found twice)", expectedSuggestions);
             linkedEntity.getSuggestions().iterator();
             assertEquals("Number of suggestions "+linkedEntity.getSuggestions().size()+
                 " != number of expected suggestions "+expectedSuggestions.size()+
                 "for selection "+linkedEntity.getSelectedText() + "(Expected: " +
                 expectedSuggestions +")", linkedEntity.getSuggestions().size(),
                 expectedSuggestions.size());
             double score = linkedEntity.getScore();
             for(int i=0;i<expectedSuggestions.size();i++){
                 Suggestion suggestion = linkedEntity.getSuggestions().get(i);
                 assertEquals("Expecced Suggestion at Rank "+i+" expected: "+
                     expectedSuggestions.get(i)+" suggestion: "+
                     suggestion.getEntity().getId(),
                     expectedSuggestions.get(i),
                     suggestion.getEntity().getId());
                 assertTrue("Score of suggestion "+i+"("+suggestion.getScore()+
                     " > as of the previous one ("+score+")",
                     score >= suggestion.getScore());
                 score = suggestion.getScore();
             }
         }
         assertTrue("The expected Result(s) "+expectedResults+" wehre not found",
             expectedResults.isEmpty());
     }
     /**
      * This tests if the Enhancements created by the Engine confirm to the
      * rules defined for the Stanbol Enhancement Structure.
      * @throws IOException
      * @throws EngineException
      */
     @Test
     public void testEngine() throws IOException, EngineException {
         EntityLinkerConfig linkerConfig = new EntityLinkerConfig();
         linkerConfig.setRedirectProcessingMode(RedirectProcessingMode.FOLLOW);
         linkerConfig.setMinFoundTokens(2);//this is assumed by this test
         EntityLinkingEngine engine = new EntityLinkingEngine("dummy",
             searcher, new TextProcessingConfig(),
             linkerConfig, labelTokenizer);
         ContentItem ci = ciFactory.createContentItem(new StringSource(TEST_TEXT));
         //tells the engine that this is an English text
         ci.getMetadata().add(new TripleImpl(ci.getUri(), DC_LANGUAGE, new PlainLiteralImpl("en")));
         //and add the AnalysedText instance used for this test
         ci.addPart(AnalysedText.ANALYSED_TEXT_URI, TEST_ANALYSED_TEXT);
         //compute the enhancements
         engine.computeEnhancements(ci);
         //validate the enhancement results
         Map<IRI,RDFTerm> expectedValues = new HashMap<IRI,RDFTerm>();
         expectedValues.put(ENHANCER_EXTRACTED_FROM, ci.getUri());
         expectedValues.put(DC_CREATOR,LiteralFactory.getInstance().createTypedLiteral(
             engine.getClass().getName()));
         //adding null as expected for confidence makes it a required property
         expectedValues.put(Properties.ENHANCER_CONFIDENCE, null);
         //validate create fise:TextAnnotations
         int numTextAnnotations = validateAllTextAnnotations(ci.getMetadata(), TEST_TEXT, expectedValues);
         assertEquals("Four fise:TextAnnotations are expected by this Test", 4, numTextAnnotations);
         //validate create fise:EntityAnnotations
         int numEntityAnnotations = validateAllEntityAnnotations(ci, expectedValues);
         assertEquals("Five fise:EntityAnnotations are expected by this Test", 5, numEntityAnnotations);
     }
     /**
      * Similar to {@link EnhancementStructureHelper#validateAllEntityAnnotations(org.apache.clerezza.commons.rdf.Graph, Map)}
      * but in addition checks fise:confidence [0..1] and entityhub:site properties
      * @param ci
      * @param expectedValues
      * @return
      */
     private static int validateAllEntityAnnotations(ContentItem ci, Map<IRI,RDFTerm> expectedValues){
         Iterator<Triple> entityAnnotationIterator = ci.getMetadata().filter(null,
                 RDF_TYPE, ENHANCER_ENTITYANNOTATION);
         int entityAnnotationCount = 0;
         while (entityAnnotationIterator.hasNext()) {
             IRI entityAnnotation = (IRI) entityAnnotationIterator.next().getSubject();
             // test if selected Text is added
             validateEntityAnnotation(ci.getMetadata(), entityAnnotation, expectedValues);
             //validate also that the confidence is between [0..1]
             Iterator<Triple> confidenceIterator = ci.getMetadata().filter(entityAnnotation, ENHANCER_CONFIDENCE, null);
             //Confidence is now checked by the EnhancementStructureHelper (STANBOL-630)
 //            assertTrue("Expected fise:confidence value is missing (entityAnnotation "
 //                    +entityAnnotation+")",confidenceIterator.hasNext());
 //            Double confidence = LiteralFactory.getInstance().createObject(Double.class,
 //                (TypedLiteral)confidenceIterator.next().getObject());
 //            assertTrue("fise:confidence MUST BE <= 1 (value= '"+confidence
 //                    + "',entityAnnotation " +entityAnnotation+")",
 //                    1.0 >= confidence.doubleValue());
 //            assertTrue("fise:confidence MUST BE >= 0 (value= '"+confidence
 //                    +"',entityAnnotation "+entityAnnotation+")",
 //                    0.0 <= confidence.doubleValue());
             //Test the entityhub:site property (STANBOL-625)
             IRI ENTITYHUB_SITE = new IRI(NamespaceEnum.entityhub+"site");
             Iterator<Triple> entitySiteIterator = ci.getMetadata().filter(entityAnnotation,
                 ENTITYHUB_SITE, null);
             assertTrue("Expected entityhub:site value is missing (entityAnnotation "
                     +entityAnnotation+")",entitySiteIterator.hasNext());
             RDFTerm siteResource = entitySiteIterator.next().getObject();
             assertTrue("entityhub:site values MUST BE Literals", siteResource instanceof Literal);
             assertEquals("'"+TEST_REFERENCED_SITE_NAME+"' is expected as "
                 + "entityhub:site value", TEST_REFERENCED_SITE_NAME,
                 ((Literal)siteResource).getLexicalForm());
             assertFalse("entityhub:site MUST HAVE only a single value", entitySiteIterator.hasNext());
             entityAnnotationCount++;
         }
         return entityAnnotationCount;

     }
 }