entitycoreference/src/main/java/org/apache/stanbol/enhancer/engines/entitycoreference/EntityCoReferenceEngine.java - stanbol - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.stanbol.enhancer.engines.entitycoreference;

 import static org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage;

 import java.util.ArrayList;
 import java.util.Collections;
 import java.util.Dictionary;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.Iterator;
 import java.util.List;
 import java.util.Map;

 import org.apache.felix.scr.annotations.Activate;
 import org.apache.felix.scr.annotations.Component;
 import org.apache.felix.scr.annotations.Deactivate;
 import org.apache.felix.scr.annotations.Properties;
 import org.apache.felix.scr.annotations.Property;
 import org.apache.felix.scr.annotations.Reference;
 import org.apache.felix.scr.annotations.Service;
 import org.apache.stanbol.enhancer.engines.entitycoreference.datamodel.NounPhrase;
 import org.apache.stanbol.enhancer.engines.entitycoreference.impl.CoreferenceFinder;
 import org.apache.stanbol.enhancer.engines.entitycoreference.impl.NounPhraseFilterer;
 import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
 import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
 import org.apache.stanbol.enhancer.nlp.model.Section;
 import org.apache.stanbol.enhancer.nlp.model.Span;
 import org.apache.stanbol.enhancer.nlp.model.SpanTypeEnum;
 import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
 import org.apache.stanbol.enhancer.nlp.ner.NerTag;
 import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag;
 import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
 import org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
 import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.apache.stanbol.entityhub.servicesapi.Entityhub;
 import org.apache.stanbol.entityhub.servicesapi.site.SiteManager;
 import org.osgi.service.cm.ConfigurationException;
 import org.osgi.service.component.ComponentContext;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 /**
  * This engine extracts references in the given text of noun phrases which point to NERs. The coreference is
  * performed based on matching several of the named entity's dbpedia/yago properties to the noun phrase
  * tokens.
  *
  * TODO - Be able to detect possessive coreferences such as Germany's prime minister TODO - be able to detect
  * products and their developer such as Iphone 7 and Apple's new device. TODO - provide the ability via config
  * for the user to also allow coreferencing of 1 word noun phrases based soley on comparison with entity class
  * type?
  *
  * @author Cristian Petroaca
  *
  */
 @Component(immediate = true, metatype = true)
 @Service(value = EnhancementEngine.class)
 @Properties(value = {
                      @Property(name = EnhancementEngine.PROPERTY_NAME, value = "entity-coreference"),
                      @Property(name = EntityCoReferenceEngine.CONFIG_LANGUAGES, value = "en"),
                      @Property(name = EntityCoReferenceEngine.REFERENCED_SITE_ID, value = "dbpedia"),
                      @Property(name = EntityCoReferenceEngine.MAX_DISTANCE, intValue = EntityCoReferenceEngine.MAX_DISTANCE_DEFAULT_VALUE)})
 public class EntityCoReferenceEngine extends AbstractEnhancementEngine<RuntimeException,RuntimeException>
         implements EnhancementEngine, ServiceProperties {

     private static final Integer ENGINE_ORDERING = ServiceProperties.ORDERING_POST_PROCESSING + 91;

     /**
      * Language configuration. Takes a list of ISO language codes of supported languages. Currently supported
      * are the languages given as default value.
      */
     protected static final String CONFIG_LANGUAGES = "enhancer.engine.entitycoreference.languages";

     /**
      * Referenced site configuration. Defaults to dbpedia.
      */
     protected static final String REFERENCED_SITE_ID = "enhancer.engine.entitycoreference.referencedSiteId";

     /**
      * Maximum sentence distance between the ner and the noun phrase which mentions it. -1 means no distance
      * constraint.
      */
     protected static final String MAX_DISTANCE = "enhancer.engine.entitycoreference.maxDistance";

     protected static final int MAX_DISTANCE_DEFAULT_VALUE = 1;
     public static final int MAX_DISTANCE_NO_CONSTRAINT = -1;

     private final Logger log = LoggerFactory.getLogger(EntityCoReferenceEngine.class);

     /**
      * Service of the Entityhub that manages all the active referenced Site. This Service is used to lookup
      * the configured Referenced Site when we need to enhance a content item.
      */
     @Reference
     protected SiteManager siteManager;

     /**
      * Used to lookup Entities if the {@link #REFERENCED_SITE_ID} property is set to "entityhub" or "local"
      */
     @Reference
     protected Entityhub entityhub;

     /**
      * Specialized class which filters out bad noun phrases based on the language.
      */
     private NounPhraseFilterer nounPhraseFilterer;

     /**
      * Performs the logic needed to find corefs based on the NERs and noun phrases in the text.
      */
     private CoreferenceFinder corefFinder;

     @SuppressWarnings("unchecked")
     @Activate
     protected void activate(ComponentContext ctx) throws ConfigurationException {
         super.activate(ctx);

         Dictionary<String,Object> config = ctx.getProperties();

         /* Step 1 - initialize the {@link NounPhraseFilterer} with the language config */
         String languages = (String) config.get(CONFIG_LANGUAGES);

         if (languages == null || languages.isEmpty()) {
             throw new ConfigurationException(CONFIG_LANGUAGES,
                     "The Languages Config is a required Parameter and MUST NOT be NULL or an empty String!");
         }

         nounPhraseFilterer = new NounPhraseFilterer(languages.split(","));

         /* Step 2 - initialize the {@link CoreferenceFinder} */
         String referencedSiteID = null;
         Object referencedSiteIDfromConfig = config.get(REFERENCED_SITE_ID);

         if (referencedSiteIDfromConfig == null) {
             throw new ConfigurationException(REFERENCED_SITE_ID,
                     "The ID of the Referenced Site is a required Parameter and MUST NOT be NULL!");
         }

         referencedSiteID = referencedSiteIDfromConfig.toString();
         if (referencedSiteID.isEmpty()) {
             throw new ConfigurationException(REFERENCED_SITE_ID,
                     "The ID of the Referenced Site is a required Parameter and MUST NOT be an empty String!");
         }

         if (Entityhub.ENTITYHUB_IDS.contains(referencedSiteID.toLowerCase())) {
             log.debug("Init NamedEntityTaggingEngine instance for the Entityhub");
             referencedSiteID = null;
         }

         int maxDistance;
         Object maxDistanceFromConfig = config.get(MAX_DISTANCE);

         if (maxDistanceFromConfig == null) {
             maxDistance = MAX_DISTANCE_DEFAULT_VALUE;
         } else if (maxDistanceFromConfig instanceof Number) {
             maxDistance = ((Number) maxDistanceFromConfig).intValue();
         } else {
             try {
                 maxDistance = Integer.parseInt(maxDistanceFromConfig.toString());
             } catch (NumberFormatException nfe) {
                 throw new ConfigurationException(MAX_DISTANCE, "The Max Distance parameter must be a number");
             }
         }

         if (maxDistance < -1) {
             throw new ConfigurationException(MAX_DISTANCE,
                     "The Max Distance parameter must not be smaller than -1");
         }

         corefFinder = new CoreferenceFinder(languages.split(","), siteManager, entityhub, referencedSiteID,
                 maxDistance);

         log.info("activate {}[name:{}]", getClass().getSimpleName(), getName());
     }

     @Override
     public Map<String,Object> getServiceProperties() {
         return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING,
             (Object) ENGINE_ORDERING));
     }

     @Override
     public int canEnhance(ContentItem ci) throws EngineException {
         String language = getLanguage(this, ci, false);
         if (language == null) {
             log.debug("Engine {} ignores ContentItem {} becuase language {} is not detected.",
                 new Object[] {getName(), ci.getUri(), language});
             return CANNOT_ENHANCE;
         }

         if (!nounPhraseFilterer.supportsLanguage(language)) {
             log.debug("Engine {} does not support language {}.", new Object[] {getName(), language});
             return CANNOT_ENHANCE;
         }

         return ENHANCE_SYNCHRONOUS;
     }

     @Override
     public void computeEnhancements(ContentItem ci) throws EngineException {
         /*
          * Step 1 - Build the NER list and the noun phrase list.
          *
          * TODO - the noun phrases need to be lemmatized.
          */
         Map<Integer,List<Span>> ners = new HashMap<Integer,List<Span>>();
         List<NounPhrase> nounPhrases = new ArrayList<NounPhrase>();
         extractNersAndNounPhrases(ci, ners, nounPhrases);

         /*
          * If there are no NERs to reference there's nothing to do but exit.
          */
         if (ners.size() == 0) {
             log.info("Did not find any NERs for which to do the coreferencing");
             return;
         }

         /*
          * Step 2 - Filter out bad noun phrases.
          */
         String language = getLanguage(this, ci, false);
         if (language == null) {
             log.info("Could not detect the language of the text");
             return;
         }

         nounPhraseFilterer.filter(nounPhrases, language);

         /*
          * If there are no good noun phrases there's nothing to do but exit.
          */
         if (nounPhrases.size() == 0) {
             log.info("Did not find any noun phrases with which to do the coreferencing");
             return;
         }

         /*
          * Step 3 - Extract corefs and write them as {@link NlpAnnotations.COREF_ANNOTATION}s in the {@link
          * Span}s
          */
         corefFinder.extractCorefs(ners, nounPhrases, language);
     }

     @Deactivate
     protected void deactivate(ComponentContext ctx) {
         log.info("deactivate {}[name:{}]", getClass().getSimpleName(), getName());

         nounPhraseFilterer = null;
         corefFinder = null;

         super.deactivate(ctx);
     }

     /**
      * Extracts the NERs and the noun phrases from the given text and puts them in the given lists.
      *
      * @param ci
      * @param ners
      * @param nounPhrases
      */
     private void extractNersAndNounPhrases(ContentItem ci,
                                            Map<Integer,List<Span>> ners,
                                            List<NounPhrase> nounPhrases) {
         AnalysedText at = NlpEngineHelper.getAnalysedText(this, ci, true);
         Iterator<? extends Section> sections = at.getSentences();
         if (!sections.hasNext()) { // process as single sentence
             sections = Collections.singleton(at).iterator();
         }

         int sentenceCnt = 0;
         while (sections.hasNext()) {
             sentenceCnt++;
             Section section = sections.next();
             List<NounPhrase> sectionNounPhrases = new ArrayList<NounPhrase>();
             List<Span> sectionNers = new ArrayList<Span>();

             Iterator<Span> chunks = section.getEnclosed(EnumSet.of(SpanTypeEnum.Chunk));
             while (chunks.hasNext()) {
                 Span chunk = chunks.next();

                 Value<NerTag> ner = chunk.getAnnotation(NlpAnnotations.NER_ANNOTATION);
                 if (ner != null) {
                     sectionNers.add(chunk);
                 }

                 Value<PhraseTag> phrase = chunk.getAnnotation(NlpAnnotations.PHRASE_ANNOTATION);
                 if (phrase != null && phrase.value().getCategory() == LexicalCategory.Noun) {
                     sectionNounPhrases.add(new NounPhrase(chunk, sentenceCnt));
                 }
             }

             for (NounPhrase nounPhrase : sectionNounPhrases) {
                 Iterator<Span> tokens = section.getEnclosed(EnumSet.of(SpanTypeEnum.Token));

                 while (tokens.hasNext()) {
                     Span token = tokens.next();

                     if (nounPhrase.containsSpan(token)) {
                         nounPhrase.addToken(token);
                     }
                 }

                 for (Span sectionNer : sectionNers) {
                     if (nounPhrase.containsSpan(sectionNer)) {
                         nounPhrase.addNerChunk(sectionNer);
                     }
                 }
             }

             nounPhrases.addAll(sectionNounPhrases);

             if (!sectionNers.isEmpty()) {
                 ners.put(sentenceCnt, sectionNers);
             }
         }
     }
 }
	/*
	* Licensed to the Apache Software Foundation (ASF) under one or more
	* contributor license agreements. See the NOTICE file distributed with
	* this work for additional information regarding copyright ownership.
	* The ASF licenses this file to You under the Apache License, Version 2.0
	* (the "License"); you may not use this file except in compliance with
	* the License. You may obtain a copy of the License at
	*
	* http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
	package org.apache.stanbol.enhancer.engines.entitycoreference;

	import static org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage;

	import java.util.ArrayList;
	import java.util.Collections;
	import java.util.Dictionary;
	import java.util.EnumSet;
	import java.util.HashMap;
	import java.util.Iterator;
	import java.util.List;
	import java.util.Map;

	import org.apache.felix.scr.annotations.Activate;
	import org.apache.felix.scr.annotations.Component;
	import org.apache.felix.scr.annotations.Deactivate;
	import org.apache.felix.scr.annotations.Properties;
	import org.apache.felix.scr.annotations.Property;
	import org.apache.felix.scr.annotations.Reference;
	import org.apache.felix.scr.annotations.Service;
	import org.apache.stanbol.enhancer.engines.entitycoreference.datamodel.NounPhrase;
	import org.apache.stanbol.enhancer.engines.entitycoreference.impl.CoreferenceFinder;
	import org.apache.stanbol.enhancer.engines.entitycoreference.impl.NounPhraseFilterer;
	import org.apache.stanbol.enhancer.nlp.NlpAnnotations;
	import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
	import org.apache.stanbol.enhancer.nlp.model.Section;
	import org.apache.stanbol.enhancer.nlp.model.Span;
	import org.apache.stanbol.enhancer.nlp.model.SpanTypeEnum;
	import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
	import org.apache.stanbol.enhancer.nlp.ner.NerTag;
	import org.apache.stanbol.enhancer.nlp.phrase.PhraseTag;
	import org.apache.stanbol.enhancer.nlp.pos.LexicalCategory;
	import org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper;
	import org.apache.stanbol.enhancer.servicesapi.ContentItem;
	import org.apache.stanbol.enhancer.servicesapi.EngineException;
	import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
	import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
	import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
	import org.apache.stanbol.entityhub.servicesapi.Entityhub;
	import org.apache.stanbol.entityhub.servicesapi.site.SiteManager;
	import org.osgi.service.cm.ConfigurationException;
	import org.osgi.service.component.ComponentContext;
	import org.slf4j.Logger;
	import org.slf4j.LoggerFactory;

	/**
	* This engine extracts references in the given text of noun phrases which point to NERs. The coreference is
	* performed based on matching several of the named entity's dbpedia/yago properties to the noun phrase
	* tokens.
	*
	* TODO - Be able to detect possessive coreferences such as Germany's prime minister TODO - be able to detect
	* products and their developer such as Iphone 7 and Apple's new device. TODO - provide the ability via config
	* for the user to also allow coreferencing of 1 word noun phrases based soley on comparison with entity class
	* type?
	*
	* @author Cristian Petroaca
	*
	*/
	@Component(immediate = true, metatype = true)
	@Service(value = EnhancementEngine.class)
	@Properties(value = {
	@Property(name = EnhancementEngine.PROPERTY_NAME, value = "entity-coreference"),
	@Property(name = EntityCoReferenceEngine.CONFIG_LANGUAGES, value = "en"),
	@Property(name = EntityCoReferenceEngine.REFERENCED_SITE_ID, value = "dbpedia"),
	@Property(name = EntityCoReferenceEngine.MAX_DISTANCE, intValue = EntityCoReferenceEngine.MAX_DISTANCE_DEFAULT_VALUE)})
	public class EntityCoReferenceEngine extends AbstractEnhancementEngine<RuntimeException,RuntimeException>
	implements EnhancementEngine, ServiceProperties {

	private static final Integer ENGINE_ORDERING = ServiceProperties.ORDERING_POST_PROCESSING + 91;

	/**
	* Language configuration. Takes a list of ISO language codes of supported languages. Currently supported
	* are the languages given as default value.
	*/
	protected static final String CONFIG_LANGUAGES = "enhancer.engine.entitycoreference.languages";

	/**
	* Referenced site configuration. Defaults to dbpedia.
	*/
	protected static final String REFERENCED_SITE_ID = "enhancer.engine.entitycoreference.referencedSiteId";

	/**
	* Maximum sentence distance between the ner and the noun phrase which mentions it. -1 means no distance
	* constraint.
	*/
	protected static final String MAX_DISTANCE = "enhancer.engine.entitycoreference.maxDistance";

	protected static final int MAX_DISTANCE_DEFAULT_VALUE = 1;
	public static final int MAX_DISTANCE_NO_CONSTRAINT = -1;

	private final Logger log = LoggerFactory.getLogger(EntityCoReferenceEngine.class);

	/**
	* Service of the Entityhub that manages all the active referenced Site. This Service is used to lookup
	* the configured Referenced Site when we need to enhance a content item.
	*/
	@Reference
	protected SiteManager siteManager;

	/**
	* Used to lookup Entities if the {@link #REFERENCED_SITE_ID} property is set to "entityhub" or "local"
	*/
	@Reference
	protected Entityhub entityhub;

	/**
	* Specialized class which filters out bad noun phrases based on the language.
	*/
	private NounPhraseFilterer nounPhraseFilterer;

	/**
	* Performs the logic needed to find corefs based on the NERs and noun phrases in the text.
	*/
	private CoreferenceFinder corefFinder;

	@SuppressWarnings("unchecked")
	@Activate
	protected void activate(ComponentContext ctx) throws ConfigurationException {
	super.activate(ctx);

	Dictionary<String,Object> config = ctx.getProperties();

	/* Step 1 - initialize the {@link NounPhraseFilterer} with the language config */
	String languages = (String) config.get(CONFIG_LANGUAGES);

	if (languages == null \|\| languages.isEmpty()) {
	throw new ConfigurationException(CONFIG_LANGUAGES,
	"The Languages Config is a required Parameter and MUST NOT be NULL or an empty String!");
	}

	nounPhraseFilterer = new NounPhraseFilterer(languages.split(","));

	/* Step 2 - initialize the {@link CoreferenceFinder} */
	String referencedSiteID = null;
	Object referencedSiteIDfromConfig = config.get(REFERENCED_SITE_ID);

	if (referencedSiteIDfromConfig == null) {
	throw new ConfigurationException(REFERENCED_SITE_ID,
	"The ID of the Referenced Site is a required Parameter and MUST NOT be NULL!");
	}

	referencedSiteID = referencedSiteIDfromConfig.toString();
	if (referencedSiteID.isEmpty()) {
	throw new ConfigurationException(REFERENCED_SITE_ID,
	"The ID of the Referenced Site is a required Parameter and MUST NOT be an empty String!");
	}

	if (Entityhub.ENTITYHUB_IDS.contains(referencedSiteID.toLowerCase())) {
	log.debug("Init NamedEntityTaggingEngine instance for the Entityhub");
	referencedSiteID = null;
	}

	int maxDistance;
	Object maxDistanceFromConfig = config.get(MAX_DISTANCE);

	if (maxDistanceFromConfig == null) {
	maxDistance = MAX_DISTANCE_DEFAULT_VALUE;
	} else if (maxDistanceFromConfig instanceof Number) {
	maxDistance = ((Number) maxDistanceFromConfig).intValue();
	} else {
	try {
	maxDistance = Integer.parseInt(maxDistanceFromConfig.toString());
	} catch (NumberFormatException nfe) {
	throw new ConfigurationException(MAX_DISTANCE, "The Max Distance parameter must be a number");
	}
	}

	if (maxDistance < -1) {
	throw new ConfigurationException(MAX_DISTANCE,
	"The Max Distance parameter must not be smaller than -1");
	}

	corefFinder = new CoreferenceFinder(languages.split(","), siteManager, entityhub, referencedSiteID,
	maxDistance);

	log.info("activate {}[name:{}]", getClass().getSimpleName(), getName());
	}

	@Override
	public Map<String,Object> getServiceProperties() {
	return Collections.unmodifiableMap(Collections.singletonMap(ENHANCEMENT_ENGINE_ORDERING,
	(Object) ENGINE_ORDERING));
	}

	@Override
	public int canEnhance(ContentItem ci) throws EngineException {
	String language = getLanguage(this, ci, false);
	if (language == null) {
	log.debug("Engine {} ignores ContentItem {} becuase language {} is not detected.",
	new Object[] {getName(), ci.getUri(), language});
	return CANNOT_ENHANCE;
	}

	if (!nounPhraseFilterer.supportsLanguage(language)) {
	log.debug("Engine {} does not support language {}.", new Object[] {getName(), language});
	return CANNOT_ENHANCE;
	}

	return ENHANCE_SYNCHRONOUS;
	}

	@Override
	public void computeEnhancements(ContentItem ci) throws EngineException {
	/*
	* Step 1 - Build the NER list and the noun phrase list.
	*
	* TODO - the noun phrases need to be lemmatized.
	*/
	Map<Integer,List<Span>> ners = new HashMap<Integer,List<Span>>();
	List<NounPhrase> nounPhrases = new ArrayList<NounPhrase>();
	extractNersAndNounPhrases(ci, ners, nounPhrases);

	/*
	* If there are no NERs to reference there's nothing to do but exit.
	*/
	if (ners.size() == 0) {
	log.info("Did not find any NERs for which to do the coreferencing");
	return;
	}

	/*
	* Step 2 - Filter out bad noun phrases.
	*/
	String language = getLanguage(this, ci, false);
	if (language == null) {
	log.info("Could not detect the language of the text");
	return;
	}

	nounPhraseFilterer.filter(nounPhrases, language);

	/*
	* If there are no good noun phrases there's nothing to do but exit.
	*/
	if (nounPhrases.size() == 0) {
	log.info("Did not find any noun phrases with which to do the coreferencing");
	return;
	}

	/*
	* Step 3 - Extract corefs and write them as {@link NlpAnnotations.COREF_ANNOTATION}s in the {@link
	* Span}s
	*/
	corefFinder.extractCorefs(ners, nounPhrases, language);
	}

	@Deactivate
	protected void deactivate(ComponentContext ctx) {
	log.info("deactivate {}[name:{}]", getClass().getSimpleName(), getName());

	nounPhraseFilterer = null;
	corefFinder = null;

	super.deactivate(ctx);
	}

	/**
	* Extracts the NERs and the noun phrases from the given text and puts them in the given lists.
	*
	* @param ci
	* @param ners
	* @param nounPhrases
	*/
	private void extractNersAndNounPhrases(ContentItem ci,
	Map<Integer,List<Span>> ners,
	List<NounPhrase> nounPhrases) {
	AnalysedText at = NlpEngineHelper.getAnalysedText(this, ci, true);
	Iterator<? extends Section> sections = at.getSentences();
	if (!sections.hasNext()) { // process as single sentence
	sections = Collections.singleton(at).iterator();
	}

	int sentenceCnt = 0;
	while (sections.hasNext()) {
	sentenceCnt++;
	Section section = sections.next();
	List<NounPhrase> sectionNounPhrases = new ArrayList<NounPhrase>();
	List<Span> sectionNers = new ArrayList<Span>();

	Iterator<Span> chunks = section.getEnclosed(EnumSet.of(SpanTypeEnum.Chunk));
	while (chunks.hasNext()) {
	Span chunk = chunks.next();

	Value<NerTag> ner = chunk.getAnnotation(NlpAnnotations.NER_ANNOTATION);
	if (ner != null) {
	sectionNers.add(chunk);
	}

	Value<PhraseTag> phrase = chunk.getAnnotation(NlpAnnotations.PHRASE_ANNOTATION);
	if (phrase != null && phrase.value().getCategory() == LexicalCategory.Noun) {
	sectionNounPhrases.add(new NounPhrase(chunk, sentenceCnt));
	}
	}

	for (NounPhrase nounPhrase : sectionNounPhrases) {
	Iterator<Span> tokens = section.getEnclosed(EnumSet.of(SpanTypeEnum.Token));

	while (tokens.hasNext()) {
	Span token = tokens.next();

	if (nounPhrase.containsSpan(token)) {
	nounPhrase.addToken(token);
	}
	}

	for (Span sectionNer : sectionNers) {
	if (nounPhrase.containsSpan(sectionNer)) {
	nounPhrase.addNerChunk(sectionNer);
	}
	}
	}

	nounPhrases.addAll(sectionNounPhrases);

	if (!sectionNers.isEmpty()) {
	ners.put(sentenceCnt, sectionNers);
	}
	}
	}
	}