enhancement-engines/restful-nlp/src/main/java/org/apache/stanbol/enhancer/engines/restful/nlp/impl/RestfulNlpAnalysisEngine.java - stanbol - Git at Google

 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
  * this work for additional information regarding copyright ownership.
  * The ASF licenses this file to You under the Apache License, Version 2.0
  * (the "License"); you may not use this file except in compliance with
  * the License.  You may obtain a copy of the License at
  *
  *     http://www.apache.org/licenses/LICENSE-2.0
  *
  * Unless required by applicable law or agreed to in writing, software
  * distributed under the License is distributed on an "AS IS" BASIS,
  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  * See the License for the specific language governing permissions and
  * limitations under the License.
  */
 package org.apache.stanbol.enhancer.engines.restful.nlp.impl;

 import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.NER_ANNOTATION;
 import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.SENTIMENT_ANNOTATION;
 import static org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
 import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;

 import java.io.IOException;
 import java.io.InputStream;
 import java.net.URI;
 import java.net.URISyntaxException;
 import java.nio.charset.Charset;
 import java.security.AccessController;
 import java.security.PrivilegedActionException;
 import java.security.PrivilegedExceptionAction;
 import java.util.Collections;
 import java.util.Dictionary;
 import java.util.EnumSet;
 import java.util.HashMap;
 import java.util.HashSet;
 import java.util.Iterator;
 import java.util.Map;
 import java.util.Set;
 import java.util.StringTokenizer;

 import org.apache.clerezza.commons.rdf.Language;
 import org.apache.clerezza.rdf.core.LiteralFactory;
 import org.apache.clerezza.commons.rdf.Graph;
 import org.apache.clerezza.commons.rdf.IRI;
 import org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl;
 import org.apache.clerezza.commons.rdf.impl.utils.TripleImpl;
 import org.apache.commons.io.IOUtils;
 import org.apache.felix.scr.annotations.Activate;
 import org.apache.felix.scr.annotations.Component;
 import org.apache.felix.scr.annotations.ConfigurationPolicy;
 import org.apache.felix.scr.annotations.Deactivate;
 import org.apache.felix.scr.annotations.Properties;
 import org.apache.felix.scr.annotations.Property;
 import org.apache.felix.scr.annotations.Reference;
 import org.apache.felix.scr.annotations.Service;
 import org.apache.http.HttpEntity;
 import org.apache.http.HttpException;
 import org.apache.http.HttpHeaders;
 import org.apache.http.HttpHost;
 import org.apache.http.HttpRequest;
 import org.apache.http.HttpRequestInterceptor;
 import org.apache.http.HttpResponse;
 import org.apache.http.StatusLine;
 import org.apache.http.auth.AuthScope;
 import org.apache.http.auth.AuthState;
 import org.apache.http.auth.Credentials;
 import org.apache.http.auth.UsernamePasswordCredentials;
 import org.apache.http.client.ClientProtocolException;
 import org.apache.http.client.CredentialsProvider;
 import org.apache.http.client.HttpResponseException;
 import org.apache.http.client.ResponseHandler;
 import org.apache.http.client.methods.HttpGet;
 import org.apache.http.client.methods.HttpPost;
 import org.apache.http.client.params.ClientPNames;
 import org.apache.http.client.protocol.ClientContext;
 import org.apache.http.entity.ContentType;
 import org.apache.http.entity.InputStreamEntity;
 import org.apache.http.impl.auth.BasicScheme;
 import org.apache.http.impl.client.BasicResponseHandler;
 import org.apache.http.impl.client.DefaultHttpClient;
 import org.apache.http.impl.conn.PoolingClientConnectionManager;
 import org.apache.http.params.BasicHttpParams;
 import org.apache.http.params.CoreConnectionPNames;
 import org.apache.http.params.CoreProtocolPNames;
 import org.apache.http.protocol.ExecutionContext;
 import org.apache.http.protocol.HttpContext;
 import org.apache.http.util.EntityUtils;
 import org.apache.stanbol.enhancer.nlp.json.AnalyzedTextParser;
 import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
 import org.apache.stanbol.enhancer.nlp.model.AnalysedTextFactory;
 import org.apache.stanbol.enhancer.nlp.model.Sentence;
 import org.apache.stanbol.enhancer.nlp.model.Span;
 import org.apache.stanbol.enhancer.nlp.model.SpanTypeEnum;
 import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
 import org.apache.stanbol.enhancer.nlp.ner.NerTag;
 import org.apache.stanbol.enhancer.nlp.utils.LanguageConfiguration;
 import org.apache.stanbol.enhancer.nlp.utils.NIFHelper;
 import org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.Blob;
 import org.apache.stanbol.enhancer.servicesapi.ContentItem;
 import org.apache.stanbol.enhancer.servicesapi.EngineException;
 import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
 import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
 import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
 import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
 import org.osgi.framework.Constants;
 import org.osgi.service.cm.ConfigurationException;
 import org.osgi.service.component.ComponentContext;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

 /**
  * An enhancement engine that uses a RESTful service for NLP processing of
  * the pain text content part of processed {@link ContentItem}s.<p>
  * The RESTful API of the remote service is standardised by
  * <a href="https://issues.apache.org/jira/browse/STANBOL-892">STANBOL-892</a> <p>
  *
  * @author Rupert Westenthaler
  */

 @Component(immediate = true, metatype = true,
     policy = ConfigurationPolicy.REQUIRE, configurationFactory=true)
 @Service
 @Properties(value={
         @Property(name= EnhancementEngine.PROPERTY_NAME,value="changeme"),
         @Property(name=RestfulNlpAnalysisEngine.CONFIG_LANGUAGES, value = {"*"},cardinality=Integer.MAX_VALUE),
         @Property(name=RestfulNlpAnalysisEngine.ANALYSIS_SERVICE_URL, value ="http://changeme"),
         @Property(name=RestfulNlpAnalysisEngine.ANALYSIS_SERVICE_USER, value =""),
         @Property(name=RestfulNlpAnalysisEngine.ANALYSIS_SERVICE_PWD, value =""),
         @Property(name=RestfulNlpAnalysisEngine.WRITE_TEXT_ANNOTATIONS_STATE,
             boolValue=RestfulNlpAnalysisEngine.DEFAULT_WRITE_TEXT_ANNOTATION_STATE),
         @Property(name=Constants.SERVICE_RANKING,intValue=0)
 })
 public class RestfulNlpAnalysisEngine extends AbstractEnhancementEngine<IOException,RuntimeException> implements ServiceProperties {

     private static final Charset UTF8 = Charset.forName("UTF-8");

     /**
      * The URI for the remote analyses service
      */
     public static final String ANALYSIS_SERVICE_URL = "enhancer.engine.restful.nlp.analysis.service";
     /**
      * The User for the remote analyses service
      */
     public static final String ANALYSIS_SERVICE_USER = "enhancer.engine.restful.nlp.analysis.service.user";
     /**
      * The User for the remote analyses service
      */
     public static final String ANALYSIS_SERVICE_PWD = "enhancer.engine.restful.nlp.analysis.service.pwd";
     /**
      * Allows to enable/disable the addition of <code>fise:TextAnnotation</code>s
      * to the enhancement metadata of the ContentItem
      */
     public static final String WRITE_TEXT_ANNOTATIONS_STATE = "enhancer.engine.restful.nlp.analysis.write-textannotations";
     public static final boolean DEFAULT_WRITE_TEXT_ANNOTATION_STATE = true;
     /**
      * Language configuration. Takes a list of ISO language codes to be processed
      * by this engine. This list will be joined with the list of languages supported
      * by the RESTful NLP analysis service.
      */
     public static final String CONFIG_LANGUAGES = "enhancer.engine.restful.nlp.languages";

     /**
      * The maximum size of the preix/suffix for the selection context
      */
     private static final int DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE = 50;


 	//TODO: move those sentiment specific constants to o.a.s.enhancer.servicesapi as soon as
 	//      Sentiment Annotations are normalized.
 	//      NOTE: they are also define in the Sentiment Summarization engine!
     /**
      * The property used to write the sum of all positive classified words
      */
     public static final IRI POSITIVE_SENTIMENT_PROPERTY = new IRI(NamespaceEnum.fise+"positive-sentiment");
     /**
      * The property used to write the sum of all negative classified words
      */
     public static final IRI NEGATIVE_SENTIMENT_PROPERTY = new IRI(NamespaceEnum.fise+"negative-sentiment");
     /**
      * The sentiment of the section (sum of positive and negative classifications)
      */
     public static final IRI SENTIMENT_PROPERTY = new IRI(NamespaceEnum.fise+"sentiment");
     /**
      * The dc:type value used for fise:TextAnnotations indicating a Sentiment
      */
     public static final IRI SENTIMENT_TYPE = new IRI(NamespaceEnum.fise+"Sentiment");
     /**
      * The dc:Type value sued for the sentiment annotation of the whole document
      */
     public static final IRI DOCUMENT_SENTIMENT_TYPE = new IRI(NamespaceEnum.fise+"DocumentSentiment");

     private static final Map<String,Object> SERVICE_PROPERTIES;
     static {
         Map<String,Object> props = new HashMap<String,Object>();
         //by default register as Tokenizing engine
         props.put(ServiceProperties.ENHANCEMENT_ENGINE_ORDERING,
             ServiceProperties.ORDERING_NLP_TOKENIZING);
 //        props.put(NlpServiceProperties.ENHANCEMENT_ENGINE_NLP_ROLE,
 //            NlpProcessingRole.Tokenizing);
         SERVICE_PROPERTIES = Collections.unmodifiableMap(props);
     }


     private static Logger log = LoggerFactory.getLogger(RestfulNlpAnalysisEngine.class);

     private URI analysisServiceUrl;

     //Langauge configuration
     private LanguageConfiguration languageConfig = new LanguageConfiguration(CONFIG_LANGUAGES,new String[]{"*"});

     private final Set<String> supportedLanguages = new HashSet<String>();

     protected DefaultHttpClient httpClient;
     private BasicHttpParams httpParams;
     private PoolingClientConnectionManager connectionManager;

     @Reference
     private AnalysedTextFactory analysedTextFactory;

     /**
      * Used to parse {@link AnalysedText} instances from responses of the
      * RESTful analysis service.
      */
     @Reference
     private AnalyzedTextParser analyzedTextParser;

     private boolean writeTextAnnotations;

     private Boolean serviceInitialised;

     private Dictionary<String, Object> config;

     /**
      * Indicate if this engine can enhance supplied ContentItem, and if it
      * suggests enhancing it synchronously or asynchronously. The
      * {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager} can force sync/async mode if desired, it is
      * just a suggestion from the engine.
      * <p/>
      * Returns ENHANCE_ASYNC in case there is a text/plain content part and a tagger for the language identified for
      * the content item, CANNOT_ENHANCE otherwise.
      *
      * @throws org.apache.stanbol.enhancer.servicesapi.EngineException
      *          if the introspecting process of the content item
      *          fails
      */
     @Override
     public int canEnhance(ContentItem ci) throws EngineException {
         // check if content is present
         Map.Entry<IRI,Blob> entry = NlpEngineHelper.getPlainText(this, ci, false);
         if(entry == null || entry.getValue() == null) {
             return CANNOT_ENHANCE;
         }
         checkRESTfulNlpAnalysisService();
         String language = getLanguage(this,ci,false);
         if(language == null) {
             return CANNOT_ENHANCE;
         }
         if(!languageConfig.isLanguage(language)){
             log.trace(" > can NOT enhance ContentItem {} because language {} is "
                 + "not enabled by this engines configuration",ci,language);
             return CANNOT_ENHANCE;
         }
         if(!supportedLanguages.contains(language)){
             log.trace(" > the RESTful Analysis service does not support '{}' (supported: {})",
                 language, supportedLanguages);
             return CANNOT_ENHANCE;
         }
         log.trace(" > can enhance ContentItem {} with language {}",ci,language);
         return ENHANCE_ASYNC;
     }


     /**
      * Compute enhancements for supplied ContentItem. The results of the process
      * are expected to be stored in the metadata of the content item.
      * <p/>
      * The client (usually an {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager}) should take care of
      * persistent storage of the enhanced {@link org.apache.stanbol.enhancer.servicesapi.ContentItem}.
      * <p/>
      * This method creates a new POSContentPart using {@link org.apache.stanbol.enhancer.engines.pos.api.POSTaggerHelper#createContentPart} from a text/plain part and
      * stores it as a new part in the content item. The metadata is not changed.
      *
      * @throws org.apache.stanbol.enhancer.servicesapi.EngineException
      *          if the underlying process failed to work as
      *          expected
      */
     @Override
     public void computeEnhancements(final ContentItem ci) throws EngineException {
         checkRESTfulNlpAnalysisService(); //validate that the service is active
         //get/create the AnalysedText
         final AnalysedText at = NlpEngineHelper.initAnalysedText(this, analysedTextFactory, ci);
         final Blob blob = at.getBlob();
         //send the text to the server
         final String language = getLanguage(this, ci, true);
         final HttpPost request = new HttpPost(analysisServiceUrl);
         request.addHeader(HttpHeaders.CONTENT_LANGUAGE, language);
         request.setEntity(new InputStreamEntity(
             blob.getStream(), blob.getContentLength(),
             ContentType.create(blob.getMimeType(),
                 blob.getParameter().get("charset"))));
         //execute the request
         try {
             AccessController.doPrivileged(new PrivilegedExceptionAction<AnalysedText>() {
                 public AnalysedText run() throws ClientProtocolException, IOException {
                     return httpClient.execute(request, new AnalysisResponseHandler(at));
                 }
             });
         } catch (PrivilegedActionException pae) {
             Exception e = pae.getException();
             if(e instanceof ClientProtocolException) {
                 //force re-initialisation upon error
                 setRESTfulNlpAnalysisServiceUnavailable();
                 throw new EngineException(this, ci, "Exception while executing Request "
                     + "on RESTful NLP Analysis Service at "+analysisServiceUrl, e);
             } else if(e instanceof IOException) {
                 //force re-initialisation upon error
                 setRESTfulNlpAnalysisServiceUnavailable();
                 throw new EngineException(this, ci, "Exception while executing Request "
                         + "on RESTful NLP Analysis Service at "+analysisServiceUrl, e);
             } else {
                 throw RuntimeException.class.cast(e);
             }
         }
         if(writeTextAnnotations){
 			//if enabled fise:TextAnnotations are created for Named Entities and Sentiments

             double positiveSent = 0.0;
             int positiveCount = 0;
             double negativeSent = 0.0;
             int negativeCount = 0;
             int sentimentCount = 0;

             Iterator<Span> spans = at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence,SpanTypeEnum.Chunk));
             Sentence context = null;
             Graph metadata = ci.getMetadata();
             Language lang = new Language(language);
             LiteralFactory lf = LiteralFactory.getInstance();
             ci.getLock().writeLock().lock();
             try { //write TextAnnotations for Named Entities
                 while(spans.hasNext()){
                     Span span = spans.next();
                     switch (span.getType()) {
                         case Sentence:
                             context = (Sentence)span;
                             //FALLThrough intended!!
                         default:
                             Value<NerTag> nerAnno = span.getAnnotation(NER_ANNOTATION);
                             if(nerAnno != null){
                                 IRI ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
                                 //add span related data
                                 metadata.add(new TripleImpl(ta, ENHANCER_SELECTED_TEXT,
                                     new PlainLiteralImpl(span.getSpan(), lang)));
                                 metadata.add(new TripleImpl(ta, ENHANCER_START,
                                     lf.createTypedLiteral(span.getStart())));
                                 metadata.add(new TripleImpl(ta, ENHANCER_END,
                                     lf.createTypedLiteral(span.getEnd())));
                                 metadata.add(new TripleImpl(ta, ENHANCER_SELECTION_CONTEXT,
                                     new PlainLiteralImpl(context == null ?
                                             getDefaultSelectionContext(at.getSpan(), span.getSpan(), span.getStart()) :
                                                 context.getSpan(), lang)));
                                 //add the NER type
                                 if(nerAnno.value().getType() != null){
                                     metadata.add(new TripleImpl(ta,DC_TYPE,nerAnno.value().getType()));
                                 }
                                 if(nerAnno.probability() >= 0) {
                                     metadata.add(new TripleImpl(ta, ENHANCER_CONFIDENCE,
                                         lf.createTypedLiteral(nerAnno.probability())));
                                 }
                             }

                             Value<Double> sentimentAnnotation = span.getAnnotation(SENTIMENT_ANNOTATION);
                             if (sentimentAnnotation != null) { //this span has a sentiment assigned

                                 Double sentiment = sentimentAnnotation.value();

 								//Create a fise:TextAnnotation for the sentiment
                                 IRI ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
                                 metadata.add(new TripleImpl(ta, ENHANCER_START,
                                         lf.createTypedLiteral(span.getStart())));
                                 metadata.add(new TripleImpl(ta, ENHANCER_END,
                                         lf.createTypedLiteral(span.getEnd())));
                                 metadata.add(new TripleImpl(ta, SENTIMENT_PROPERTY,
                                         lf.createTypedLiteral(sentiment)));

                                 //add the generic dc:type used for all Sentiment annotation
                                 metadata.add(new TripleImpl(ta, DC_TYPE, SENTIMENT_TYPE));
 								//determine the specific dc:type for the sentiment annotation
                                 IRI ssoType = NIFHelper.SPAN_TYPE_TO_SSO_TYPE.get(span.getType());
                                 if(ssoType != null){
                                     metadata.add(new TripleImpl(ta, DC_TYPE, ssoType));
                                 }

                                 //keep statistics for the overall sentiment for the Document
                                 sentimentCount++ ;
                                 if(sentiment > 0){
                                     positiveSent += sentiment;
                                     positiveCount++;
                                 }else if(sentiment < 0){
                                     negativeSent += sentiment;
                                     negativeCount++;
                                 }

                             }
                             break;
                     }
                 }


                 //Add the annotation for the overall sentiment of the document
                 if ( sentimentCount > 0 ) {
                 IRI ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
                     //calculate the average sentiment for a document
                     //TODO: Think on a better way to calculate a general sentiment value for a document.
                     metadata.add(new TripleImpl(ta, SENTIMENT_PROPERTY,
                             lf.createTypedLiteral((positiveSent + negativeSent) / sentimentCount)));

                     if ( positiveCount > 0 ){
                         //average positive sentiment calculation for the document
                         metadata.add(new TripleImpl(ta, POSITIVE_SENTIMENT_PROPERTY,
                                 lf.createTypedLiteral( positiveSent / positiveCount )));
                     }
                     if ( negativeCount > 0 ){
                         //average negative sentiment calculation for the document
                         metadata.add(new TripleImpl(ta, NEGATIVE_SENTIMENT_PROPERTY,
                                 lf.createTypedLiteral( negativeSent / negativeCount )));
                     }
                     metadata.add(new TripleImpl(ta, DC_TYPE, SENTIMENT_TYPE));
                     metadata.add(new TripleImpl(ta, DC_TYPE, DOCUMENT_SENTIMENT_TYPE));
                 } // no sentiment annotation present ... nothing to do

             } finally {
                 ci.getLock().writeLock().unlock();
             }
         } //else do not write fise:TextAnnotations
     }

     protected class AnalysisResponseHandler implements ResponseHandler<AnalysedText>{

         protected final AnalysedText at;


         protected AnalysisResponseHandler(AnalysedText at){
             this.at = at;
         }

         @Override
         public AnalysedText handleResponse(HttpResponse response) throws ClientProtocolException, IOException {
             StatusLine statusLine = response.getStatusLine();
             HttpEntity entity = response.getEntity();
             if (statusLine.getStatusCode() >= 300) {
                 String reason;
                 if(entity != null) {
                     StringBuilder sb = new StringBuilder(statusLine.getReasonPhrase());
                     String message = EntityUtils.toString(entity);
                     if(message != null && !message.isEmpty()){
                         sb.append("\nMessage:\n").append(message);
                     }
                     reason = sb.toString();
                 } else {
                     reason = statusLine.getReasonPhrase();
                 }
                 EntityUtils.consume(entity);
                 throw new HttpResponseException(statusLine.getStatusCode(), reason);
             }
             //parse the results
             InputStream in = null;
             try {
                 in = entity.getContent();
                 Charset charset = entity.getContentEncoding() != null ?
                         Charset.forName(entity.getContentEncoding().getValue()) : UTF8;
                 return analyzedTextParser.parse(in, charset, at);
             } finally {
                 //ensure that the stream is closed
                 IOUtils.closeQuietly(in);
             }
         }
     }

     @Override
     public Map<String,Object> getServiceProperties() {
         return SERVICE_PROPERTIES;
     }
     /**
      * Activate and read the properties. Configures and initialises a POSTagger for each language configured in
      * CONFIG_LANGUAGES.
      *
      * @param ce the {@link org.osgi.service.component.ComponentContext}
      */
     @Activate
     protected void activate(ComponentContext ce) throws ConfigurationException, IOException {
         super.activate(ce);
         log.info("activate {} '{}'",getClass().getSimpleName(),getName());
         config = ce.getProperties();

         Object value = config.get(ANALYSIS_SERVICE_URL);
         if(value == null){
             throw new ConfigurationException(ANALYSIS_SERVICE_URL,
                 "The RESTful Analysis Service URL is missing in the provided configuration!");
         } else {
             try {
                 analysisServiceUrl = new URI(value.toString());
                 log.info("  ... service: {}",analysisServiceUrl);
             } catch (URISyntaxException e) {
                 throw new ConfigurationException(ANALYSIS_SERVICE_URL,
                         "The parsed RESTful Analysis Service URL '"+ value
                         + "'is not a valid URL!",e);
             }
         }
         String usr;
         String pwd;
         value = config.get(ANALYSIS_SERVICE_USER);
         if(value != null && !value.toString().isEmpty()){
             usr = value.toString();
             value = config.get(ANALYSIS_SERVICE_PWD);
             pwd = value == null ? null : value.toString();
         } else { // no user set
             usr = null;
             pwd = null;
         }

         //init the http client
         httpParams = new BasicHttpParams();
         httpParams.setParameter(CoreProtocolPNames.USER_AGENT, "Apache Stanbol RESTful NLP Analysis Engine");
         httpParams.setBooleanParameter(ClientPNames.HANDLE_REDIRECTS, true);
         httpParams.setIntParameter(ClientPNames.MAX_REDIRECTS, 3);
         httpParams.setBooleanParameter(CoreConnectionPNames.SO_KEEPALIVE, true);

         connectionManager = new PoolingClientConnectionManager();
         connectionManager.setMaxTotal(20);
         connectionManager.setDefaultMaxPerRoute(20);

         //NOTE: The list of supported languages is the combination of the
         //      languages enabled by the configuration (#languageConfig) and the
         //      languages supported by the RESTful NLP Analysis Service
         //      (#supportedLanguages)
         //init the language configuration with the engine configuration
         languageConfig.setConfiguration(config);

         httpClient = new DefaultHttpClient(connectionManager,httpParams);
         if(usr != null){
             log.info("  ... setting user to {}",usr);
             httpClient.getCredentialsProvider().setCredentials(AuthScope.ANY,
                 new UsernamePasswordCredentials(usr, pwd));
             // And add request interceptor to have preemptive authentication
             httpClient.addRequestInterceptor(new PreemptiveAuthInterceptor(), 0);
         }
         //STANBOL-1389: deactivated initialization during activation as this can create
         //issues in cases where Stanbol and the NLP service do run in the same
         //servlet container.
         //initRESTfulNlpAnalysisService();

         value = config.get(WRITE_TEXT_ANNOTATIONS_STATE);
         if(value instanceof Boolean){
             this.writeTextAnnotations = ((Boolean)value).booleanValue();
         } else if(value != null){
             this.writeTextAnnotations = Boolean.parseBoolean(value.toString());
         } else {
             this.writeTextAnnotations = DEFAULT_WRITE_TEXT_ANNOTATION_STATE;
         }
     }
     /**
      * @throws EngineException
      */
     private void checkRESTfulNlpAnalysisService() throws EngineException {
         if(!initRESTfulNlpAnalysisService()){
             throw new EngineException("The configured RESTful NLP Analysis Service is "
                 + "currently not available (url: '"+analysisServiceUrl+"')");
         }
     }
     /**
      * to be called after handling an exception while calling the remote service
      * that indicates that the service is no longer available.
      */
     private void setRESTfulNlpAnalysisServiceUnavailable(){
         serviceInitialised = false;
         supportedLanguages.clear();
     }

     /**
      * initialises the RESRfulNlpAnalysis if not yet done.
      */
     private boolean initRESTfulNlpAnalysisService() {
         if(serviceInitialised != null && serviceInitialised){
             return true; //already initialised
         }
         if(serviceInitialised == null){
             log.info(" ... checking configured RESTful NLP Analysis service {}", analysisServiceUrl);
             serviceInitialised = false;
         } else {
             log.info(" ... re-trying to initialise RESTful NLP Analysis service {}", analysisServiceUrl);
         }
         //get the supported languages
         String supported;
         try {
             supported = AccessController.doPrivileged(new PrivilegedExceptionAction<String>() {
                 public String run() throws IOException {
                     HttpGet request = new HttpGet(analysisServiceUrl);
                     request.setHeader(HttpHeaders.ACCEPT, ContentType.APPLICATION_JSON.toString());
                     return httpClient.execute(request,new BasicResponseHandler());
                 }
             });
             serviceInitialised = true;
         } catch (PrivilegedActionException pae) {
             Exception e = pae.getException();
             setRESTfulNlpAnalysisServiceUnavailable();
             if(e instanceof IOException){
                 log.warn("Unable to initialise RESTful NLP Analysis Service!", e);
                 return false;
             } else {
                 throw RuntimeException.class.cast(e);
             }
         }
         //NOTE: The list of supported languages is the combination of the
         //      languages enabled by the configuration (#languageConfig) and the
         //      languages supported by the RESTful NLP Analysis Service
         //      (#supportedLanguages)
         //parse the supported languages from the initialization response
         StringTokenizer st = new StringTokenizer(supported, "{[\",]}");
         while(st.hasMoreElements()){
             supportedLanguages.add(st.nextToken());
         }
         return true;
     }

     @Deactivate
     protected void deactivate(ComponentContext context) {
         languageConfig.setDefault();
         supportedLanguages.clear();
         //shutdown the Http Client
         httpClient = null;
         httpParams = null;
         connectionManager.shutdown();
         connectionManager = null;
         serviceInitialised = null;
         super.deactivate(context);
     }

     /**
      * Extracts the selection context based on the content, selection and
      * the start char offset of the selection
      * @param content the content
      * @param selection the selected text
      * @param selectionStartPos the start char position of the selection
      * @return the context
      */
     private String getDefaultSelectionContext(String content, String selection,int selectionStartPos){
         //extract the selection context
         int beginPos;
         if(selectionStartPos <= DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE){
             beginPos = 0;
         } else {
             int start = selectionStartPos-DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
             beginPos = content.indexOf(' ',start);
             if(beginPos < 0 || beginPos >= selectionStartPos){ //no words
                 beginPos = start; //begin within a word
             }
         }
         int endPos;
         if(selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE >= content.length()){
             endPos = content.length();
         } else {
             int start = selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
             endPos = content.lastIndexOf(' ', start);
             if(endPos <= selectionStartPos+selection.length()){
                 endPos = start; //end within a word;
             }
         }
         return content.substring(beginPos, endPos);
     }

     /**
      * HttpRequestInterceptor for preemptive authentication, based on httpclient
      * 4.0 example
      */
     private static class PreemptiveAuthInterceptor implements HttpRequestInterceptor {

         public void process(HttpRequest request, HttpContext context) throws HttpException, IOException {

             AuthState authState = (AuthState) context.getAttribute(ClientContext.TARGET_AUTH_STATE);
             CredentialsProvider credsProvider = (CredentialsProvider) context.getAttribute(ClientContext.CREDS_PROVIDER);
             HttpHost targetHost = (HttpHost) context.getAttribute(ExecutionContext.HTTP_TARGET_HOST);

             // If not auth scheme has been initialized yet
             if (authState.getAuthScheme() == null) {
                 AuthScope authScope = new AuthScope(targetHost.getHostName(), targetHost.getPort());

                 // Obtain credentials matching the target host
                 Credentials creds = credsProvider.getCredentials(authScope);

                 // If found, generate BasicScheme preemptively
                 if (creds != null) {
                     authState.update(new BasicScheme(), creds);
                 }
             }
         }
     }
 }