blob: 280db6901f308b4428e611eb7df577927978d498 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.restful.nlp.impl;
import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.NER_ANNOTATION;
import static org.apache.stanbol.enhancer.nlp.NlpAnnotations.SENTIMENT_ANNOTATION;
import static org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper.getLanguage;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.DC_TYPE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_CONFIDENCE;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_END;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTED_TEXT;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_SELECTION_CONTEXT;
import static org.apache.stanbol.enhancer.servicesapi.rdf.Properties.ENHANCER_START;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.charset.Charset;
import java.security.AccessController;
import java.security.PrivilegedActionException;
import java.security.PrivilegedExceptionAction;
import java.util.Collections;
import java.util.Dictionary;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import org.apache.clerezza.commons.rdf.Language;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.commons.rdf.Graph;
import org.apache.clerezza.commons.rdf.IRI;
import org.apache.clerezza.commons.rdf.impl.utils.PlainLiteralImpl;
import org.apache.clerezza.commons.rdf.impl.utils.TripleImpl;
import org.apache.commons.io.IOUtils;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.ConfigurationPolicy;
import org.apache.felix.scr.annotations.Deactivate;
import org.apache.felix.scr.annotations.Properties;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Reference;
import org.apache.felix.scr.annotations.Service;
import org.apache.http.HttpEntity;
import org.apache.http.HttpException;
import org.apache.http.HttpHeaders;
import org.apache.http.HttpHost;
import org.apache.http.HttpRequest;
import org.apache.http.HttpRequestInterceptor;
import org.apache.http.HttpResponse;
import org.apache.http.StatusLine;
import org.apache.http.auth.AuthScope;
import org.apache.http.auth.AuthState;
import org.apache.http.auth.Credentials;
import org.apache.http.auth.UsernamePasswordCredentials;
import org.apache.http.client.ClientProtocolException;
import org.apache.http.client.CredentialsProvider;
import org.apache.http.client.HttpResponseException;
import org.apache.http.client.ResponseHandler;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.methods.HttpPost;
import org.apache.http.client.params.ClientPNames;
import org.apache.http.client.protocol.ClientContext;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.InputStreamEntity;
import org.apache.http.impl.auth.BasicScheme;
import org.apache.http.impl.client.BasicResponseHandler;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.impl.conn.PoolingClientConnectionManager;
import org.apache.http.params.BasicHttpParams;
import org.apache.http.params.CoreConnectionPNames;
import org.apache.http.params.CoreProtocolPNames;
import org.apache.http.protocol.ExecutionContext;
import org.apache.http.protocol.HttpContext;
import org.apache.http.util.EntityUtils;
import org.apache.stanbol.enhancer.nlp.json.AnalyzedTextParser;
import org.apache.stanbol.enhancer.nlp.model.AnalysedText;
import org.apache.stanbol.enhancer.nlp.model.AnalysedTextFactory;
import org.apache.stanbol.enhancer.nlp.model.Sentence;
import org.apache.stanbol.enhancer.nlp.model.Span;
import org.apache.stanbol.enhancer.nlp.model.SpanTypeEnum;
import org.apache.stanbol.enhancer.nlp.model.annotation.Value;
import org.apache.stanbol.enhancer.nlp.ner.NerTag;
import org.apache.stanbol.enhancer.nlp.utils.LanguageConfiguration;
import org.apache.stanbol.enhancer.nlp.utils.NIFHelper;
import org.apache.stanbol.enhancer.nlp.utils.NlpEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.Blob;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
import org.osgi.framework.Constants;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* An enhancement engine that uses a RESTful service for NLP processing of
* the pain text content part of processed {@link ContentItem}s.<p>
* The RESTful API of the remote service is standardised by
* <a href="https://issues.apache.org/jira/browse/STANBOL-892">STANBOL-892</a> <p>
*
* @author Rupert Westenthaler
*/
@Component(immediate = true, metatype = true,
policy = ConfigurationPolicy.REQUIRE, configurationFactory=true)
@Service
@Properties(value={
@Property(name= EnhancementEngine.PROPERTY_NAME,value="changeme"),
@Property(name=RestfulNlpAnalysisEngine.CONFIG_LANGUAGES, value = {"*"},cardinality=Integer.MAX_VALUE),
@Property(name=RestfulNlpAnalysisEngine.ANALYSIS_SERVICE_URL, value ="http://changeme"),
@Property(name=RestfulNlpAnalysisEngine.ANALYSIS_SERVICE_USER, value =""),
@Property(name=RestfulNlpAnalysisEngine.ANALYSIS_SERVICE_PWD, value =""),
@Property(name=RestfulNlpAnalysisEngine.WRITE_TEXT_ANNOTATIONS_STATE,
boolValue=RestfulNlpAnalysisEngine.DEFAULT_WRITE_TEXT_ANNOTATION_STATE),
@Property(name=Constants.SERVICE_RANKING,intValue=0)
})
public class RestfulNlpAnalysisEngine extends AbstractEnhancementEngine<IOException,RuntimeException> implements ServiceProperties {
private static final Charset UTF8 = Charset.forName("UTF-8");
/**
* The URI for the remote analyses service
*/
public static final String ANALYSIS_SERVICE_URL = "enhancer.engine.restful.nlp.analysis.service";
/**
* The User for the remote analyses service
*/
public static final String ANALYSIS_SERVICE_USER = "enhancer.engine.restful.nlp.analysis.service.user";
/**
* The User for the remote analyses service
*/
public static final String ANALYSIS_SERVICE_PWD = "enhancer.engine.restful.nlp.analysis.service.pwd";
/**
* Allows to enable/disable the addition of <code>fise:TextAnnotation</code>s
* to the enhancement metadata of the ContentItem
*/
public static final String WRITE_TEXT_ANNOTATIONS_STATE = "enhancer.engine.restful.nlp.analysis.write-textannotations";
public static final boolean DEFAULT_WRITE_TEXT_ANNOTATION_STATE = true;
/**
* Language configuration. Takes a list of ISO language codes to be processed
* by this engine. This list will be joined with the list of languages supported
* by the RESTful NLP analysis service.
*/
public static final String CONFIG_LANGUAGES = "enhancer.engine.restful.nlp.languages";
/**
* The maximum size of the preix/suffix for the selection context
*/
private static final int DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE = 50;
//TODO: move those sentiment specific constants to o.a.s.enhancer.servicesapi as soon as
// Sentiment Annotations are normalized.
// NOTE: they are also define in the Sentiment Summarization engine!
/**
* The property used to write the sum of all positive classified words
*/
public static final IRI POSITIVE_SENTIMENT_PROPERTY = new IRI(NamespaceEnum.fise+"positive-sentiment");
/**
* The property used to write the sum of all negative classified words
*/
public static final IRI NEGATIVE_SENTIMENT_PROPERTY = new IRI(NamespaceEnum.fise+"negative-sentiment");
/**
* The sentiment of the section (sum of positive and negative classifications)
*/
public static final IRI SENTIMENT_PROPERTY = new IRI(NamespaceEnum.fise+"sentiment");
/**
* The dc:type value used for fise:TextAnnotations indicating a Sentiment
*/
public static final IRI SENTIMENT_TYPE = new IRI(NamespaceEnum.fise+"Sentiment");
/**
* The dc:Type value sued for the sentiment annotation of the whole document
*/
public static final IRI DOCUMENT_SENTIMENT_TYPE = new IRI(NamespaceEnum.fise+"DocumentSentiment");
private static final Map<String,Object> SERVICE_PROPERTIES;
static {
Map<String,Object> props = new HashMap<String,Object>();
//by default register as Tokenizing engine
props.put(ServiceProperties.ENHANCEMENT_ENGINE_ORDERING,
ServiceProperties.ORDERING_NLP_TOKENIZING);
// props.put(NlpServiceProperties.ENHANCEMENT_ENGINE_NLP_ROLE,
// NlpProcessingRole.Tokenizing);
SERVICE_PROPERTIES = Collections.unmodifiableMap(props);
}
private static Logger log = LoggerFactory.getLogger(RestfulNlpAnalysisEngine.class);
private URI analysisServiceUrl;
//Langauge configuration
private LanguageConfiguration languageConfig = new LanguageConfiguration(CONFIG_LANGUAGES,new String[]{"*"});
private final Set<String> supportedLanguages = new HashSet<String>();
protected DefaultHttpClient httpClient;
private BasicHttpParams httpParams;
private PoolingClientConnectionManager connectionManager;
@Reference
private AnalysedTextFactory analysedTextFactory;
/**
* Used to parse {@link AnalysedText} instances from responses of the
* RESTful analysis service.
*/
@Reference
private AnalyzedTextParser analyzedTextParser;
private boolean writeTextAnnotations;
private Boolean serviceInitialised;
private Dictionary<String, Object> config;
/**
* Indicate if this engine can enhance supplied ContentItem, and if it
* suggests enhancing it synchronously or asynchronously. The
* {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager} can force sync/async mode if desired, it is
* just a suggestion from the engine.
* <p/>
* Returns ENHANCE_ASYNC in case there is a text/plain content part and a tagger for the language identified for
* the content item, CANNOT_ENHANCE otherwise.
*
* @throws org.apache.stanbol.enhancer.servicesapi.EngineException
* if the introspecting process of the content item
* fails
*/
@Override
public int canEnhance(ContentItem ci) throws EngineException {
// check if content is present
Map.Entry<IRI,Blob> entry = NlpEngineHelper.getPlainText(this, ci, false);
if(entry == null || entry.getValue() == null) {
return CANNOT_ENHANCE;
}
checkRESTfulNlpAnalysisService();
String language = getLanguage(this,ci,false);
if(language == null) {
return CANNOT_ENHANCE;
}
if(!languageConfig.isLanguage(language)){
log.trace(" > can NOT enhance ContentItem {} because language {} is "
+ "not enabled by this engines configuration",ci,language);
return CANNOT_ENHANCE;
}
if(!supportedLanguages.contains(language)){
log.trace(" > the RESTful Analysis service does not support '{}' (supported: {})",
language, supportedLanguages);
return CANNOT_ENHANCE;
}
log.trace(" > can enhance ContentItem {} with language {}",ci,language);
return ENHANCE_ASYNC;
}
/**
* Compute enhancements for supplied ContentItem. The results of the process
* are expected to be stored in the metadata of the content item.
* <p/>
* The client (usually an {@link org.apache.stanbol.enhancer.servicesapi.EnhancementJobManager}) should take care of
* persistent storage of the enhanced {@link org.apache.stanbol.enhancer.servicesapi.ContentItem}.
* <p/>
* This method creates a new POSContentPart using {@link org.apache.stanbol.enhancer.engines.pos.api.POSTaggerHelper#createContentPart} from a text/plain part and
* stores it as a new part in the content item. The metadata is not changed.
*
* @throws org.apache.stanbol.enhancer.servicesapi.EngineException
* if the underlying process failed to work as
* expected
*/
@Override
public void computeEnhancements(final ContentItem ci) throws EngineException {
checkRESTfulNlpAnalysisService(); //validate that the service is active
//get/create the AnalysedText
final AnalysedText at = NlpEngineHelper.initAnalysedText(this, analysedTextFactory, ci);
final Blob blob = at.getBlob();
//send the text to the server
final String language = getLanguage(this, ci, true);
final HttpPost request = new HttpPost(analysisServiceUrl);
request.addHeader(HttpHeaders.CONTENT_LANGUAGE, language);
request.setEntity(new InputStreamEntity(
blob.getStream(), blob.getContentLength(),
ContentType.create(blob.getMimeType(),
blob.getParameter().get("charset"))));
//execute the request
try {
AccessController.doPrivileged(new PrivilegedExceptionAction<AnalysedText>() {
public AnalysedText run() throws ClientProtocolException, IOException {
return httpClient.execute(request, new AnalysisResponseHandler(at));
}
});
} catch (PrivilegedActionException pae) {
Exception e = pae.getException();
if(e instanceof ClientProtocolException) {
//force re-initialisation upon error
setRESTfulNlpAnalysisServiceUnavailable();
throw new EngineException(this, ci, "Exception while executing Request "
+ "on RESTful NLP Analysis Service at "+analysisServiceUrl, e);
} else if(e instanceof IOException) {
//force re-initialisation upon error
setRESTfulNlpAnalysisServiceUnavailable();
throw new EngineException(this, ci, "Exception while executing Request "
+ "on RESTful NLP Analysis Service at "+analysisServiceUrl, e);
} else {
throw RuntimeException.class.cast(e);
}
}
if(writeTextAnnotations){
//if enabled fise:TextAnnotations are created for Named Entities and Sentiments
double positiveSent = 0.0;
int positiveCount = 0;
double negativeSent = 0.0;
int negativeCount = 0;
int sentimentCount = 0;
Iterator<Span> spans = at.getEnclosed(EnumSet.of(SpanTypeEnum.Sentence,SpanTypeEnum.Chunk));
Sentence context = null;
Graph metadata = ci.getMetadata();
Language lang = new Language(language);
LiteralFactory lf = LiteralFactory.getInstance();
ci.getLock().writeLock().lock();
try { //write TextAnnotations for Named Entities
while(spans.hasNext()){
Span span = spans.next();
switch (span.getType()) {
case Sentence:
context = (Sentence)span;
//FALLThrough intended!!
default:
Value<NerTag> nerAnno = span.getAnnotation(NER_ANNOTATION);
if(nerAnno != null){
IRI ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
//add span related data
metadata.add(new TripleImpl(ta, ENHANCER_SELECTED_TEXT,
new PlainLiteralImpl(span.getSpan(), lang)));
metadata.add(new TripleImpl(ta, ENHANCER_START,
lf.createTypedLiteral(span.getStart())));
metadata.add(new TripleImpl(ta, ENHANCER_END,
lf.createTypedLiteral(span.getEnd())));
metadata.add(new TripleImpl(ta, ENHANCER_SELECTION_CONTEXT,
new PlainLiteralImpl(context == null ?
getDefaultSelectionContext(at.getSpan(), span.getSpan(), span.getStart()) :
context.getSpan(), lang)));
//add the NER type
if(nerAnno.value().getType() != null){
metadata.add(new TripleImpl(ta,DC_TYPE,nerAnno.value().getType()));
}
if(nerAnno.probability() >= 0) {
metadata.add(new TripleImpl(ta, ENHANCER_CONFIDENCE,
lf.createTypedLiteral(nerAnno.probability())));
}
}
Value<Double> sentimentAnnotation = span.getAnnotation(SENTIMENT_ANNOTATION);
if (sentimentAnnotation != null) { //this span has a sentiment assigned
Double sentiment = sentimentAnnotation.value();
//Create a fise:TextAnnotation for the sentiment
IRI ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
metadata.add(new TripleImpl(ta, ENHANCER_START,
lf.createTypedLiteral(span.getStart())));
metadata.add(new TripleImpl(ta, ENHANCER_END,
lf.createTypedLiteral(span.getEnd())));
metadata.add(new TripleImpl(ta, SENTIMENT_PROPERTY,
lf.createTypedLiteral(sentiment)));
//add the generic dc:type used for all Sentiment annotation
metadata.add(new TripleImpl(ta, DC_TYPE, SENTIMENT_TYPE));
//determine the specific dc:type for the sentiment annotation
IRI ssoType = NIFHelper.SPAN_TYPE_TO_SSO_TYPE.get(span.getType());
if(ssoType != null){
metadata.add(new TripleImpl(ta, DC_TYPE, ssoType));
}
//keep statistics for the overall sentiment for the Document
sentimentCount++ ;
if(sentiment > 0){
positiveSent += sentiment;
positiveCount++;
}else if(sentiment < 0){
negativeSent += sentiment;
negativeCount++;
}
}
break;
}
}
//Add the annotation for the overall sentiment of the document
if ( sentimentCount > 0 ) {
IRI ta = EnhancementEngineHelper.createTextEnhancement(ci, this);
//calculate the average sentiment for a document
//TODO: Think on a better way to calculate a general sentiment value for a document.
metadata.add(new TripleImpl(ta, SENTIMENT_PROPERTY,
lf.createTypedLiteral((positiveSent + negativeSent) / sentimentCount)));
if ( positiveCount > 0 ){
//average positive sentiment calculation for the document
metadata.add(new TripleImpl(ta, POSITIVE_SENTIMENT_PROPERTY,
lf.createTypedLiteral( positiveSent / positiveCount )));
}
if ( negativeCount > 0 ){
//average negative sentiment calculation for the document
metadata.add(new TripleImpl(ta, NEGATIVE_SENTIMENT_PROPERTY,
lf.createTypedLiteral( negativeSent / negativeCount )));
}
metadata.add(new TripleImpl(ta, DC_TYPE, SENTIMENT_TYPE));
metadata.add(new TripleImpl(ta, DC_TYPE, DOCUMENT_SENTIMENT_TYPE));
} // no sentiment annotation present ... nothing to do
} finally {
ci.getLock().writeLock().unlock();
}
} //else do not write fise:TextAnnotations
}
protected class AnalysisResponseHandler implements ResponseHandler<AnalysedText>{
protected final AnalysedText at;
protected AnalysisResponseHandler(AnalysedText at){
this.at = at;
}
@Override
public AnalysedText handleResponse(HttpResponse response) throws ClientProtocolException, IOException {
StatusLine statusLine = response.getStatusLine();
HttpEntity entity = response.getEntity();
if (statusLine.getStatusCode() >= 300) {
String reason;
if(entity != null) {
StringBuilder sb = new StringBuilder(statusLine.getReasonPhrase());
String message = EntityUtils.toString(entity);
if(message != null && !message.isEmpty()){
sb.append("\nMessage:\n").append(message);
}
reason = sb.toString();
} else {
reason = statusLine.getReasonPhrase();
}
EntityUtils.consume(entity);
throw new HttpResponseException(statusLine.getStatusCode(), reason);
}
//parse the results
InputStream in = null;
try {
in = entity.getContent();
Charset charset = entity.getContentEncoding() != null ?
Charset.forName(entity.getContentEncoding().getValue()) : UTF8;
return analyzedTextParser.parse(in, charset, at);
} finally {
//ensure that the stream is closed
IOUtils.closeQuietly(in);
}
}
}
@Override
public Map<String,Object> getServiceProperties() {
return SERVICE_PROPERTIES;
}
/**
* Activate and read the properties. Configures and initialises a POSTagger for each language configured in
* CONFIG_LANGUAGES.
*
* @param ce the {@link org.osgi.service.component.ComponentContext}
*/
@Activate
protected void activate(ComponentContext ce) throws ConfigurationException, IOException {
super.activate(ce);
log.info("activate {} '{}'",getClass().getSimpleName(),getName());
config = ce.getProperties();
Object value = config.get(ANALYSIS_SERVICE_URL);
if(value == null){
throw new ConfigurationException(ANALYSIS_SERVICE_URL,
"The RESTful Analysis Service URL is missing in the provided configuration!");
} else {
try {
analysisServiceUrl = new URI(value.toString());
log.info(" ... service: {}",analysisServiceUrl);
} catch (URISyntaxException e) {
throw new ConfigurationException(ANALYSIS_SERVICE_URL,
"The parsed RESTful Analysis Service URL '"+ value
+ "'is not a valid URL!",e);
}
}
String usr;
String pwd;
value = config.get(ANALYSIS_SERVICE_USER);
if(value != null && !value.toString().isEmpty()){
usr = value.toString();
value = config.get(ANALYSIS_SERVICE_PWD);
pwd = value == null ? null : value.toString();
} else { // no user set
usr = null;
pwd = null;
}
//init the http client
httpParams = new BasicHttpParams();
httpParams.setParameter(CoreProtocolPNames.USER_AGENT, "Apache Stanbol RESTful NLP Analysis Engine");
httpParams.setBooleanParameter(ClientPNames.HANDLE_REDIRECTS, true);
httpParams.setIntParameter(ClientPNames.MAX_REDIRECTS, 3);
httpParams.setBooleanParameter(CoreConnectionPNames.SO_KEEPALIVE, true);
connectionManager = new PoolingClientConnectionManager();
connectionManager.setMaxTotal(20);
connectionManager.setDefaultMaxPerRoute(20);
//NOTE: The list of supported languages is the combination of the
// languages enabled by the configuration (#languageConfig) and the
// languages supported by the RESTful NLP Analysis Service
// (#supportedLanguages)
//init the language configuration with the engine configuration
languageConfig.setConfiguration(config);
httpClient = new DefaultHttpClient(connectionManager,httpParams);
if(usr != null){
log.info(" ... setting user to {}",usr);
httpClient.getCredentialsProvider().setCredentials(AuthScope.ANY,
new UsernamePasswordCredentials(usr, pwd));
// And add request interceptor to have preemptive authentication
httpClient.addRequestInterceptor(new PreemptiveAuthInterceptor(), 0);
}
//STANBOL-1389: deactivated initialization during activation as this can create
//issues in cases where Stanbol and the NLP service do run in the same
//servlet container.
//initRESTfulNlpAnalysisService();
value = config.get(WRITE_TEXT_ANNOTATIONS_STATE);
if(value instanceof Boolean){
this.writeTextAnnotations = ((Boolean)value).booleanValue();
} else if(value != null){
this.writeTextAnnotations = Boolean.parseBoolean(value.toString());
} else {
this.writeTextAnnotations = DEFAULT_WRITE_TEXT_ANNOTATION_STATE;
}
}
/**
* @throws EngineException
*/
private void checkRESTfulNlpAnalysisService() throws EngineException {
if(!initRESTfulNlpAnalysisService()){
throw new EngineException("The configured RESTful NLP Analysis Service is "
+ "currently not available (url: '"+analysisServiceUrl+"')");
}
}
/**
* to be called after handling an exception while calling the remote service
* that indicates that the service is no longer available.
*/
private void setRESTfulNlpAnalysisServiceUnavailable(){
serviceInitialised = false;
supportedLanguages.clear();
}
/**
* initialises the RESRfulNlpAnalysis if not yet done.
*/
private boolean initRESTfulNlpAnalysisService() {
if(serviceInitialised != null && serviceInitialised){
return true; //already initialised
}
if(serviceInitialised == null){
log.info(" ... checking configured RESTful NLP Analysis service {}", analysisServiceUrl);
serviceInitialised = false;
} else {
log.info(" ... re-trying to initialise RESTful NLP Analysis service {}", analysisServiceUrl);
}
//get the supported languages
String supported;
try {
supported = AccessController.doPrivileged(new PrivilegedExceptionAction<String>() {
public String run() throws IOException {
HttpGet request = new HttpGet(analysisServiceUrl);
request.setHeader(HttpHeaders.ACCEPT, ContentType.APPLICATION_JSON.toString());
return httpClient.execute(request,new BasicResponseHandler());
}
});
serviceInitialised = true;
} catch (PrivilegedActionException pae) {
Exception e = pae.getException();
setRESTfulNlpAnalysisServiceUnavailable();
if(e instanceof IOException){
log.warn("Unable to initialise RESTful NLP Analysis Service!", e);
return false;
} else {
throw RuntimeException.class.cast(e);
}
}
//NOTE: The list of supported languages is the combination of the
// languages enabled by the configuration (#languageConfig) and the
// languages supported by the RESTful NLP Analysis Service
// (#supportedLanguages)
//parse the supported languages from the initialization response
StringTokenizer st = new StringTokenizer(supported, "{[\",]}");
while(st.hasMoreElements()){
supportedLanguages.add(st.nextToken());
}
return true;
}
@Deactivate
protected void deactivate(ComponentContext context) {
languageConfig.setDefault();
supportedLanguages.clear();
//shutdown the Http Client
httpClient = null;
httpParams = null;
connectionManager.shutdown();
connectionManager = null;
serviceInitialised = null;
super.deactivate(context);
}
/**
* Extracts the selection context based on the content, selection and
* the start char offset of the selection
* @param content the content
* @param selection the selected text
* @param selectionStartPos the start char position of the selection
* @return the context
*/
private String getDefaultSelectionContext(String content, String selection,int selectionStartPos){
//extract the selection context
int beginPos;
if(selectionStartPos <= DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE){
beginPos = 0;
} else {
int start = selectionStartPos-DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
beginPos = content.indexOf(' ',start);
if(beginPos < 0 || beginPos >= selectionStartPos){ //no words
beginPos = start; //begin within a word
}
}
int endPos;
if(selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE >= content.length()){
endPos = content.length();
} else {
int start = selectionStartPos+selection.length()+DEFAULT_SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
endPos = content.lastIndexOf(' ', start);
if(endPos <= selectionStartPos+selection.length()){
endPos = start; //end within a word;
}
}
return content.substring(beginPos, endPos);
}
/**
* HttpRequestInterceptor for preemptive authentication, based on httpclient
* 4.0 example
*/
private static class PreemptiveAuthInterceptor implements HttpRequestInterceptor {
public void process(HttpRequest request, HttpContext context) throws HttpException, IOException {
AuthState authState = (AuthState) context.getAttribute(ClientContext.TARGET_AUTH_STATE);
CredentialsProvider credsProvider = (CredentialsProvider) context.getAttribute(ClientContext.CREDS_PROVIDER);
HttpHost targetHost = (HttpHost) context.getAttribute(ExecutionContext.HTTP_TARGET_HOST);
// If not auth scheme has been initialized yet
if (authState.getAuthScheme() == null) {
AuthScope authScope = new AuthScope(targetHost.getHostName(), targetHost.getPort());
// Obtain credentials matching the target host
Credentials creds = credsProvider.getCredentials(authScope);
// If found, generate BasicScheme preemptively
if (creds != null) {
authState.update(new BasicScheme(), creds);
}
}
}
}
}