blob: 2a62e1329d63782345170e435239fd319baf5ead [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.solrcas;
import java.io.IOException;
import java.io.InputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Map;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.impl.HttpSolrServer;
import org.apache.solr.common.SolrInputDocument;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.CasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.resource.ResourceAccessException;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
import org.xml.sax.SAXException;
/**
* CAS Consumer to write on a Solr instance
*/
public class SolrCASConsumer extends CasAnnotator_ImplBase {
private static final String CLASSPATH = "classpath:";
private static final String FILEPATH = "file://";
private static final String EMPTY_STRING = "";
protected SolrServer solrServer;
private SolrMappingConfiguration mappingConfig;
private boolean autoCommit;
@Override
public void initialize(UimaContext context) throws ResourceInitializationException {
super.initialize(context);
/* create the SolrServer*/
try {
this.solrServer = createServer();
} catch (Exception e) {
context.getLogger().log(Level.SEVERE, "Unable to initialize SolrServer properly");
throw new ResourceInitializationException(e);
}
/* create the mapping configuration */
try{
this.mappingConfig = createSolrMappingConfiguration();
} catch (Exception e) {
context.getLogger().log(Level.SEVERE, "Unable to initialize Solr mapping configuration properly");
throw new ResourceInitializationException(e);
}
/* set Solr autoCommit parameter */
try {
this.autoCommit = getAutoCommitValue();
} catch (Exception e) {
context.getLogger().log(Level.SEVERE, "Unable to initialize Solr autoCommit parameter properly");
throw new ResourceInitializationException(e);
}
}
@Override
public void typeSystemInit(TypeSystem typeSystem) throws AnalysisEngineProcessException {
super.typeSystemInit(typeSystem);
for (String key : mappingConfig.getFeatureStructuresMapping().keySet()) {
Type type = typeSystem.getType(key);
if (type==null) {
throw new AnalysisEngineProcessException("required_feature_structure_missing_from_cas",
new Object[]{key});
}
Map<String, String> stringStringMap = mappingConfig.getFeatureStructuresMapping().get(key);
for (String featureName : stringStringMap.keySet()) {
if (!"coveredText".equals(featureName) && type.getFeatureByBaseName(featureName)==null) {
throw new AnalysisEngineProcessException("required_attribute_missing",
new Object[]{featureName,type});
}
}
}
}
public void process(CAS cas) throws AnalysisEngineProcessException {
// create the SolrDocument from the CAS object basing on the mapping configuration
SolrInputDocument document = createDocument(cas);
// send the SolrDocument to SolrServer
try {
solrServer.add(document);
} catch (Exception e) {
getContext().getLogger().log(Level.SEVERE, new StringBuilder("Error while adding document").
append(document.toString()).toString());
throw new AnalysisEngineProcessException(e);
}
// if AutoCommit is enabled send the commit message to the SolrServer
if (!autoCommit) {
try {
solrServer.commit();
} catch (Exception e) {
getContext().getLogger().log(Level.SEVERE, new StringBuilder("Error while committing document").
append(document.toString()).toString());
throw new AnalysisEngineProcessException(e);
}
}
}
/* allows retrieving of a URI from a path specifying one of:
* file://absolute/path
* http://something.com/res.ext
* classpath:/path/to/something.xml
* data/path/relative/file.ext
*/
protected URI getURI(String path) throws ResourceAccessException, IOException, URISyntaxException {
URI uri;
if (path.startsWith(CLASSPATH)) {
uri = System.class.getResource(path.replaceFirst(CLASSPATH, EMPTY_STRING)).toURI();
} else {
uri = UriUtils.create(path); // this supports file://ABSOLUTE_PATH and http://URL
if (!uri.isAbsolute()) {
String dataPath = getContext().getDataPath().replace('\\', '/');
if (dataPath.matches("[a-zA-Z]\\:.*")) {
dataPath = "/" + dataPath;
}
uri = UriUtils.create(new StringBuilder(FILEPATH).append(dataPath).
append("/").append(path.replace(FILEPATH, EMPTY_STRING)).toString()); // this supports relative file paths
}
}
return uri;
}
private boolean getAutoCommitValue() {
boolean autoCommitValue = false;
Object autoCommitParam = getContext().getConfigParameterValue("autoCommit");
if (autoCommitParam != null && autoCommitParam.toString().length() > 0)
autoCommitValue = Boolean.valueOf(autoCommitParam.toString());
return autoCommitValue;
}
private SolrMappingConfiguration createSolrMappingConfiguration()
throws IOException, ResourceAccessException, ParserConfigurationException, SAXException, URISyntaxException {
FieldMappingReader fieldMappingReader = new FieldMappingReader();
String mappingFileParam = String.valueOf(getContext().getConfigParameterValue("mappingFile"));
InputStream input = getURI(mappingFileParam).toURL().openStream();
return fieldMappingReader.getConf(input);
}
protected SolrServer createServer() throws SolrServerException {
SolrServer solrServer = null;
try {
/* get Solr type*/
String solrInstanceTypeParam = String.valueOf(getContext().
getConfigParameterValue("solrInstanceType"));
/* get Solr Path */
String solrPathParam = String.valueOf(getContext().
getConfigParameterValue("solrPath"));
if (solrInstanceTypeParam.equalsIgnoreCase("http")) {
solrServer = new HttpSolrServer(solrPathParam);
}
} catch (Exception e) {
throw new SolrServerException("Error creating SolrServer", e);
}
return solrServer;
}
/* create a SolrDocument from the current CAS object and the mapping configuration */
private SolrInputDocument createDocument(CAS cas) {
SolrInputDocument document = new SolrInputDocument();
if (mappingConfig.getDocumentTextMapping() != null && mappingConfig.getDocumentTextMapping().length() > 0)
document.addField(mappingConfig.getDocumentTextMapping(), cas.getDocumentText());
if (mappingConfig.getDocumentLanguageMapping() != null && mappingConfig.getDocumentLanguageMapping().length() > 0)
document.addField(mappingConfig.getDocumentLanguageMapping(), cas.getDocumentLanguage());
for (String key : mappingConfig.getFeatureStructuresMapping().keySet()) {
Type type = cas.getTypeSystem().getType(key);
for (FSIterator<FeatureStructure> iterator = cas.getIndexRepository().getAllIndexedFS(type); iterator
.hasNext();) {
FeatureStructure fs = iterator.next();
Map<String, String> stringStringMap = mappingConfig.getFeatureStructuresMapping().get(key);
for (String featureName : stringStringMap.keySet()) {
String fieldName = stringStringMap.get(featureName);
String featureValue;
if (fs instanceof AnnotationFS && "coveredText".equals(featureName)) {
featureValue = ((AnnotationFS) fs).getCoveredText();
} else {
Feature feature = type.getFeatureByBaseName(featureName);
featureValue = fs.getFeatureValueAsString(feature);
}
document.addField(fieldName, featureValue);
}
}
}
return document;
}
}