blob: 4a1859a7483a1144e4a8ae50a5c22291140b7340 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.ruta.descriptor;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.commons.lang3.StringUtils;
import org.apache.uima.ResourceSpecifierFactory;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngineDescription;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceManager;
import org.apache.uima.resource.impl.ResourceManager_impl;
import org.apache.uima.resource.metadata.Capability;
import org.apache.uima.resource.metadata.ConfigurationParameterSettings;
import org.apache.uima.resource.metadata.FsIndexDescription;
import org.apache.uima.resource.metadata.Import;
import org.apache.uima.resource.metadata.TypeDescription;
import org.apache.uima.resource.metadata.TypeSystemDescription;
import org.apache.uima.resource.metadata.impl.Import_impl;
import org.apache.uima.ruta.UIMAConstants;
import org.apache.uima.ruta.engine.RutaEngine;
import org.apache.uima.util.CasCreationUtils;
import org.apache.uima.util.InvalidXMLException;
import org.apache.uima.util.XMLInputSource;
import org.apache.uima.util.XMLSerializer;
import org.apache.uima.util.XMLizable;
import org.springframework.core.io.Resource;
import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;
public class RutaDescriptorBuilder {
private static ResourceSpecifierFactory uimaFactory = UIMAFramework.getResourceSpecifierFactory();
private final URL defaultTypeSystem;
private final URL defaultAnalysisEngine;
public RutaDescriptorBuilder(URL defaultTypeSystem, URL defaultAnalysisEngine)
throws InvalidXMLException, IOException {
super();
this.defaultTypeSystem = defaultTypeSystem;
this.defaultAnalysisEngine = defaultAnalysisEngine;
}
public TypeSystemDescription createTypeSystemDescription(RutaDescriptorInformation desc,
String typeSystemOutput, RutaBuildOptions options, String[] enginePaths)
throws InvalidXMLException, ResourceInitializationException, IOException,
URISyntaxException {
TypeSystemDescription typeSystemDescription = uimaFactory.createTypeSystemDescription();
ResourceManager rm = UIMAFramework.newDefaultResourceManager();
if (options.getClassLoader() != null) {
new ResourceManager_impl(options.getClassLoader());
}
if (enginePaths != null) {
String dataPath = "";
for (String string : enginePaths) {
dataPath += string + File.pathSeparator;
}
rm.setDataPath(dataPath);
}
Map<String, String> typeNameMap = new HashMap<String, String>();
TypeSystemDescription initialTypeSystem = UIMAFramework.getXMLParser()
.parseTypeSystemDescription(new XMLInputSource(defaultTypeSystem));
CAS cas = CasCreationUtils.createCas(initialTypeSystem, null, new FsIndexDescription[0]);
fillTypeNameMap(typeNameMap, cas.getTypeSystem());
cas.release();
List<TypeSystemDescription> toInclude = new ArrayList<TypeSystemDescription>();
List<Import> importList = new ArrayList<Import>();
Import_impl import_impl = new Import_impl();
if (options.isImportByName()) {
String name = initialTypeSystem.getName();
import_impl.setName(name);
} else if (options.isResolveImports()) {
String absoluteLocation = initialTypeSystem.getSourceUrlString();
import_impl.setLocation(absoluteLocation);
} else {
URI uri = null;
try {
uri = defaultTypeSystem.toURI();
} catch (URISyntaxException e) {
// do nothing
}
if (uri != null) {
String relativeLocation = getRelativeLocation(uri, typeSystemOutput);
if (relativeLocation != null) {
import_impl.setLocation(relativeLocation);
} else {
toInclude.add(initialTypeSystem);
}
} else {
toInclude.add(initialTypeSystem);
}
}
if (import_impl.getLocation() != null || import_impl.getName() != null) {
importList.add(import_impl);
}
for (String eachName : desc.getImportedTypeSystems()) {
String locate = RutaEngine.locate(eachName, enginePaths, ".xml");
URL url = null;
boolean include = false;
if (locate != null) {
File file = new File(locate);
url = file.toURI().toURL();
}
if (url == null) {
url = checkImportExistence(eachName, ".xml", options.getClassLoader());
include = true;
if (url == null) {
throw new FileNotFoundException("Build process can't find " + eachName + " in "
+ desc.getScriptName());
}
}
TypeSystemDescription each = getTypeSystemDescriptor(url, options, rm);
if (each != null) {
fillTypeNameMap(typeNameMap, each);
if (include) {
// need to include the complete type system because an import is not possible
each.resolveImports(rm);
toInclude.add(each);
} else {
import_impl = new Import_impl();
if (options.isImportByName()) {
import_impl.setName(eachName);
} else if (options.isResolveImports()) {
String absoluteLocation = each.getSourceUrlString();
import_impl.setLocation(absoluteLocation);
} else {
String relativeLocation = getRelativeLocation(url.toURI(), typeSystemOutput);
File parentFile = new File(typeSystemOutput).getParentFile();
File targetFile = new File(parentFile, relativeLocation);
boolean ableToFindFile = targetFile.exists();
if (!ableToFindFile) {
// hotfix for different partitions making trouble for the relative path
import_impl.setName(eachName);
} else {
import_impl.setLocation(relativeLocation);
}
}
importList.add(import_impl);
}
} else {
throw new FileNotFoundException("Build process can't find " + eachName + " in "
+ desc.getScriptName());
}
}
for (String eachName : desc.getImportedScripts()) {
String locate = RutaEngine.locate(eachName, enginePaths, options.getTypeSystemSuffix()
+ ".xml");
URL url = null;
if (locate != null) {
File file = new File(locate);
url = file.toURI().toURL();
}
if (url == null) {
url = checkImportExistence(eachName, options.getTypeSystemSuffix() + ".xml",
options.getClassLoader());
if (url == null) {
throw new FileNotFoundException("Build process can't find " + eachName
+ options.getTypeSystemSuffix() + ".xml" + " in " + desc.getScriptName());
}
}
TypeSystemDescription each = getTypeSystemDescriptor(url, options, rm);
if (each != null) {
fillTypeNameMap(typeNameMap, each);
import_impl = new Import_impl();
if (options.isImportByName()) {
import_impl.setName(eachName + options.getTypeSystemSuffix());
} else if (options.isResolveImports()) {
String absoluteLocation = each.getSourceUrlString();
import_impl.setLocation(absoluteLocation);
} else {
String relativeLocation = getRelativeLocation(url.toURI(), typeSystemOutput);
import_impl.setLocation(relativeLocation);
}
importList.add(import_impl);
} else {
throw new FileNotFoundException("Build process can't find " + eachName + " in "
+ desc.getScriptName());
}
}
typeSystemDescription = CasCreationUtils.mergeTypeSystems(toInclude, rm);
if (!importList.isEmpty()) {
Import[] newImports = importList.toArray(new Import[0]);
typeSystemDescription.setImports(newImports);
}
if (options.isResolveImports()) {
typeSystemDescription.resolveImports(rm);
}
// TODO hotfixes: where do I get the final types??
Set<String> finalTypes = new HashSet<String>();
finalTypes.addAll(Arrays.asList(new String[] { "uima.cas.Boolean", "uima.cas.Byte",
"uima.cas.Short", "uima.cas.Integer", "uima.cas.Long", "uima.cas.Float", "uima.cas.Double",
"uima.cas.BooleanArray", "uima.cas.ByteArray", "uima.cas.ShortArray",
"uima.cas.IntegerArray", "uima.cas.LongArray", "uima.cas.FloatArray",
"uima.cas.DoubleArray", "uima.cas.StringArray", "uima.cas.FSArray" }));
int typeIndex = 0;
for (String eachType : desc.getTypeShortNames()) {
StringTriple typeTriple = desc.getTypeTriples().get(typeIndex);
typeTriple = resolveType(typeTriple, typeNameMap, desc.getScriptName());
if (typeSystemDescription.getType(typeTriple.getName()) != null) {
continue;
}
if (!finalTypes.contains(typeTriple.getParent())) {
TypeDescription newType = typeSystemDescription.addType(typeTriple.getName(),
typeTriple.getDescription(), typeTriple.getParent());
Collection<StringTriple> collection = desc.getFeatures().get(eachType);
if (collection != null) {
for (StringTriple eachFeature : collection) {
eachFeature = resolveFeature(eachFeature, typeNameMap);
newType.addFeature(eachFeature.getName(), eachFeature.getDescription(),
eachFeature.getParent());
// capability.addInputFeature(eachFeature.getName());
// capability.addOutputFeature(eachFeature.getName());
}
}
}
typeIndex++;
}
Set<String> names = new HashSet<String>();
Collection<TypeDescription> types = new HashSet<TypeDescription>();
for (TypeDescription each : typeSystemDescription.getTypes()) {
String name = each.getName();
if (!names.contains(name)) {
names.add(name);
types.add(each);
}
}
File typeSystemFile = getFile(typeSystemOutput);
TypeDescription[] presentTypes = typeSystemDescription.getTypes();
types.addAll(Arrays.asList(presentTypes));
typeSystemDescription.setTypes(types.toArray(new TypeDescription[0]));
typeSystemDescription.setName(desc.getScriptName() + options.getTypeSystemSuffix());
typeSystemDescription.setSourceUrl(typeSystemFile.toURI().toURL());
return typeSystemDescription;
}
public AnalysisEngineDescription createAnalysisEngineDescription(RutaDescriptorInformation desc,
TypeSystemDescription typeSystemDescription, String typeSystemOutput,
String engineOutput, RutaBuildOptions options, String[] scriptPaths,
String[] enginePaths, String[] resourcePaths) throws InvalidXMLException, IOException {
TypeSystemDescription aets = uimaFactory.createTypeSystemDescription();
Import_impl import_impl = new Import_impl();
if (options.isImportByName()) {
if (typeSystemDescription != null) {
import_impl.setName(typeSystemDescription.getName());
}
} else {
if (typeSystemOutput != null) {
String relativeLocation = getRelativeLocation(new File(typeSystemOutput).toURI(),
engineOutput);
import_impl.setLocation(relativeLocation);
}
}
return configureEngine(desc, engineOutput, options, scriptPaths, enginePaths, resourcePaths,
import_impl, aets);
}
public void build(RutaDescriptorInformation desc, String typeSystemOutput, String engineOutput,
RutaBuildOptions options, String[] scriptPaths, String[] enginePaths)
throws SAXException, InvalidXMLException, IOException, ResourceInitializationException,
URISyntaxException {
TypeSystemDescription typeSystemDescription = createTypeSystemDescription(desc,
typeSystemOutput, options, enginePaths);
AnalysisEngineDescription analysisEngineDescription = createAnalysisEngineDescription(desc,
typeSystemDescription, typeSystemOutput, engineOutput, options, scriptPaths,
enginePaths, null);
File analysisEngineFile = getFile(engineOutput);
File typeSystemFile = getFile(typeSystemOutput);
toFile(typeSystemDescription, typeSystemFile);
toFile(analysisEngineDescription, analysisEngineFile);
}
private void fillTypeNameMap(Map<String, String> typeNameMap, TypeSystem typeSystem) {
Iterator<Type> typeIterator = typeSystem.getTypeIterator();
while (typeIterator.hasNext()) {
Type type = (Type) typeIterator.next();
String shortName = type.getShortName();
String name = type.getName();
typeNameMap.put(shortName, name);
}
}
private boolean fillTypeNameMap(Map<String, String> typeNameMap, TypeSystemDescription desc) {
boolean contained = false;
for (TypeDescription each : desc.getTypes()) {
String name = each.getName();
int lastIndexOf = name.lastIndexOf(".");
String shortName = name.substring(lastIndexOf + 1, name.length());
typeNameMap.put(shortName, name);
}
return contained;
}
private StringTriple resolveFeature(StringTriple eachFeature, Map<String, String> types) {
String parent = eachFeature.getParent();
String name = eachFeature.getName();
parent = translate(parent);
if (parent.indexOf(".") == -1 && types.containsKey(parent)) {
parent = types.get(parent);
}
return new StringTriple(name, eachFeature.getDescription(), parent);
}
private StringTriple resolveType(StringTriple typeTriple, Map<String, String> types,
String packageName) {
String parent = typeTriple.getParent();
String name = typeTriple.getName();
if (parent == null) {
parent = "uima.tcas.Annotation";
}
parent = translate(parent);
name = translate(name);
if (parent.indexOf(".") == -1 && types.containsKey(parent)) {
parent = types.get(parent);
}
if (name.indexOf(".") == -1) {
if (types.containsKey(name)) {
name = types.get(name);
} else {
name = packageName + "." + name;
}
}
int lastIndexOf = name.lastIndexOf(".");
String shortName = name.substring(lastIndexOf + 1, name.length());
types.put(shortName, name);
return new StringTriple(name, typeTriple.getDescription(), parent);
}
private String translate(String name) {
if (name == null) {
return null;
}
if (name.equals("Annotation")) {
return "uima.tcas.Annotation";
} else if (name.equals("STRING")) {
return UIMAConstants.TYPE_STRING;
} else if (name.equals("INT")) {
return UIMAConstants.TYPE_INTEGER;
} else if (name.equals("DOUBLE")) {
return UIMAConstants.TYPE_DOUBLE;
} else if (name.equals("FLOAT")) {
return UIMAConstants.TYPE_FLOAT;
} else if (name.equals("BOOLEAN")) {
return UIMAConstants.TYPE_BOOLEAN;
} else if (name.equals("TYPE")) {
return UIMAConstants.TYPE_STRING;
}
return name;
}
private AnalysisEngineDescription configureEngine(RutaDescriptorInformation desc,
String engineOutput, RutaBuildOptions option, String[] scriptPaths,
String[] descriptorPaths, String[] resourcePaths, Import_impl import_impl,
TypeSystemDescription aets) throws InvalidXMLException, IOException {
AnalysisEngineDescription analysisEngineDescription = UIMAFramework.getXMLParser()
.parseAnalysisEngineDescription(new XMLInputSource(defaultAnalysisEngine));
aets.setImports(new Import[] { import_impl });
analysisEngineDescription.getAnalysisEngineMetaData().setTypeSystem(aets);
File file = getFile(engineOutput);
analysisEngineDescription.setSourceUrl(file.toURI().toURL());
if (!desc.getTypeShortNames().isEmpty()) {
Capability capability = uimaFactory.createCapability();
for (StringTriple typeTriple : desc.getTypeTriples()) {
capability.addInputType(typeTriple.getName(), false);
capability.addOutputType(typeTriple.getName(), false);
}
Capability[] capabilities = analysisEngineDescription.getAnalysisEngineMetaData()
.getCapabilities();
Capability[] newArray = new Capability[capabilities.length + 1];
System.arraycopy(capabilities, 0, newArray, 0, capabilities.length);
newArray[capabilities.length] = capability;
analysisEngineDescription.getAnalysisEngineMetaData().setCapabilities(newArray);
}
String mainScript = desc.getScriptName();
if (!StringUtils.isBlank(desc.getPackageString())) {
mainScript = desc.getPackageString().concat(".").concat(mainScript);
}
analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings()
.setParameterValue(RutaEngine.PARAM_MAIN_SCRIPT, mainScript);
if (scriptPaths != null) {
analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings()
.setParameterValue(RutaEngine.PARAM_SCRIPT_PATHS, scriptPaths);
}
if (descriptorPaths != null) {
analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings()
.setParameterValue(RutaEngine.PARAM_DESCRIPTOR_PATHS, descriptorPaths);
}
String[] parameterValue = (String[]) analysisEngineDescription.getAnalysisEngineMetaData()
.getConfigurationParameterSettings().getParameterValue(RutaEngine.PARAM_RESOURCE_PATHS);
Set<String> resourceLocations = new HashSet<String>();
if (parameterValue != null && parameterValue.length != 0) {
resourceLocations.addAll(Arrays.asList(parameterValue));
}
if (resourcePaths != null) {
resourceLocations.addAll(Arrays.asList(resourcePaths));
}
if (descriptorPaths != null) {
for (String string : descriptorPaths) {
File descDir = new File(string);
File defaultResourceDir = new File(descDir.getParent(), "resources");
resourceLocations.add(defaultResourceDir.getAbsolutePath());
}
}
analysisEngineDescription
.getAnalysisEngineMetaData()
.getConfigurationParameterSettings()
.setParameterValue(RutaEngine.PARAM_RESOURCE_PATHS,
resourceLocations.toArray(new String[0]));
String[] additionalScriptsArray = desc.getImportedScripts().toArray(new String[] {});
analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings()
.setParameterValue(RutaEngine.PARAM_ADDITIONAL_SCRIPTS, additionalScriptsArray);
String[] additionalEnginesArray = desc.getImportedEngines().toArray(new String[] {});
analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings()
.setParameterValue(RutaEngine.PARAM_ADDITIONAL_ENGINES, additionalEnginesArray);
String[] additionalUimafitEnginesArray = desc.getImportedUimafitEngines().toArray(
new String[] {});
analysisEngineDescription
.getAnalysisEngineMetaData()
.getConfigurationParameterSettings()
.setParameterValue(RutaEngine.PARAM_ADDITIONAL_UIMAFIT_ENGINES,
additionalUimafitEnginesArray);
analysisEngineDescription.getAnalysisEngineMetaData().setTypeSystem(aets);
configureExtensions(analysisEngineDescription, option);
return analysisEngineDescription;
}
private String getRelativeLocation(URI target, String base) {
Path basePath = Paths.get(base);
if (!basePath.toFile().isDirectory()) {
basePath = basePath.getParent();
}
Path targetPath = null;
try {
targetPath = Paths.get(target);
} catch (Exception e) {
return null;
}
Path relativePath = basePath.relativize(targetPath);
return relativePath.toString();
}
private void configureExtensions(AnalysisEngineDescription analysisEngineDescription,
RutaBuildOptions options) {
ConfigurationParameterSettings configurationParameterSettings = analysisEngineDescription
.getAnalysisEngineMetaData().getConfigurationParameterSettings();
List<String> languageExtensions = options.getLanguageExtensions();
String[] extensions = (String[]) configurationParameterSettings
.getParameterValue(RutaEngine.PARAM_ADDITIONAL_EXTENSIONS);
String[] loaders = (String[]) configurationParameterSettings
.getParameterValue(RutaEngine.PARAM_ADDITIONAL_ENGINE_LOADERS);
List<String> es = new ArrayList<String>();
List<String> ls = new ArrayList<String>();
if (extensions != null) {
es.addAll(Arrays.asList(extensions));
}
es.addAll(languageExtensions);
if (loaders != null) {
ls.addAll(Arrays.asList(loaders));
}
ls.addAll(options.getEngineLoaders());
configurationParameterSettings.setParameterValue(RutaEngine.PARAM_ADDITIONAL_EXTENSIONS,
es.toArray(new String[0]));
configurationParameterSettings.setParameterValue(RutaEngine.PARAM_ADDITIONAL_ENGINE_LOADERS,
ls.toArray(new String[0]));
}
private TypeSystemDescription getTypeSystemDescriptor(URL url, RutaBuildOptions option,
ResourceManager rm) throws InvalidXMLException, IOException {
TypeSystemDescription tsdesc = UIMAFramework.getXMLParser().parseTypeSystemDescription(
new XMLInputSource(url));
if (option.isResolveImports()) {
tsdesc.resolveImports(rm);
}
return tsdesc;
}
private File getFile(String location) {
return new File(location);
}
private void toFile(XMLizable desc, File destination) throws SAXException, FileNotFoundException {
destination.getParentFile().mkdirs();
OutputStream out = new FileOutputStream(destination);
XMLSerializer sax = new XMLSerializer(out);
ContentHandler ch = sax.getContentHandler();
ch.startDocument();
desc.toXML(ch);
ch.endDocument();
}
public static URL checkImportExistence(String candidate, String extension, ClassLoader classloader)
throws IOException {
String p = candidate.replaceAll("[.]", "/");
p += extension;
PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver(
classloader);
String prefix = "classpath*:";
String pattern = prefix + p;
Resource[] resources = resolver.getResources(pattern);
if (resources == null || resources.length == 0) {
return null;
} else {
Resource resource = resources[0];
URL url = resource.getURL();
return url;
}
}
}