/* | |
* Licensed to the Apache Software Foundation (ASF) under one | |
* or more contributor license agreements. See the NOTICE file | |
* distributed with this work for additional information | |
* regarding copyright ownership. The ASF licenses this file | |
* to you under the Apache License, Version 2.0 (the | |
* "License"); you may not use this file except in compliance | |
* with the License. You may obtain a copy of the License at | |
* | |
* http://www.apache.org/licenses/LICENSE-2.0 | |
* | |
* Unless required by applicable law or agreed to in writing, | |
* software distributed under the License is distributed on an | |
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | |
* KIND, either express or implied. See the License for the | |
* specific language governing permissions and limitations | |
* under the License. | |
*/ | |
package org.apache.uima.ruta.descriptor; | |
import java.io.File; | |
import java.io.FileNotFoundException; | |
import java.io.FileOutputStream; | |
import java.io.IOException; | |
import java.io.OutputStream; | |
import java.net.URI; | |
import java.net.URISyntaxException; | |
import java.net.URL; | |
import java.nio.file.Path; | |
import java.nio.file.Paths; | |
import java.util.ArrayList; | |
import java.util.Arrays; | |
import java.util.Collection; | |
import java.util.HashMap; | |
import java.util.HashSet; | |
import java.util.Iterator; | |
import java.util.List; | |
import java.util.Map; | |
import java.util.Set; | |
import org.apache.commons.lang3.StringUtils; | |
import org.apache.uima.ResourceSpecifierFactory; | |
import org.apache.uima.UIMAFramework; | |
import org.apache.uima.analysis_engine.AnalysisEngineDescription; | |
import org.apache.uima.cas.CAS; | |
import org.apache.uima.cas.Type; | |
import org.apache.uima.cas.TypeSystem; | |
import org.apache.uima.resource.ResourceInitializationException; | |
import org.apache.uima.resource.ResourceManager; | |
import org.apache.uima.resource.impl.ResourceManager_impl; | |
import org.apache.uima.resource.metadata.Capability; | |
import org.apache.uima.resource.metadata.ConfigurationParameterSettings; | |
import org.apache.uima.resource.metadata.FsIndexDescription; | |
import org.apache.uima.resource.metadata.Import; | |
import org.apache.uima.resource.metadata.TypeDescription; | |
import org.apache.uima.resource.metadata.TypeSystemDescription; | |
import org.apache.uima.resource.metadata.impl.Import_impl; | |
import org.apache.uima.ruta.UIMAConstants; | |
import org.apache.uima.ruta.engine.RutaEngine; | |
import org.apache.uima.util.CasCreationUtils; | |
import org.apache.uima.util.InvalidXMLException; | |
import org.apache.uima.util.XMLInputSource; | |
import org.apache.uima.util.XMLSerializer; | |
import org.apache.uima.util.XMLizable; | |
import org.springframework.core.io.Resource; | |
import org.springframework.core.io.support.PathMatchingResourcePatternResolver; | |
import org.xml.sax.ContentHandler; | |
import org.xml.sax.SAXException; | |
public class RutaDescriptorBuilder { | |
private static ResourceSpecifierFactory uimaFactory = UIMAFramework.getResourceSpecifierFactory(); | |
private final URL defaultTypeSystem; | |
private final URL defaultAnalysisEngine; | |
public RutaDescriptorBuilder(URL defaultTypeSystem, URL defaultAnalysisEngine) | |
throws InvalidXMLException, IOException { | |
super(); | |
this.defaultTypeSystem = defaultTypeSystem; | |
this.defaultAnalysisEngine = defaultAnalysisEngine; | |
} | |
public TypeSystemDescription createTypeSystemDescription(RutaDescriptorInformation desc, | |
String typeSystemOutput, RutaBuildOptions options, String[] enginePaths) | |
throws InvalidXMLException, ResourceInitializationException, IOException, | |
URISyntaxException { | |
TypeSystemDescription typeSystemDescription = uimaFactory.createTypeSystemDescription(); | |
ResourceManager rm = UIMAFramework.newDefaultResourceManager(); | |
if (options.getClassLoader() != null) { | |
new ResourceManager_impl(options.getClassLoader()); | |
} | |
if (enginePaths != null) { | |
String dataPath = ""; | |
for (String string : enginePaths) { | |
dataPath += string + File.pathSeparator; | |
} | |
rm.setDataPath(dataPath); | |
} | |
Map<String, String> typeNameMap = new HashMap<String, String>(); | |
TypeSystemDescription initialTypeSystem = UIMAFramework.getXMLParser() | |
.parseTypeSystemDescription(new XMLInputSource(defaultTypeSystem)); | |
CAS cas = CasCreationUtils.createCas(initialTypeSystem, null, new FsIndexDescription[0]); | |
fillTypeNameMap(typeNameMap, cas.getTypeSystem()); | |
cas.release(); | |
List<TypeSystemDescription> toInclude = new ArrayList<TypeSystemDescription>(); | |
List<Import> importList = new ArrayList<Import>(); | |
Import_impl import_impl = new Import_impl(); | |
if (options.isImportByName()) { | |
String name = initialTypeSystem.getName(); | |
import_impl.setName(name); | |
} else if (options.isResolveImports()) { | |
String absoluteLocation = initialTypeSystem.getSourceUrlString(); | |
import_impl.setLocation(absoluteLocation); | |
} else { | |
URI uri = null; | |
try { | |
uri = defaultTypeSystem.toURI(); | |
} catch (URISyntaxException e) { | |
// do nothing | |
} | |
if (uri != null) { | |
String relativeLocation = getRelativeLocation(uri, typeSystemOutput); | |
if (relativeLocation != null) { | |
import_impl.setLocation(relativeLocation); | |
} else { | |
toInclude.add(initialTypeSystem); | |
} | |
} else { | |
toInclude.add(initialTypeSystem); | |
} | |
} | |
if (import_impl.getLocation() != null || import_impl.getName() != null) { | |
importList.add(import_impl); | |
} | |
for (String eachName : desc.getImportedTypeSystems()) { | |
String locate = RutaEngine.locate(eachName, enginePaths, ".xml"); | |
URL url = null; | |
boolean include = false; | |
if (locate != null) { | |
File file = new File(locate); | |
url = file.toURI().toURL(); | |
} | |
if (url == null) { | |
url = checkImportExistence(eachName, ".xml", options.getClassLoader()); | |
include = true; | |
if (url == null) { | |
throw new FileNotFoundException("Build process can't find " + eachName + " in " | |
+ desc.getScriptName()); | |
} | |
} | |
TypeSystemDescription each = getTypeSystemDescriptor(url, options, rm); | |
if (each != null) { | |
fillTypeNameMap(typeNameMap, each); | |
if (include) { | |
// need to include the complete type system because an import is not possible | |
each.resolveImports(rm); | |
toInclude.add(each); | |
} else { | |
import_impl = new Import_impl(); | |
if (options.isImportByName()) { | |
import_impl.setName(eachName); | |
} else if (options.isResolveImports()) { | |
String absoluteLocation = each.getSourceUrlString(); | |
import_impl.setLocation(absoluteLocation); | |
} else { | |
String relativeLocation = getRelativeLocation(url.toURI(), typeSystemOutput); | |
File parentFile = new File(typeSystemOutput).getParentFile(); | |
File targetFile = new File(parentFile, relativeLocation); | |
boolean ableToFindFile = targetFile.exists(); | |
if (!ableToFindFile) { | |
// hotfix for different partitions making trouble for the relative path | |
import_impl.setName(eachName); | |
} else { | |
import_impl.setLocation(relativeLocation); | |
} | |
} | |
importList.add(import_impl); | |
} | |
} else { | |
throw new FileNotFoundException("Build process can't find " + eachName + " in " | |
+ desc.getScriptName()); | |
} | |
} | |
for (String eachName : desc.getImportedScripts()) { | |
String locate = RutaEngine.locate(eachName, enginePaths, options.getTypeSystemSuffix() | |
+ ".xml"); | |
URL url = null; | |
if (locate != null) { | |
File file = new File(locate); | |
url = file.toURI().toURL(); | |
} | |
if (url == null) { | |
url = checkImportExistence(eachName, options.getTypeSystemSuffix() + ".xml", | |
options.getClassLoader()); | |
if (url == null) { | |
throw new FileNotFoundException("Build process can't find " + eachName + " in " | |
+ desc.getScriptName()); | |
} | |
} | |
TypeSystemDescription each = getTypeSystemDescriptor(url, options, rm); | |
if (each != null) { | |
fillTypeNameMap(typeNameMap, each); | |
import_impl = new Import_impl(); | |
if (options.isImportByName()) { | |
import_impl.setName(eachName + options.getTypeSystemSuffix()); | |
} else if (options.isResolveImports()) { | |
String absoluteLocation = each.getSourceUrlString(); | |
import_impl.setLocation(absoluteLocation); | |
} else { | |
String relativeLocation = getRelativeLocation(url.toURI(), typeSystemOutput); | |
import_impl.setLocation(relativeLocation); | |
} | |
importList.add(import_impl); | |
} else { | |
throw new FileNotFoundException("Build process can't find " + eachName + " in " | |
+ desc.getScriptName()); | |
} | |
} | |
typeSystemDescription = CasCreationUtils.mergeTypeSystems(toInclude, rm); | |
if (!importList.isEmpty()) { | |
Import[] newImports = importList.toArray(new Import[0]); | |
typeSystemDescription.setImports(newImports); | |
} | |
if (options.isResolveImports()) { | |
typeSystemDescription.resolveImports(rm); | |
} | |
// TODO hotfixes: where do I get the final types?? | |
Set<String> finalTypes = new HashSet<String>(); | |
finalTypes.addAll(Arrays.asList(new String[] { "uima.cas.Boolean", "uima.cas.Byte", | |
"uima.cas.Short", "uima.cas.Integer", "uima.cas.Long", "uima.cas.Float", "uima.cas.Double", | |
"uima.cas.BooleanArray", "uima.cas.ByteArray", "uima.cas.ShortArray", | |
"uima.cas.IntegerArray", "uima.cas.LongArray", "uima.cas.FloatArray", | |
"uima.cas.DoubleArray", "uima.cas.StringArray", "uima.cas.FSArray" })); | |
int typeIndex = 0; | |
for (String eachType : desc.getTypeShortNames()) { | |
StringTriple typeTriple = desc.getTypeTriples().get(typeIndex); | |
typeTriple = resolveType(typeTriple, typeNameMap, desc.getScriptName()); | |
if (typeSystemDescription.getType(typeTriple.getName()) != null) { | |
continue; | |
} | |
if (!finalTypes.contains(typeTriple.getParent())) { | |
TypeDescription newType = typeSystemDescription.addType(typeTriple.getName(), | |
typeTriple.getDescription(), typeTriple.getParent()); | |
Collection<StringTriple> collection = desc.getFeatures().get(eachType); | |
if (collection != null) { | |
for (StringTriple eachFeature : collection) { | |
eachFeature = resolveFeature(eachFeature, typeNameMap); | |
newType.addFeature(eachFeature.getName(), eachFeature.getDescription(), | |
eachFeature.getParent()); | |
// capability.addInputFeature(eachFeature.getName()); | |
// capability.addOutputFeature(eachFeature.getName()); | |
} | |
} | |
} | |
typeIndex++; | |
} | |
Set<String> names = new HashSet<String>(); | |
Collection<TypeDescription> types = new HashSet<TypeDescription>(); | |
for (TypeDescription each : typeSystemDescription.getTypes()) { | |
String name = each.getName(); | |
if (!names.contains(name)) { | |
names.add(name); | |
types.add(each); | |
} | |
} | |
File typeSystemFile = getFile(typeSystemOutput); | |
TypeDescription[] presentTypes = typeSystemDescription.getTypes(); | |
types.addAll(Arrays.asList(presentTypes)); | |
typeSystemDescription.setTypes(types.toArray(new TypeDescription[0])); | |
typeSystemDescription.setName(desc.getScriptName() + options.getTypeSystemSuffix()); | |
typeSystemDescription.setSourceUrl(typeSystemFile.toURI().toURL()); | |
return typeSystemDescription; | |
} | |
public AnalysisEngineDescription createAnalysisEngineDescription(RutaDescriptorInformation desc, | |
TypeSystemDescription typeSystemDescription, String typeSystemOutput, | |
String engineOutput, RutaBuildOptions options, String[] scriptPaths, | |
String[] enginePaths, String[] resourcePaths) throws InvalidXMLException, IOException { | |
TypeSystemDescription aets = uimaFactory.createTypeSystemDescription(); | |
Import_impl import_impl = new Import_impl(); | |
if (options.isImportByName()) { | |
if (typeSystemDescription != null) { | |
import_impl.setName(typeSystemDescription.getName()); | |
} | |
} else { | |
if (typeSystemOutput != null) { | |
String relativeLocation = getRelativeLocation(new File(typeSystemOutput).toURI(), | |
engineOutput); | |
import_impl.setLocation(relativeLocation); | |
} | |
} | |
return configureEngine(desc, engineOutput, options, scriptPaths, enginePaths, resourcePaths, | |
import_impl, aets); | |
} | |
public void build(RutaDescriptorInformation desc, String typeSystemOutput, String engineOutput, | |
RutaBuildOptions options, String[] scriptPaths, String[] enginePaths) | |
throws SAXException, InvalidXMLException, IOException, ResourceInitializationException, | |
URISyntaxException { | |
TypeSystemDescription typeSystemDescription = createTypeSystemDescription(desc, | |
typeSystemOutput, options, enginePaths); | |
AnalysisEngineDescription analysisEngineDescription = createAnalysisEngineDescription(desc, | |
typeSystemDescription, typeSystemOutput, engineOutput, options, scriptPaths, | |
enginePaths, null); | |
File analysisEngineFile = getFile(engineOutput); | |
File typeSystemFile = getFile(typeSystemOutput); | |
toFile(typeSystemDescription, typeSystemFile); | |
toFile(analysisEngineDescription, analysisEngineFile); | |
} | |
private void fillTypeNameMap(Map<String, String> typeNameMap, TypeSystem typeSystem) { | |
Iterator<Type> typeIterator = typeSystem.getTypeIterator(); | |
while (typeIterator.hasNext()) { | |
Type type = (Type) typeIterator.next(); | |
String shortName = type.getShortName(); | |
String name = type.getName(); | |
typeNameMap.put(shortName, name); | |
} | |
} | |
private boolean fillTypeNameMap(Map<String, String> typeNameMap, TypeSystemDescription desc) { | |
boolean contained = false; | |
for (TypeDescription each : desc.getTypes()) { | |
String name = each.getName(); | |
int lastIndexOf = name.lastIndexOf("."); | |
String shortName = name.substring(lastIndexOf + 1, name.length()); | |
typeNameMap.put(shortName, name); | |
} | |
return contained; | |
} | |
private StringTriple resolveFeature(StringTriple eachFeature, Map<String, String> types) { | |
String parent = eachFeature.getParent(); | |
String name = eachFeature.getName(); | |
parent = translate(parent); | |
if (parent.indexOf(".") == -1 && types.containsKey(parent)) { | |
parent = types.get(parent); | |
} | |
return new StringTriple(name, eachFeature.getDescription(), parent); | |
} | |
private StringTriple resolveType(StringTriple typeTriple, Map<String, String> types, | |
String packageName) { | |
String parent = typeTriple.getParent(); | |
String name = typeTriple.getName(); | |
if (parent == null) { | |
parent = "uima.tcas.Annotation"; | |
} | |
parent = translate(parent); | |
name = translate(name); | |
if (parent.indexOf(".") == -1 && types.containsKey(parent)) { | |
parent = types.get(parent); | |
} | |
if (name.indexOf(".") == -1) { | |
if (types.containsKey(name)) { | |
name = types.get(name); | |
} else { | |
name = packageName + "." + name; | |
} | |
} | |
int lastIndexOf = name.lastIndexOf("."); | |
String shortName = name.substring(lastIndexOf + 1, name.length()); | |
types.put(shortName, name); | |
return new StringTriple(name, typeTriple.getDescription(), parent); | |
} | |
private String translate(String name) { | |
if (name == null) { | |
return null; | |
} | |
if (name.equals("Annotation")) { | |
return "uima.tcas.Annotation"; | |
} else if (name.equals("STRING")) { | |
return UIMAConstants.TYPE_STRING; | |
} else if (name.equals("INT")) { | |
return UIMAConstants.TYPE_INTEGER; | |
} else if (name.equals("DOUBLE")) { | |
return UIMAConstants.TYPE_DOUBLE; | |
} else if (name.equals("FLOAT")) { | |
return UIMAConstants.TYPE_FLOAT; | |
} else if (name.equals("BOOLEAN")) { | |
return UIMAConstants.TYPE_BOOLEAN; | |
} else if (name.equals("TYPE")) { | |
return UIMAConstants.TYPE_STRING; | |
} | |
return name; | |
} | |
private AnalysisEngineDescription configureEngine(RutaDescriptorInformation desc, | |
String engineOutput, RutaBuildOptions option, String[] scriptPaths, | |
String[] descriptorPaths, String[] resourcePaths, Import_impl import_impl, | |
TypeSystemDescription aets) throws InvalidXMLException, IOException { | |
AnalysisEngineDescription analysisEngineDescription = UIMAFramework.getXMLParser() | |
.parseAnalysisEngineDescription(new XMLInputSource(defaultAnalysisEngine)); | |
aets.setImports(new Import[] { import_impl }); | |
analysisEngineDescription.getAnalysisEngineMetaData().setTypeSystem(aets); | |
File file = getFile(engineOutput); | |
analysisEngineDescription.setSourceUrl(file.toURI().toURL()); | |
if (!desc.getTypeShortNames().isEmpty()) { | |
Capability capability = uimaFactory.createCapability(); | |
for (StringTriple typeTriple : desc.getTypeTriples()) { | |
capability.addInputType(typeTriple.getName(), false); | |
capability.addOutputType(typeTriple.getName(), false); | |
} | |
Capability[] capabilities = analysisEngineDescription.getAnalysisEngineMetaData() | |
.getCapabilities(); | |
Capability[] newArray = new Capability[capabilities.length + 1]; | |
System.arraycopy(capabilities, 0, newArray, 0, capabilities.length); | |
newArray[capabilities.length] = capability; | |
analysisEngineDescription.getAnalysisEngineMetaData().setCapabilities(newArray); | |
} | |
String mainScript = desc.getScriptName(); | |
if (!StringUtils.isBlank(desc.getPackageString())) { | |
mainScript = desc.getPackageString().concat(".").concat(mainScript); | |
} | |
analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings() | |
.setParameterValue(RutaEngine.PARAM_MAIN_SCRIPT, mainScript); | |
if (scriptPaths != null) { | |
analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings() | |
.setParameterValue(RutaEngine.PARAM_SCRIPT_PATHS, scriptPaths); | |
} | |
if (descriptorPaths != null) { | |
analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings() | |
.setParameterValue(RutaEngine.PARAM_DESCRIPTOR_PATHS, descriptorPaths); | |
} | |
String[] parameterValue = (String[]) analysisEngineDescription.getAnalysisEngineMetaData() | |
.getConfigurationParameterSettings().getParameterValue(RutaEngine.PARAM_RESOURCE_PATHS); | |
Set<String> resourceLocations = new HashSet<String>(); | |
if (parameterValue != null && parameterValue.length != 0) { | |
resourceLocations.addAll(Arrays.asList(parameterValue)); | |
} | |
if (resourcePaths != null) { | |
resourceLocations.addAll(Arrays.asList(resourcePaths)); | |
} | |
if (descriptorPaths != null) { | |
for (String string : descriptorPaths) { | |
File descDir = new File(string); | |
File defaultResourceDir = new File(descDir.getParent(), "resources"); | |
resourceLocations.add(defaultResourceDir.getAbsolutePath()); | |
} | |
} | |
analysisEngineDescription | |
.getAnalysisEngineMetaData() | |
.getConfigurationParameterSettings() | |
.setParameterValue(RutaEngine.PARAM_RESOURCE_PATHS, | |
resourceLocations.toArray(new String[0])); | |
String[] additionalScriptsArray = desc.getImportedScripts().toArray(new String[] {}); | |
analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings() | |
.setParameterValue(RutaEngine.PARAM_ADDITIONAL_SCRIPTS, additionalScriptsArray); | |
String[] additionalEnginesArray = desc.getImportedEngines().toArray(new String[] {}); | |
analysisEngineDescription.getAnalysisEngineMetaData().getConfigurationParameterSettings() | |
.setParameterValue(RutaEngine.PARAM_ADDITIONAL_ENGINES, additionalEnginesArray); | |
String[] additionalUimafitEnginesArray = desc.getImportedUimafitEngines().toArray( | |
new String[] {}); | |
analysisEngineDescription | |
.getAnalysisEngineMetaData() | |
.getConfigurationParameterSettings() | |
.setParameterValue(RutaEngine.PARAM_ADDITIONAL_UIMAFIT_ENGINES, | |
additionalUimafitEnginesArray); | |
analysisEngineDescription.getAnalysisEngineMetaData().setTypeSystem(aets); | |
configureExtensions(analysisEngineDescription, option); | |
return analysisEngineDescription; | |
} | |
private String getRelativeLocation(URI target, String base) { | |
Path basePath = Paths.get(base); | |
if (!basePath.toFile().isDirectory()) { | |
basePath = basePath.getParent(); | |
} | |
Path targetPath = null; | |
try { | |
targetPath = Paths.get(target); | |
} catch (Exception e) { | |
return null; | |
} | |
Path relativePath = basePath.relativize(targetPath); | |
return relativePath.toString(); | |
} | |
private void configureExtensions(AnalysisEngineDescription analysisEngineDescription, | |
RutaBuildOptions options) { | |
ConfigurationParameterSettings configurationParameterSettings = analysisEngineDescription | |
.getAnalysisEngineMetaData().getConfigurationParameterSettings(); | |
List<String> languageExtensions = options.getLanguageExtensions(); | |
String[] extensions = (String[]) configurationParameterSettings | |
.getParameterValue(RutaEngine.PARAM_ADDITIONAL_EXTENSIONS); | |
String[] loaders = (String[]) configurationParameterSettings | |
.getParameterValue(RutaEngine.PARAM_ADDITIONAL_ENGINE_LOADERS); | |
List<String> es = new ArrayList<String>(); | |
List<String> ls = new ArrayList<String>(); | |
if (extensions != null) { | |
es.addAll(Arrays.asList(extensions)); | |
} | |
es.addAll(languageExtensions); | |
if (loaders != null) { | |
ls.addAll(Arrays.asList(loaders)); | |
} | |
ls.addAll(options.getEngineLoaders()); | |
configurationParameterSettings.setParameterValue(RutaEngine.PARAM_ADDITIONAL_EXTENSIONS, | |
es.toArray(new String[0])); | |
configurationParameterSettings.setParameterValue(RutaEngine.PARAM_ADDITIONAL_ENGINE_LOADERS, | |
ls.toArray(new String[0])); | |
} | |
private TypeSystemDescription getTypeSystemDescriptor(URL url, RutaBuildOptions option, | |
ResourceManager rm) throws InvalidXMLException, IOException { | |
TypeSystemDescription tsdesc = UIMAFramework.getXMLParser().parseTypeSystemDescription( | |
new XMLInputSource(url)); | |
if (option.isResolveImports()) { | |
tsdesc.resolveImports(rm); | |
} | |
return tsdesc; | |
} | |
private File getFile(String location) { | |
return new File(location); | |
} | |
private void toFile(XMLizable desc, File destination) throws SAXException, FileNotFoundException { | |
destination.getParentFile().mkdirs(); | |
OutputStream out = new FileOutputStream(destination); | |
XMLSerializer sax = new XMLSerializer(out); | |
ContentHandler ch = sax.getContentHandler(); | |
ch.startDocument(); | |
desc.toXML(ch); | |
ch.endDocument(); | |
} | |
public static URL checkImportExistence(String candidate, String extension, ClassLoader classloader) | |
throws IOException { | |
String p = candidate.replaceAll("[.]", "/"); | |
p += extension; | |
PathMatchingResourcePatternResolver resolver = new PathMatchingResourcePatternResolver( | |
classloader); | |
String prefix = "classpath*:"; | |
String pattern = prefix + p; | |
Resource[] resources = resolver.getResources(pattern); | |
if (resources == null || resources.length == 0) { | |
return null; | |
} else { | |
Resource resource = resources[0]; | |
URL url = resource.getURL(); | |
return url; | |
} | |
} | |
} |