blob: 897bc15e92a7e954b3e980d036531ef94f1d55e9 [file] [log] [blame]
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
package org.apache.taverna.databundle;
import static java.nio.file.Files.createDirectories;
import static java.nio.file.Files.delete;
import static java.nio.file.Files.isDirectory;
import static java.nio.file.Files.isRegularFile;
import static java.nio.file.Files.newDirectoryStream;
import static java.nio.file.Files.newInputStream;
import static java.nio.file.Files.newOutputStream;
import static java.nio.file.Files.readAllLines;
import static java.nio.file.Files.write;
import static java.nio.file.StandardOpenOption.CREATE;
import static java.nio.file.StandardOpenOption.TRUNCATE_EXISTING;
import java.lang.reflect.Array;
import java.nio.charset.Charset;
import java.nio.file.DirectoryIteratorException;
import java.nio.file.DirectoryStream;
import java.nio.file.DirectoryStream.Filter;
import java.nio.file.FileAlreadyExistsException;
import java.nio.file.Files;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumSet;
import java.util.List;
import java.util.NavigableMap;
import java.util.TreeMap;
import java.util.UUID;
import java.util.logging.Level;
import java.util.logging.Logger;
import org.apache.taverna.databundle.DataBundles.ResolveOptions;
import org.apache.taverna.robundle.Bundle;
import org.apache.taverna.robundle.Bundles;
import org.apache.taverna.scufl2.api.container.WorkflowBundle;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
* Utility functions for dealing with data bundles.
* <p>
* The style of using this class is similar to that of {@link Files}. In fact, a
* data bundle is implemented as a set of {@link Path}s.
public class DataBundles extends Bundles {
private static final class OBJECT_MAPPER {
// Lazy initialization of singleton
private static final ObjectMapper instance = new ObjectMapper();
protected static final class ExtensionIgnoringFilter implements
Filter<Path> {
private final String fname;
private ExtensionIgnoringFilter(Path file) {
this.fname = filenameWithoutExtension(file);
public boolean accept(Path entry) throws IOException {
return fname.equals(filenameWithoutExtension(entry));
private static WorkflowBundleIO wfBundleIO;
private static Logger logger = Logger.getLogger(DataBundles.class.getCanonicalName());
private static final String WFBUNDLE_CONTENT_TYPE = "application/vnd.taverna.scufl2.workflow-bundle";
private static final String WFDESC_TURTLE = "text/vnd.wf4ever.wfdesc+turtle";
private static final String WORKFLOW = "workflow";
private static final String DOT_WFDESC_TTL = ".wfdesc.ttl";
private static final String DOT_WFBUNDLE = ".wfbundle";
private static final String WORKFLOWRUN_PROV_TTL = "workflowrun.prov.ttl";
private static final String WORKFLOWRUN_JSON = "workflowrun.json";
private static final String DOT_ERR = ".err";
private static final String INPUTS = "inputs";
private static final String INTERMEDIATES = "intermediates";
private static final String OUTPUTS = "outputs";
private static final Charset UTF8 = Charset.forName("UTF-8");
private static Path anyExtension(Path path) throws IOException {
return anyExtension(path.getParent(), path.getFileName().toString());
private static Path anyExtension(Path directory, String fileName)
throws IOException {
Path path = directory.resolve(fileName);
// Prefer the fileName as it is
if (Files.exists(path))
return path;
// Strip any existing extension
String fileNameNoExt = filenameWithoutExtension(path);
Path withoutExt = path.resolveSibling(fileNameNoExt);
if (Files.exists(withoutExt))
return withoutExt;
// Check directory for path.*
for (Path p : newDirectoryStream(directory, fileNameNoExt + ".*"))
* We'll just return the first one
* TODO: Should we fail if there's more than one?
return p;
* Nothing? Then let's give the existing one; perhaps it is to be
* created.
return path;
private static void checkExistingAnyExtension(Path path)
throws IOException, FileAlreadyExistsException {
Path existing = anyExtension(path);
if (!path.equals(existing))
throw new FileAlreadyExistsException(existing.toString());
public static void createList(Path path) throws IOException {
public static void deleteAllExtensions(final Path file) throws IOException {
Filter<Path> filter = new ExtensionIgnoringFilter(file);
try (DirectoryStream<Path> ds = newDirectoryStream(file.getParent(),
filter)) {
for (Path p : ds)
protected static String filenameWithoutExtension(Path entry) {
String fileName = entry.getFileName().toString();
int lastDot = fileName.lastIndexOf(".");
if (lastDot < 0)
return fileName.replace("/", "");
return fileName.substring(0, lastDot);
public static ErrorDocument getError(Path path) throws IOException {
if (path == null)
return null;
Path errorPath = withExtension(path, DOT_ERR);
List<String> errorList = readAllLines(errorPath, UTF8);
int split = errorList.indexOf("");
if (split == -1 || errorList.size() <= split)
throw new IOException("Invalid error document: " + errorPath);
ErrorDocument errorDoc = new ErrorDocument();
for (String cause : errorList.subList(0, split))
errorDoc.setMessage(errorList.get(split + 1));
StringBuilder errorTrace = new StringBuilder();
for (String line : errorList.subList(split + 2, errorList.size())) {
if (errorTrace.length() > 0)
// Delete last \n
errorTrace.deleteCharAt(errorTrace.length() - 1);
return errorDoc;
public static Path getInputs(Bundle dataBundle) throws IOException {
Path inputs = dataBundle.getRoot().resolve(INPUTS);
return inputs;
private static long getEntryNumber(Path entry) throws NumberFormatException {
String name = filenameWithoutExtension(entry);
return Long.parseLong(name);
public static List<Path> getList(Path list) throws IOException {
if (list == null)
return null;
List<Path> paths = new ArrayList<>();
try (DirectoryStream<Path> ds = newDirectoryStream(list)) {
for (Path entry : ds)
try {
long entryNum = getEntryNumber(entry);
while (paths.size() <= entryNum)
// Fill any gaps
// NOTE: Don't use add() as these could come in any order!
paths.set((int) entryNum, entry);
} catch (NumberFormatException ex) {
} catch (DirectoryIteratorException ex) {
throw ex.getCause();
return paths;
public static Path getListItem(Path list, long position) throws IOException {
if (position < 0)
throw new IllegalArgumentException(
"Position must be 0 or more, not: " + position);
return anyExtension(list, Long.toString(position));
public static Path getOutputs(Bundle dataBundle) throws IOException {
Path inputs = dataBundle.getRoot().resolve(OUTPUTS);
return inputs;
public static Path getPort(Path map, String portName) throws IOException {
return anyExtension(map, portName);
public static NavigableMap<String, Path> getPorts(Path path)
throws IOException {
NavigableMap<String, Path> ports = new TreeMap<>();
try (DirectoryStream<Path> ds = newDirectoryStream(path)) {
for (Path p : ds)
ports.put(filenameWithoutExtension(p), p);
return ports;
public static boolean hasInputs(Bundle dataBundle) {
Path inputs = dataBundle.getRoot().resolve(INPUTS);
return isDirectory(inputs);
public static boolean hasOutputs(Bundle dataBundle) {
Path outputs = dataBundle.getRoot().resolve(OUTPUTS);
return isDirectory(outputs);
public static boolean isError(Path path) {
return isRegularFile(withExtension(path, DOT_ERR));
public static boolean isList(Path path) {
return isDirectory(path);
public static boolean isMissing(Path item) {
return Bundles.isMissing(item) && !isError(item);
public static boolean isValue(Path item) {
return !isError(item) && Bundles.isValue(item);
public static Path newListItem(Path list) throws IOException {
return list.resolve(Long.toString(getListSize(list)));
public static Path setError(Path path, ErrorDocument error)
throws IOException {
return setError(path, error.getMessage(), error.getTrace(), error
.getCausedBy().toArray(new Path[error.getCausedBy().size()]));
public static Path setError(Path errorPath, String message, String trace,
Path... causedBy) throws IOException {
errorPath = withExtension(errorPath, DOT_ERR);
// Silly \n-based format
List<String> errorDoc = new ArrayList<>();
for (Path cause : causedBy) {
Path relCause = errorPath.getParent().relativize(cause);
errorDoc.add(""); // Our magic separator
write(errorPath, errorDoc, UTF8, TRUNCATE_EXISTING, CREATE);
return errorPath;
public static Path setReference(Path path, URI reference)
throws IOException {
path = withExtension(path, DOT_URL);
return Bundles.setReference(path, reference);
public static void setStringValue(Path path, String string)
throws IOException {
Bundles.setStringValue(path, string);
protected static Path withExtension(Path path, String extension) {
String filename = path.getFileName().toString();
return path.resolveSibling(withExtensionFilename(filename, extension));
protected static String withExtensionFilename(String filename,
String extension) {
if (!extension.isEmpty() && !extension.startsWith("."))
throw new IllegalArgumentException(
"Extension must be empty or start with .");
if (!extension.isEmpty()
&& filename.toLowerCase().endsWith(extension.toLowerCase()))
return filename;
// Everything after the last . - or just the end
return filename.replaceFirst("(\\.[^.]*)?$", extension);
public static Path getWorkflowRunProvenance(Bundle dataBundle) {
return dataBundle.getRoot().resolve(WORKFLOWRUN_PROV_TTL);
public static Path getWorkflowRunReport(Bundle dataBundle) {
return dataBundle.getRoot().resolve(WORKFLOWRUN_JSON);
public static JsonNode getWorkflowRunReportAsJson(Bundle dataBundle)
throws IOException {
Path path = getWorkflowRunReport(dataBundle);
try (InputStream jsonIn = newInputStream(path)) {
return OBJECT_MAPPER.instance.readTree(jsonIn);
public static void setWorkflowRunReport(Bundle dataBundle,
JsonNode workflowRunReport) throws IOException {
Path path = getWorkflowRunReport(dataBundle);
try (OutputStream out = newOutputStream(path)) {
OBJECT_MAPPER.instance.writeValue(out, workflowRunReport);
public static Path getWorkflow(Bundle dataBundle) throws IOException {
return anyExtension(dataBundle.getRoot(), WORKFLOW);
public static Path getWorkflowDescription(Bundle dataBundle)
throws IOException {
Path annotations = getAnnotations(dataBundle);
return annotations.resolve(WORKFLOW + DOT_WFDESC_TTL);
public static void setWorkflowBundle(Bundle dataBundle,
WorkflowBundle wfBundle) throws IOException {
Path bundlePath = withExtension(getWorkflow(dataBundle), DOT_WFBUNDLE);
// TODO: Save as nested folder?
try (OutputStream outputStream = newOutputStream(bundlePath)) {
getWfBundleIO().writeBundle(wfBundle, outputStream,
} catch (WriterException e) {
throw new IOException("Can't write workflow bundle to: "
+ bundlePath, e);
// wfdesc
Path wfdescPath = getWorkflowDescription(dataBundle);
try (OutputStream outputStream = newOutputStream(wfdescPath)) {
getWfBundleIO().writeBundle(wfBundle, outputStream, WFDESC_TURTLE);
} catch (IllegalArgumentException | WriterException e) {
logger.log(Level.WARNING, "Can't write wfdesc to: " + bundlePath, e);
// throw new IOException("Can't write wfdesc to: " + bundlePath, e);
public static WorkflowBundle getWorkflowBundle(Bundle dataBundle)
throws ReaderException, IOException {
Path wf = getWorkflow(dataBundle);
// String type = Files.probeContentType(wf);
return getWfBundleIO().readBundle(newInputStream(wf), null);
public static Path getIntermediates(Bundle dataBundle) throws IOException {
Path intermediates = dataBundle.getRoot().resolve(INTERMEDIATES);
return intermediates;
public static Path getIntermediate(Bundle dataBundle, UUID uuid)
throws IOException {
String fileName = uuid.toString();
Path intermediates = getIntermediates(dataBundle);
// Folder is named after first 2 characters of UUID
Path folder = intermediates.resolve(fileName.substring(0, 2));
return anyExtension(folder, fileName);
public static long getListSize(Path list) throws IOException {
// Should fail if list is not a directory
try (DirectoryStream<Path> ds = newDirectoryStream(list)) {
long max = -1L;
for (Path entry : ds)
try {
long entryNum = getEntryNumber(entry);
if (entryNum > max)
max = entryNum;
} catch (NumberFormatException ex) {
return max + 1;
} catch (DirectoryIteratorException ex) {
throw ex.getCause();
public enum ResolveOptions {
* Leaf values are represented as bundle {@link Path}s, except errors as
* {@link ErrorDocument} and references as {@link URL}. Note that specifying this
* option does not negate any of the other options like {@link #BYTES}.
* Leaf values should be represented as a {@link String} (NOTE: This won't work well if the path is a binary)
* Leaf values should be represented as a <code>byte[]</code>
* Leaf values should always be represented as {@link URI}s (except errors)
* Leaf values should be represented as bundle {@link Path}s (even if they are errors)
* Replace errors with <code>null</code>, or the empty string if {@link #REPLACE_NULL} is also specified.
* Instead of returning <code>null</code>, return the empty
* {@link String} "", or empty byte[] if {@link #BYTES} is specified, or
* the missing path if {@link #PATH} is specified.
* Deeply resolve a {@link Path} to JVM objects.
* <p>
* This method is intended mainly for presentational uses
* with a particular input/output port from
* {@link #getPorts(Path)} or {@link #getPort(Path, String)}.
* <p>
* Note that as all lists are resolved deeply (including lists of lists)
* and when using options {@link ResolveOptions#STRING} or {@link ResolveOptions#BYTES}
* the full content of the values are read into memory, this
* method can be time-consuming.
* <p>
* If the path is <code>null</code> or {@link #isMissing(Path)},
* <code>null</code> is returned, unless the option
* {@link ResolveOptions#REPLACE_NULL} is specified, which would return the
* empty String "".
* <p>
* If the path {@link #isValue(Path)} and the option
* {@link ResolveOptions#STRING} is specified, its
* {@link #getStringValue(Path)} is returned (assuming an UTF-8 encoding).
* NOTE: Binary formats (e.g. PNG) will NOT be represented correctly read as
* UTF-8 String and should instead be read directly with
* {@link Files#newInputStream(Path, java.nio.file.OpenOption...)}. Note
* that this could consume a large amount of memory as no size checks are
* performed.
* <p>
* If the option {@link ResolveOptions#URI} is specified, all non-missing
* non-error leaf values are resolved as a {@link URI}. If the path is a
* {@link #isReference(Path)} the URI will be the reference from
* {@link #getReference(Path)}, otherwise the URI will
* identify a {@link Path} within the current {@link Bundle}.
* <p>
* If the path {@link #isValue(Path)} and the option
* {@link ResolveOptions#BYTES} is specified, the complete content is returned as
* a <code>byte[]</code>. Note that this could consume a large amount of memory
* as no size checks are performed.
* <p>
* If the path {@link #isError(Path)}, the corresponding
* {@link ErrorDocument} is returned, except when the option
* {@link ResolveOptions#REPLACE_ERRORS} is specified, which means errors are
* returned as <code>null</code> (or <code>""</code> if {@link ResolveOptions#REPLACE_NULL} is also specified).
* <p>
* If the path {@link #isReference(Path)} and the option
* {@link ResolveOptions#URI} is <strong>not</strong> set,
* either a {@link File} or a {@link URL} is returned,
* depending on its protocol. If the reference protocol has no
* corresponding {@link URLStreamHandler}, a {@link URI} is returned
* instead.
* <p>
* If the path {@link #isList(Path)}, a {@link List} is returned
* corresponding to resolving the paths from {@link #getList(Path)}. using
* this method with the same options.
* <p>
* If none of the above, the {@link Path} itself is returned. This is
* thus the default for non-reference non-error leaf values if neither
* {@link ResolveOptions#STRING}, {@link ResolveOptions#BYTES} or
* {@link ResolveOptions#URI} are specified.
* To force returning of {@link Path}s for all non-missing leaf values, specify
* {@link ResolveOptions#PATH};
* @param path
* Data bundle path to resolve
* @param options
* Resolve options
* @return <code>null</code>, a {@link String}, {@link ErrorDocument},
* {@link URL}, {@link File}, {@link Path} or {@link List}
* (containing any of these) depending on the path type and the options.
* @throws IOException
* If the path (or any of the path in a contained list) can't be
* accessed
@SuppressWarnings({ "unchecked", "rawtypes" })
public static Object resolve(Path path, ResolveOptions... options) throws IOException {
EnumSet<ResolveOptions> opt;
if (options.length == 0) {
opt = EnumSet.of(ResolveOptions.DEFAULT); // no-op
} else {
opt = EnumSet.of(ResolveOptions.DEFAULT, options);
if (opt.contains(ResolveOptions.BYTES) && opt.contains(ResolveOptions.STRING)) {
throw new IllegalArgumentException("Incompatible options: BYTES and STRING");
if (opt.contains(ResolveOptions.BYTES) && opt.contains(ResolveOptions.PATH)) {
throw new IllegalArgumentException("Incompatible options: BYTES and PATH");
if (opt.contains(ResolveOptions.BYTES) && opt.contains(ResolveOptions.URI)) {
throw new IllegalArgumentException("Incompatible options: BYTES and URI");
if (opt.contains(ResolveOptions.STRING) && opt.contains(ResolveOptions.PATH)) {
throw new IllegalArgumentException("Incompatible options: STRING and PATH");
if (opt.contains(ResolveOptions.STRING) && opt.contains(ResolveOptions.URI)) {
throw new IllegalArgumentException("Incompatible options: STRING and URI");
if (opt.contains(ResolveOptions.PATH) && opt.contains(ResolveOptions.URI)) {
throw new IllegalArgumentException("Incompatible options: PATH and URI");
if (path == null || isMissing(path)) {
if (! opt.contains(ResolveOptions.REPLACE_NULL)) {
return null;
if (opt.contains(ResolveOptions.BYTES)) {
return new byte[0];
if (opt.contains(ResolveOptions.PATH)) {
return path;
if (opt.contains(ResolveOptions.URI)) {
return path.toUri();
return "";
if (isList(path)) {
List<Path> list = getList(path);
List<Object> objectList = new ArrayList<Object>(list.size());
for (Path pathElement : list) {
objectList.add(resolve(pathElement, options));
return objectList;
if (opt.contains(ResolveOptions.PATH)) {
return path;
if (isError(path)) {
if (opt.contains(ResolveOptions.REPLACE_ERRORS)) {
return opt.contains(ResolveOptions.REPLACE_NULL) ? "" : null;
return getError(path);
if (opt.contains(ResolveOptions.URI)) {
if (isReference(path)) {
return getReference(path);
} else {
return path.toUri();
if (isReference(path)) {
URI reference = getReference(path);
String scheme = reference.getScheme();
if ("file".equals(scheme)) {
return new File(reference);
} else {
try {
return reference.toURL();
} catch (IllegalArgumentException|MalformedURLException e) {
return reference;
if (isValue(path)) {
if (opt.contains(ResolveOptions.BYTES)) {
return Files.readAllBytes(path);
if (opt.contains(ResolveOptions.STRING)) {
return getStringValue(path);
// Fall-back - return Path as-is
return path;
* Deeply resolve path as a {@link Stream} that only contain leaf elements of
* the specified class.
* <p>
* This method is somewhat equivalent to {@link #resolve(Path, ResolveOptions...)}, but
* the returned stream is not in any particular order, and will contain the leaf
* items from all deep lists. Empty lists and error documents are ignored.
* <p>
* Any {@link IOException}s occurring during resolution are
* wrapped as {@link UncheckedIOException}.
* <p>
* Supported types include:
* <ul>
* <li>{@link String}.class</li>
* <li><code>byte[].class</code></li>
* <li>{@link Path}.class</li>
* <li>{@link URI}.class</li>
* <li>{@link URL}.class</li>
* <li>{@link File}.class</li>
* <li>{@link ErrorDocument}.class</li>
* <li>{@link Object}.class</li>
* </ul>
* @param path Data bundle path to resolve
* @param type Type of objects to return, e.g. <code>String.class</code>
* @return A {@link Stream} of resolved objects, or an empty stream if no such objects were resolved.
* @throws UncheckedIOException If the path could not be accessed.
public static <T> Stream<T> resolveAsStream(Path path, Class<T> type) throws UncheckedIOException {
ResolveOptions options;
if (type == String.class) {
options = ResolveOptions.STRING;
} else if (type == byte[].class) {
options = ResolveOptions.BYTES;
} else if (type == Path.class) {
options = ResolveOptions.PATH;
} else if (type == URI.class) {
options = ResolveOptions.URI;
} else {
// Dummy-option, we'll filter on the returned type anyway
options = ResolveOptions.DEFAULT;
if (isList(path)) {
// return Stream of unordered list of resolved list items,
// recursing to find the leaf nodes
try {
return Files.walk(path)
// avoid re-recursion
.filter(p -> !Files.isDirectory(p))
.flatMap(p -> resolveItemAsStream(p, type, options));
} catch (IOException e) {
throw new UncheckedIOException(e);
} else {
return resolveItemAsStream(path, type, options);
private static <T> Stream<T> resolveItemAsStream(Path path, Class<T> type, ResolveOptions options) throws UncheckedIOException {
try {
Object value = resolve(path, options);
if (type.isInstance(value)) {
return Stream.of(type.cast(value));
return Stream.empty();
} catch (IOException e) {
throw new UncheckedIOException(e);
public static WorkflowBundleIO getWfBundleIO() {
if (wfBundleIO == null)
wfBundleIO = new WorkflowBundleIO();
return wfBundleIO;
public static void setWfBundleIO(WorkflowBundleIO wfBundleIO) {
if (wfBundleIO == null)
throw new NullPointerException();
DataBundles.wfBundleIO = wfBundleIO;