| package org.purl.wf4ever.provtaverna.export; |
| |
| import java.io.File; |
| import java.io.FileNotFoundException; |
| import java.io.IOException; |
| import java.io.InputStream; |
| import java.io.OutputStream; |
| import java.net.ProtocolException; |
| import java.net.URI; |
| import java.net.URISyntaxException; |
| import java.net.URL; |
| import java.nio.file.Files; |
| import java.nio.file.Path; |
| import java.security.DigestOutputStream; |
| import java.security.MessageDigest; |
| import java.security.NoSuchAlgorithmException; |
| import java.util.ArrayList; |
| import java.util.Collections; |
| import java.util.Comparator; |
| import java.util.HashMap; |
| import java.util.List; |
| import java.util.Map; |
| |
| import net.sf.taverna.t2.invocation.InvocationContext; |
| import net.sf.taverna.t2.provenance.api.ProvenanceAccess; |
| import net.sf.taverna.t2.reference.ExternalReferenceSPI; |
| import net.sf.taverna.t2.reference.ReferenceService; |
| import net.sf.taverna.t2.reference.ReferenceSet; |
| import net.sf.taverna.t2.reference.ReferenceSetService; |
| import net.sf.taverna.t2.reference.T2Reference; |
| import net.sf.taverna.t2.reference.ValueCarryingExternalReference; |
| import net.sf.taverna.t2.spi.SPIRegistry; |
| import net.sf.taverna.t2.workbench.reference.config.DataManagementConfiguration; |
| |
| import org.apache.commons.beanutils.PropertyUtils; |
| import org.apache.commons.codec.binary.Hex; |
| import org.apache.commons.io.IOUtils; |
| import org.apache.log4j.Logger; |
| import org.apache.tika.Tika; |
| import org.apache.tika.mime.MimeTypeException; |
| import org.apache.tika.mime.MimeTypes; |
| import org.openrdf.rio.RDFParserFactory; |
| import org.openrdf.rio.RDFParserRegistry; |
| import org.openrdf.rio.RDFWriterFactory; |
| import org.openrdf.rio.RDFWriterRegistry; |
| import org.purl.wf4ever.robundle.Bundle; |
| |
| import uk.org.taverna.databundle.DataBundles; |
| |
| public class Saver { |
| |
| private static final String APPLICATION_OCTET_STREAM = "application/octet-stream"; |
| |
| private static Logger logger = Logger.getLogger(Saver.class); |
| |
| /** |
| * @param saveProvAction |
| */ |
| public Saver(ReferenceService referenceService, InvocationContext context, |
| String runId, Map<String, T2Reference> chosenReferences) { |
| this.setReferenceService(referenceService); |
| this.setContext(context); |
| this.setRunId(runId); |
| this.setChosenReferences(chosenReferences); |
| prepareSesame(); |
| } |
| |
| /** |
| * Load |
| */ |
| protected void prepareSesame() { |
| RDFParserRegistry parserReg = RDFParserRegistry.getInstance(); |
| SPIRegistry<RDFParserFactory> parserSPI = new SPIRegistry<>( |
| RDFParserFactory.class); |
| for (RDFParserFactory service : parserSPI.getInstances()) { |
| parserReg.add(service); |
| } |
| |
| RDFWriterRegistry writerReg = RDFWriterRegistry.getInstance(); |
| SPIRegistry<RDFWriterFactory> writerSPI = new SPIRegistry<>( |
| RDFWriterFactory.class); |
| for (RDFWriterFactory service : writerSPI.getInstances()) { |
| writerReg.add(service); |
| } |
| |
| } |
| |
| private Map<Path, T2Reference> fileToId = new HashMap<>(); |
| |
| private Map<Path, String> sha1sums = new HashMap<>(); |
| private Map<Path, String> sha512sums = new HashMap<>(); |
| |
| private ReferenceService referenceService; |
| |
| private InvocationContext context; |
| |
| private String runId; |
| |
| private Map<String, T2Reference> chosenReferences; |
| |
| private Bundle bundle; |
| |
| private Map<T2Reference, String> mediaTypes = new HashMap<>(); |
| |
| /** |
| * @return the bundle |
| */ |
| public Bundle getBundle() { |
| return bundle; |
| } |
| |
| public void saveData(Path bundlePath) throws FileNotFoundException, |
| IOException { |
| Bundle bundle = DataBundles.createBundle(); |
| // String folderName = bundlePath.getFileName().toString(); |
| // if (folderName.endsWith(".")) { |
| // bundlePath = bundlePath.resolveSibling(folderName.substring(0, |
| // folderName.length() - 1)); |
| // } |
| setBundle(bundle); |
| saveToFolder(bundle.getRoot(), getChosenReferences(), |
| getReferenceService()); |
| DataBundles.closeAndSaveBundle(bundle, bundlePath); |
| } |
| |
| private void setBundle(Bundle bundle) { |
| this.bundle = bundle; |
| } |
| |
| protected static Tika tika = new Tika(); |
| |
| public Path saveReference(T2Reference t2Ref, Path file) throws IOException { |
| ReferenceSetService refSet = getReferenceService() |
| .getReferenceSetService(); |
| ReferenceSet referenceSet = refSet.getReferenceSet(t2Ref); |
| List<ExternalReferenceSPI> externalReferences = new ArrayList<ExternalReferenceSPI>( |
| referenceSet.getExternalReferences()); |
| Collections.sort(externalReferences, |
| new Comparator<ExternalReferenceSPI>() { |
| public int compare(ExternalReferenceSPI o1, |
| ExternalReferenceSPI o2) { |
| return (int) (o1.getResolutionCost() - o2 |
| .getResolutionCost()); |
| } |
| }); |
| String mimeType = findMimeType(externalReferences); |
| getMediaTypes().put(t2Ref, mimeType); |
| |
| Path targetFile = writeIfLocal(externalReferences, file, mimeType); |
| if (targetFile == null) { |
| URI uri = referenceAsURI(externalReferences); |
| if (uri != null) { |
| targetFile = DataBundles.setReference(file, uri); |
| } |
| } |
| |
| if (targetFile != null) { |
| getFileToId().put(targetFile, t2Ref); |
| } else { |
| logger.warn("Could not write out reference " + t2Ref); |
| } |
| |
| return targetFile; |
| |
| } |
| |
| private Path writeIfLocal(List<ExternalReferenceSPI> externalReferences, |
| Path file, String mimeType) throws IOException { |
| |
| ValueCarryingExternalReference<?> valRef = null; |
| for (ExternalReferenceSPI ref : externalReferences) { |
| if (ref instanceof ValueCarryingExternalReference) { |
| valRef = (ValueCarryingExternalReference<?>) ref; |
| break; |
| } |
| } |
| |
| if (valRef == null) { |
| return null; |
| } |
| |
| String fileExtension; |
| try { |
| fileExtension = MimeTypes.getDefaultMimeTypes().forName(mimeType) |
| .getExtension(); |
| } catch (MimeTypeException e1) { |
| fileExtension = ""; |
| } |
| Path targetFile = file.resolveSibling(file.getFileName() |
| + fileExtension); |
| |
| MessageDigest sha = null; |
| MessageDigest sha512 = null; |
| OutputStream output = Files.newOutputStream(targetFile); |
| try { |
| try { |
| sha = MessageDigest.getInstance("SHA"); |
| output = new DigestOutputStream(output, sha); |
| |
| sha512 = MessageDigest.getInstance("SHA-512"); |
| output = new DigestOutputStream(output, sha512); |
| } catch (NoSuchAlgorithmException e) { |
| logger.info("Could not find digest", e); |
| } |
| |
| IOUtils.copyLarge(valRef.openStream(getContext()), output); |
| } finally { |
| output.close(); |
| } |
| |
| if (sha != null) { |
| getSha1sums().put(targetFile.toRealPath(), hexOfDigest(sha)); |
| } |
| if (sha512 != null) { |
| sha512.digest(); |
| getSha512sums().put(targetFile.toRealPath(), hexOfDigest(sha512)); |
| } |
| |
| return targetFile; |
| } |
| |
| private URI referenceAsURI(List<ExternalReferenceSPI> externalReferences) { |
| for (ExternalReferenceSPI ref : externalReferences) { |
| String className = ref.getClass().getName(); |
| if (className |
| .equals("net.sf.taverna.t2.reference.impl.external.http.HttpReference")) { |
| URL url = (URL) getProperty(ref, "httpUrl"); |
| try { |
| return url.toURI(); |
| } catch (URISyntaxException e) { |
| logger.warn("Can't convert HttpReference to URI: " + url, e); |
| continue; |
| } |
| } else if (className |
| .equals("net.sf.taverna.t2.reference.impl.external.file.FileReference")) { |
| File file = (File) getProperty(ref, "file"); |
| return file.toURI(); |
| } else if (className |
| .equals("de.uni_luebeck.inb.knowarc.usecases.invocation.ssh.SshReference")) { |
| String host = (String) getProperty(ref, "host"); |
| int port = (int) getProperty(ref, "port"); |
| String path = (String) getProperty(ref, "fullPath"); |
| try { |
| return new URI("sftp", null, host, port, path, null, null); |
| } catch (URISyntaxException e) { |
| logger.warn("Can't convert SshReference to URI: sftp://" |
| + host + ":" + port + path, e); |
| continue; |
| } |
| } |
| } |
| return null; |
| } |
| |
| protected Object getProperty(ExternalReferenceSPI ref, String propertyName) { |
| try { |
| return PropertyUtils.getSimpleProperty(ref, propertyName); |
| } catch (Exception ex) { |
| throw new IllegalArgumentException("Can't look up " + propertyName |
| + " in bean " + ref, ex); |
| } |
| } |
| |
| private String findMimeType(List<ExternalReferenceSPI> externalReferences) |
| throws IOException, ProtocolException { |
| String mimeType = null; |
| for (ExternalReferenceSPI externalReference : externalReferences) { |
| String className = externalReference.getClass().getName(); |
| if (className |
| .equals("net.sf.taverna.t2.reference.impl.external.http.HttpReference")) { |
| URL url = (URL) getProperty(externalReference, "httpUrl"); |
| mimeType = tika.detect(url); |
| } else if (className |
| .equals("net.sf.taverna.t2.reference.impl.external.file.FileReference")) { |
| File file = (File) getProperty(externalReference, "file"); |
| mimeType = tika.detect(file); |
| } else if (className |
| .equals("de.uni_luebeck.inb.knowarc.usecases.invocation.ssh.SshReference")) { |
| String filename = (String) getProperty(externalReference, |
| "fileName"); |
| try (InputStream instream = externalReference |
| .openStream(context)) { |
| mimeType = tika.detect(instream, filename); |
| } |
| } else if (className |
| .equals("net.sf.taverna.t2.reference.impl.external.object.VMObjectReference")) { |
| mimeType = "application/x-java-serialized-object"; |
| } else { |
| try (InputStream instream = externalReference |
| .openStream(context)) { |
| mimeType = tika.detect(instream); |
| } |
| } |
| if (mimeType != null && !mimeType.equals(APPLICATION_OCTET_STREAM)) { |
| break; |
| } |
| } |
| if (mimeType == null || mimeType.isEmpty()) { |
| return APPLICATION_OCTET_STREAM; |
| } |
| return mimeType; |
| } |
| |
| protected void saveToFolder(Path folder, |
| Map<String, T2Reference> chosenReferences, |
| ReferenceService referenceService) throws IOException, |
| FileNotFoundException { |
| logger.info("Saving provenance and outputs to " + folder.toRealPath()); |
| Files.createDirectories(folder); |
| String connectorType = DataManagementConfiguration.getInstance() |
| .getConnectorType(); |
| ProvenanceAccess provenanceAccess = new ProvenanceAccess(connectorType, |
| getContext()); |
| W3ProvenanceExport export = new W3ProvenanceExport(provenanceAccess, |
| getRunId(), this); |
| export.setFileToT2Reference(getFileToId()); |
| export.setBundle(bundle); |
| |
| try { |
| logger.debug("Saving provenance"); |
| export.exportAsW3Prov(); |
| logger.info("Saved provenance"); |
| } catch (Exception e) { |
| logger.error("Failed to save the provenance graph", e); |
| } |
| } |
| |
| private String hexOfDigest(MessageDigest sha) { |
| return new String(Hex.encodeHex(sha.digest())); |
| } |
| |
| public ReferenceService getReferenceService() { |
| return referenceService; |
| } |
| |
| public void setReferenceService(ReferenceService referenceService) { |
| this.referenceService = referenceService; |
| } |
| |
| public InvocationContext getContext() { |
| return context; |
| } |
| |
| public void setContext(InvocationContext context) { |
| this.context = context; |
| } |
| |
| public String getRunId() { |
| return runId; |
| } |
| |
| public void setRunId(String runId) { |
| this.runId = runId; |
| } |
| |
| public Map<String, T2Reference> getChosenReferences() { |
| return chosenReferences; |
| } |
| |
| public void setChosenReferences(Map<String, T2Reference> chosenReferences) { |
| this.chosenReferences = chosenReferences; |
| } |
| |
| public Map<Path, T2Reference> getFileToId() { |
| return fileToId; |
| } |
| |
| public void setFileToId(Map<Path, T2Reference> fileToId) { |
| this.fileToId = fileToId; |
| } |
| |
| public Map<Path, String> getSha1sums() { |
| return sha1sums; |
| } |
| |
| public Map<T2Reference, String> getMediaTypes() { |
| return mediaTypes; |
| } |
| |
| public Map<Path, String> getSha512sums() { |
| return sha512sums; |
| } |
| |
| public void setMediaTypes(Map<T2Reference, String> mediaTypes) { |
| this.mediaTypes = mediaTypes; |
| } |
| |
| } |