blob: 7d5d2cb016b6d2a17b847e9598d8afd5c91f7ebd [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.contentitem.file;
import static org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper.DEFAULT_CONTENT_ITEM_PREFIX;
import static org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper.SHA1;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.commons.io.IOUtils;
import org.apache.felix.scr.annotations.Activate;
import org.apache.felix.scr.annotations.Component;
import org.apache.felix.scr.annotations.Deactivate;
import org.apache.felix.scr.annotations.Properties;
import org.apache.felix.scr.annotations.Property;
import org.apache.felix.scr.annotations.Service;
import org.apache.stanbol.commons.indexedgraph.IndexedMGraph;
import org.apache.stanbol.enhancer.servicesapi.Blob;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.ContentSink;
import org.apache.stanbol.enhancer.servicesapi.ContentSource;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.impl.ContentItemImpl;
import org.osgi.framework.Constants;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* ContentItemFactory that stores the parsed content in Files. This Factory
* should be preferred to the InMemoryContentItemFactory in cases where content
* is parsed to the Enhancer that can not be kept in Memory.
* @author Rupert Westenthaler
*
*/
@Component(inherit=true)
@Service(value=ContentItemFactory.class)
@Properties(value={
@Property(name=FileContentItemFactory.PARAM_BASE_DIR,value=""),
@Property(name=Constants.SERVICE_RANKING, intValue=50)
})
public class FileContentItemFactory extends AbstractContentItemFactory implements ContentItemFactory {
private final Logger log = LoggerFactory.getLogger(FileContentItemFactory.class);
public static final String DEFAULT_BINARY_MIMETYPE = "application/octet-stream";
public static final String PARAM_BASE_DIR = "stanbol.enhancer.contentitem.file.baseDir";
private static FileContentItemFactory instance;
/**
* Base directory used to create temp files
*/
private File baseDir;
/**
* Getter for the singleton instance of this factory. Within an OSGI
* environment this should not be used as this Factory is also registered
* as OSGI service.
* @return the singleton instance using the system default temporary file
* directory.
*/
public static FileContentItemFactory getInstance(){
if(instance == null){
instance = new FileContentItemFactory();
}
return instance;
}
public FileContentItemFactory() {
super(false); //dereference all data on construction
}
public FileContentItemFactory(File baseDir) throws IOException {
this();
if(baseDir != null){
this.baseDir = baseDir;
initBaseDir();
}
}
@Activate
protected void activate(ComponentContext ctx) throws ConfigurationException {
Object value = ctx.getProperties().get(PARAM_BASE_DIR);
if(value != null && !value.toString().isEmpty()){
String home = ctx.getBundleContext().getProperty("sling.home");
if(home != null){
baseDir = new File(home,value.toString());
} else {
baseDir = new File(value.toString());
}
try {
initBaseDir();
} catch (Exception e) {
new ConfigurationException(PARAM_BASE_DIR, "Unable to initialise"
+ "configured base Directory '"+value+"' (absolute path: '"
+ baseDir.getAbsolutePath()+"')!",e);
}
}
}
/**
* Internally used to initialise the {@link #baseDir}
* @throws IllegalStateException if the parsed Directory already exists
* but is not an directory.
* @throws IOException if the configured directory does not exists but
* could not be created
*/
private void initBaseDir() throws IOException {
if(baseDir.exists() && !baseDir.isDirectory()){
baseDir = null;
throw new IllegalArgumentException("A File with the configured Directory '"
+ baseDir.getAbsolutePath()+ "' already exists, but is not a Directory!");
}
log.info("activate {} with temp directory {}",getClass().getSimpleName(),
baseDir.getAbsolutePath());
if(!baseDir.isDirectory()){
if(!baseDir.mkdirs()){
throw new IOException("Unable to create"
+ "temp-directory '"+baseDir.getAbsolutePath()+")!");
}
}
}
@Deactivate
protected void deactivate(ComponentContext ctx){
baseDir = null;
}
@Override
protected ContentItem createContentItem(UriRef id, Blob blob, MGraph metadata) {
return new FileContentItem(id, blob, metadata);
}
@Override
protected ContentItem createContentItem(String prefix, Blob blob, MGraph metadata) {
return new FileContentItem(prefix, blob, metadata);
}
@Override
public Blob createBlob(ContentSource source) throws IOException {
return new FileBlob(source);
}
@Override
public ContentSink createContentSink(String mediaType) throws IOException {
return new FileContentSink(mediaType);
}
protected File createTempFile(String prefix){
File tmpFile;
try {
tmpFile = File.createTempFile(prefix, null, baseDir);
} catch (IOException e) {
if(baseDir != null){
log.warn("Unable to create temp-file in directory "+baseDir
+ " (try to create in system temp");
try {
tmpFile = File.createTempFile(prefix, null, null);
} catch (IOException e1) {
throw new IllegalStateException("Unable to create temp-file" +
"in '"+baseDir+"' and system temp directory",e1);
}
} else {
throw new IllegalStateException("Unable to create temp-file",e);
}
}
tmpFile.deleteOnExit();
return tmpFile;
}
public class FileContentSink implements ContentSink {
private final WriteableFileBlob blob;
protected FileContentSink(String mediaType){
blob = new WriteableFileBlob(mediaType);
}
@Override
public OutputStream getOutputStream() {
return blob.getOutputStream();
}
@Override
public Blob getBlob() {
return blob;
}
}
public class WriteableFileBlob implements Blob {
private final File file;
private final OutputStream out;
private String mimeType;
private Map<String,String> parameters;
protected WriteableFileBlob(String mediaType){
this.file = createTempFile("blob");
try {
this.out = new FileOutputStream(file);
} catch (FileNotFoundException e) {
throw new IllegalStateException("temporary file '"
+ file.getAbsolutePath()+"' was not created as expected!",e);
}
Map<String,String> parameters;
if(mediaType == null){
this.mimeType = DEFAULT_BINARY_MIMETYPE;
parameters = new HashMap<String,String>();
} else {
parameters = ContentItemHelper.parseMimeType(mediaType);
this.mimeType = parameters.remove(null);
}
this.parameters = Collections.unmodifiableMap(parameters);
}
/**
* Used by the {@link FileContentSink} implementation
* @return
*/
protected final OutputStream getOutputStream(){
return out;
}
@Override
public String getMimeType() {
return mimeType;
}
@Override
public InputStream getStream() {
try {
return new FileInputStream(file);
} catch (FileNotFoundException e) {
throw new IllegalStateException("temporary file '"
+ file.getAbsolutePath()+"' no longer present!",e);
}
}
@Override
public Map<String,String> getParameter() {
return parameters;
}
@Override
public long getContentLength() {
return file.length();
}
@Override
protected void finalize() throws Throwable {
IOUtils.closeQuietly(out);
file.delete();
super.finalize();
}
}
/**
* Blob implementation that store the data in a temp file. NOTE that
* all the other information such as {@link #getMimeType()},
* {@link #getParameter()} are kept in memory. So this can NOT be used
* to persist a ContentItem!
* @author Rupert Westenthaler
*
*/
public class FileBlob implements Blob {
private final File file;
/**
* This implementation generates the sha1 while copying the data
* in the constructor to the file to avoid reading the data twice if a
* {@link ContentItem} is created based on a Blob.
*/
private final String sha1;
private final String mimeType;
private final Map<String,String> parameters;
protected FileBlob(ContentSource source) throws IOException {
if(source == null){
throw new IllegalArgumentException("The parsed ConentSource MUST NOT be NULL!");
}
file = createTempFile("blob");
OutputStream out = null;
InputStream in = null;
try {
out = new FileOutputStream(file);
in = source.getStream();
sha1 = ContentItemHelper.streamDigest(in, out, SHA1);
} finally {
IOUtils.closeQuietly(in);
IOUtils.closeQuietly(out);
}
Map<String,String> parameters;
if(source.getMediaType() == null){
this.mimeType = DEFAULT_BINARY_MIMETYPE;
parameters = new HashMap<String,String>();
} else {
parameters = ContentItemHelper.parseMimeType(source.getMediaType());
this.mimeType = parameters.remove(null);
}
this.parameters = Collections.unmodifiableMap(parameters);
}
/**
* The tmp file representing this Blob
* @return the file
*/
protected final File getFile() {
return file;
}
/**
* The sha1 of this Blob - typically used to generate the default IDs
* of a ContentItem
* @return the sha1
*/
protected final String getSha1() {
return sha1;
}
@Override
public String getMimeType() {
return mimeType;
}
@Override
public InputStream getStream() {
try {
return new FileInputStream(file);
} catch (FileNotFoundException e) {
throw new IllegalStateException("temporary file '"
+ file.getAbsolutePath()+"' no longer present!",e);
}
}
@Override
public Map<String,String> getParameter() {
return parameters;
}
@Override
public long getContentLength() {
return file.length();
}
@Override
protected void finalize() throws Throwable {
//delete the file
file.delete();
}
}
/**
* Utility that returns the ID for a FileContentItem based on
* {@link FileBlob#getSha1()}.<p>
* This method is part of the {@link FileContentItemFactory} because it
* is used in the super(..) call of the {@link FileContentItem}. Normally
* it would be a static method of the inner class (what is a similar scope
* as a non static method in the outer class).
* @param blob the blob
* @return the id
* @throws IllegalArgumentException if the parsed {@link Blob} or the
* prefix is <code>null</code>
* @throws IllegalStateException if the parsed blob is not an {@link FileBlob}
*/
protected UriRef getDefaultUri(Blob blob, String prefix) {
if(blob == null){
throw new IllegalArgumentException("The parsed Blob MUST NOT be NULL!");
}
if(prefix == null){
throw new IllegalArgumentException("The parsed prefix MUST NOT be NULL!");
}
if(blob instanceof FileBlob) {
return new UriRef(prefix+SHA1.toLowerCase()+ '-' + ((FileBlob)blob).getSha1());
} else {
throw new IllegalStateException("FileContentItem expects FileBlobs to be used" +
"as Blob implementation (found: "+blob.getClass()+")!");
}
}
protected class FileContentItem extends ContentItemImpl implements ContentItem {
public FileContentItem(UriRef id, Blob blob,MGraph metadata) {
super(id == null ? getDefaultUri(blob, DEFAULT_CONTENT_ITEM_PREFIX) : id, blob,
metadata == null ? new IndexedMGraph() : metadata);
}
public FileContentItem(String prefix, Blob blob,MGraph metadata) {
super(getDefaultUri(blob, prefix), blob,
metadata == null ? new IndexedMGraph() : metadata);
}
}
}