blob: d2b94a61c78599d8a38a784f670cc712c60f0307 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.sling.cms.core.internal.listeners;
import java.io.IOException;
import java.io.InputStream;
import java.util.Calendar;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.jackrabbit.JcrConstants;
import org.apache.jackrabbit.util.Text;
import org.apache.sling.api.resource.LoginException;
import org.apache.sling.api.resource.ModifiableValueMap;
import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceResolverFactory;
import org.apache.sling.api.resource.observation.ExternalResourceChangeListener;
import org.apache.sling.api.resource.observation.ResourceChange;
import org.apache.sling.api.resource.observation.ResourceChangeListener;
import org.apache.sling.cms.File;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.osgi.service.component.annotations.Component;
import org.osgi.service.component.annotations.Reference;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.SAXException;
/**
* A Resource Change Listener which extracts the metadata from sling:Files when
* they are uploaded.
*/
@Component(service = { FileMetadataExtractor.class, ResourceChangeListener.class,
ExternalResourceChangeListener.class }, property = { ResourceChangeListener.CHANGES + "=ADDED",
ResourceChangeListener.PATHS + "=/content",
ResourceChangeListener.PATHS + "=/static" }, immediate = true)
public class FileMetadataExtractor implements ResourceChangeListener, ExternalResourceChangeListener {
public static final String NN_METADATA = "metadata";
public static final String PN_X_PARSED_BY = "X-Parsed-By";
@Reference
private ResourceResolverFactory factory;
private static final Logger log = LoggerFactory.getLogger(FileMetadataExtractor.class);
public void extractMetadata(File file) throws IOException, SAXException, TikaException {
extractMetadata(file.getResource());
}
public void extractMetadata(Resource resource) throws IOException, SAXException, TikaException {
log.info("Extracting metadata from {}", resource.getPath());
ResourceResolver resolver = resource.getResourceResolver();
InputStream is = resource.adaptTo(InputStream.class);
Resource content = resource.getChild(JcrConstants.JCR_CONTENT);
if (content == null) {
log.warn("Content resource is null");
return;
}
Map<String, Object> properties = new HashMap<>();
Resource metadata = content.getChild(NN_METADATA);
if (metadata != null) {
properties = metadata.adaptTo(ModifiableValueMap.class);
} else {
properties.put(JcrConstants.JCR_PRIMARYTYPE, JcrConstants.NT_UNSTRUCTURED);
}
if (properties != null) {
Parser parser = new AutoDetectParser();
BodyContentHandler handler = new BodyContentHandler();
Metadata md = new Metadata();
ParseContext context = new ParseContext();
parser.parse(is, handler, md, context);
for (String name : md.names()) {
updateProperty(properties, name, md);
}
if (metadata == null) {
resolver.create(content, NN_METADATA, properties);
}
resolver.commit();
log.info("Metadata extracted from {}", resource.getPath());
} else {
log.warn("Failed to update metadata for {}", resource.getPath());
}
}
private void updateProperty(Map<String, Object> properties, String name, Metadata metadata) {
log.trace("Updating property: {}", name);
String filtered = Text.escapeIllegalJcrChars(name);
Property property = Property.get(name);
if (property != null) {
if (metadata.isMultiValued(property)) {
properties.put(filtered, metadata.getValues(property));
} else if (metadata.getDate(property) != null) {
Calendar cal = Calendar.getInstance();
cal.setTime(metadata.getDate(property));
properties.put(filtered, cal);
} else if (metadata.getInt(property) != null) {
properties.put(filtered, metadata.getInt(property));
} else {
properties.put(filtered, metadata.get(property));
}
} else {
properties.put(filtered, metadata.get(name));
}
}
@Override
public void onChange(List<ResourceChange> changes) {
Map<String, Object> serviceParams = new HashMap<>();
serviceParams.put(ResourceResolverFactory.SUBSERVICE, "sling-cms-metadata");
ResourceResolver serviceResolver = null;
try {
serviceResolver = factory.getServiceResourceResolver(serviceParams);
for (ResourceChange rc : changes) {
Resource changed = serviceResolver.getResource(rc.getPath());
try {
extractMetadata(changed);
} catch (Throwable t) {
log.warn("Failed to extract metadata due to exception", t);
}
}
} catch (LoginException e) {
log.error("Exception getting service user", e);
} finally {
if (serviceResolver != null) {
serviceResolver.close();
}
}
}
}