blob: 549f0613613ec7a966f34d5af43b871698c35780 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.stanbol.enhancer.engines.metaxa.core.html;
import java.io.IOException;
import java.io.StringReader;
import java.io.StringWriter;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import javax.xml.transform.stream.StreamSource;
import org.ontoware.rdf2go.exception.ModelRuntimeException;
import org.ontoware.rdf2go.model.Syntax;
import org.semanticdesktop.aperture.extractor.ExtractorException;
import org.semanticdesktop.aperture.rdf.RDFContainer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
/**
* XsltExtractor.java
*
* @author <a href="mailto:kasper@dfki.de">Walter Kasper</a>
*/
public class XsltExtractor implements HtmlExtractionComponent {
/**
* This contains the logger.
*/
private static final Logger LOG =
LoggerFactory.getLogger(XsltExtractor.class);
public static Syntax N3 =
new Syntax("N3", "application/rdf+n3", ".n3", true);
private String uriParameter = "uri";
private Transformer transformer;
private String id;
private URI source;
private Syntax syntax = Syntax.RdfXml;
public XsltExtractor() {
}
public XsltExtractor(String id, String fileName, TransformerFactory factory)
throws InitializationException {
this.id = id;
try {
URI location =
getClass().getClassLoader().getResource(fileName).toURI();
source = location;
} catch (URISyntaxException e) {
throw new InitializationException(e.getMessage(), e);
}
initialize(factory);
}
public String getUriParameter() {
return uriParameter;
}
public void setUriParameter(String uriParameter) {
this.uriParameter = uriParameter;
}
public Syntax getSyntax() {
return syntax;
}
public void setSyntax(Syntax syntax) {
this.syntax = syntax;
}
public Transformer getTransformer() {
return transformer;
}
public void setTransformer(Transformer transformer) {
this.transformer = transformer;
}
public String getId() {
return id;
}
public void setId(String id) {
this.id = id;
}
public URI getSource() {
return source;
}
public void setSource(URI source) {
this.source = source;
}
public synchronized void extract(String id, Document doc, Map<String, Object> params,
RDFContainer result)
throws ExtractorException {
if (params == null) {
params = new HashMap<String, Object>();
}
params.put(this.uriParameter, id);
initTransformerParameters(params);
Source source = new DOMSource(doc);
StringWriter writer = new StringWriter();
StreamResult output = new StreamResult(writer);
try {
this.transformer.transform(source, output);
String rdf = writer.toString();
LOG.debug(rdf);
StringReader reader = new StringReader(rdf);
result.getModel().readFrom(reader, this.syntax);
reader.close();
} catch (TransformerException e) {
throw new ExtractorException(e.getMessage(), e);
} catch (ModelRuntimeException e) {
throw new ExtractorException(e.getMessage(), e);
} catch (IOException e) {
throw new ExtractorException(e.getMessage(), e);
}
}
public void initialize(TransformerFactory factory)
throws InitializationException {
if (source == null || id == null) {
throw new InitializationException("Missing source or id");
}
if (factory == null) {
factory = TransformerFactory.newInstance();
factory.setURIResolver(new BundleURIResolver());
}
StreamSource xsltSource = new StreamSource(source.toString());
xsltSource.setSystemId(source.toString());
try {
transformer = factory.newTransformer(xsltSource);
} catch (TransformerConfigurationException e) {
throw new InitializationException(e.getMessage(), e);
}
}
public void initTransformerParameters(Map<String, Object> params) {
transformer.clearParameters();
if (params != null) {
Set<String> parms = params.keySet();
for (String piter : parms) {
transformer.setParameter(piter, params.get(piter));
}
}
}
}