blob: 7bd9e69a4c10a0256afe793f41e237c2c9e72698 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.any23.writer;
import org.apache.any23.extractor.ExtractionContext;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Value;
import java.io.PrintWriter;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;
import org.apache.any23.util.StringUtils;
/**
* Triple handler decorator useful for logging purposes.
*/
public class LoggingTripleHandler implements TripleHandler {
/**
* Decorated.
*/
private final TripleHandler underlyingHandler;
private final Map<String, Integer> contextTripleMap = new HashMap<String, Integer>();
private long startTime = 0;
private long contentLength = 0;
private final PrintWriter destination;
public LoggingTripleHandler(TripleHandler tripleHandler, PrintWriter destination) {
if(tripleHandler == null) {
throw new NullPointerException("tripleHandler cannot be null.");
}
if(destination == null) {
throw new NullPointerException("destination cannot be null.");
}
underlyingHandler = tripleHandler;
this.destination = destination;
printHeader(destination);
}
public void startDocument(IRI documentIRI) throws TripleHandlerException {
underlyingHandler.startDocument(documentIRI);
startTime = System.currentTimeMillis();
}
public void close() throws TripleHandlerException {
underlyingHandler.close();
destination.flush();
destination.close();
}
public void closeContext(ExtractionContext context) throws TripleHandlerException {
underlyingHandler.closeContext(context);
}
public void openContext(ExtractionContext context) throws TripleHandlerException {
underlyingHandler.openContext(context);
}
public void receiveTriple(Resource s, IRI p, Value o, IRI g, ExtractionContext context)
throws TripleHandlerException {
underlyingHandler.receiveTriple(s, p, o, g, context);
Integer i = contextTripleMap.get(context.getExtractorName());
if (i == null) i = 0;
contextTripleMap.put(context.getExtractorName(), (i + 1));
}
public void receiveNamespace(String prefix, String uri, ExtractionContext context)
throws TripleHandlerException {
underlyingHandler.receiveNamespace(prefix, uri, context);
}
@Override
public void endDocument(IRI documentIRI) throws TripleHandlerException {
underlyingHandler.endDocument(documentIRI);
long elapsedTime = System.currentTimeMillis() - startTime;
final AtomicBoolean success = new AtomicBoolean(true);
StringBuilder sb = new StringBuilder("[ ");
String[] parsers = contextTripleMap.entrySet().stream().map(e -> {
if (e.getValue() > 0) {
success.set(true);
}
return String.format(Locale.ROOT, "%s:%d", e.getKey(), e.getValue()); }
).collect(Collectors.toList()).toArray(new String[] {});
sb.append(StringUtils.join(", ", parsers));
sb.append(" ]");
destination.println(
documentIRI + "\t" + contentLength + "\t" + elapsedTime + "\t" + success.get() + "\t" + sb.toString()
);
contextTripleMap.clear();
}
public void setContentLength(long contentLength) {
underlyingHandler.setContentLength(contentLength);
this.contentLength = contentLength;
}
private void printHeader(PrintWriter writer) {
writer.println("# Document-IRI\tContent-Length\tElapsed-Time\tSuccess\tExtractors:Triples");
}
}