| /* |
| * Licensed to the Apache Software Foundation (ASF) under one or more |
| * contributor license agreements. See the NOTICE file distributed with |
| * this work for additional information regarding copyright ownership. |
| * The ASF licenses this file to You under the Apache License, Version 2.0 |
| * (the "License"); you may not use this file except in compliance with |
| * the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, software |
| * distributed under the License is distributed on an "AS IS" BASIS, |
| * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| * See the License for the specific language governing permissions and |
| * limitations under the License. |
| */ |
| |
| package org.apache.any23.writer; |
| |
| import org.apache.any23.extractor.ExtractionContext; |
| import org.eclipse.rdf4j.model.Resource; |
| import org.eclipse.rdf4j.model.IRI; |
| import org.eclipse.rdf4j.model.Value; |
| |
| import java.util.Collection; |
| import java.util.HashSet; |
| import java.util.Locale; |
| import java.util.concurrent.atomic.AtomicInteger; |
| |
| /** |
| * A {@link TripleHandler} that collects |
| * various information about the extraction process, such as |
| * the extractors used and the total number of triples. |
| * |
| * @author Richard Cyganiak (richard@cyganiak.de) |
| */ |
| public class ReportingTripleHandler implements TripleHandler { |
| |
| private final TripleHandler wrapped; |
| |
| private final Collection<String> extractorNames = new HashSet<>(); |
| private AtomicInteger totalTriples = new AtomicInteger(0); |
| private AtomicInteger totalDocuments = new AtomicInteger(0); |
| |
| public ReportingTripleHandler(TripleHandler wrapped) { |
| if(wrapped == null) { |
| throw new NullPointerException("wrapped cannot be null."); |
| } |
| this.wrapped = wrapped; |
| } |
| |
| public Collection<String> getExtractorNames() { |
| return extractorNames; |
| } |
| |
| public int getTotalTriples() { |
| return totalTriples.get(); |
| } |
| |
| public int getTotalDocuments() { |
| return totalDocuments.get(); |
| } |
| |
| /** |
| * @return a human readable report. |
| */ |
| public String printReport() { |
| return String.format(Locale.ROOT, "Total Documents: %d, Total Triples: %d", getTotalDocuments(), getTotalTriples()); |
| } |
| |
| public void startDocument(IRI documentIRI) throws TripleHandlerException { |
| totalDocuments.incrementAndGet(); |
| wrapped.startDocument(documentIRI); |
| } |
| |
| public void openContext(ExtractionContext context) throws TripleHandlerException { |
| wrapped.openContext(context); |
| } |
| |
| public void receiveNamespace( |
| String prefix, |
| String uri, |
| ExtractionContext context |
| ) throws TripleHandlerException { |
| wrapped.receiveNamespace(prefix, uri, context); |
| } |
| |
| public void receiveTriple( |
| Resource s, |
| IRI p, |
| Value o, |
| IRI g, |
| ExtractionContext context |
| ) throws TripleHandlerException { |
| extractorNames.add(context.getExtractorName()); |
| totalTriples.incrementAndGet(); |
| wrapped.receiveTriple(s, p, o, g, context); |
| } |
| |
| public void setContentLength(long contentLength) { |
| wrapped.setContentLength(contentLength); |
| } |
| |
| public void closeContext(ExtractionContext context) throws TripleHandlerException { |
| wrapped.closeContext(context); |
| } |
| |
| public void endDocument(IRI documentIRI) throws TripleHandlerException { |
| wrapped.endDocument(documentIRI); |
| } |
| |
| public void close() throws TripleHandlerException { |
| wrapped.close(); |
| } |
| |
| } |