blob: 8cd08a67fb814e267962f103b62ada60b91a003a [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.pdfbox.benchmark;
import java.io.File;
import java.io.IOException;
import java.util.concurrent.TimeUnit;
import org.apache.logging.log4j.Level;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.core.config.Configurator;
import org.apache.pdfbox.Loader;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.text.PDFTextStripper;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.infra.Blackhole;
public class TextExtraction {
static final String PDF32000_2008 = "target/pdfs/PDF32000_2008.pdf";
static {
Configurator.setAllLevels(LogManager.getRootLogger().getName(), Level.OFF);
java.util.logging.Logger.getLogger("org.apache").setLevel(java.util.logging.Level.OFF);
}
@Benchmark
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@BenchmarkMode(Mode.AverageTime)
public void extractPDFSpecUnsorted(Blackhole blackhole) throws IOException {
try (PDDocument pdf = Loader.loadPDF(new File(PDF32000_2008)))
{
PDFTextStripper pdfStripper = new PDFTextStripper();
pdfStripper.setSortByPosition(false);
String parsedText = pdfStripper.getText(pdf);
blackhole.consume(parsedText);
}
}
@Benchmark
@OutputTimeUnit(TimeUnit.MILLISECONDS)
@BenchmarkMode(Mode.AverageTime)
public void extractPDFSpecSorted(Blackhole blackhole) throws IOException {
try (PDDocument pdf = Loader.loadPDF(new File(PDF32000_2008)))
{
PDFTextStripper pdfStripper = new PDFTextStripper();
pdfStripper.setSortByPosition(true);
String parsedText = pdfStripper.getText(pdf);
blackhole.consume(parsedText);
}
}
}