blob: 5f156a4a3418a0f62c9f42aa046f5e5becf29f47 [file] [log] [blame]
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.uima.test.junit_extension;
import java.io.File;
import java.io.FileFilter;
import java.util.HashMap;
import java.util.logging.LogManager;
import junit.framework.Assert;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.cas.CAS;
import org.apache.uima.internal.util.Timer;
import org.apache.uima.resource.ResourceManager;
import org.apache.uima.resource.ResourceSpecifier;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import org.apache.uima.util.XMLInputSource;
/**
* AnnotatorPerfTester is a helper class to execute annotator performance tests. The performance
* test results are returned as {@link PerformanceTestResultImpl} object.
*
*/
public class AnnotatorPerformanceTester {
private static class FileFileFilter implements FileFilter {
private FileFileFilter() {
super();
}
public boolean accept(File arg0) {
return arg0.isFile();
}
}
private static HashMap logLevels = new HashMap(9);
static {
logLevels.put("OFF", Level.OFF);
logLevels.put("SEVERE", Level.SEVERE);
logLevels.put("WARNING", Level.WARNING);
logLevels.put("INFO", Level.INFO);
logLevels.put("CONFIG", Level.CONFIG);
logLevels.put("FINE", Level.FINE);
logLevels.put("FINER", Level.FINER);
logLevels.put("FINEST", Level.FINEST);
logLevels.put("ALL", Level.ALL);
}
/**
* runs an annotator performance test
*
* @param repeatSingle
* if true, every document is process "numsToRun" times before the next document is
* processed. If false, all documents are processed and this is repeated "numsToRun"
* times.
*
* @param numsToRun
* repeat count for the input documents
* @param taeDescFilePath
* ae descriptor - absolute file path
* @param testFileDir
* test file directory
* @param dataPath
* ae datapath
* @param doWarmup
* do warum for analysis engine - runs an short english sample document
* @return PerformanceTestResult - returns the performance test results
*
* @throws Exception
*/
public static PerformanceTestResult runPerformanceTest(boolean repeatSingle, int numsToRun,
File taeDescFilePath, File testFileDir, String dataPath, boolean doWarmup)
throws Exception {
// create performance result object
PerformanceTestResultImpl result = new PerformanceTestResultImpl();
// check mandetory settings
Assert.assertNotNull(taeDescFilePath);
Assert.assertNotNull(testFileDir);
// save settings
result.setRepeatSingleMode(repeatSingle);
result.setDoWarmup(doWarmup);
result.setNumsToRun(numsToRun);
result.setAeDescFilePath(taeDescFilePath);
result.setTestFileDir(testFileDir);
result.setDatapath(dataPath);
// set and check test file directory
if (testFileDir == null || !testFileDir.isDirectory() || !testFileDir.canRead()) {
throw new Exception("test file directory not valid");
}
// get current log level setting
Level defaultLogLevel = (Level) logLevels.get(LogManager.getLogManager()
.getProperty(".level"));
if (defaultLogLevel == null) {
// no log level was specified, use default log level settings "INFO" that is also
// used by the Java logging framework.
defaultLogLevel = Level.INFO;
}
// turn of logging for the performance test
Logger logger = UIMAFramework.getLogger();
logger.setLevel(Level.OFF);
//create timer
Timer globalTimer = new Timer();
Timer initTimer = new Timer();
Timer warmupTimer = new Timer();
Timer ioTimer = new Timer();
Timer processResetTimer = new Timer();
Timer cleanupTimer = new Timer();
Timer documentPreparationTimer = new Timer();
//start timer for global time
globalTimer.start();
// init analysis engine
try {
// start initialization timer
initTimer.start();
// set datapath
ResourceManager resMgr = UIMAFramework.newDefaultResourceManager();
if (dataPath != null) {
resMgr.setDataPath(dataPath);
}
AnalysisEngine ae = null;
CAS cas = null;
// get resource specifier from XML file
XMLInputSource in = new XMLInputSource(taeDescFilePath);
ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);
// create analysis engine with resource manager
ae = UIMAFramework.produceAnalysisEngine(specifier, resMgr, null);
// check ae
Assert.assertNotNull(ae);
// create new cas
cas = ae.newCAS();
// check cas
Assert.assertNotNull(cas);
// access cas type system
cas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_LANGUAGE);
// stop initalization timer
initTimer.stop();
result.setInitTime(initTimer.getTimeSpan());
if (doWarmup) {
// start warmup timer
warmupTimer.start();
// process dummy document
cas.setDocumentLanguage("en");
cas.setDocumentText("This is a test sentence.");
ae.process(cas);
cas.reset();
// stop warmup timer
warmupTimer.stop();
result.setWarmupTime(warmupTimer.getTimeSpan());
}
// start io timer
ioTimer.start();
// read all files in the test file directory
File[] inputFiles = testFileDir.listFiles(new FileFileFilter());
// create string array for the file content and language
String[] fileTexts = new String[inputFiles.length];
String[] languages = new String[inputFiles.length];
int numChars = 0;
long fileSize = 0;
// iterate of all input files and extract content and language
for (int i = 0; i < inputFiles.length; i++) {
// get file language
languages[i] = inputFiles[i].getName().substring(0, 2);
// get file content
fileTexts[i] = FileUtils.file2String(inputFiles[i], "UTF-8");
fileSize += inputFiles[i].length();
// count characters
numChars += fileTexts[i].length();
}
// stop io timer
ioTimer.stop();
// save results
result.setNumberOfFiles(inputFiles.length);
result.setNumberOfCharacters(numChars);
result.setTotalFileSize(fileSize);
result.setIoTime(ioTimer.getTimeSpan());
// start real processing
int numAnnot = 0;
// check repeat single mode setting
// repeatSingle=true: iterates of all files and repeat each file "numsToRun" times
// repeatSingle=false: iterates of all files and repeat the collection "numsToRun" times
if (repeatSingle) {
// iterate over all text files (over the cached content)
for (int i = 0; i < fileTexts.length; i++) {
// file repeat mode
// iterate over the current document "numsToRun" times
for (int j = 0; j < numsToRun; j++) {
documentPreparationTimer.start();
// set cas data
cas.setDocumentLanguage(languages[i]);
cas.setDocumentText(fileTexts[i]);
documentPreparationTimer.stop();
processResetTimer.start();
ae.process(cas);
processResetTimer.stop();
documentPreparationTimer.start();
numAnnot += cas.getAnnotationIndex().size();
cas.reset();
documentPreparationTimer.stop();
}
}
}
// use collection repeat mode
else {
// process the file collection "numsToRun" times
for (int j = 0; j < numsToRun; j++) {
// iterate over all text files (over the cached content)
for (int i = 0; i < fileTexts.length; i++) {
documentPreparationTimer.start();
// set cas data
cas.setDocumentLanguage(languages[i]);
cas.setDocumentText(fileTexts[i]);
documentPreparationTimer.stop();
processResetTimer.start();
ae.process(cas);
processResetTimer.stop();
documentPreparationTimer.start();
numAnnot += cas.getAnnotationIndex().size();
cas.reset();
documentPreparationTimer.stop();
}
}
}
// cleanup ae and stop global timer
cleanupTimer.start();
ae.destroy();
ae = null;
cleanupTimer.stop();
globalTimer.stop();
// save results
result.setNumberOfCreatedAnnotations(numAnnot);
result.setOverallTime(globalTimer.getTimeSpan());
result.setProcessingTime(processResetTimer.getTimeSpan());
result.setCleanupTime(cleanupTimer.getTimeSpan());
result.setDocumentPreparationTime(documentPreparationTimer.getTimeSpan());
// turn on logging as it was before
logger.setLevel(defaultLogLevel);
// return result object
return result;
} catch (Exception e) { // Bail out.
throw e;
}
}
}