| /* |
| * Licensed to the Apache Software Foundation (ASF) under one |
| * or more contributor license agreements. See the NOTICE file |
| * distributed with this work for additional information |
| * regarding copyright ownership. The ASF licenses this file |
| * to you under the Apache License, Version 2.0 (the |
| * "License"); you may not use this file except in compliance |
| * with the License. You may obtain a copy of the License at |
| * |
| * http://www.apache.org/licenses/LICENSE-2.0 |
| * |
| * Unless required by applicable law or agreed to in writing, |
| * software distributed under the License is distributed on an |
| * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY |
| * KIND, either express or implied. See the License for the |
| * specific language governing permissions and limitations |
| * under the License. |
| */ |
| |
| package org.apache.uima.test.junit_extension; |
| |
| import java.io.File; |
| import java.io.FileFilter; |
| import java.util.HashMap; |
| import java.util.logging.LogManager; |
| |
| import junit.framework.Assert; |
| |
| import org.apache.uima.UIMAFramework; |
| import org.apache.uima.analysis_engine.AnalysisEngine; |
| import org.apache.uima.cas.CAS; |
| import org.apache.uima.internal.util.Timer; |
| import org.apache.uima.resource.ResourceManager; |
| import org.apache.uima.resource.ResourceSpecifier; |
| import org.apache.uima.util.FileUtils; |
| import org.apache.uima.util.Level; |
| import org.apache.uima.util.Logger; |
| import org.apache.uima.util.XMLInputSource; |
| |
| /** |
| * AnnotatorPerfTester is a helper class to execute annotator performance tests. The performance |
| * test results are returned as {@link PerformanceTestResultImpl} object. |
| * |
| */ |
| public class AnnotatorPerformanceTester { |
| |
| private static class FileFileFilter implements FileFilter { |
| |
| private FileFileFilter() { |
| super(); |
| } |
| |
| public boolean accept(File arg0) { |
| return arg0.isFile(); |
| } |
| |
| } |
| |
| private static HashMap logLevels = new HashMap(9); |
| static { |
| logLevels.put("OFF", Level.OFF); |
| logLevels.put("SEVERE", Level.SEVERE); |
| logLevels.put("WARNING", Level.WARNING); |
| logLevels.put("INFO", Level.INFO); |
| logLevels.put("CONFIG", Level.CONFIG); |
| logLevels.put("FINE", Level.FINE); |
| logLevels.put("FINER", Level.FINER); |
| logLevels.put("FINEST", Level.FINEST); |
| logLevels.put("ALL", Level.ALL); |
| } |
| |
| /** |
| * runs an annotator performance test |
| * |
| * @param repeatSingle |
| * if true, every document is process "numsToRun" times before the next document is |
| * processed. If false, all documents are processed and this is repeated "numsToRun" |
| * times. |
| * |
| * @param numsToRun |
| * repeat count for the input documents |
| * @param taeDescFilePath |
| * ae descriptor - absolute file path |
| * @param testFileDir |
| * test file directory |
| * @param dataPath |
| * ae datapath |
| * @param doWarmup |
| * do warum for analysis engine - runs an short english sample document |
| * @return PerformanceTestResult - returns the performance test results |
| * |
| * @throws Exception |
| */ |
| public static PerformanceTestResult runPerformanceTest(boolean repeatSingle, int numsToRun, |
| File taeDescFilePath, File testFileDir, String dataPath, boolean doWarmup) |
| throws Exception { |
| |
| // create performance result object |
| PerformanceTestResultImpl result = new PerformanceTestResultImpl(); |
| |
| // check mandetory settings |
| Assert.assertNotNull(taeDescFilePath); |
| Assert.assertNotNull(testFileDir); |
| |
| // save settings |
| result.setRepeatSingleMode(repeatSingle); |
| result.setDoWarmup(doWarmup); |
| result.setNumsToRun(numsToRun); |
| result.setAeDescFilePath(taeDescFilePath); |
| result.setTestFileDir(testFileDir); |
| result.setDatapath(dataPath); |
| |
| // set and check test file directory |
| if (testFileDir == null || !testFileDir.isDirectory() || !testFileDir.canRead()) { |
| throw new Exception("test file directory not valid"); |
| } |
| |
| // get current log level setting |
| Level defaultLogLevel = (Level) logLevels.get(LogManager.getLogManager() |
| .getProperty(".level")); |
| |
| if (defaultLogLevel == null) { |
| // no log level was specified, use default log level settings "INFO" that is also |
| // used by the Java logging framework. |
| defaultLogLevel = Level.INFO; |
| } |
| // turn of logging for the performance test |
| Logger logger = UIMAFramework.getLogger(); |
| logger.setLevel(Level.OFF); |
| |
| //create timer |
| Timer globalTimer = new Timer(); |
| Timer initTimer = new Timer(); |
| Timer warmupTimer = new Timer(); |
| Timer ioTimer = new Timer(); |
| Timer processResetTimer = new Timer(); |
| Timer cleanupTimer = new Timer(); |
| Timer documentPreparationTimer = new Timer(); |
| |
| //start timer for global time |
| globalTimer.start(); |
| |
| // init analysis engine |
| try { |
| |
| // start initialization timer |
| initTimer.start(); |
| |
| // set datapath |
| ResourceManager resMgr = UIMAFramework.newDefaultResourceManager(); |
| if (dataPath != null) { |
| resMgr.setDataPath(dataPath); |
| } |
| |
| AnalysisEngine ae = null; |
| CAS cas = null; |
| // get resource specifier from XML file |
| XMLInputSource in = new XMLInputSource(taeDescFilePath); |
| ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in); |
| |
| // create analysis engine with resource manager |
| ae = UIMAFramework.produceAnalysisEngine(specifier, resMgr, null); |
| // check ae |
| Assert.assertNotNull(ae); |
| |
| // create new cas |
| cas = ae.newCAS(); |
| // check cas |
| Assert.assertNotNull(cas); |
| |
| // access cas type system |
| cas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_LANGUAGE); |
| |
| // stop initalization timer |
| initTimer.stop(); |
| result.setInitTime(initTimer.getTimeSpan()); |
| |
| if (doWarmup) { |
| // start warmup timer |
| warmupTimer.start(); |
| |
| // process dummy document |
| cas.setDocumentLanguage("en"); |
| cas.setDocumentText("This is a test sentence."); |
| ae.process(cas); |
| cas.reset(); |
| |
| // stop warmup timer |
| warmupTimer.stop(); |
| result.setWarmupTime(warmupTimer.getTimeSpan()); |
| } |
| |
| // start io timer |
| ioTimer.start(); |
| |
| // read all files in the test file directory |
| File[] inputFiles = testFileDir.listFiles(new FileFileFilter()); |
| // create string array for the file content and language |
| String[] fileTexts = new String[inputFiles.length]; |
| String[] languages = new String[inputFiles.length]; |
| int numChars = 0; |
| long fileSize = 0; |
| // iterate of all input files and extract content and language |
| for (int i = 0; i < inputFiles.length; i++) { |
| // get file language |
| languages[i] = inputFiles[i].getName().substring(0, 2); |
| // get file content |
| fileTexts[i] = FileUtils.file2String(inputFiles[i], "UTF-8"); |
| fileSize += inputFiles[i].length(); |
| // count characters |
| numChars += fileTexts[i].length(); |
| } |
| |
| // stop io timer |
| ioTimer.stop(); |
| |
| // save results |
| result.setNumberOfFiles(inputFiles.length); |
| result.setNumberOfCharacters(numChars); |
| result.setTotalFileSize(fileSize); |
| result.setIoTime(ioTimer.getTimeSpan()); |
| |
| // start real processing |
| int numAnnot = 0; |
| |
| // check repeat single mode setting |
| // repeatSingle=true: iterates of all files and repeat each file "numsToRun" times |
| // repeatSingle=false: iterates of all files and repeat the collection "numsToRun" times |
| if (repeatSingle) { |
| // iterate over all text files (over the cached content) |
| for (int i = 0; i < fileTexts.length; i++) { |
| // file repeat mode |
| // iterate over the current document "numsToRun" times |
| for (int j = 0; j < numsToRun; j++) { |
| documentPreparationTimer.start(); |
| // set cas data |
| cas.setDocumentLanguage(languages[i]); |
| cas.setDocumentText(fileTexts[i]); |
| documentPreparationTimer.stop(); |
| processResetTimer.start(); |
| ae.process(cas); |
| processResetTimer.stop(); |
| documentPreparationTimer.start(); |
| numAnnot += cas.getAnnotationIndex().size(); |
| cas.reset(); |
| documentPreparationTimer.stop(); |
| } |
| } |
| } |
| // use collection repeat mode |
| else { |
| // process the file collection "numsToRun" times |
| for (int j = 0; j < numsToRun; j++) { |
| // iterate over all text files (over the cached content) |
| for (int i = 0; i < fileTexts.length; i++) { |
| documentPreparationTimer.start(); |
| // set cas data |
| cas.setDocumentLanguage(languages[i]); |
| cas.setDocumentText(fileTexts[i]); |
| documentPreparationTimer.stop(); |
| processResetTimer.start(); |
| ae.process(cas); |
| processResetTimer.stop(); |
| documentPreparationTimer.start(); |
| numAnnot += cas.getAnnotationIndex().size(); |
| cas.reset(); |
| documentPreparationTimer.stop(); |
| } |
| } |
| } |
| |
| // cleanup ae and stop global timer |
| cleanupTimer.start(); |
| ae.destroy(); |
| ae = null; |
| cleanupTimer.stop(); |
| globalTimer.stop(); |
| |
| // save results |
| result.setNumberOfCreatedAnnotations(numAnnot); |
| result.setOverallTime(globalTimer.getTimeSpan()); |
| result.setProcessingTime(processResetTimer.getTimeSpan()); |
| result.setCleanupTime(cleanupTimer.getTimeSpan()); |
| result.setDocumentPreparationTime(documentPreparationTimer.getTimeSpan()); |
| |
| // turn on logging as it was before |
| logger.setLevel(defaultLogLevel); |
| |
| // return result object |
| return result; |
| |
| } catch (Exception e) { // Bail out. |
| throw e; |
| } |
| |
| } |
| |
| } |