Logo Search packages:      
Sourcecode: uimaj version File versions  Download package

AnnotatorPerformanceTester.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.uima.test.junit_extension;

import java.io.File;
import java.io.FileFilter;
import java.util.HashMap;
import java.util.logging.LogManager;

import junit.framework.Assert;

import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.cas.CAS;
import org.apache.uima.internal.util.Timer;
import org.apache.uima.resource.ResourceManager;
import org.apache.uima.resource.ResourceSpecifier;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import org.apache.uima.util.XMLInputSource;

/**
 * AnnotatorPerfTester is a helper class to execute annotator performance tests. The performance
 * test results are returned as {@link PerformanceTestResultImpl} object.
 * 
 */
00045 public class AnnotatorPerformanceTester {

00047   private static class FileFileFilter implements FileFilter {

    private FileFileFilter() {
      super();
    }

    public boolean accept(File arg0) {
      return arg0.isFile();
    }

  }

  private static HashMap logLevels = new HashMap(9);
  static {
    logLevels.put("OFF", Level.OFF);
    logLevels.put("SEVERE", Level.SEVERE);
    logLevels.put("WARNING", Level.WARNING);
    logLevels.put("INFO", Level.INFO);
    logLevels.put("CONFIG", Level.CONFIG);
    logLevels.put("FINE", Level.FINE);
    logLevels.put("FINER", Level.FINER);
    logLevels.put("FINEST", Level.FINEST);
    logLevels.put("ALL", Level.ALL);
  }

  /**
   * runs an annotator performance test
   * 
   * @param repeatSingle
   *          if true, every document is process "numsToRun" times before the next document is
   *          processed. If false, all documents are processed and this is repeated "numsToRun"
   *          times.
   * 
   * @param numsToRun
   *          repeat count for the input documents
   * @param taeDescFilePath
   *          ae descriptor - absolute file path
   * @param testFileDir
   *          test file directory
   * @param dataPath
   *          ae datapath
   * @param doWarmup
   *          do warum for analysis engine - runs an short english sample document
   * @return PerformanceTestResult - returns the performance test results
   * 
   * @throws Exception
   */
00094   public static PerformanceTestResult runPerformanceTest(boolean repeatSingle, int numsToRun,
          File taeDescFilePath, File testFileDir, String dataPath, boolean doWarmup)
          throws Exception {

    // create performance result object
    PerformanceTestResultImpl result = new PerformanceTestResultImpl();

    // check mandetory settings
    Assert.assertNotNull(taeDescFilePath);
    Assert.assertNotNull(testFileDir);

    // save settings
    result.setRepeatSingleMode(repeatSingle);
    result.setDoWarmup(doWarmup);
    result.setNumsToRun(numsToRun);
    result.setAeDescFilePath(taeDescFilePath);
    result.setTestFileDir(testFileDir);
    result.setDatapath(dataPath);

    // set and check test file directory
    if (testFileDir == null || !testFileDir.isDirectory() || !testFileDir.canRead()) {
      throw new Exception("test file directory not valid");
    }

    // get current log level setting
    Level defaultLogLevel = (Level) logLevels.get(LogManager.getLogManager()
            .getProperty(".level"));

    if (defaultLogLevel == null) {
      // no log level was specified, use default log level settings "INFO" that is also
      // used by the Java logging framework.
      defaultLogLevel = Level.INFO;
    }
    // turn of logging for the performance test
    Logger logger = UIMAFramework.getLogger();
    logger.setLevel(Level.OFF);

    //create timer 
    Timer globalTimer = new Timer();
    Timer initTimer = new Timer();
    Timer warmupTimer = new Timer();
    Timer ioTimer = new Timer();
    Timer processResetTimer = new Timer();
    Timer cleanupTimer = new Timer();
    Timer documentPreparationTimer = new Timer();
    
    //start timer for global time
    globalTimer.start();

    // init analysis engine
    try {

      // start initialization timer   
      initTimer.start();
      
      // set datapath
      ResourceManager resMgr = UIMAFramework.newDefaultResourceManager();
      if (dataPath != null) {
        resMgr.setDataPath(dataPath);
      }

      AnalysisEngine ae = null;
      CAS cas = null;
      // get resource specifier from XML file
      XMLInputSource in = new XMLInputSource(taeDescFilePath);
      ResourceSpecifier specifier = UIMAFramework.getXMLParser().parseResourceSpecifier(in);

      // create analysis engine with resource manager
      ae = UIMAFramework.produceAnalysisEngine(specifier, resMgr, null);
      // check ae
      Assert.assertNotNull(ae);

      // create new cas
      cas = ae.newCAS();
      // check cas
      Assert.assertNotNull(cas);

      // access cas type system
      cas.getTypeSystem().getFeatureByFullName(CAS.FEATURE_FULL_NAME_LANGUAGE);

      // stop initalization timer
      initTimer.stop();
      result.setInitTime(initTimer.getTimeSpan());

      if (doWarmup) {
        // start warmup timer     
        warmupTimer.start();

        // process dummy document
        cas.setDocumentLanguage("en");
        cas.setDocumentText("This is a test sentence.");
        ae.process(cas);
        cas.reset();

        // stop warmup timer
        warmupTimer.stop();
        result.setWarmupTime(warmupTimer.getTimeSpan());
      }

      // start io timer
      ioTimer.start();

      // read all files in the test file directory
      File[] inputFiles = testFileDir.listFiles(new FileFileFilter());
      // create string array for the file content and language
      String[] fileTexts = new String[inputFiles.length];
      String[] languages = new String[inputFiles.length];
      int numChars = 0;
      long fileSize = 0;
      // iterate of all input files and extract content and language
      for (int i = 0; i < inputFiles.length; i++) {
        // get file language
        languages[i] = inputFiles[i].getName().substring(0, 2);
        // get file content
        fileTexts[i] = FileUtils.file2String(inputFiles[i], "UTF-8");
        fileSize += inputFiles[i].length();
        // count characters
        numChars += fileTexts[i].length();
      }

      // stop io timer
      ioTimer.stop();

      // save results
      result.setNumberOfFiles(inputFiles.length);
      result.setNumberOfCharacters(numChars);
      result.setTotalFileSize(fileSize);
      result.setIoTime(ioTimer.getTimeSpan());

      // start real processing
      int numAnnot = 0;

      // check repeat single mode setting
      // repeatSingle=true: iterates of all files and repeat each file "numsToRun" times
      // repeatSingle=false: iterates of all files and repeat the collection "numsToRun" times
      if (repeatSingle) {
        // iterate over all text files (over the cached content)
        for (int i = 0; i < fileTexts.length; i++) {
          // file repeat mode
          // iterate over the current document "numsToRun" times
          for (int j = 0; j < numsToRun; j++) {
            documentPreparationTimer.start();
            // set cas data
            cas.setDocumentLanguage(languages[i]);
            cas.setDocumentText(fileTexts[i]);
            documentPreparationTimer.stop();
            processResetTimer.start();
            ae.process(cas);
            processResetTimer.stop();
            documentPreparationTimer.start();
            numAnnot += cas.getAnnotationIndex().size();
            cas.reset();
            documentPreparationTimer.stop();
          }
        }
      }
      // use collection repeat mode
      else {
        // process the file collection "numsToRun" times
        for (int j = 0; j < numsToRun; j++) {
          // iterate over all text files (over the cached content)
          for (int i = 0; i < fileTexts.length; i++) {
            documentPreparationTimer.start();
            // set cas data
            cas.setDocumentLanguage(languages[i]);
            cas.setDocumentText(fileTexts[i]);
            documentPreparationTimer.stop();
            processResetTimer.start();
            ae.process(cas);
            processResetTimer.stop();
            documentPreparationTimer.start();
            numAnnot += cas.getAnnotationIndex().size();
            cas.reset();
            documentPreparationTimer.stop();
          }
        }
      }

      // cleanup ae and stop global timer
      cleanupTimer.start();
      ae.destroy();
      ae = null;
      cleanupTimer.stop();
      globalTimer.stop();

      // save results
      result.setNumberOfCreatedAnnotations(numAnnot);
      result.setOverallTime(globalTimer.getTimeSpan());
      result.setProcessingTime(processResetTimer.getTimeSpan());
      result.setCleanupTime(cleanupTimer.getTimeSpan());
      result.setDocumentPreparationTime(documentPreparationTimer.getTimeSpan());

      // turn on logging as it was before
      logger.setLevel(defaultLogLevel);

      // return result object
      return result;

    } catch (Exception e) { // Bail out.
      throw e;
    }

  }

}

Generated by  Doxygen 1.6.0   Back to index