Logo Search packages:      
Sourcecode: uimaj version File versions  Download package

SimpleRunCPM.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.uima.examples.cpe;

import java.io.IOException;
import java.util.List;

import org.apache.uima.UIMAException;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.cas.CAS;
import org.apache.uima.collection.CasConsumer;
import org.apache.uima.collection.CollectionProcessingManager;
import org.apache.uima.collection.CollectionReader;
import org.apache.uima.collection.EntityProcessStatus;
import org.apache.uima.collection.StatusCallbackListener;
import org.apache.uima.resource.ResourceSpecifier;
import org.apache.uima.util.XMLInputSource;

/**
 * Main Class that runs the Collection Processing Manager (CPM). This class reads descriptor files
 * and initiailizes the following components:
 * <ol>
 * <li> CollectionReader </li>
 * <li> Analysis Engine </li>
 * <li> CAS Consumer </li>
 * </ol>
 * <br>
 * It also registers a callback listener with the CPM, which will print progress and statistics to
 * System.out. <br>
 * Command lines arguments for the run are :
 * <ol>
 * <li> args[0] : CollectionReader descriptor file </li>
 * <li> args[1] : CAS Consumer descriptor file. </li>
 * <li> args[2] : AnnotationPrinter descriptor file </li>
 * </ol>
 * <br>
 * Example : <br>
 * java -cp &lt; all jar files needed &gt; org.apache.uima.example.cpe.SimpleRunCPE
 * descriptors/collection_reader/FileSystemCollectionReader.xml
 * descriptors/analysis_engine/PersonTitleAnnotator.xml
 * descriptors/cas_consumer/XmiWrtierCasConsumer.xml
 * 
 */
00062 public class SimpleRunCPM extends Thread {
  /**
   * The Collection Processing Manager instance that coordinates the processing.
   */
00066   private CollectionProcessingManager mCPM;

  /**
   * Start time of the processing - used to compute elapsed time.
   */
00071   private long mStartTime;

  /**
   * Constructor for the class.
   * 
   * @param args
   *          command line arguments into the program - see class description
   */
00079   public SimpleRunCPM(String args[]) throws UIMAException, IOException {
    mStartTime = System.currentTimeMillis();

    // check command line args
    if (args.length < 3) {
      printUsageMessage();
      System.exit(1);
    }

    // create components from their descriptors

    // Collection Reader
    System.out.println("Initializing Collection Reader");
    ResourceSpecifier colReaderSpecifier = UIMAFramework.getXMLParser()
            .parseCollectionReaderDescription(new XMLInputSource(args[0]));
    CollectionReader collectionReader = UIMAFramework.produceCollectionReader(colReaderSpecifier);

    // AnalysisEngine
    System.out.println("Initializing AnalysisEngine");
    ResourceSpecifier aeSpecifier = UIMAFramework.getXMLParser().parseResourceSpecifier(
            new XMLInputSource(args[1]));
    AnalysisEngine ae = UIMAFramework.produceAnalysisEngine(aeSpecifier);

    // CAS Consumer
    System.out.println("Initializing CAS Consumer");
    ResourceSpecifier consumerSpecifier = UIMAFramework.getXMLParser().parseCasConsumerDescription(
            new XMLInputSource(args[2]));
    CasConsumer casConsumer = UIMAFramework.produceCasConsumer(consumerSpecifier);

    // create a new Collection Processing Manager
    mCPM = UIMAFramework.newCollectionProcessingManager();

    // Register AE and CAS Consumer with the CPM
    mCPM.setAnalysisEngine(ae);
    mCPM.addCasConsumer(casConsumer);

    // Create and register a Status Callback Listener
    mCPM.addStatusCallbackListener(new StatusCallbackListenerImpl());

    // Finish setup
    mCPM.setPauseOnException(false);

    // Start Processing (in batches of 10, just for testing purposes)
    mCPM.process(collectionReader, 10);
  }

  /**
   * 
   */
  private static void printUsageMessage() {
    System.out.println(" Arguments to the program are as follows : \n"
            + "args[0] : Collection Reader descriptor file \n "
            + "args[1] : Analysis Engine descriptor file. \n"
            + "args[2] : CAS Consumer descriptor file");
  }

  /**
   * main class.
   * 
   * @param args
   *          Command line arguments - see class description
   */
00141   public static void main(String[] args) throws UIMAException, IOException {
    new SimpleRunCPM(args);
  }

  /**
   * Callback Listener. Receives event notifications from CPM.
   * 
   * 
   */
00150   class StatusCallbackListenerImpl implements StatusCallbackListener {
    int entityCount = 0;

    long size = 0;

    /**
     * Called when the initialization is completed.
     * 
     * @see org.apache.uima.collection.processing.StatusCallbackListener#initializationComplete()
     */
00160     public void initializationComplete() {
      System.out.println("CPM Initialization Complete");
    }

    /**
     * Called when the batchProcessing is completed.
     * 
     * @see org.apache.uima.collection.processing.StatusCallbackListener#batchProcessComplete()
     * 
     */
00170     public void batchProcessComplete() {
      System.out.print("Completed " + entityCount + " documents");
      if (size > 0) {
        System.out.print("; " + size + " characters");
      }
      System.out.println();
      long elapsedTime = System.currentTimeMillis() - mStartTime;
      System.out.println("Time Elapsed : " + elapsedTime + " ms ");
    }

    /**
     * Called when the collection processing is completed.
     * 
     * @see org.apache.uima.collection.processing.StatusCallbackListener#collectionProcessComplete()
     */
00185     public void collectionProcessComplete() {
      System.out.print("Completed " + entityCount + " documents");
      if (size > 0) {
        System.out.print("; " + size + " characters");
      }
      System.out.println();
      long elapsedTime = System.currentTimeMillis() - mStartTime;
      System.out.println("Time Elapsed : " + elapsedTime + " ms ");
      System.out.println("\n\n ------------------ PERFORMANCE REPORT ------------------\n");
      System.out.println(mCPM.getPerformanceReport().toString());
    }

    /**
     * Called when the CPM is paused.
     * 
     * @see org.apache.uima.collection.processing.StatusCallbackListener#paused()
     */
00202     public void paused() {
      System.out.println("Paused");
    }

    /**
     * Called when the CPM is resumed after a pause.
     * 
     * @see org.apache.uima.collection.processing.StatusCallbackListener#resumed()
     */
00211     public void resumed() {
      System.out.println("Resumed");
    }

    /**
     * Called when the CPM is stopped abruptly due to errors.
     * 
     * @see org.apache.uima.collection.processing.StatusCallbackListener#aborted()
     */
00220     public void aborted() {
      System.out.println("Aborted");
    }

    /**
     * Called when the processing of a Document is completed. <br>
     * The process status can be looked at and corresponding actions taken.
     * 
     * @param aCas
     *          CAS corresponding to the completed processing
     * @param aStatus
     *          EntityProcessStatus that holds the status of all the events for aEntity
     */
00233     public void entityProcessComplete(CAS aCas, EntityProcessStatus aStatus) {
      if (aStatus.isException()) {
        List exceptions = aStatus.getExceptions();
        for (int i = 0; i < exceptions.size(); i++) {
          ((Throwable) exceptions.get(i)).printStackTrace();
        }
        return;
      }
      entityCount++;
      String docText = aCas.getDocumentText();
      if (docText != null) {
        size += docText.length();
      }
    }
  }

}

Generated by  Doxygen 1.6.0   Back to index