Logo Search packages:      
Sourcecode: uimaj version File versions  Download package

CollectionProcessingManager.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.uima.collection;

import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.resource.ResourceConfigurationException;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.ProcessTrace;
import org.apache.uima.util.Progress;

/**
 * A <code>CollectionProcessingManager</code> (CPM) manages the application of an
 * {@link AnalysisEngine} to a collection of artifacts. For text analysis applications, this will be
 * a collection of documents. The analysis results will then be delivered to one ore more
 * {@link CasConsumer}s.
 * <p>
 * The CPM is configured with an Analysis Engine and CAS Consumers by calling its
 * {@link #setAnalysisEngine(AnalysisEngine)} and {@link #addCasConsumer(CasConsumer)} methods.
 * Collection processing is then initiated by calling the {@link #process(CollectionReader)} or
 * {@link #process(CollectionReader,int)} methods.
 * <p>
 * The <code>process</code> methods take a {@link CollectionReader} object as an argument. The
 * Collection Reader retreivies each artifact from the collection as a
 * {@link org.apache.uima.cas.CAS} object.
 * <p>
 * Listeners can register with the CPM by calling the
 * {@link #addStatusCallbackListener(StatusCallbackListener)} method. These listeners receive status
 * callbacks during the processing. At any time, performance and progress reports are available from
 * the {@link #getPerformanceReport()} and {@link #getProgress()} methods.
 * <p>
 * A CPM implementation may choose to implement parallelization of the processing, but this is not a
 * requirement of the architecture.
 * <p>
 * Note that a CPM only supports processing one collection at a time. Attempting to reconfigure a
 * CPM or start a new processing job while a previous processing job is occurring will result in a
 * {@link org.apache.uima.UIMA_IllegalStateException}. Processing multiple collections
 * simultaneously is done by instantiating and configuring multiple instances of the CPM.
 * <p>
 * A <code>CollectionProcessingManager</code> instance can be obtained by calling
 * {@link org.apache.uima.UIMAFramework#newCollectionProcessingManager()}.
 * 
 * 
 */
00061 public interface CollectionProcessingManager {
  /**
   * Gets the <code>AnalysisEngine</code> that is assigned to this CPM.
   * 
   * @return the <code>AnalysisEngine</code> that this CPM will use to analyze each CAS in the
   *         collection.
   */
  public AnalysisEngine getAnalysisEngine();

  /**
   * Sets the <code>AnalysisEngine</code> that is assigned to this CPM.
   * 
   * @param aAnalysisEngine
   *          the <code>AnalysisEngine</code> that this CPM will use to analyze each CAS in the
   *          collection.
   * 
   * @throws org.apache.uima.UIMA_IllegalStateException
   *           if this CPM is currently processing
   */
  public void setAnalysisEngine(AnalysisEngine aAnalysisEngine)
          throws ResourceConfigurationException;

  /**
   * Gets the <code>CasConsumers</code>s assigned to this CPM.
   * 
   * @return an array of <code>CasConsumer</code>s
   */
  public CasConsumer[] getCasConsumers();

  /**
   * Adds a <code>CasConsumer</code> to this CPM.
   * 
   * @param aCasConsumer
   *          a <code>CasConsumer</code> to add
   * 
   * @throws org.apache.uima.UIMA_IllegalStateException
   *           if this CPM is currently processing
   */
  public void addCasConsumer(CasConsumer aCasConsumer) throws ResourceConfigurationException;

  /**
   * Removes a <code>CasConsumer</code> from this CPM.
   * 
   * @param aCasConsumer
   *          the <code>CasConsumer</code> to remove
   * 
   * @throws org.apache.uima.UIMA_IllegalStateException
   *           if this CPM is currently processing
   */
  public void removeCasConsumer(CasConsumer aCasConsumer);

  /**
   * Gets whether this CPM is required to process the collection's elements serially (as opposed to
   * perfoming parallelization). Note that a value of <code>false</code> does not guarantee that
   * parallelization is performed; this is left up to the CPM implementation.
   * 
   * @return true if and only if serial processing is required
   */
  public boolean isSerialProcessingRequired();

  /**
   * Sets whether this CPM is required to process the collection's elements serially* (as opposed to
   * perfoming parallelization). If this method is not called,* the default is <code>false</code>.
   * Note that a value of <code>false</code> does not guarantee that parallelization is performed;
   * this is left up to the CPM implementation.
   * 
   * @param aRequired
   *          true if and only if serial processing is required
   * 
   * @throws org.apache.uima.UIMA_IllegalStateException
   *           if this CPM is currently processing
   */
  public void setSerialProcessingRequired(boolean aRequired);

  /**
   * Gets whether this CPM will automatically pause processing if an exception occurs. If processing
   * is paused it can be resumed by calling the {@link #resume(boolean)} method.
   * 
   * @return true if and only if this CPM will pause on exception
   */
  public boolean isPauseOnException();

  /**
   * Sets whether this CPM will automatically pause processing if an exception occurs. If processing
   * is paused it can be resumed by calling the {@link #resume(boolean)} method.
   * 
   * @param aPause
   *          true if and only if this CPM should pause on exception
   * 
   * @throws org.apache.uima.UIMA_IllegalStateException
   *           if this CPM is currently processing
   */
  public void setPauseOnException(boolean aPause);

  /**
   * Registers a listsner to receive status callbacks.
   * 
   * @param aListener
   *          the listener to add
   */
  public void addStatusCallbackListener(StatusCallbackListener aListener);

  /**
   * Unregisters a status callback listener.
   * 
   * @param aListener
   *          the listener to remove
   */
  public void removeStatusCallbackListener(StatusCallbackListener aListener);

  /**
   * Initiates processing of a collection. CollectionReader initializes the CAS with Documents from
   * the Colection. This method starts the processing in another thread and returns immediately.
   * Status of the processing can be obtained by registering a listener with the
   * {@link #addStatusCallbackListener(StatusCallbackListener)} method.
   * <p>
   * A CPM can only process one collection at a time. If this method is called while a previous
   * processing request has not yet completed, a <code>UIMA_IllegalStateException</code> will
   * result. To find out whether a CPM is free to begin another processing request, call the
   * {@link #isProcessing()} method.
   * 
   * @param aCollectionReader
   *          the <code>CollectionReader</code> from which to obtain the Entities to be processed
   * 
   * @throws ResourceInitializationException
   *           if an error occurs during initialization
   * @throws org.apache.uima.UIMA_IllegalStateException
   *           if this CPM is currently processing
   */
  public void process(CollectionReader aCollectionReader) throws ResourceInitializationException;

  /**
   * Initiates processing of a collection. This method works in the same way as
   * {@link #process(CollectionReader)}, but it breaks the processing up into batches of a size
   * determined by the <code>aBatchSize</code> parameter. Each {@link CasConsumer} will be
   * notified at the end of each batch.
   * 
   * @param aCollectionReader
   *          the <code>CollectionReader</code> from which to obtain the Entities to be processed
   * @param aBatchSize
   *          the size of the batch.
   * 
   * @throws ResourceInitializationException
   *           if an error occurs during initialization
   * @throws org.apache.uima.UIMA_IllegalStateException
   *           if this CPM is currently processing
   */
  public void process(CollectionReader aCollectionReader, int aBatchSize)
          throws ResourceInitializationException;

  /**
   * Determines whether this CPM is currently processing. This means that a processing request has
   * been submitted and has not yet completed or been {@link #stop()}ped. If processing is paused,
   * this method will still return <code>true<code>.
   * 
   * @return true if and only if this CPM is currently processing.
   */
  public boolean isProcessing();

  /**
   * Pauses processing. Processing can later be resumed by calling the {@link #resume(boolean)}
   * method.
   * 
   * @throws org.apache.uima.UIMA_IllegalStateException
   *           if no processing is currently occuring
   */
  public void pause();

  /**
   * Determines whether this CPM's processing is currently paused.
   * 
   * @return true if and only if this CPM's processing is currently paused.
   */
  public boolean isPaused();

  /**
   * Resumes processing that has been paused.
   * 
   * @param aRetryFailed
   *          if processing was paused because an exception occurred (see
   *          {@link #setPauseOnException(boolean)}), setting a value of <code>true</code> for
   *          this parameter will cause the failed entity to be retried. A value of
   *          <code>false</code> (the default) will cause processing to continue with the next
   *          entity after the failure.
   * 
   * @throws org.apache.uima.UIMA_IllegalStateException
   *           if processing is not currently paused
   */
  public void resume(boolean aRetryFailed);

  /**
   * Resumes processing that has been paused.
   * 
   * @throws org.apache.uima.UIMA_IllegalStateException
   *           if processing is not currently paused
   */
  public void resume();

  /**
   * Stops processing.
   * 
   * @throws org.apache.uima.UIMA_IllegalStateException
   *           if no processing is currently occuring
   */
  public void stop();

  /**
   * Gets a performance report for the processing that is currently occurring or has just completed.
   * 
   * @return an object containing performance statistics
   */
  public ProcessTrace getPerformanceReport();

  /**
   * Gets a progress report for the processing that is currently occurring or has just completed.
   * 
   * @return an array of <code>Progress</code> objects, each of which represents the progress in a
   *         different set of units (for example number of entities or bytes)
   */
  public Progress[] getProgress();
}

Generated by  Doxygen 1.6.0   Back to index