Logo Search packages:      
Sourcecode: uimaj version File versions  Download package

TcasTransAnnotator.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 *   http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.uima.examples.cpm.sofa;

import java.util.Arrays;
import java.util.StringTokenizer;

import org.apache.uima.analysis_engine.ResultSpecification;
import org.apache.uima.analysis_engine.annotator.AnnotatorProcessException;
import org.apache.uima.analysis_engine.annotator.Annotator_ImplBase;
import org.apache.uima.analysis_engine.annotator.TextAnnotator;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.Feature;
import org.apache.uima.cas.SofaID;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;

/**
 * Simple English to German translator
 * 
 */
00039 public class TcasTransAnnotator extends Annotator_ImplBase implements TextAnnotator {

00041   public void process(CAS aCas, ResultSpecification aResultSpec) throws AnnotatorProcessException {
    CAS engTcas, germTcas;

    // get English text Sofa and open CAS view
    // SofaID realSofaName = getContext().mapToSofaID("EnglishDocument");
    // System.out.println("TRANSANNOTATOR: real sofa name for English document " +
    // realSofaName.getSofaID());

    // engTcas = aCas.getView(aCas.getSofa(realSofaName));
    engTcas = aCas;
    // if (engTcas == null )
    // System.out.println(realSofaName + " sofa not found in CAS");

    SofaID realSofaName = getContext().mapToSofaID("GermanDocument");
    // System.out.println("TRANSANNOTATOR: real sofa name of GermanDocument " +
    // realSofaName.getSofaID());
    // Create the output German text Sofa and open CAS view
    germTcas = aCas.getView(aCas.createSofa(realSofaName, "text"));

    // Get some necessary Type System constants
    Type annot = engTcas.getAnnotationType();
    Type cross = engTcas.getTypeSystem().getType("sofa.test.CrossAnnotation");
    Feature other = cross.getFeatureByBaseName("otherAnnotation");

    // Get the English text
    String engText = engTcas.getDocumentText();

    // Setup for translated text
    int engEnd = 0;
    int germBegin = 0;
    int germEnd = 0;
    StringBuffer translation = new StringBuffer();

    // Parse the English text
    StringTokenizer st = new StringTokenizer(engText);
    while (st.hasMoreTokens()) {
      String thisTok = st.nextToken();
      int engBegin = engText.indexOf(thisTok, engEnd);
      engEnd = engBegin + thisTok.length();

      // Create token annotations on English text
      AnnotationFS engAnnot = engTcas.createAnnotation(annot, engBegin, engEnd);
      engTcas.getIndexRepository().addFS(engAnnot);

      // Simple word-by-word translation
      String germWord = Translate(thisTok);

      // Accumulate the translated text
      if (germBegin > 0) {
        translation.append(' ');
        germBegin += 1;
      }
      translation.append(germWord.toCharArray(), 0, germWord.length());

      // Create token annotations on German text
      germEnd = germBegin + germWord.length();
      AnnotationFS germAnnot = germTcas.createAnnotation(cross, germBegin, germEnd);
      germTcas.getIndexRepository().addFS(germAnnot);

      // add link to English text
      germAnnot.setFeatureValue(other, engAnnot);
      germBegin = germEnd;
    }

    // Finally, set the output tranlation Sofa data
    germTcas.setDocumentText(translation.toString());

  }

  static char wThis[] = { 't', 'h', 'i', 's' };

  static char wBeer[] = { 'b', 'e', 'e', 'r' };

  static char wIs[] = { 'i', 's' };

  private String Translate(String word) {
    String lword = word.toLowerCase();
    if (Arrays.equals(wThis, lword.toCharArray()))
      return "das";
    if (Arrays.equals(wBeer, lword.toCharArray()))
      return "bier";
    if (Arrays.equals(wIs, lword.toCharArray()))
      return "ist";
    return "gut";
  }

}

Generated by  Doxygen 1.6.0   Back to index