CuratorConnectionExample.java

#

You can download this file here. This example shows how to connect to the Curator using Edison. While the Curator is stores annotations of text, Edison provides a library to manipulate the annotations and generate features using them.

package edu.illinois.cs.cogcomp.edison.examples;

import java.util.List;

import org.apache.thrift.TException;

import edu.illinois.cs.cogcomp.core.datastructures.trees.Tree;
import edu.illinois.cs.cogcomp.edison.data.curator.CuratorClient;
import edu.illinois.cs.cogcomp.edison.sentences.Constituent;
import edu.illinois.cs.cogcomp.edison.sentences.Sentence;
import edu.illinois.cs.cogcomp.edison.sentences.SpanLabelView;
import edu.illinois.cs.cogcomp.edison.sentences.TextAnnotation;
import edu.illinois.cs.cogcomp.edison.sentences.TokenLabelView;
import edu.illinois.cs.cogcomp.edison.sentences.TreeView;
import edu.illinois.cs.cogcomp.edison.sentences.ViewNames;
import edu.illinois.cs.cogcomp.thrift.base.AnnotationFailedException;
import edu.illinois.cs.cogcomp.thrift.base.ServiceUnavailableException;

public class CuratorConnectionExample {
	public static void main(String[] args) throws ServiceUnavailableException,
			AnnotationFailedException, TException {
#

When we want to annotate raw text

		String text = "Good afternoon, gentlemen. I am a HAL-9000 computer.";

		String corpus = "2001_ODYSSEY";
		String textId = "001";

		String curatorHost = "trollope.cs.illinois.edu";
		int curatorPort = 9010;

		CuratorClient client = new CuratorClient(curatorHost, curatorPort);

		boolean forceUpdate = false;
		TextAnnotation ta = client.getTextAnnotation(corpus, textId, text,
				forceUpdate);
#

Print the text. This prints the raw text that was used to create the TextAnnotation object.

		System.out.println(ta.getText());
#

Print the tokenized text. The tokenized text is obtained from the Curator, which defines a tokenization scheme. All the views in this TextAnnotation will be defined using this tokenization scheme.

		System.out.println(ta.getTokenizedText());
#

Print the de-tokenized text. In addition to the tokenized text, the de-tokenized text can also be used as the key for maps.

		System.out.println(ta.getDetokenizedText());
#

Print the tokens

		for (int i = 0; i < ta.size(); i++) {
			System.out.print(i + ":" + ta.getToken(i) + "\t");
		}
		System.out.println();
#

Print the list of views that this text annotation has

		System.out.println(ta.getAvailableViews());
#

Print the sentences.

		List<Sentence> sentences = ta.sentences();
		System.out.println(sentences.size() + " sentences found.");
		for (int i = 0; i < sentences.size(); i++) {
			Sentence sentence = sentences.get(i);
			System.out.println(sentence);
		}
#

Add parse trees generated by the stanford parser

		client.addStanfordParse(ta, forceUpdate);
#

Get the parse trees as a TreeView.

		TreeView parseView = (TreeView) ta.getView(ViewNames.PARSE_STANFORD);
#

Each sentence in the text corresponds to a parse tree.

		for (int sentenceId = 0; sentenceId < sentences.size(); sentenceId++) {
			Tree<String> parseTree = parseView.getTree(sentenceId);
			System.out.println(parseTree);
		}
#

Add parse trees generated by the stanford parser

		client.addCharniakParse(ta, forceUpdate);

		parseView = (TreeView) ta.getView(ViewNames.PARSE_CHARNIAK);
#

Each sentence in the text corresponds to a parse tree.

		for (int sentenceId = 0; sentenceId < sentences.size(); sentenceId++) {
			Tree<String> parseTree = parseView.getTree(sentenceId);
			System.out.println(parseTree);
		}
#

Add the shallow parse view.

		client.addChunkView(ta, forceUpdate);
#

Print all the views that have been added so far.

		System.out.println(ta.getAvailableViews());
#

Get the shallow parse view that was added from the curator as a SpanLabelView.

		SpanLabelView shallowParseView = (SpanLabelView) ta
				.getView(ViewNames.SHALLOW_PARSE);
#

Get all the shallow parse constituents. In the shallow parse constituent, each chunk will have one constituent. There are no relations between the chunks.

		List<Constituent> shallowParseConstituents = shallowParseView
				.getConstituents();
		for (Constituent c : shallowParseConstituents) {
			System.out.println(c.getStartSpan() + "-" + c.getEndSpan() + ":"
					+ c.getLabel() + " " + c.getSurfaceString());
		}
#

Add the part of speech view.

		client.addPOSView(ta, forceUpdate);
#

Print the views that have been added so far.

		System.out.println(ta.getAvailableViews());
#

Get the POS view as a TokenLabelView

		TokenLabelView posView = (TokenLabelView) ta.getView(ViewNames.POS);

		for (int i = 0; i < ta.size(); i++) {
			System.out.println(i + ":" + posView.getLabel(i));
		}

	}
}