CuratorConnectionExample.java

CuratorConnectionExample.java
# You can download this file here. This example shows how to connect to the Curator using Edison. While the Curator is stores annotations of text, Edison provides a library to manipulate the annotations and generate features using them.	package edu.illinois.cs.cogcomp.edison.examples; import java.util.List; import org.apache.thrift.TException; import edu.illinois.cs.cogcomp.core.datastructures.trees.Tree; import edu.illinois.cs.cogcomp.edison.data.curator.CuratorClient; import edu.illinois.cs.cogcomp.edison.sentences.Constituent; import edu.illinois.cs.cogcomp.edison.sentences.Sentence; import edu.illinois.cs.cogcomp.edison.sentences.SpanLabelView; import edu.illinois.cs.cogcomp.edison.sentences.TextAnnotation; import edu.illinois.cs.cogcomp.edison.sentences.TokenLabelView; import edu.illinois.cs.cogcomp.edison.sentences.TreeView; import edu.illinois.cs.cogcomp.edison.sentences.ViewNames; import edu.illinois.cs.cogcomp.thrift.base.AnnotationFailedException; import edu.illinois.cs.cogcomp.thrift.base.ServiceUnavailableException; public class CuratorConnectionExample { public static void main(String[] args) throws ServiceUnavailableException, AnnotationFailedException, TException {
# When we want to annotate raw text	String text = "Good afternoon, gentlemen. I am a HAL-9000 computer."; String corpus = "2001_ODYSSEY"; String textId = "001"; String curatorHost = "trollope.cs.illinois.edu"; int curatorPort = 9010; CuratorClient client = new CuratorClient(curatorHost, curatorPort); boolean forceUpdate = false; TextAnnotation ta = client.getTextAnnotation(corpus, textId, text, forceUpdate);
# Print the text. This prints the raw text that was used to create the TextAnnotation object.	System.out.println(ta.getText());
# Print the tokenized text. The tokenized text is obtained from the Curator, which defines a tokenization scheme. All the views in this TextAnnotation will be defined using this tokenization scheme.	System.out.println(ta.getTokenizedText());
# Print the de-tokenized text. In addition to the tokenized text, the de-tokenized text can also be used as the key for maps.	System.out.println(ta.getDetokenizedText());
# Print the tokens	for (int i = 0; i < ta.size(); i++) { System.out.print(i + ":" + ta.getToken(i) + "\t"); } System.out.println();
# Print the list of views that this text annotation has	System.out.println(ta.getAvailableViews());
# Print the sentences.	List<Sentence> sentences = ta.sentences(); System.out.println(sentences.size() + " sentences found."); for (int i = 0; i < sentences.size(); i++) { Sentence sentence = sentences.get(i); System.out.println(sentence); }
# Add parse trees generated by the stanford parser	client.addStanfordParse(ta, forceUpdate);
# Get the parse trees as a TreeView.	TreeView parseView = (TreeView) ta.getView(ViewNames.PARSE_STANFORD);
# Each sentence in the text corresponds to a parse tree.	for (int sentenceId = 0; sentenceId < sentences.size(); sentenceId++) { Tree<String> parseTree = parseView.getTree(sentenceId); System.out.println(parseTree); }
# Add parse trees generated by the stanford parser	client.addCharniakParse(ta, forceUpdate); parseView = (TreeView) ta.getView(ViewNames.PARSE_CHARNIAK);
# Each sentence in the text corresponds to a parse tree.	for (int sentenceId = 0; sentenceId < sentences.size(); sentenceId++) { Tree<String> parseTree = parseView.getTree(sentenceId); System.out.println(parseTree); }
# Add the shallow parse view.	client.addChunkView(ta, forceUpdate);
# Print all the views that have been added so far.	System.out.println(ta.getAvailableViews());
# Get the shallow parse view that was added from the curator as a SpanLabelView.	SpanLabelView shallowParseView = (SpanLabelView) ta .getView(ViewNames.SHALLOW_PARSE);
# Get all the shallow parse constituents. In the shallow parse constituent, each chunk will have one constituent. There are no relations between the chunks.	List<Constituent> shallowParseConstituents = shallowParseView .getConstituents(); for (Constituent c : shallowParseConstituents) { System.out.println(c.getStartSpan() + "-" + c.getEndSpan() + ":" + c.getLabel() + " " + c.getSurfaceString()); }
# Add the part of speech view.	client.addPOSView(ta, forceUpdate);
# Print the views that have been added so far.	System.out.println(ta.getAvailableViews());
# Get the POS view as a TokenLabelView	TokenLabelView posView = (TokenLabelView) ta.getView(ViewNames.POS); for (int i = 0; i < ta.size(); i++) { System.out.println(i + ":" + posView.getLabel(i)); } } }