WikifierExample.java

#

You can download the java file for this example here.

package edu.illinois.cs.cogcomp.edison.examples;

import org.apache.thrift.TException;

import edu.illinois.cs.cogcomp.edison.data.curator.CuratorClient;
import edu.illinois.cs.cogcomp.edison.sentences.Constituent;
import edu.illinois.cs.cogcomp.edison.sentences.SpanLabelView;
import edu.illinois.cs.cogcomp.edison.sentences.TextAnnotation;
import edu.illinois.cs.cogcomp.edison.sentences.ViewNames;
import edu.illinois.cs.cogcomp.thrift.base.AnnotationFailedException;
import edu.illinois.cs.cogcomp.thrift.base.ServiceUnavailableException;
#

Getting the Wikifier annotation from the Curator

public class WikifierExample {
#

The Wikifier is one of the annotations provided by the Illinois Curator. It annotates phrases in text with their Wikipedia pages. This example shows how to get the Wikifier view into Edison's datastructures.

The CuratorClient converts the Wikifier's output to a SpanLabelView and adds it to the TextAnnotation as ViewNames.WIKIFIER. After this, the view can be used as any other SpanLabelView.

This example obtains the Wikifier annotation for some text and prints all the Wikipedia links from the Wikifier view.

    public static void main(String[] args) throws ServiceUnavailableException,
	    AnnotationFailedException, TException {
#

We have some text from a corpus.

	String corpusId = "2001_ODYSSEY";
	String textId = "002";

	String text = "2001: A Space Odyssey is a science-fiction narrative,"
		+ " produced in 1968 as both a film, directed by Stanley "
		+ "Kubrick, and a novel, written by Arthur C. Clarke. ";
#

Create a curator client.

	String curatorHost = "smeagol.cs.uiuc.edu";
	int curatorPort = 9010;

	CuratorClient client = new CuratorClient(curatorHost, curatorPort,
		false);
#

Let us first create a TextAnnotation object using the Curator's sentence segmenter and tokenizer.

	TextAnnotation ta = client.getTextAnnotation(corpusId, textId, text,
		false);
#

Add the Wikifier to the text.

	client.addWikifierView(ta, false);
#

The TextAnnotation should now have a view called ViewNames.WIKIFIER.

	assert ta.hasView(ViewNames.WIKIFIER);
#

Get the Wikifier view.

	SpanLabelView wikifier = (SpanLabelView) ta.getView(ViewNames.WIKIFIER);
#

Print all the constituents in this view

	for (Constituent w : wikifier.getConstituents()) {
	    System.out.println(w.getSurfaceString() + ": " + w.getLabel());
	}
    }
}