
The eventual goal of a language model is to accurately predict the value of a missing word given its context. We present an approach to word prediction that is based on learning a representation for each word as a function of words and linguistics predicates in its context. This approach raises a few new questions that we address. First, in order to learn good word representations it is necessary to use an expressive representation of the context. We present a way that uses external knowledge to generate expressive context representations, along with a learning method capable of handling the large number of features generated this way that can, potentially, contribute to each prediction. Second, since the number of words ``competing'' for each prediction is large, there is a need to ``focus the attention'' on a smaller subset of these. We exhibit the contribution of a ``focus of attention'' mechanism to the performance of the word predictor. Finally, we describe a large scale experimental study in which the approach presented is shown to yield significant improvements in word prediction tasks.
@conference{Even-ZoharRo00,
author = {Y. Even-Zohar and D. Roth},
title = {A classification approach to word prediction},
booktitle = {NAACL},
pages = {124-131},
year = {2000},
acceptance = {43/166 (26\%)},
url = " http://cogcomp.cs.illinois.edu/papers/naacl00.pdf",
funding = {NSF98,KDI},
projects = {LT,KR},
comment = {Learning with expressive features; dependency parse based features; verb prediction},
}