@InProceedings{andrews-EtAl:2017:Long,
author = {Andrews, Nicholas and Dredze, Mark and Van Durme, Benjamin and Eisner, Jason},
title = {Bayesian Modeling of Lexical Resources for Low-Resource Settings},
booktitle = {Proceedings of the 55th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
month = {July},
year = {2017},
address = {Vancouver, Canada},
publisher = {Association for Computational Linguistics},
pages = {1029--1039},
abstract = {Lexical resources such as dictionaries and gazetteers are often used
as auxiliary data for tasks such as part-of-speech induction and named-entity
recognition. However, discriminative training with lexical features requires
annotated data to reliably estimate the lexical feature weights and may result
in overfitting the lexical features at the expense of features which generalize
better.
In this paper, we investigate a more robust approach: we stipulate
that the lexicon is the result of an assumed generative
process. Practically, this means that we may treat the lexical
resources as observations under the proposed generative model.
The lexical resources provide training data for the generative model
without requiring separate data to estimate lexical feature
weights. We evaluate the proposed approach in two settings:
part-of-speech induction and low-resource named-entity recognition.},
url = {http://aclweb.org/anthology/P17-1095}
}