@InProceedings{guzman-vogel:2012:PAPERS,
author = {Guzm{\'a}n, Francisco and Vogel, Stephan},
title = {Understanding the Performance of Statistical {MT} Systems: A Linear Regression Framework},
booktitle = {Proceedings of the 24rd International Conference on Computational Linguistics {(COLING 2012)},
month = {December},
year = {2012},
address = {Mumbai, India},
pages = {1029--1044},
url = {http://www.aclweb.org/anthology/C12-1063},
abstract = {We present a framework for the analysis of Machine Translation performance. We use multivariate linear models to determine the impact of a wide range of features on translation performance. Our assumption is that variables that most contribute to predict translation performance are the key to understand the differences between good and bad translations. During training, we learn the regression parameters that better predict translation quality using a wide range of input features based on the translation model and the first-best translation hypotheses. We use a linear regression with regularization. Our results indicate that with regularized linear regression, we can achieve higher levels of correlation between our predicted values and the actual values of the quality metrics. Our analysis shows that the performance for in-domain data is largely dependent on the characteristics of the translation model. On the other hand, out-of domain data can benefit from better reordering strategies.
}