From b3b024133dcd61793659f5cf01cfa202ca5e5e28 Mon Sep 17 00:00:00 2001 From: Courtney Napoles <393791+cnap@users.noreply.github.com> Date: Thu, 24 Oct 2019 11:00:21 -0700 Subject: [PATCH] Create README.md --- README.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 README.md diff --git a/README.md b/README.md new file mode 100644 index 0000000..a21127d --- /dev/null +++ b/README.md @@ -0,0 +1,18 @@ +# Enabling Robust Grammatical Error Correction in New Domains: Data Sets, Metrics, and Analyses + + +``` +@article{doi:10.1162/tacl\_a\_00282, +author = {Napoles, Courtney and Nădejde, Maria and Tetreault, Joel}, +title = {Enabling Robust Grammatical Error Correction in New Domains: Data Sets, Metrics, and Analyses}, +journal = {Transactions of the Association for Computational Linguistics}, +volume = {7}, +number = {}, +pages = {551-566}, +year = {2019}, +doi = {10.1162/tacl\_a\_00282}, +URL = {https://doi.org/10.1162/tacl_a_00282}, +eprint = {https://doi.org/10.1162/tacl_a_00282}, +abstract = { Until now, grammatical error correction (GEC) has been primarily evaluated on text written by non-native English speakers, with a focus on student essays. This paper enables GEC development on text written by native speakers by providing a new data set and metric. We present a multiple-reference test corpus for GEC that includes 4,000 sentences in two new domains (formal and informal writing by native English speakers) and 2,000 sentences from a diverse set of non-native student writing. We also collect human judgments of several GEC systems on this new test set and perform a meta-evaluation, assessing how reliable automatic metrics are across these domains. We find that commonly used GEC metrics have inconsistent performance across domains, and therefore we propose a new ensemble metric that is robust on all three domains of text.} +} +```