forked from hankcs/HanLP
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreferences.bib
622 lines (572 loc) · 42.3 KB
/
references.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
%% This BibTeX bibliography file was created using BibDesk.
%% https://bibdesk.sourceforge.io/
%% Created for hankcs at 2022-12-07 15:02:16 -0500
%% Saved with string encoding Unicode (UTF-8)
@inproceedings{bai-etal-2022-graph,
address = {Dublin, Ireland},
author = {Bai, Xuefeng and Chen, Yulong and Zhang, Yue},
booktitle = {Proceedings of the 60th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)},
date-added = {2022-12-07 15:02:15 -0500},
date-modified = {2022-12-07 15:02:15 -0500},
month = may,
pages = {6001--6015},
publisher = {Association for Computational Linguistics},
title = {Graph Pre-training for {AMR} Parsing and Generation},
url = {https://aclanthology.org/2022.acl-long.415},
year = {2022},
bdsk-url-1 = {https://aclanthology.org/2022.acl-long.415}}
@inproceedings{wang-etal-2021-minilmv2,
address = {Online},
author = {Wang, Wenhui and Bao, Hangbo and Huang, Shaohan and Dong, Li and Wei, Furu},
booktitle = {Findings of the Association for Computational Linguistics: ACL-IJCNLP 2021},
date-added = {2022-06-14 20:10:18 -0400},
date-modified = {2022-06-14 20:10:18 -0400},
doi = {10.18653/v1/2021.findings-acl.188},
month = aug,
pages = {2140--2151},
publisher = {Association for Computational Linguistics},
title = {{M}ini{LM}v2: Multi-Head Self-Attention Relation Distillation for Compressing Pretrained Transformers},
url = {https://aclanthology.org/2021.findings-acl.188},
year = {2021},
bdsk-url-1 = {https://aclanthology.org/2021.findings-acl.188},
bdsk-url-2 = {https://doi.org/10.18653/v1/2021.findings-acl.188}}
@article{zhang2021mengzi,
author = {Zhang, Zhuosheng and Zhang, Hanqing and Chen, Keming and Guo, Yuhang and Hua, Jingyun and Wang, Yulong and Zhou, Ming},
date-added = {2022-04-15 10:32:14 -0400},
date-modified = {2022-04-15 10:32:14 -0400},
journal = {arXiv preprint arXiv:2110.06696},
title = {Mengzi: Towards Lightweight yet Ingenious Pre-trained Models for Chinese},
year = {2021}}
@inproceedings{samuel-straka-2020-ufal,
abstract = {We present PERIN, a novel permutation-invariant approach to sentence-to-graph semantic parsing. PERIN is a versatile, cross-framework and language independent architecture for universal modeling of semantic structures. Our system participated in the CoNLL 2020 shared task, Cross-Framework Meaning Representation Parsing (MRP 2020), where it was evaluated on five different frameworks (AMR, DRG, EDS, PTG and UCCA) across four languages. PERIN was one of the winners of the shared task. The source code and pretrained models are available at http://www.github.com/ufal/perin.},
address = {Online},
author = {Samuel, David and Straka, Milan},
booktitle = {Proceedings of the CoNLL 2020 Shared Task: Cross-Framework Meaning Representation Parsing},
date-added = {2022-04-12 22:36:23 -0400},
date-modified = {2022-04-12 22:36:23 -0400},
doi = {10.18653/v1/2020.conll-shared.5},
month = nov,
pages = {53--64},
publisher = {Association for Computational Linguistics},
title = {{{\'U}FAL} at {MRP} 2020: Permutation-invariant Semantic Parsing in {PERIN}},
url = {https://aclanthology.org/2020.conll-shared.5},
year = {2020},
bdsk-url-1 = {https://aclanthology.org/2020.conll-shared.5},
bdsk-url-2 = {https://doi.org/10.18653/v1/2020.conll-shared.5}}
@inproceedings{qiu-etal-2014-multi,
address = {Dublin, Ireland},
author = {Qiu, Likun and Zhang, Yue and Jin, Peng and Wang, Houfeng},
booktitle = {Proceedings of {COLING} 2014, the 25th International Conference on Computational Linguistics: Technical Papers},
date-added = {2022-02-15 04:42:58 -0500},
date-modified = {2022-02-15 04:42:58 -0500},
month = aug,
pages = {257--268},
publisher = {Dublin City University and Association for Computational Linguistics},
title = {Multi-view {C}hinese Treebanking},
url = {https://aclanthology.org/C14-1026},
year = {2014},
bdsk-url-1 = {https://aclanthology.org/C14-1026}}
@inproceedings{li-etal-2018-analogical,
abstract = {Analogical reasoning is effective in capturing linguistic regularities. This paper proposes an analogical reasoning task on Chinese. After delving into Chinese lexical knowledge, we sketch 68 implicit morphological relations and 28 explicit semantic relations. A big and balanced dataset CA8 is then built for this task, including 17813 questions. Furthermore, we systematically explore the influences of vector representations, context features, and corpora on analogical reasoning. With the experiments, CA8 is proved to be a reliable benchmark for evaluating Chinese word embeddings.},
address = {Melbourne, Australia},
author = {Li, Shen and Zhao, Zhe and Hu, Renfen and Li, Wensi and Liu, Tao and Du, Xiaoyong},
booktitle = {Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
date-added = {2022-01-30 22:52:52 -0500},
date-modified = {2022-01-30 22:52:52 -0500},
doi = {10.18653/v1/P18-2023},
month = jul,
pages = {138--143},
publisher = {Association for Computational Linguistics},
title = {Analogical Reasoning on {C}hinese Morphological and Semantic Relations},
url = {https://aclanthology.org/P18-2023},
year = {2018},
bdsk-url-1 = {https://aclanthology.org/P18-2023},
bdsk-url-2 = {https://doi.org/10.18653/v1/P18-2023}}
@inproceedings{NIPS2013_9aa42b31,
author = {Mikolov, Tomas and Sutskever, Ilya and Chen, Kai and Corrado, Greg S and Dean, Jeff},
booktitle = {Advances in Neural Information Processing Systems},
date-added = {2022-01-30 18:17:28 -0500},
date-modified = {2022-01-30 18:17:28 -0500},
editor = {C. J. C. Burges and L. Bottou and M. Welling and Z. Ghahramani and K. Q. Weinberger},
publisher = {Curran Associates, Inc.},
title = {Distributed Representations of Words and Phrases and their Compositionality},
url = {https://proceedings.neurips.cc/paper/2013/file/9aa42b31882ec039965f3c4923ce901b-Paper.pdf},
volume = {26},
year = {2013},
bdsk-url-1 = {https://proceedings.neurips.cc/paper/2013/file/9aa42b31882ec039965f3c4923ce901b-Paper.pdf}}
@inproceedings{bevilacqua-etal-2021-one,
author = {Bevilacqua, Michele and Blloshmi, Rexhina and Navigli, Roberto},
booktitle = {Proceedings of AAAI},
date-added = {2022-01-25 11:58:03 -0500},
date-modified = {2022-01-25 11:58:03 -0500},
title = {One {SPRING} to Rule Them Both: {S}ymmetric {AMR} Semantic Parsing and Generation without a Complex Pipeline},
year = {2021}}
@inproceedings{lewis-etal-2020-bart,
abstract = {We present BART, a denoising autoencoder for pretraining sequence-to-sequence models. BART is trained by (1) corrupting text with an arbitrary noising function, and (2) learning a model to reconstruct the original text. It uses a standard Tranformer-based neural machine translation architecture which, despite its simplicity, can be seen as generalizing BERT (due to the bidirectional encoder), GPT (with the left-to-right decoder), and other recent pretraining schemes. We evaluate a number of noising approaches, finding the best performance by both randomly shuffling the order of sentences and using a novel in-filling scheme, where spans of text are replaced with a single mask token. BART is particularly effective when fine tuned for text generation but also works well for comprehension tasks. It matches the performance of RoBERTa on GLUE and SQuAD, and achieves new state-of-the-art results on a range of abstractive dialogue, question answering, and summarization tasks, with gains of up to 3.5 ROUGE. BART also provides a 1.1 BLEU increase over a back-translation system for machine translation, with only target language pretraining. We also replicate other pretraining schemes within the BART framework, to understand their effect on end-task performance.},
address = {Online},
author = {Lewis, Mike and Liu, Yinhan and Goyal, Naman and Ghazvininejad, Marjan and Mohamed, Abdelrahman and Levy, Omer and Stoyanov, Veselin and Zettlemoyer, Luke},
booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
date-added = {2022-01-25 11:56:10 -0500},
date-modified = {2022-01-25 11:56:10 -0500},
doi = {10.18653/v1/2020.acl-main.703},
month = jul,
pages = {7871--7880},
publisher = {Association for Computational Linguistics},
title = {{BART}: Denoising Sequence-to-Sequence Pre-training for Natural Language Generation, Translation, and Comprehension},
url = {https://www.aclweb.org/anthology/2020.acl-main.703},
year = {2020},
bdsk-url-1 = {https://www.aclweb.org/anthology/2020.acl-main.703},
bdsk-url-2 = {https://doi.org/10.18653/v1/2020.acl-main.703}}
@article{knight2014abstract,
author = {Knight, Kevin and Baranescu, Lauren and Bonial, Claire and Georgescu, Madalina and Griffitt, Kira and Hermjakob, Ulf and Marcu, Daniel and Palmer, Martha and Schneifer, Nathan},
date-added = {2022-01-25 11:54:11 -0500},
date-modified = {2022-01-25 11:54:11 -0500},
journal = {Web download},
title = {Abstract meaning representation (amr) annotation release 1.0},
year = {2014}}
@inproceedings{he-choi-2021-stem,
abstract = {Multi-task learning with transformer encoders (MTL) has emerged as a powerful technique to improve performance on closely-related tasks for both accuracy and efficiency while a question still remains whether or not it would perform as well on tasks that are distinct in nature. We first present MTL results on five NLP tasks, POS, NER, DEP, CON, and SRL, and depict its deficiency over single-task learning. We then conduct an extensive pruning analysis to show that a certain set of attention heads get claimed by most tasks during MTL, who interfere with one another to fine-tune those heads for their own objectives. Based on this finding, we propose the Stem Cell Hypothesis to reveal the existence of attention heads naturally talented for many tasks that cannot be jointly trained to create adequate embeddings for all of those tasks. Finally, we design novel parameter-free probes to justify our hypothesis and demonstrate how attention heads are transformed across the five tasks during MTL through label analysis.},
address = {Online and Punta Cana, Dominican Republic},
author = {He, Han and Choi, Jinho D.},
booktitle = {Proceedings of the 2021 Conference on Empirical Methods in Natural Language Processing},
date-added = {2021-11-06 18:24:44 -0400},
date-modified = {2021-11-06 18:24:44 -0400},
month = nov,
pages = {5555--5577},
publisher = {Association for Computational Linguistics},
title = {The Stem Cell Hypothesis: Dilemma behind Multi-Task Learning with Transformer Encoders},
url = {https://aclanthology.org/2021.emnlp-main.451},
year = {2021},
bdsk-url-1 = {https://aclanthology.org/2021.emnlp-main.451}}
@inproceedings{he-choi-2019,
abstract = {This paper presents new state-of-the-art models for three tasks, part-of-speech tagging, syntactic parsing, and semantic parsing, using the cutting-edge contextualized embedding framework known as BERT. For each task, we first replicate and simplify the current state-of-the-art approach to enhance its model efficiency. We then evaluate our simplified approaches on those three tasks using token embeddings generated by BERT. 12 datasets in both English and Chinese are used for our experiments. The BERT models outperform the previously best-performing models by 2.5\% on average (7.5\% for the most significant case). All models and source codes are available in public so that researchers can improve upon and utilize them to establish strong baselines for the next decade.},
author = {Han He and Jinho Choi},
booktitle = {The Thirty-Third International Flairs Conference},
conference = {Florida Artificial Intelligence Research Society Conference},
date-added = {2021-10-16 21:09:00 -0400},
date-modified = {2021-10-16 21:09:00 -0400},
keywords = {part-of-speech tagging, syntactic parsing, semantic parsing, Transformer, BERT},
title = {Establishing Strong Baselines for the New Decade: Sequence Tagging, Syntactic and Semantic Parsing with BERT},
url = {https://www.aaai.org/ocs/index.php/FLAIRS/FLAIRS20/paper/view/18438},
year = {2020},
bdsk-url-1 = {https://www.aaai.org/ocs/index.php/FLAIRS/FLAIRS20/paper/view/18438}}
@inproceedings{xiao-etal-2021-ernie,
abstract = {Coarse-grained linguistic information, such as named entities or phrases, facilitates adequately representation learning in pre-training. Previous works mainly focus on extending the objective of BERT{'}s Masked Language Modeling (MLM) from masking individual tokens to contiguous sequences of n tokens. We argue that such contiguously masking method neglects to model the intra-dependencies and inter-relation of coarse-grained linguistic information. As an alternative, we propose ERNIE-Gram, an explicitly n-gram masking method to enhance the integration of coarse-grained information into pre-training. In ERNIE-Gram, n-grams are masked and predicted directly using explicit n-gram identities rather than contiguous sequences of n tokens. Furthermore, ERNIE-Gram employs a generator model to sample plausible n-gram identities as optional n-gram masks and predict them in both coarse-grained and fine-grained manners to enable comprehensive n-gram prediction and relation modeling. We pre-train ERNIE-Gram on English and Chinese text corpora and fine-tune on 19 downstream tasks. Experimental results show that ERNIE-Gram outperforms previous pre-training models like XLNet and RoBERTa by a large margin, and achieves comparable results with state-of-the-art methods. The source codes and pre-trained models have been released at https://github.com/PaddlePaddle/ERNIE.},
address = {Online},
author = {Xiao, Dongling and Li, Yu-Kun and Zhang, Han and Sun, Yu and Tian, Hao and Wu, Hua and Wang, Haifeng},
booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
date-added = {2021-09-04 14:09:52 -0400},
date-modified = {2021-09-04 14:09:52 -0400},
doi = {10.18653/v1/2021.naacl-main.136},
month = jun,
pages = {1702--1715},
publisher = {Association for Computational Linguistics},
title = {{ERNIE}-Gram: Pre-Training with Explicitly N-Gram Masked Language Modeling for Natural Language Understanding},
url = {https://aclanthology.org/2021.naacl-main.136},
year = {2021},
bdsk-url-1 = {https://aclanthology.org/2021.naacl-main.136},
bdsk-url-2 = {https://doi.org/10.18653/v1/2021.naacl-main.136}}
@inproceedings{akbik-etal-2018-contextual,
abstract = {Recent advances in language modeling using recurrent neural networks have made it viable to model language as distributions over characters. By learning to predict the next character on the basis of previous characters, such models have been shown to automatically internalize linguistic concepts such as words, sentences, subclauses and even sentiment. In this paper, we propose to leverage the internal states of a trained character language model to produce a novel type of word embedding which we refer to as contextual string embeddings. Our proposed embeddings have the distinct properties that they (a) are trained without any explicit notion of words and thus fundamentally model words as sequences of characters, and (b) are contextualized by their surrounding text, meaning that the same word will have different embeddings depending on its contextual use. We conduct a comparative evaluation against previous embeddings and find that our embeddings are highly useful for downstream tasks: across four classic sequence labeling tasks we consistently outperform the previous state-of-the-art. In particular, we significantly outperform previous work on English and German named entity recognition (NER), allowing us to report new state-of-the-art F1-scores on the CoNLL03 shared task. We release all code and pre-trained language models in a simple-to-use framework to the research community, to enable reproduction of these experiments and application of our proposed embeddings to other tasks: https://github.com/zalandoresearch/flair},
address = {Santa Fe, New Mexico, USA},
author = {Akbik, Alan and Blythe, Duncan and Vollgraf, Roland},
booktitle = {Proceedings of the 27th International Conference on Computational Linguistics},
date-added = {2021-09-01 13:10:59 -0400},
date-modified = {2021-09-01 13:10:59 -0400},
month = aug,
pages = {1638--1649},
publisher = {Association for Computational Linguistics},
title = {Contextual String Embeddings for Sequence Labeling},
url = {https://aclanthology.org/C18-1139},
year = {2018},
bdsk-url-1 = {https://aclanthology.org/C18-1139}}
@inproceedings{he-choi-2021-levi,
abstract = {Coupled with biaffine decoders, transformers have been effectively adapted to text-to-graph transduction and achieved state-of-the-art performance on AMR parsing. Many prior works, however, rely on the biaffine decoder for either or both arc and label predictions although most features used by the decoder may be learned by the transformer already. This paper presents a novel approach to AMR parsing by combining heterogeneous data (tokens, concepts, labels) as one input to a transformer to learn attention, and use only attention matrices from the transformer to predict all elements in AMR graphs (concepts, arcs, labels). Although our models use significantly fewer parameters than the previous state-of-the-art graph parser, they show similar or better accuracy on AMR 2.0 and 3.0.},
address = {Online},
author = {He, Han and Choi, Jinho D.},
booktitle = {Proceedings of the 17th International Conference on Parsing Technologies and the IWPT 2021 Shared Task on Parsing into Enhanced Universal Dependencies (IWPT 2021)},
date-added = {2021-09-01 13:09:14 -0400},
date-modified = {2021-09-01 13:09:14 -0400},
doi = {10.18653/v1/2021.iwpt-1.5},
month = aug,
pages = {50--57},
publisher = {Association for Computational Linguistics},
title = {Levi Graph {AMR} Parser using Heterogeneous Attention},
url = {https://aclanthology.org/2021.iwpt-1.5},
year = {2021},
bdsk-url-1 = {https://aclanthology.org/2021.iwpt-1.5},
bdsk-url-2 = {https://doi.org/10.18653/v1/2021.iwpt-1.5}}
@inproceedings{conneau-etal-2020-unsupervised,
abstract = {This paper shows that pretraining multilingual language models at scale leads to significant performance gains for a wide range of cross-lingual transfer tasks. We train a Transformer-based masked language model on one hundred languages, using more than two terabytes of filtered CommonCrawl data. Our model, dubbed XLM-R, significantly outperforms multilingual BERT (mBERT) on a variety of cross-lingual benchmarks, including +14.6{\%} average accuracy on XNLI, +13{\%} average F1 score on MLQA, and +2.4{\%} F1 score on NER. XLM-R performs particularly well on low-resource languages, improving 15.7{\%} in XNLI accuracy for Swahili and 11.4{\%} for Urdu over previous XLM models. We also present a detailed empirical analysis of the key factors that are required to achieve these gains, including the trade-offs between (1) positive transfer and capacity dilution and (2) the performance of high and low resource languages at scale. Finally, we show, for the first time, the possibility of multilingual modeling without sacrificing per-language performance; XLM-R is very competitive with strong monolingual models on the GLUE and XNLI benchmarks. We will make our code and models publicly available.},
address = {Online},
author = {Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin},
booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
date-added = {2021-09-01 12:41:50 -0400},
date-modified = {2021-09-01 12:41:50 -0400},
doi = {10.18653/v1/2020.acl-main.747},
month = jul,
pages = {8440--8451},
publisher = {Association for Computational Linguistics},
title = {Unsupervised Cross-lingual Representation Learning at Scale},
url = {https://aclanthology.org/2020.acl-main.747},
year = {2020},
bdsk-url-1 = {https://aclanthology.org/2020.acl-main.747},
bdsk-url-2 = {https://doi.org/10.18653/v1/2020.acl-main.747}}
@inproceedings{xue-etal-2021-mt5,
abstract = {The recent {``}Text-to-Text Transfer Transformer{''} (T5) leveraged a unified text-to-text format and scale to attain state-of-the-art results on a wide variety of English-language NLP tasks. In this paper, we introduce mT5, a multilingual variant of T5 that was pre-trained on a new Common Crawl-based dataset covering 101 languages. We detail the design and modified training of mT5 and demonstrate its state-of-the-art performance on many multilingual benchmarks. We also describe a simple technique to prevent {``}accidental translation{''} in the zero-shot setting, where a generative model chooses to (partially) translate its prediction into the wrong language. All of the code and model checkpoints used in this work are publicly available.},
address = {Online},
author = {Xue, Linting and Constant, Noah and Roberts, Adam and Kale, Mihir and Al-Rfou, Rami and Siddhant, Aditya and Barua, Aditya and Raffel, Colin},
booktitle = {Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies},
date-added = {2021-09-01 12:40:34 -0400},
date-modified = {2021-09-01 12:40:34 -0400},
doi = {10.18653/v1/2021.naacl-main.41},
month = jun,
pages = {483--498},
publisher = {Association for Computational Linguistics},
title = {m{T}5: A Massively Multilingual Pre-trained Text-to-Text Transformer},
url = {https://aclanthology.org/2021.naacl-main.41},
year = {2021},
bdsk-url-1 = {https://aclanthology.org/2021.naacl-main.41},
bdsk-url-2 = {https://doi.org/10.18653/v1/2021.naacl-main.41}}
@misc{https://doi.org/10.35111/gvd0-xk91,
author = {Xue, Nianwen and {Zhang, Xiuhong} and {Jiang, Zixin} and {Palmer, Martha} and {Xia, Fei} and {Chiou, Fu-Dong} and {Chang, Meiyu}},
date-added = {2021-09-01 12:32:05 -0400},
date-modified = {2021-09-01 12:36:22 -0400},
doi = {10.35111/GVD0-XK91},
publisher = {Linguistic Data Consortium},
title = {Chinese Treebank 9.0},
url = {https://catalog.ldc.upenn.edu/LDC2016T13},
year = {2016},
bdsk-url-1 = {https://catalog.ldc.upenn.edu/LDC2016T13},
bdsk-url-2 = {https://doi.org/10.35111/GVD0-XK91}}
@inproceedings{clark2020electra,
author = {Kevin Clark and Minh-Thang Luong and Quoc V. Le and Christopher D. Manning},
booktitle = {ICLR},
date-added = {2021-08-07 15:53:27 -0400},
date-modified = {2021-08-07 15:53:27 -0400},
title = {{ELECTRA}: Pre-training Text Encoders as Discriminators Rather Than Generators},
url = {https://openreview.net/pdf?id=r1xMH1BtvB},
year = {2020},
bdsk-url-1 = {https://openreview.net/pdf?id=r1xMH1BtvB}}
@inproceedings{chang-etal-2009-discriminative,
address = {Boulder, Colorado},
author = {Chang, Pi-Chuan and Tseng, Huihsin and Jurafsky, Dan and Manning, Christopher D.},
booktitle = {Proceedings of the Third Workshop on Syntax and Structure in Statistical Translation ({SSST}-3) at {NAACL} {HLT} 2009},
date-added = {2021-03-17 13:37:03 -0400},
date-modified = {2021-03-17 13:37:03 -0400},
month = jun,
pages = {51--59},
publisher = {Association for Computational Linguistics},
title = {Discriminative Reordering with {C}hinese Grammatical Relations Features},
url = {https://www.aclweb.org/anthology/W09-2307},
year = {2009},
bdsk-url-1 = {https://www.aclweb.org/anthology/W09-2307}}
@inproceedings{pennington-etal-2014-glove,
address = {Doha, Qatar},
author = {Pennington, Jeffrey and Socher, Richard and Manning, Christopher},
booktitle = {Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing ({EMNLP})},
date-added = {2020-12-31 15:07:29 -0500},
date-modified = {2020-12-31 15:07:29 -0500},
doi = {10.3115/v1/D14-1162},
month = oct,
pages = {1532--1543},
publisher = {Association for Computational Linguistics},
title = {{G}lo{V}e: Global Vectors for Word Representation},
url = {https://www.aclweb.org/anthology/D14-1162},
year = {2014},
bdsk-url-1 = {https://www.aclweb.org/anthology/D14-1162},
bdsk-url-2 = {https://doi.org/10.3115/v1/D14-1162}}
@incollection{he2018dual,
author = {He, Han and Wu, Lei and Yang, Xiaokun and Yan, Hua and Gao, Zhimin and Feng, Yi and Townsend, George},
booktitle = {Information Technology-New Generations},
date-added = {2020-12-31 15:03:58 -0500},
date-modified = {2020-12-31 15:03:58 -0500},
pages = {421--426},
publisher = {Springer},
title = {Dual long short-term memory networks for sub-character representation learning},
year = {2018}}
@inproceedings{devlin-etal-2019-bert,
abstract = {We introduce a new language representation model called BERT, which stands for Bidirectional Encoder Representations from Transformers. Unlike recent language representation models (Peters et al., 2018a; Radford et al., 2018), BERT is designed to pre-train deep bidirectional representations from unlabeled text by jointly conditioning on both left and right context in all layers. As a result, the pre-trained BERT model can be fine-tuned with just one additional output layer to create state-of-the-art models for a wide range of tasks, such as question answering and language inference, without substantial task-specific architecture modifications. BERT is conceptually simple and empirically powerful. It obtains new state-of-the-art results on eleven natural language processing tasks, including pushing the GLUE score to 80.5 (7.7 point absolute improvement), MultiNLI accuracy to 86.7{\%} (4.6{\%} absolute improvement), SQuAD v1.1 question answering Test F1 to 93.2 (1.5 point absolute improvement) and SQuAD v2.0 Test F1 to 83.1 (5.1 point absolute improvement).},
address = {Minneapolis, Minnesota},
author = {Devlin, Jacob and Chang, Ming-Wei and Lee, Kenton and Toutanova, Kristina},
booktitle = {Proceedings of the 2019 Conference of the North {A}merican Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)},
date-added = {2020-12-31 14:46:54 -0500},
date-modified = {2020-12-31 14:46:54 -0500},
doi = {10.18653/v1/N19-1423},
month = jun,
pages = {4171--4186},
publisher = {Association for Computational Linguistics},
title = {{BERT}: Pre-training of Deep Bidirectional Transformers for Language Understanding},
url = {https://www.aclweb.org/anthology/N19-1423},
year = {2019},
bdsk-url-1 = {https://www.aclweb.org/anthology/N19-1423},
bdsk-url-2 = {https://doi.org/10.18653/v1/N19-1423}}
@inproceedings{Lan2020ALBERT:,
author = {Zhenzhong Lan and Mingda Chen and Sebastian Goodman and Kevin Gimpel and Piyush Sharma and Radu Soricut},
booktitle = {International Conference on Learning Representations},
date-added = {2020-12-31 14:44:52 -0500},
date-modified = {2020-12-31 14:44:52 -0500},
title = {ALBERT: A Lite BERT for Self-supervised Learning of Language Representations},
url = {https://openreview.net/forum?id=H1eA7AEtvS},
year = {2020},
bdsk-url-1 = {https://openreview.net/forum?id=H1eA7AEtvS}}
@inproceedings{wang-xu-2017-convolutional,
abstract = {Character-based sequence labeling framework is flexible and efficient for Chinese word segmentation (CWS). Recently, many character-based neural models have been applied to CWS. While they obtain good performance, they have two obvious weaknesses. The first is that they heavily rely on manually designed bigram feature, i.e. they are not good at capturing $n$-gram features automatically. The second is that they make no use of full word information. For the first weakness, we propose a convolutional neural model, which is able to capture rich $n$-gram features without any feature engineering. For the second one, we propose an effective approach to integrate the proposed model with word embeddings. We evaluate the model on two benchmark datasets: PKU and MSR. Without any feature engineering, the model obtains competitive performance {---} 95.7{\%} on PKU and 97.3{\%} on MSR. Armed with word embeddings, the model achieves state-of-the-art performance on both datasets {---} 96.5{\%} on PKU and 98.0{\%} on MSR, without using any external labeled resource.},
address = {Taipei, Taiwan},
author = {Wang, Chunqi and Xu, Bo},
booktitle = {Proceedings of the Eighth International Joint Conference on Natural Language Processing (Volume 1: Long Papers)},
date-added = {2020-12-31 14:42:35 -0500},
date-modified = {2020-12-31 14:42:35 -0500},
month = nov,
pages = {163--172},
publisher = {Asian Federation of Natural Language Processing},
title = {Convolutional Neural Network with Word Embeddings for {C}hinese Word Segmentation},
url = {https://www.aclweb.org/anthology/I17-1017},
year = {2017},
bdsk-url-1 = {https://www.aclweb.org/anthology/I17-1017}}
@article{bojanowski2017enriching,
author = {Bojanowski, Piotr and Grave, Edouard and Joulin, Armand and Mikolov, Tomas},
date-added = {2020-12-25 22:31:59 -0500},
date-modified = {2020-12-25 22:31:59 -0500},
issn = {2307-387X},
journal = {Transactions of the Association for Computational Linguistics},
pages = {135--146},
title = {Enriching Word Vectors with Subword Information},
volume = {5},
year = {2017}}
@article{collins-koo-2005-discriminative,
author = {Collins, Michael and Koo, Terry},
date-added = {2020-12-25 17:25:59 -0500},
date-modified = {2020-12-25 17:25:59 -0500},
doi = {10.1162/0891201053630273},
journal = {Computational Linguistics},
number = {1},
pages = {25--70},
title = {Discriminative Reranking for Natural Language Parsing},
url = {https://www.aclweb.org/anthology/J05-1003},
volume = {31},
year = {2005},
bdsk-url-1 = {https://www.aclweb.org/anthology/J05-1003},
bdsk-url-2 = {https://doi.org/10.1162/0891201053630273}}
@inproceedings{zhang-clark-2008-tale,
address = {Honolulu, Hawaii},
author = {Zhang, Yue and Clark, Stephen},
booktitle = {Proceedings of the 2008 Conference on Empirical Methods in Natural Language Processing},
date-added = {2020-12-25 15:10:10 -0500},
date-modified = {2020-12-25 15:10:10 -0500},
month = oct,
pages = {562--571},
publisher = {Association for Computational Linguistics},
title = {A Tale of Two Parsers: {I}nvestigating and Combining Graph-based and Transition-based Dependency Parsing},
url = {https://www.aclweb.org/anthology/D08-1059},
year = {2008},
bdsk-url-1 = {https://www.aclweb.org/anthology/D08-1059}}
@inproceedings{pradhan-etal-2012-conll,
address = {Jeju Island, Korea},
author = {Pradhan, Sameer and Moschitti, Alessandro and Xue, Nianwen and Uryupina, Olga and Zhang, Yuchen},
booktitle = {Joint Conference on {EMNLP} and {C}o{NLL} - Shared Task},
date-added = {2020-12-24 23:42:41 -0500},
date-modified = {2020-12-24 23:42:41 -0500},
month = jul,
pages = {1--40},
publisher = {Association for Computational Linguistics},
title = {{C}o{NLL}-2012 Shared Task: Modeling Multilingual Unrestricted Coreference in {O}nto{N}otes},
url = {https://www.aclweb.org/anthology/W12-4501},
year = {2012},
bdsk-url-1 = {https://www.aclweb.org/anthology/W12-4501}}
@inproceedings{levow-2006-third,
address = {Sydney, Australia},
author = {Levow, Gina-Anne},
booktitle = {Proceedings of the Fifth {SIGHAN} Workshop on {C}hinese Language Processing},
date-added = {2020-12-24 23:21:14 -0500},
date-modified = {2020-12-24 23:21:14 -0500},
month = jul,
pages = {108--117},
publisher = {Association for Computational Linguistics},
title = {The Third International {C}hinese Language Processing Bakeoff: Word Segmentation and Named Entity Recognition},
url = {https://www.aclweb.org/anthology/W06-0115},
year = {2006},
bdsk-url-1 = {https://www.aclweb.org/anthology/W06-0115}}
@inproceedings{tjong-kim-sang-de-meulder-2003-introduction,
author = {Tjong Kim Sang, Erik F. and De Meulder, Fien},
booktitle = {Proceedings of the Seventh Conference on Natural Language Learning at {HLT}-{NAACL} 2003},
date-added = {2020-12-24 23:19:00 -0500},
date-modified = {2020-12-24 23:19:00 -0500},
pages = {142--147},
title = {Introduction to the {C}o{NLL}-2003 Shared Task: Language-Independent Named Entity Recognition},
url = {https://www.aclweb.org/anthology/W03-0419},
year = {2003},
bdsk-url-1 = {https://www.aclweb.org/anthology/W03-0419}}
@inproceedings{koehn2005europarl,
author = {Koehn, Philipp},
booktitle = {MT summit},
date-added = {2020-12-24 23:06:03 -0500},
date-modified = {2020-12-24 23:06:03 -0500},
organization = {Citeseer},
pages = {79--86},
title = {Europarl: A parallel corpus for statistical machine translation},
volume = {5},
year = {2005}}
@inproceedings{Schweter:Ahmed:2019,
author = {Stefan Schweter and Sajawel Ahmed},
booktitle = {Proceedings of the 15th Conference on Natural Language Processing (KONVENS)},
date-added = {2020-12-24 23:03:23 -0500},
date-modified = {2020-12-24 23:03:23 -0500},
location = {Erlangen, Germany},
note = {accepted},
title = {{Deep-EOS: General-Purpose Neural Networks for Sentence Boundary Detection}},
year = 2019}
@incollection{he2019effective,
author = {He, Han and Wu, Lei and Yan, Hua and Gao, Zhimin and Feng, Yi and Townsend, George},
booktitle = {Smart Intelligent Computing and Applications},
date-added = {2020-12-24 19:35:03 -0500},
date-modified = {2020-12-24 19:35:03 -0500},
pages = {133--142},
publisher = {Springer},
title = {Effective neural solution for multi-criteria word segmentation},
year = {2019}}
@inproceedings{dozat2017stanford,
author = {Dozat, Timothy and Qi, Peng and Manning, Christopher D},
booktitle = {Proceedings of the CoNLL 2017 Shared Task: Multilingual Parsing from Raw Text to Universal Dependencies},
date-added = {2020-12-24 15:02:18 -0500},
date-modified = {2020-12-24 15:02:18 -0500},
pages = {20--30},
title = {Stanford's graph-based neural dependency parser at the conll 2017 shared task},
year = {2017}}
@inproceedings{he-etal-2018-jointly,
abstract = {Recent BIO-tagging-based neural semantic role labeling models are very high performing, but assume gold predicates as part of the input and cannot incorporate span-level features. We propose an end-to-end approach for jointly predicting all predicates, arguments spans, and the relations between them. The model makes independent decisions about what relationship, if any, holds between every possible word-span pair, and learns contextualized span representations that provide rich, shared input features for each decision. Experiments demonstrate that this approach sets a new state of the art on PropBank SRL without gold predicates.},
address = {Melbourne, Australia},
author = {He, Luheng and Lee, Kenton and Levy, Omer and Zettlemoyer, Luke},
booktitle = {Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 2: Short Papers)},
date-added = {2020-12-24 14:23:45 -0500},
date-modified = {2020-12-24 14:23:45 -0500},
doi = {10.18653/v1/P18-2058},
month = jul,
pages = {364--369},
publisher = {Association for Computational Linguistics},
title = {Jointly Predicting Predicates and Arguments in Neural Semantic Role Labeling},
url = {https://www.aclweb.org/anthology/P18-2058},
year = {2018},
bdsk-url-1 = {https://www.aclweb.org/anthology/P18-2058},
bdsk-url-2 = {https://doi.org/10.18653/v1/P18-2058}}
@inproceedings{yu-etal-2020-named,
abstract = {Named Entity Recognition (NER) is a fundamental task in Natural Language Processing, concerned with identifying spans of text expressing references to entities. NER research is often focused on flat entities only (flat NER), ignoring the fact that entity references can be nested, as in [Bank of [China]] (Finkel and Manning, 2009). In this paper, we use ideas from graph-based dependency parsing to provide our model a global view on the input via a biaffine model (Dozat and Manning, 2017). The biaffine model scores pairs of start and end tokens in a sentence which we use to explore all spans, so that the model is able to predict named entities accurately. We show that the model works well for both nested and flat NER through evaluation on 8 corpora and achieving SoTA performance on all of them, with accuracy gains of up to 2.2 percentage points.},
address = {Online},
author = {Yu, Juntao and Bohnet, Bernd and Poesio, Massimo},
booktitle = {Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics},
date-added = {2020-12-24 13:35:09 -0500},
date-modified = {2020-12-24 13:35:09 -0500},
doi = {10.18653/v1/2020.acl-main.577},
month = jul,
pages = {6470--6476},
publisher = {Association for Computational Linguistics},
title = {Named Entity Recognition as Dependency Parsing},
url = {https://www.aclweb.org/anthology/2020.acl-main.577},
year = {2020},
bdsk-url-1 = {https://www.aclweb.org/anthology/2020.acl-main.577},
bdsk-url-2 = {https://doi.org/10.18653/v1/2020.acl-main.577}}
@inproceedings{10.1145/1457838.1457895,
abstract = {Many computer applications require the storage of large amounts of information within the computer's memory where it will be readily available for reference and updating. Quite commonly, more storage space is required than is available in the computer's high-speed working memory. It is, therefore, a common practice to equip computers with magnetic tapes, disks, or drums, or a combination of these to provide additional storage. This additional storage is always slower in operation than the computer's working memory and therefore care must be taken when using it to avoid excessive operating time.},
address = {New York, NY, USA},
author = {De La Briandais, Rene},
booktitle = {Papers Presented at the the March 3-5, 1959, Western Joint Computer Conference},
date-added = {2020-12-24 13:07:31 -0500},
date-modified = {2020-12-24 13:07:31 -0500},
doi = {10.1145/1457838.1457895},
isbn = {9781450378659},
location = {San Francisco, California},
numpages = {4},
pages = {295--298},
publisher = {Association for Computing Machinery},
series = {IRE-AIEE-ACM '59 (Western)},
title = {File Searching Using Variable Length Keys},
url = {https://doi.org/10.1145/1457838.1457895},
year = {1959},
bdsk-url-1 = {https://doi.org/10.1145/1457838.1457895}}
@article{lafferty2001conditional,
author = {Lafferty, John and McCallum, Andrew and Pereira, Fernando CN},
date-added = {2020-12-24 11:46:30 -0500},
date-modified = {2020-12-24 12:08:29 -0500},
journal = {Departmental Papers (CIS)},
title = {Conditional random fields: Probabilistic models for segmenting and labeling sequence data},
year = {2001}}
@inproceedings{clark-etal-2019-bam,
abstract = {It can be challenging to train multi-task neural networks that outperform or even match their single-task counterparts. To help address this, we propose using knowledge distillation where single-task models teach a multi-task model. We enhance this training with teacher annealing, a novel method that gradually transitions the model from distillation to supervised learning, helping the multi-task model surpass its single-task teachers. We evaluate our approach by multi-task fine-tuning BERT on the GLUE benchmark. Our method consistently improves over standard single-task and multi-task training.},
address = {Florence, Italy},
author = {Clark, Kevin and Luong, Minh-Thang and Khandelwal, Urvashi and Manning, Christopher D. and Le, Quoc V.},
booktitle = {Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics},
date-added = {2020-12-24 11:26:54 -0500},
date-modified = {2020-12-24 11:26:54 -0500},
doi = {10.18653/v1/P19-1595},
month = jul,
pages = {5931--5937},
publisher = {Association for Computational Linguistics},
title = {{BAM}! Born-Again Multi-Task Networks for Natural Language Understanding},
url = {https://www.aclweb.org/anthology/P19-1595},
year = {2019},
bdsk-url-1 = {https://www.aclweb.org/anthology/P19-1595},
bdsk-url-2 = {https://doi.org/10.18653/v1/P19-1595}}
@inproceedings{kondratyuk-straka-2019-75,
address = {Hong Kong, China},
author = {Kondratyuk, Dan and Straka, Milan},
booktitle = {Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)},
date-added = {2020-12-23 23:51:07 -0500},
date-modified = {2020-12-23 23:51:07 -0500},
pages = {2779--2795},
publisher = {Association for Computational Linguistics},
title = {75 Languages, 1 Model: Parsing Universal Dependencies Universally},
url = {https://www.aclweb.org/anthology/D19-1279},
year = {2019},
bdsk-url-1 = {https://www.aclweb.org/anthology/D19-1279}}
@inproceedings{dozat:17a,
author = {Dozat, Timothy and Manning, Christopher D.},
booktitle = {Proceedings of the 5th International Conference on Learning Representations},
date-added = {2020-12-23 23:46:20 -0500},
date-modified = {2020-12-23 23:46:20 -0500},
series = {ICLR'17},
title = {{Deep Biaffine Attention for Neural Dependency Parsing}},
url = {https://openreview.net/pdf?id=Hk95PK9le},
year = {2017},
bdsk-url-1 = {http://arxiv.org/abs/1611.01734},
bdsk-url-2 = {https://openreview.net/pdf?id=Hk95PK9le}}
@inproceedings{smith-smith-2007-probabilistic,
address = {Prague, Czech Republic},
author = {Smith, David A. and Smith, Noah A.},
booktitle = {Proceedings of the 2007 Joint Conference on Empirical Methods in Natural Language Processing and Computational Natural Language Learning ({EMNLP}-{C}o{NLL})},
date-added = {2020-12-23 21:46:06 -0500},
date-modified = {2020-12-23 21:46:06 -0500},
month = jun,
pages = {132--140},
publisher = {Association for Computational Linguistics},
title = {Probabilistic Models of Nonprojective Dependency Trees},
url = {https://www.aclweb.org/anthology/D07-1014},
year = {2007},
bdsk-url-1 = {https://www.aclweb.org/anthology/D07-1014}}
@inproceedings{ijcai2020-560,
author = {Zhang, Yu and Zhou, Houquan and Li, Zhenghua},
booktitle = {Proceedings of the Twenty-Ninth International Joint Conference on Artificial Intelligence, {IJCAI-20}},
date-added = {2020-12-23 21:36:56 -0500},
date-modified = {2020-12-23 21:36:56 -0500},
doi = {10.24963/ijcai.2020/560},
editor = {Christian Bessiere},
month = {7},
note = {Main track},
pages = {4046--4053},
publisher = {International Joint Conferences on Artificial Intelligence Organization},
title = {Fast and Accurate Neural CRF Constituency Parsing},
url = {https://doi.org/10.24963/ijcai.2020/560},
year = {2020},
bdsk-url-1 = {https://doi.org/10.24963/ijcai.2020/560}}
@inproceedings{buchholz-marsi-2006-conll,
address = {New York City},
author = {Buchholz, Sabine and Marsi, Erwin},
booktitle = {Proceedings of the Tenth Conference on Computational Natural Language Learning ({C}o{NLL}-X)},
date-added = {2020-12-22 22:57:41 -0500},
date-modified = {2020-12-22 22:57:41 -0500},
month = jun,
pages = {149--164},
publisher = {Association for Computational Linguistics},
title = {{C}o{NLL}-{X} Shared Task on Multilingual Dependency Parsing},
url = {https://www.aclweb.org/anthology/W06-2920},
year = {2006},
bdsk-url-1 = {https://www.aclweb.org/anthology/W06-2920}}