-
Notifications
You must be signed in to change notification settings - Fork 449
/
Copy pathNMT_model_architecture.bib
337 lines (337 loc) · 10.3 KB
/
NMT_model_architecture.bib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
@InProceedings{D13-1176,
author = "Kalchbrenner, Nal
and Blunsom, Phil",
title = "Recurrent Continuous Translation Models",
booktitle = "Proceedings of EMNLP",
year = "2013"
}
@inproceedings{seq2seq,
title={Sequence to sequence learning with neural networks},
author={Sutskever, Ilya and Vinyals, Oriol and Le, Quoc V},
booktitle={Advances in neural information processing systems},
year={2014}
}
@inproceedings{rnnsearch,
author = {Bahdanau, Dzmitry and Cho, KyungHyun and Bengio, Yoshua},
title = {Neural Machine Translation by Jointly Learning to Align and Translate},
booktitle = {Proceedings of ICLR},
year = {2015}
}
@InProceedings{gnmt,
author= {Yonghui Wu and
Mike Schuster and
Zhifeng Chen and
Quoc V. Le and
Mohammad Norouzi and
Wolfgang Macherey and
Maxim Krikun and
Yuan Cao and
Qin Gao and
Klaus Macherey and
Jeff Klingner and
Apurva Shah and
Melvin Johnson and
Xiaobing Liu and
Lukasz Kaiser and
Stephan Gouws and
Yoshikiyo Kato and
Taku Kudo and
Hideto Kazawa and
Keith Stevens and
George Kurian and
Nishant Patil and
Wei Wang and
Cliff Young and
Jason Smith and
Jason Riesa and
Alex Rudnick and
Oriol Vinyals and
Greg Corrado and
Macduff Hughes and
Jeffrey Dean},
title = {Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation},
booktitle = "Proceedings of NIPS",
year = {2016},
}
@Article{Q16-1027,
author = "Zhou, Jie
and Cao, Ying
and Wang, Xuguang
and Li, Peng
and Xu, Wei",
title = "Deep Recurrent Models with Fast-Forward Connections for Neural Machine Translation",
journal = "Transactions of the Association for Computational Linguistics",
year = "2016",
volume = "4",
pages = "371--383"
}
@InProceedings{P16-1154,
author = "Gu, Jiatao
and Lu, Zhengdong
and Li, Hang
and Li, Victor O.K.",
title = "Incorporating Copying Mechanism in Sequence-to-Sequence Learning",
booktitle = "Proceedings of ACL",
year = "2016"
}
@InProceedings{D16-1050,
author = "Zhang, Biao
and Xiong, Deyi
and su, jinsong
and Duan, Hong
and Zhang, Min",
title = "Variational Neural Machine Translation",
booktitle = "Proceedings of EMNLP",
year = "2016"
}
@InProceedings{convs2s,
title={Convolutional Sequence to Sequence Learning},
author={Gehring, Jonas and Auli, Michael and Grangier, David and Yarats, Denis and Dauphin, Yann N},
booktitle={Proceedings of ICML},
year={2017}
}
@InProceedings{P17-1012,
author = "Gehring, Jonas
and Auli, Michael
and Grangier, David
and Dauphin, Yann",
title = "A Convolutional Encoder Model for Neural Machine Translation",
booktitle = "Proceedings of ACL",
year = "2017"
}
@InProceedings{P17-1013,
author = "Wang, Mingxuan
and Lu, Zhengdong
and Zhou, Jie
and Liu, Qun",
title = "Deep Neural Machine Translation with Linear Associative Unit",
booktitle = "Proceedings of ACL",
year = "2017"
}
@InProceedings{D17-1145,
author = "Sperber, Matthias
and Neubig, Graham
and Niehues, Jan
and Waibel, Alex",
title = "Neural Lattice-to-Sequence Models for Uncertain Inputs",
booktitle = "Proceedings of EMNLP",
year = "2017"
}
@InProceedings{D17-1151,
author = "Britz, Denny
and Goldie, Anna
and Luong, Minh-Thang
and Le, Quoc",
title = "Massive Exploration of Neural Machine Translation Architectures",
booktitle = "Proceedings of EMNLP",
year = "2017"
}
@inproceedings{transformer,
title={Attention Is All You Need},
author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, Lukasz and Polosukhin, Illia},
booktitle={Proceedings of NIPS},
year={2017}
}
@inproceedings{tu2017neural,
title={Neural machine translation with reconstruction},
author={Tu, Zhaopeng and Liu, Yang and Shang, Lifeng and Liu, Xiaohua and Li, Hang},
booktitle={Thirty-First AAAI Conference on Artificial Intelligence},
year={2017}
}
@inproceedings{kaiser2017depthwise,
title={Depthwise separable convolutions for neural machine translation},
author={Kaiser, Lukasz and Gomez, Aidan N and Chollet, Francois},
booktitle={Proceedings of ICLR},
year={2018}
}
@InProceedings{kaiser2018fast,
title={Fast Decoding in Sequence Models Using Discrete Latent Variables},
author={Kaiser, {\L}ukasz and Roy, Aurko and Vaswani, Ashish and Pamar, Niki and Bengio, Samy and Uszkoreit, Jakob and Shazeer, Noam},
booktitle={Proceedings of ICML},
year={2018}
}
@InProceedings{N18-1117,
author = "Shen, Yanyao
and Tan, Xu
and He, Di
and Qin, Tao
and Liu, Tie-Yan",
title = "Dense Information Flow for Neural Machine Translation",
booktitle = "Proceedings of NAACL",
year = "2018"
}
@InProceedings{N18-1154,
author = "Chen, Wenhu
and Li, Guanlin
and Ren, Shuo
and Liu, Shujie
and Zhang, Zhirui
and Li, Mu
and Zhou, Ming",
title = "Generative Bridging Network for Neural Sequence Prediction",
booktitle = "Proceedings of ACL",
year = "2018"
}
@InProceedings{P18-1008,
author = "Chen, Mia Xu
and Firat, Orhan
and Bapna, Ankur
and Johnson, Melvin
and Macherey, Wolfgang
and Foster, George
and Jones, Llion
and Schuster, Mike
and Shazeer, Noam
and Parmar, Niki
and Vaswani, Ashish
and Uszkoreit, Jakob
and Kaiser, Lukasz
and Chen, Zhifeng
and Wu, Yonghui
and Hughes, Macduff",
title = "The Best of Both Worlds: Combining Recent Advances in Neural Machine Translation",
booktitle = "Proceedings of ACL",
year = "2018"
}
@InProceedings{P18-2060,
author = "Wang, Weiyue
and Zhu, Derui
and Alkhouli, Tamer
and Gan, Zixuan
and Ney, Hermann",
title = "Neural Hidden Markov Model for Machine Translation",
booktitle = "Proceedings of ACL",
year = "2018"
}
@InProceedings{C18-1232,
author = "Gong, Jingjing
and Qiu, Xipeng
and Wang, Shaojing
and Huang, Xuanjing",
title = "Information Aggregation via Dynamic Routing for Sequence Encoding",
booktitle = "Proceedings of COLING",
year = "2018"
}
@InProceedings{C18-1255,
author = "Wang, Qiang
and Li, Fuxue
and Xiao, Tong
and Li, Yanyang
and Li, Yinqiao
and Zhu, Jingbo",
title = "Multi-layer Representation Fusion for Neural Machine Translation",
booktitle = "Proceedings of COLING",
year = "2018"
}
@InProceedings{C18-1257,
author = "Li, Yachao
and Li, Junhui
and Zhang, Min",
title = "Adaptive Weighting for Neural Machine Translation",
booktitle = "Proceedings of COLING",
year = "2018"
}
@InProceedings{C18-1259,
author = "Song, Kaitao
and Tan, Xu
and He, Di
and Lu, Jianfeng
and Qin, Tao
and Liu, Tie-Yan",
title = "Double Path Networks for Sequence to Sequence Learning",
booktitle = "Proceedings of COLING",
year = "2018"
}
@InProceedings{D18-1457,
author = "Dou, Zi-Yi
and Tu, Zhaopeng
and Wang, Xing
and Shi, Shuming
and Zhang, Tong",
title = "Exploiting Deep Representations for Neural Machine Translation",
booktitle = "Proceedings of EMNLP",
year = "2018"
}
@InProceedings{D18-1459,
author = "Zhang, Biao
and Xiong, Deyi
and su, jinsong
and Lin, Qian
and Zhang, Huiji",
title = "Simplifying Neural Machine Translation with Addition-Subtraction Twin-Gated Recurrent Networks",
booktitle = "Proceedings of EMNLP",
year = "2018"
}
@InProceedings{D18-1458,
author = "Tang, Gongbo
and M{\"u}ller, Mathias
and Rios, Annette
and Sennrich, Rico",
title = "Why Self-Attention? A Targeted Evaluation of Neural Machine Translation Architectures",
booktitle = "Proceedings of EMNLP",
year = "2018"
}
@InProceedings{D18-1503,
author = "Tran, Ke
and Bisazza, Arianna
and Monz, Christof",
title = "The Importance of Being Recurrent for Modeling Hierarchical Structure",
booktitle = "Proceedings of EMNLP",
year = "2018"
}
@InProceedings{D18-1335,
author = "Bahar, Parnia
and Brix, Christopher
and Ney, Hermann",
title = "Towards Two-Dimensional Sequence to Sequence Model in Neural Machine Translation",
booktitle = "Proceedings of EMNLP",
year = "2018"
}
@inproceedings{he2018layer,
title={Layer-wise coordination between encoder and decoder for neural machine translation},
author={He, Tianyu and Tan, Xu and Xia, Yingce and He, Di and Qin, Tao and Chen, Zhibo and Liu, Tie-Yan},
booktitle={Proceedings of NeurIPS},
year={2018}
}
@article{hassan2018achieving,
title={Achieving human parity on automatic chinese to english news translation},
author={Hassan, Hany and Aue, Anthony and Chen, Chang and Chowdhary, Vishal and Clark, Jonathan and Federmann, Christian and Huang, Xuedong and Junczys-Dowmunt, Marcin and Lewis, William and Li, Mu and others},
year={Technical report. Microsoft AI \& Research}
}
@inproceedings{dehghani2018universal,
title={Universal Transformers},
author={Dehghani, Mostafa and Gouws, Stephan and Vinyals, Oriol and Uszkoreit, Jakob and Kaiser, {\L}ukasz},
booktitle = {Proceedings of ICLR},
year = {2019}
}
@inproceedings{wu2019pay,
title={Pay Less Attention with Lightweight and Dynamic Convolutions},
author={Wu, Felix and Fan, Angela and Baevski, Alexei and Dauphin, Yann N and Auli, Michael},
booktitle = {Proceedings of ICLR},
year = {2019}
}
@InProceedings{dou2019dynamic,
author = "Dou, Zi-yi
and Tu, Zhaopeng
and Wang, Xing
and Wang, Longyue
and Shi, Shuming
and Zhang, Tong",
title = "Dynamic Layer Aggregation for Neural Machine Translation with Routing-by-Agreement",
booktitle = "Proceedings of AAAI"
year = "2019"
}
@inproceedings{wang-2019-towards,
title = "Towards Linear Time Neural Machine Translation with Capsule Networks",
author = "Wang, Mingxuan",
booktitle = "Proceedings of EMNLP",
publisher = "Association for Computational Linguistics",
}
@inproceedings{zhang-etal-2019-improving,
title = "Improving Deep Transformer with Depth-Scaled Initialization and Merged Attention",
author = "Zhang, Biao and
Titov, Ivan and
Sennrich, Rico",
booktitle = "Proceedings of EMNLP"
year = "2019",
}