bib_citations/NMT_model_architecture.bib

@InProceedings{D13-1176,
  author = 	"Kalchbrenner, Nal
		and Blunsom, Phil",
  title = 	"Recurrent Continuous Translation Models",
  booktitle = 	"Proceedings of EMNLP",
  year = 	"2013"
}
@inproceedings{seq2seq,
  title={Sequence to sequence learning with neural networks},
  author={Sutskever, Ilya and Vinyals, Oriol and Le, Quoc V},
  booktitle={Advances in neural information processing systems},
  year={2014}
}
@inproceedings{rnnsearch,
  author    = {Bahdanau, Dzmitry and Cho, KyungHyun and Bengio, Yoshua},
  title     = {Neural Machine Translation by Jointly Learning to Align and Translate},
  booktitle = {Proceedings of ICLR},
  year      = {2015}
}
@InProceedings{gnmt,
      author= {Yonghui Wu and
               Mike Schuster and
               Zhifeng Chen and
               Quoc V. Le and
               Mohammad Norouzi and
               Wolfgang Macherey and
               Maxim Krikun and
               Yuan Cao and
               Qin Gao and
               Klaus Macherey and
               Jeff Klingner and
               Apurva Shah and
               Melvin Johnson and
               Xiaobing Liu and
               Lukasz Kaiser and
               Stephan Gouws and
               Yoshikiyo Kato and
               Taku Kudo and
               Hideto Kazawa and
               Keith Stevens and
               George Kurian and
               Nishant Patil and
               Wei Wang and
               Cliff Young and
               Jason Smith and
               Jason Riesa and
               Alex Rudnick and
               Oriol Vinyals and
               Greg Corrado and
               Macduff Hughes and
               Jeffrey Dean},
  title     = {Google's Neural Machine Translation System: Bridging the Gap between Human and Machine Translation},
  booktitle = 	"Proceedings of NIPS",
  year      = {2016},
}
@Article{Q16-1027,
  author = 	"Zhou, Jie
		and Cao, Ying
		and Wang, Xuguang
		and Li, Peng
		and Xu, Wei",
  title = 	"Deep Recurrent Models with Fast-Forward Connections for Neural Machine Translation",
  journal = 	"Transactions of the Association for Computational Linguistics",
  year = 	"2016",
  volume = 	"4",
  pages = 	"371--383"
}
@InProceedings{P16-1154,
  author = 	"Gu, Jiatao
		and Lu, Zhengdong
		and Li, Hang
		and Li, Victor O.K.",
  title = 	"Incorporating Copying Mechanism in Sequence-to-Sequence Learning",
  booktitle = 	"Proceedings of ACL",
  year = 	"2016"
}
@InProceedings{D16-1050,
  author = 	"Zhang, Biao
		and Xiong, Deyi
		and su, jinsong
		and Duan, Hong
		and Zhang, Min",
  title = 	"Variational Neural Machine Translation",
  booktitle = 	"Proceedings of EMNLP",
  year = 	"2016"
}
@InProceedings{convs2s,
  title={Convolutional Sequence to Sequence Learning},
  author={Gehring, Jonas and Auli, Michael and Grangier, David and Yarats, Denis and Dauphin, Yann N},
  booktitle={Proceedings of ICML},
  year={2017}
}
@InProceedings{P17-1012,
  author = 	"Gehring, Jonas
		and Auli, Michael
		and Grangier, David
		and Dauphin, Yann",
  title = 	"A Convolutional Encoder Model for Neural Machine Translation",
  booktitle = 	"Proceedings of ACL",
  year = 	"2017"  
}
@InProceedings{P17-1013,
  author = 	"Wang, Mingxuan
		and Lu, Zhengdong
		and Zhou, Jie
		and Liu, Qun",
  title = 	"Deep Neural Machine Translation with Linear Associative Unit",
  booktitle = 	"Proceedings of ACL",
  year = 	"2017"
}
@InProceedings{D17-1145,
  author = 	"Sperber, Matthias
		and Neubig, Graham
		and Niehues, Jan
		and Waibel, Alex",
  title = 	"Neural Lattice-to-Sequence Models for Uncertain Inputs",
  booktitle = 	"Proceedings of EMNLP",
  year = 	"2017"
}
@InProceedings{D17-1151,
  author = 	"Britz, Denny
		and Goldie, Anna
		and Luong, Minh-Thang
		and Le, Quoc",
  title = 	"Massive Exploration of Neural Machine Translation Architectures",
  booktitle = 	"Proceedings of EMNLP",
  year = 	"2017"
}
@inproceedings{transformer,
  title={Attention Is All You Need},
  author={Vaswani, Ashish and Shazeer, Noam and Parmar, Niki and Uszkoreit, Jakob and Jones, Llion and Gomez, Aidan N and Kaiser, Lukasz and Polosukhin, Illia},
  booktitle={Proceedings of NIPS},
  year={2017}
}
@inproceedings{tu2017neural,
title={Neural machine translation with reconstruction},
author={Tu, Zhaopeng and Liu, Yang and Shang, Lifeng and Liu, Xiaohua and Li, Hang},
booktitle={Thirty-First AAAI Conference on Artificial Intelligence},
year={2017}
}
@inproceedings{kaiser2017depthwise,
  title={Depthwise separable convolutions for neural machine translation},
  author={Kaiser, Lukasz and Gomez, Aidan N and Chollet, Francois},
  booktitle={Proceedings of ICLR},
  year={2018}
}
@InProceedings{kaiser2018fast,
  title={Fast Decoding in Sequence Models Using Discrete Latent Variables},
  author={Kaiser, {\L}ukasz and Roy, Aurko and Vaswani, Ashish and Pamar, Niki and Bengio, Samy and Uszkoreit, Jakob and Shazeer, Noam},
  booktitle={Proceedings of ICML},
  year={2018}
}
@InProceedings{N18-1117,
  author = 	"Shen, Yanyao
		and Tan, Xu
		and He, Di
		and Qin, Tao
		and Liu, Tie-Yan",
  title = 	"Dense Information Flow for Neural Machine Translation",
  booktitle = 	"Proceedings of NAACL",
  year = 	"2018"
}
@InProceedings{N18-1154,
  author = 	"Chen, Wenhu
		and Li, Guanlin
		and Ren, Shuo
		and Liu, Shujie
		and Zhang, Zhirui
		and Li, Mu
		and Zhou, Ming",
  title = 	"Generative Bridging Network for Neural Sequence Prediction",
  booktitle = 	"Proceedings of ACL",
  year = 	"2018"
}
@InProceedings{P18-1008,
  author = 	"Chen, Mia Xu
		and Firat, Orhan
		and Bapna, Ankur
		and Johnson, Melvin
		and Macherey, Wolfgang
		and Foster, George
		and Jones, Llion
		and Schuster, Mike
		and Shazeer, Noam
		and Parmar, Niki
		and Vaswani, Ashish
		and Uszkoreit, Jakob
		and Kaiser, Lukasz
		and Chen, Zhifeng
		and Wu, Yonghui
		and Hughes, Macduff",
  title = 	"The Best of Both Worlds: Combining Recent Advances in Neural Machine Translation",
  booktitle = 	"Proceedings of ACL",
  year = 	"2018"
}
@InProceedings{P18-2060,
  author = 	"Wang, Weiyue
		and Zhu, Derui
		and Alkhouli, Tamer
		and Gan, Zixuan
		and Ney, Hermann",
  title = 	"Neural Hidden Markov Model for Machine Translation",
  booktitle = 	"Proceedings of ACL",
  year = 	"2018"
}
@InProceedings{C18-1232,
  author = 	"Gong, Jingjing
		and Qiu, Xipeng
		and Wang, Shaojing
		and Huang, Xuanjing",
  title = 	"Information Aggregation via Dynamic Routing for Sequence Encoding",
  booktitle = 	"Proceedings of COLING",
  year = 	"2018"
}
@InProceedings{C18-1255,
  author = 	"Wang, Qiang
		and Li, Fuxue
		and Xiao, Tong
		and Li, Yanyang
		and Li, Yinqiao
		and Zhu, Jingbo",
  title = 	"Multi-layer Representation Fusion for Neural Machine Translation",
  booktitle = 	"Proceedings of COLING",
  year = 	"2018"
}
@InProceedings{C18-1257,
  author = 	"Li, Yachao
		and Li, Junhui
		and Zhang, Min",
  title = 	"Adaptive Weighting for Neural Machine Translation",
  booktitle = 	"Proceedings of COLING",
  year = 	"2018"
}
@InProceedings{C18-1259,
  author = 	"Song, Kaitao
		and Tan, Xu
		and He, Di
		and Lu, Jianfeng
		and Qin, Tao
		and Liu, Tie-Yan",
  title = 	"Double Path Networks for Sequence to Sequence Learning",
  booktitle = 	"Proceedings of COLING",
  year = 	"2018"
}
@InProceedings{D18-1457,
  author = 	"Dou, Zi-Yi
		and Tu, Zhaopeng
		and Wang, Xing
		and Shi, Shuming
		and Zhang, Tong",
  title = 	"Exploiting Deep Representations for Neural Machine Translation",
  booktitle = 	"Proceedings of EMNLP",
  year = 	"2018"
}
@InProceedings{D18-1459,
  author = 	"Zhang, Biao
		and Xiong, Deyi
		and su, jinsong
		and Lin, Qian
		and Zhang, Huiji",
  title = 	"Simplifying Neural Machine Translation with Addition-Subtraction Twin-Gated Recurrent Networks",
  booktitle = 	"Proceedings of EMNLP",
  year = 	"2018"
}
@InProceedings{D18-1458,
  author = 	"Tang, Gongbo
		and M{\"u}ller, Mathias
		and Rios, Annette
		and Sennrich, Rico",
  title = 	"Why Self-Attention? A Targeted Evaluation of Neural Machine Translation Architectures",
  booktitle = 	"Proceedings of EMNLP",
  year = 	"2018"
}
@InProceedings{D18-1503,
  author = 	"Tran, Ke
		and Bisazza, Arianna
		and Monz, Christof",
  title = 	"The Importance of Being Recurrent for Modeling Hierarchical Structure",
  booktitle = 	"Proceedings of EMNLP",
  year = 	"2018"
}
@InProceedings{D18-1335,
  author = 	"Bahar, Parnia
		and Brix, Christopher
		and Ney, Hermann",
  title = 	"Towards Two-Dimensional Sequence to Sequence Model in Neural Machine Translation",
  booktitle = 	"Proceedings of EMNLP",
  year = 	"2018"
}
@inproceedings{he2018layer,
  title={Layer-wise coordination between encoder and decoder for neural machine translation},
  author={He, Tianyu and Tan, Xu and Xia, Yingce and He, Di and Qin, Tao and Chen, Zhibo and Liu, Tie-Yan},
  booktitle={Proceedings of NeurIPS},
  year={2018}
}
@article{hassan2018achieving,
  title={Achieving human parity on automatic chinese to english news translation},
  author={Hassan, Hany and Aue, Anthony and Chen, Chang and Chowdhary, Vishal and Clark, Jonathan and Federmann, Christian and Huang, Xuedong and Junczys-Dowmunt, Marcin and Lewis, William and Li, Mu and others},
  year={Technical report. Microsoft AI \& Research}
}
@inproceedings{dehghani2018universal,
  title={Universal Transformers},
  author={Dehghani, Mostafa and Gouws, Stephan and Vinyals, Oriol and Uszkoreit, Jakob and Kaiser, {\L}ukasz},
  booktitle = {Proceedings of ICLR},
  year      = {2019}
}
@inproceedings{wu2019pay,
  title={Pay Less Attention with Lightweight and Dynamic Convolutions},
  author={Wu, Felix and Fan, Angela and Baevski, Alexei and Dauphin, Yann N and Auli, Michael},
  booktitle = {Proceedings of ICLR},
  year      = {2019}
}
@InProceedings{dou2019dynamic,
	author = "Dou, Zi-yi  
		and Tu, Zhaopeng  
		and Wang, Xing  
		and Wang, Longyue  
		and Shi, Shuming  
		and Zhang, Tong",
    title = "Dynamic Layer Aggregation for Neural Machine Translation with Routing-by-Agreement",
    booktitle = "Proceedings of AAAI"
    year = 	"2019"
}
@inproceedings{wang-2019-towards,
    title = "Towards Linear Time Neural Machine Translation with Capsule Networks",
    author = "Wang, Mingxuan",
    booktitle = "Proceedings of EMNLP",
    publisher = "Association for Computational Linguistics",
}
@inproceedings{zhang-etal-2019-improving,
    title = "Improving Deep Transformer with Depth-Scaled Initialization and Merged Attention",
    author = "Zhang, Biao  and
      Titov, Ivan  and
      Sennrich, Rico",
    booktitle = "Proceedings of EMNLP"    
    year = "2019",
}