@inproceedings{bakhtinPHYRENewBenchmark2019,
  title = {{{PHYRE}}: {{A New Benchmark}} for {{Physical Reasoning}}},
  booktitle = {Adv. {{Neural Inf}}. {{Process}}. {{Syst}}.},
  author = {Bakhtin, Anton and {\noopsort{maaten}}{van der Maaten}, Laurens and Johnson, Justin and Gustafson, Laura and Girshick, Ross},
  year = {2019},
  volume = {32},
  url = {https://proceedings.neurips.cc/paper/2019/hash/4191ef5f6c1576762869ac49281130c9-Abstract.html},
  urldate = {2022-09-26}
}

@inproceedings{baradelCoPhyCounterfactualLearning2020a,
  title = {{{CoPhy}}: {{Counterfactual Learning}} of {{Physical Dynamics}}},
  booktitle = {International {{Conference}} on {{Learning Representations}}},
  author = {Baradel, Fabien and Neverova, Natalia and Mille, Julien and Mori, Greg and Wolf, Christian},
  year = {2020},
  url = {https://openreview.net/forum?id=SkeyppEFvS},
  urldate = {2022-10-25}
}

@inproceedings{barrettMeasuringabstractreasoning2018,
  title = {Measuring Abstract Reasoning in Neural Networks},
  booktitle = {Proc. 35th {{Int}}. {{Conf}}. {{Mach}}. {{Learn}}.},
  author = {Barrett, David and Hill, Felix and Santoro, Adam and Morcos, Ari and Lillicrap, Timothy},
  year = {2018},
  pages = {511--520},
  issn = {2640-3498},
  url = {https://proceedings.mlr.press/v80/barrett18a.html},
  urldate = {2022-10-25}
}

@inproceedings{bennyScaleLocalizedAbstractReasoning2021,
  title = {Scale-{{Localized Abstract Reasoning}}},
  booktitle = {2021 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
  author = {Benny, Yaniv and Pekar, Niv and Wolf, Lior},
  year = {2021},
  pages = {12552--12560},
  url = {https://ieeexplore.ieee.org/document/9577474/},
  urldate = {2022-10-25}
}

@inproceedings{bittonWinoGAViLGamifiedAssociation2022,
  title = {{{WinoGAViL}}: {{Gamified Association Benchmark}} to {{Challenge Vision-and-Language Models}}},
  booktitle = {Thirty-Sixth {{Conference}} on {{Neural Information Processing Systems Datasets}} and {{Benchmarks Track}}},
  author = {Bitton, Yonatan and Guetta, Nitzan Bitton and Yosef, Ron and Elovici, Yuval and Bansal, Mohit and Stanovsky, Gabriel and Schwartz, Roy},
  year = {2022},
  url = {https://openreview.net/forum?id=aJtVdI251Vv},
  urldate = {2022-10-25}
}

@inproceedings{chenREXReasoningawareGrounded2022,
  title = {{{REX}}: {{Reasoning-aware}} and {{Grounded Explanation}}},
  booktitle = {2022 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
  author = {Chen, Shi and Zhao, Qi},
  year = {2022},
  pages = {15565--15574},
  url = {https://ieeexplore.ieee.org/document/9879365/},
  urldate = {2022-11-01}
}

@inproceedings{girdharCATERdiagnosticdataset2020,
  title = {{{CATER}}: {{A}} Diagnostic Dataset for {{Compositional Actions}} \& {{TEmporal Reasoning}}},
  booktitle = {International {{Conference}} on {{Learning Representations}}},
  author = {Girdhar, Rohit and Ramanan, Deva},
  year = {2020},
  url = {https://openreview.net/forum?id=HJgzt2VKPB},
  urldate = {2022-09-26}
}

@inproceedings{hesselAbductionSherlockHolmes2022,
  title = {The {{Abduction}} of {{Sherlock Holmes}}: {{A Dataset}} for {{Visual Abductive Reasoning}}},
  booktitle = {{{ECCV}} 2020},
  author = {Hessel, Jack and Hwang, Jena D. and Park, Jae Sung and Zellers, Rowan and Bhagavatula, Chandra and Rohrbach, Anna and Saenko, Kate and Choi, Yejin},
  year = {2022},
  eprint = {2202.04800},
  eprinttype = {arxiv},
  url = {http://arxiv.org/abs/2202.04800},
  urldate = {2022-02-22},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Computation and Language,Computer Science - Computer Vision and Pattern Recognition}
}

@inproceedings{hongTransformationDrivenVisual2021,
  title = {Transformation {{Driven Visual Reasoning}}},
  booktitle = {2021 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
  author = {Hong, Xin and Lan, Yanyan and Pang, Liang and Guo, Jiafeng and Cheng, Xueqi},
  year = {2021},
  pages = {6899--6908},
  url = {https://ieeexplore.ieee.org/document/9578722/},
  urldate = {2022-07-12}
}

@inproceedings{huangVisualStorytelling2016,
  title = {Visual {{Storytelling}}},
  booktitle = {Proc. 2016 {{Conf}}. {{North Am}}. {{Chapter Assoc}}. {{Comput}}. {{Linguist}}. {{Hum}}. {{Lang}}. {{Technol}}.},
  author = {Huang, Ting-Hao Kenneth and Ferraro, Francis and Mostafazadeh, Nasrin and Misra, Ishan and Agrawal, Aishwarya and Devlin, Jacob and Girshick, Ross and He, Xiaodong and Kohli, Pushmeet and Batra, Dhruv and Zitnick, C. Lawrence and Parikh, Devi and Vanderwende, Lucy and Galley, Michel and Mitchell, Margaret},
  year = {2016},
  pages = {1233--1239},
  url = {https://aclanthology.org/N16-1147},
  urldate = {2022-10-25}
}

@inproceedings{hudsonGQANewDataset2019,
  title = {{{GQA}}: {{A New Dataset}} for {{Real-World Visual Reasoning}} and {{Compositional Question Answering}}},
  booktitle = {2019 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
  author = {Hudson, Drew A. and Manning, Christopher D.},
  year = {2019},
  pages = {6693--6702},
  url = {https://ieeexplore.ieee.org/document/8953451/},
  urldate = {2022-09-26}
}

@article{huStratifiedRuleAwareNetwork2021,
  title = {Stratified {{Rule-Aware Network}} for {{Abstract Visual Reasoning}}},
  author = {Hu, Sheng and Ma, Yuqing and Liu, Xianglong and Wei, Yanlu and Bai, Shihao},
  year = {2021},
  journal = {Proc. AAAI Conf. Artif. Intell.},
  volume = {35},
  number = {2},
  pages = {1567--1574},
  issn = {2374-3468},
  url = {https://ojs.aaai.org/index.php/AAAI/article/view/16248},
  urldate = {2022-10-25},
  copyright = {Copyright (c) 2021 Association for the Advancement of Artificial Intelligence},
  keywords = {Visual Reasoning \& Symbolic Representations}
}

@inproceedings{jiangBongardHOIBenchmarkingFewShot2022a,
  title = {Bongard-{{HOI}}: {{Benchmarking Few-Shot Visual Reasoning}} for {{Human-Object Interactions}}},
  booktitle = {2022 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
  author = {Jiang, Huaizu and Ma, Xiaojian and Nie, Weili and Yu, Zhiding and Zhu, Yuke and Anandkumar, Anima},
  year = {2022},
  pages = {19034--19043},
  url = {https://ieeexplore.ieee.org/document/9878697/},
  urldate = {2022-11-06}
}

@inproceedings{jingMaintainingReasoningConsistency2022,
  title = {Maintaining {{Reasoning Consistency}} in {{Compositional Visual Question Answering}}},
  booktitle = {2022 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
  author = {Jing, Chenchen and Jia, Yunde and Wu, Yuwei and Liu, Xinyu and Wu, Qi},
  year = {2022},
  pages = {5089--5098},
  url = {https://ieeexplore.ieee.org/document/9879826/},
  urldate = {2022-11-01}
}

@inproceedings{johnsonCLEVRDiagnosticDataset2017,
  title = {{{CLEVR}}: {{A Diagnostic Dataset}} for {{Compositional Language}} and {{Elementary Visual Reasoning}}},
  booktitle = {2017 {{IEEE Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
  author = {Johnson, Justin and Hariharan, Bharath and {\noopsort{maaten}}{van der Maaten}, Laurens and Feifei, Li and Zitnick, C. Lawrence and Girshick, Ross},
  year = {2017},
  pages = {1988--1997},
  url = {https://ieeexplore.ieee.org/document/8099698/},
  urldate = {2021-12-31}
}

@article{liangVisualAbductiveReasoning2022,
  title = {Visual {{Abductive Reasoning}}},
  author = {Liang, Chen and Wang, Wenguan and Zhou, Tianfei and Yang, Yi},
  year = {2022},
  journal = {2022 IEEECVF Conf. Comput. Vis. Pattern Recognit. CVPR},
  eprint = {2203.14040},
  eprinttype = {arxiv},
  primaryclass = {cs},
  pages = {15544--15554},
  url = {https://ieeexplore.ieee.org/document/9880226/},
  urldate = {2022-05-22},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Computer Vision and Pattern Recognition}
}

@inproceedings{liQLEVRDiagnosticDataset2022,
  title = {{{QLEVR}}: {{A Diagnostic Dataset}} for {{Quantificational Language}} and {{Elementary Visual Reasoning}}},
  booktitle = {Find. {{Assoc}}. {{Comput}}. {{Linguist}}. {{NAACL}} 2022},
  author = {Li, Zechen and Søgaard, Anders},
  year = {2022},
  pages = {980--996},
  url = {https://aclanthology.org/2022.findings-naacl.73},
  urldate = {2022-10-25}
}

@inproceedings{liRepresentationReasoningboth2022,
  title = {From {{Representation}} to {{Reasoning}}: {{Towards}} Both {{Evidence}} and {{Commonsense Reasoning}} for {{Video Question-Answering}}},
  booktitle = {2022 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
  author = {Li, Jiangtong and Niu, Li and Zhang, Liqing},
  year = {2022},
  pages = {21241--21250},
  url = {https://ieeexplore.ieee.org/document/9878800/},
  urldate = {2022-11-01}
}

@inproceedings{liuCLEVRRefDiagnosingVisual2019,
  title = {{{CLEVR-Ref}}+: {{Diagnosing Visual Reasoning With Referring Expressions}}},
  booktitle = {2019 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
  author = {Liu, Runtao and Liu, Chenxi and Bai, Yutong and Yuille, Alan L.},
  year = {2019},
  pages = {4180--4189},
  url = {https://ieeexplore.ieee.org/document/8954348/},
  urldate = {2022-10-24}
}

@misc{liuVisualSpatialReasoning2022,
  title = {Visual {{Spatial Reasoning}}},
  author = {Liu, Fangyu and Emerson, Guy and Collier, Nigel},
  year = {2022},
  number = {arXiv:2205.00363},
  eprint = {2205.00363},
  eprinttype = {arxiv},
  primaryclass = {cs},
  url = {http://arxiv.org/abs/2205.00363},
  urldate = {2022-10-25},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Artificial Intelligence,Computer Science - Computation and Language,Computer Science - Computer Vision and Pattern Recognition}
}

@inproceedings{nieBongardLOGONewBenchmark2020a,
  title = {Bongard-{{LOGO}}: {{A New Benchmark}} for {{Human-Level Concept Learning}} and {{Reasoning}}},
  booktitle = {Adv. {{Neural Inf}}. {{Process}}. {{Syst}}.},
  author = {Nie, Weili and Yu, Zhiding and Mao, Lei and Patel, Ankit B and Zhu, Yuke and Anandkumar, Anima},
  year = {2020},
  volume = {33},
  pages = {16468--16480},
  url = {https://proceedings.neurips.cc/paper/2020/hash/bf15e9bbff22c7719020f9df4badc20a-Abstract.html},
  urldate = {2022-10-25}
}

@inproceedings{parkRobustChangeCaptioning2019b,
  title = {Robust {{Change Captioning}}},
  booktitle = {2019 {{IEEECVF Int}}. {{Conf}}. {{Comput}}. {{Vis}}. {{ICCV}}},
  author = {Park, Dong Huk and Darrell, Trevor and Rohrbach, Anna},
  year = {2019},
  pages = {4623--4632},
  url = {https://ieeexplore.ieee.org/document/9008523/},
  urldate = {2022-10-24}
}

@inproceedings{parkVisualCOMETReasoningDynamic2020c,
  title = {{{VisualCOMET}}: {{Reasoning About}} the {{Dynamic Context}} of a {{Still Image}}},
  booktitle = {Comput. {{Vis}}. – {{ECCV}} 2020 16th {{Eur}}. {{Conf}}. {{Glasg}}. {{UK August}} 23–28 2020 {{Proc}}. {{Part V}}},
  author = {Park, Jae Sung and Bhagavatula, Chandra and Mottaghi, Roozbeh and Farhadi, Ali and Choi, Yejin},
  year = {2020},
  pages = {508--524},
  url = {https://doi.org/10.1007/978-3-030-58558-7_30},
  urldate = {2022-10-18}
}

@article{riochetIntPhys2019Benchmark2022,
  title = {{{IntPhys}} 2019: {{A Benchmark}} for {{Visual Intuitive Physics Understanding}}},
  author = {Riochet, Ronan and Castro, Mario Ynocente and Bernard, Mathieu and Lerer, Adam and Fergus, Rob and Izard, Véronique and Dupoux, Emmanuel},
  year = {2022},
  journal = {IEEE Trans. Pattern Anal. Mach. Intell.},
  volume = {44},
  number = {9},
  pages = {5016--5025},
  issn = {1939-3539},
  keywords = {Benchmark testing,Motion pictures,Physics,Predictive models,Shape,Task analysis,Visualization}
}

@inproceedings{suhrCorpusNaturalLanguage2017,
  title = {A {{Corpus}} of {{Natural Language}} for {{Visual Reasoning}}},
  booktitle = {Proc. 55th {{Annu}}. {{Meet}}. {{Assoc}}. {{Comput}}. {{Linguist}}. {{Vol}}. 2 {{Short Pap}}.},
  author = {Suhr, Alane and Lewis, Mike and Yeh, James and Artzi, Yoav},
  year = {2017},
  pages = {217--223},
  url = {https://aclanthology.org/P17-2034},
  urldate = {2022-10-19}
}

@inproceedings{suhrCorpusReasoningNatural2019a,
  title = {A {{Corpus}} for {{Reasoning}} about {{Natural Language Grounded}} in {{Photographs}}},
  booktitle = {Proc. 57th {{Annu}}. {{Meet}}. {{Assoc}}. {{Comput}}. {{Linguist}}.},
  author = {Suhr, Alane and Zhou, Stephanie and Zhang, Ally and Zhang, Iris and Bai, Huajun and Artzi, Yoav},
  year = {2019},
  pages = {6418--6428},
  url = {https://aclanthology.org/P19-1644},
  urldate = {2022-10-19}
}

@article{teneyVPROMBenchmarkVisual2020,
  title = {V-{{PROM}}: {{A Benchmark}} for {{Visual Reasoning Using Visual Progressive Matrices}}},
  author = {Teney, Damien and Wang, Peng and Cao, Jiewei and Liu, Lingqiao and Shen, Chunhua and {\noopsort{hengel}}van den Hengel, Anton},
  year = {2020},
  journal = {Proc. AAAI Conf. Artif. Intell.},
  volume = {34},
  number = {07},
  pages = {12071--12078},
  issn = {2374-3468},
  url = {https://ojs.aaai.org/index.php/AAAI/article/view/6885},
  urldate = {2022-09-26},
  copyright = {Copyright (c) 2020 Association for the Advancement of Artificial Intelligence}
}

@inproceedings{thrushWinogroundProbingVision2022a,
  title = {Winoground: {{Probing Vision}} and {{Language Models}} for {{Visio-Linguistic Compositionality}}},
  booktitle = {2022 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
  author = {Thrush, Tristan and Jiang, Ryan and Bartolo, Max and Singh, Amanpreet and Williams, Adina and Kiela, Douwe and Ross, Candace},
  year = {2022},
  pages = {5228--5238},
  url = {https://ieeexplore.ieee.org/document/9878945/},
  urldate = {2022-10-25}
}

@inproceedings{xiangSelfsupervisedSpatialReasoning2022,
  title = {Self-Supervised {{Spatial Reasoning}} on {{Multi-View Line Drawings}}},
  booktitle = {2022 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
  author = {Xiang, Siyuan and Yang, Anbang and Xue, Yanfei and Yang, Yaoqing and Feng, Chen},
  year = {2022},
  pages = {12735--12744},
  url = {https://ieeexplore.ieee.org/document/9879170/},
  urldate = {2022-11-01}
}

@misc{xieVisualEntailmentNovel2019,
  title = {Visual {{Entailment}}: {{A Novel Task}} for {{Fine-Grained Image Understanding}}},
  author = {Xie, Ning and Lai, Farley and Doran, Derek and Kadav, Asim},
  year = {2019},
  number = {arXiv:1901.06706},
  eprint = {1901.06706},
  eprinttype = {arxiv},
  primaryclass = {cs},
  url = {http://arxiv.org/abs/1901.06706},
  urldate = {2022-10-25},
  archiveprefix = {arXiv},
  keywords = {Computer Science - Computer Vision and Pattern Recognition}
}

@inproceedings{yiCLEVRERCollisionEvents2020a,
  title = {{{CLEVRER}}: {{Collision Events}} for {{Video Representation}} and {{Reasoning}}},
  booktitle = {International {{Conference}} on {{Learning Representations}}},
  author = {Yi, Kexin and Gan*, Chuang and Li, Yunzhu and Kohli, Pushmeet and Wu, Jiajun and Torralba, Antonio and Tenenbaum, Joshua B.},
  year = {2020},
  url = {https://openreview.net/forum?id=HkxYzANYDB},
  urldate = {2022-10-18}
}

@inproceedings{zellersRecognitionCognitionVisual2019,
  title = {From {{Recognition}} to {{Cognition}}: {{Visual Commonsense Reasoning}}},
  booktitle = {2019 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
  author = {Zellers, Rowan and Bisk, Yonatan and Farhadi, Ali and Choi, Yejin},
  year = {2019},
  pages = {6713--6724},
  url = {https://ieeexplore.ieee.org/document/8953217/},
  urldate = {2022-09-25}
}

@inproceedings{zhangRAVENDatasetRelational2019,
  title = {{{RAVEN}}: {{A Dataset}} for {{Relational}} and {{Analogical Visual REasoNing}}},
  booktitle = {2019 {{IEEECVF Conf}}. {{Comput}}. {{Vis}}. {{Pattern Recognit}}. {{CVPR}}},
  author = {Zhang, Chi and Gao, Feng and Jia, Baoxiong and Zhu, Yixin and Zhu, Songchun},
  year = {2019},
  pages = {5312--5322},
  url = {https://ieeexplore.ieee.org/document/8953364/},
  urldate = {2022-09-26}
}

@article{zhaoVideoABCRealWorldVideo2022a,
  title = {{{VideoABC}}: {{A Real-World Video Dataset}} for {{Abductive Visual Reasoning}}},
  author = {Zhao, Wenliang and Rao, Yongming and Tang, Yansong and Zhou, Jie and Lu, Jiwen},
  year = {2022},
  journal = {IEEE Trans. Image Process.},
  volume = {31},
  pages = {6048--6061},
  issn = {1941-0042},
  url = {https://ieeexplore.ieee.org/abstract/document/9893026},
  keywords = {abudctive reasoning,Benchmark testing,Cognition,Convolutional neural networks,instruction video,Machine vision,Question answering (information retrieval),Task analysis,video understanding,Visual reasoning,Visualization}
}

@preamble{ "\providecommand{\noopsort}[1]{} " }