Last active
March 4, 2022 15:27
-
-
Save mjpost/bc281ceaa87a457eae04cb869cbcb5e5 to your computer and use it in GitHub Desktop.
XML file submitted to DOI for EMNLP 2020 main conference papers
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version='1.0' encoding='UTF-8'?> | |
<doi_batch xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="http://www.crossref.org/schema/4.4.1" xsi:schemaLocation="http://www.crossref.org/schema/4.4.1 http://www.crossref.org/schema/deposit/crossref4.4.1.xsd" version="4.4.1"> | |
<head> | |
<doi_batch_id>1646395517</doi_batch_id> | |
<timestamp>1646395517</timestamp> | |
<depositor> | |
<depositor_name>Matt Post</depositor_name> | |
<email_address>[email protected]</email_address> | |
</depositor> | |
<registrant>Association for Computational Linguistics</registrant> | |
</head> | |
<body> | |
<conference> | |
<contributors> | |
<person_name contributor_role="chair" sequence="first"> | |
<given_name>Bonnie</given_name> | |
<surname>Webber</surname> | |
</person_name> | |
<person_name contributor_role="chair" sequence="additional"> | |
<given_name>Trevor</given_name> | |
<surname>Cohn</surname> | |
</person_name> | |
<person_name contributor_role="chair" sequence="additional"> | |
<given_name>Yulan</given_name> | |
<surname>He</surname> | |
</person_name> | |
<person_name contributor_role="chair" sequence="additional"> | |
<given_name>Yang</given_name> | |
<surname>Liu</surname> | |
</person_name> | |
</contributors> | |
<event_metadata> | |
<conference_name>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</conference_name> | |
<conference_location>Online</conference_location> | |
<conference_date start_year="2020" end_year="2020" start_month="11" end_month="11"/> | |
</event_metadata> | |
<proceedings_metadata language="en"> | |
<proceedings_title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</proceedings_title> | |
<publisher> | |
<publisher_name>Association for Computational Linguistics</publisher_name> | |
<publisher_place>Stroudsburg, PA, USA</publisher_place> | |
</publisher> | |
<publication_date> | |
<year>2020</year> | |
</publication_date> | |
<noisbn reason="simple_series"/> | |
<doi_data> | |
<doi>10.18653/v1/2020.emnlp-main</doi> | |
<resource>https://aclanthology.org/2020.emnlp-main</resource> | |
</doi_data> | |
</proceedings_metadata> | |
<conference_paper> | |
<contributors> | |
<person_name contributor_role="author" sequence="first"> | |
<given_name>Yohan</given_name> | |
<surname>Jo</surname> | |
</person_name> | |
<person_name contributor_role="author" sequence="additional"> | |
<given_name>Seojin</given_name> | |
<surname>Bang</surname> | |
</person_name> | |
<person_name contributor_role="author" sequence="additional"> | |
<given_name>Emaad</given_name> | |
<surname>Manzoor</surname> | |
</person_name> | |
<person_name contributor_role="author" sequence="additional"> | |
<given_name>Eduard</given_name> | |
<surname>Hovy</surname> | |
</person_name> | |
<person_name contributor_role="author" sequence="additional"> | |
<given_name>Chris</given_name> | |
<surname>Reed</surname> | |
</person_name> | |
</contributors> | |
<titles> | |
<title>Detecting Attackable Sentences in Arguments</title> | |
</titles> | |
<publication_date> | |
<year>2020</year> | |
</publication_date> | |
<pages> | |
<first_page>1</first_page> | |
<last_page>23</last_page> | |
</pages> | |
<doi_data> | |
<doi>10.18653/v1/2020.emnlp-main.1</doi> | |
<resource>https://aclanthology.org/2020.emnlp-main.1</resource> | |
</doi_data> | |
</conference_paper> | |
<conference_paper> | |
<contributors> | |
<person_name contributor_role="author" sequence="first"> | |
<given_name>Yohan</given_name> | |
<surname>Jo</surname> | |
</person_name> | |
<person_name contributor_role="author" sequence="additional"> | |
<given_name>Jacky</given_name> | |
<surname>Visser</surname> | |
</person_name> | |
<person_name contributor_role="author" sequence="additional"> | |
<given_name>Chris</given_name> | |
<surname>Reed</surname> | |
</person_name> | |
<person_name contributor_role="author" sequence="additional"> | |
<given_name>Eduard</given_name> | |
<surname>Hovy</surname> | |
</person_name> | |
</contributors> | |
<titles> | |
<title>Extracting Implicitly Asserted Propositions in Argumentation</title> | |
</titles> | |
<publication_date> | |
<year>2020</year> | |
</publication_date> | |
<pages> | |
<first_page>24</first_page> | |
<last_page>38</last_page> | |
</pages> | |
<doi_data> | |
<doi>10.18653/v1/2020.emnlp-main.2</doi> | |
<resource>https://aclanthology.org/2020.emnlp-main.2</resource> | |
</doi_data> | |
</conference_paper> | |
<conference_paper> | |
<contributors> | |
<person_name contributor_role="author" sequence="first"> | |
<given_name>Roy</given_name> | |
<surname>Bar-Haim</surname> | |
</person_name> | |
<person_name contributor_role="author" sequence="additional"> | |
<given_name>Yoav</given_name> | |
<surname>Kantor</surname> | |
</person_name> | |
<person_name contributor_role="author" sequence="additional"> | |
<given_name>Lilach</given_name> | |
<surname>Eden</surname> | |
</person_name> | |
<person_name contributor_role="author" sequence="additional"> | |
<given_name>Roni</given_name> | |
<surname>Friedman</surname> | |
</person_name> | |
<person_name contributor_role="author" sequence="additional"> | |
<given_name>Dan</given_name> | |
<surname>Lahav</surname> | |
</person_name> | |
<person_name contributor_role="author" sequence="additional"> | |
<given_name>Noam</given_name> | |
<surname>Slonim</surname> | |
</person_name> | |
</contributors> | |
<titles> | |
<title>Quantitative argument summarization and beyond: Cross-domain key point analysis</title> | |
</titles> | |
<publication_date> | |
<year>2020</year> | |
</publication_date> | |
<pages> | |
<first_page>39</first_page> | |
<last_page>49</last_page> | |
</pages> | |
<doi_data> | |
<doi>10.18653/v1/2020.emnlp-main.3</doi> | |
<resource>https://aclanthology.org/2020.emnlp-main.3</resource> | |
</doi_data> | |
</conference_paper> | |
<conference_paper> | |
<contributors> | |
<person_name contributor_role="author" sequence="first"> | |
<given_name>Jonathan</given_name> | |
<surname>Kobbe</surname> | |
</person_name> | |
<person_name contributor_role="author" sequence="additional"> | |
<given_name>Ioana</given_name> | |
<surname>Hulpuș</surname> | |
</person_name> | |
<person_name contributor_role="author" sequence="additional"> | |
<given_name>Heiner</given_name> | |
<surname>Stuckenschmidt</surname> | |
</person_name> | |
</contributors> | |
<titles> | |
<title>Unsupervised stance detection for arguments from consequences</title> | |
</titles> | |
<publication_date> | |
<year>2020</year> | |
</publication_date> | |
<pages> | |
<first_page>50</first_page> | |
<last_page>60</last_page> | |
</pages> | |
<doi_data> | |
<doi>10.18653/v1/2020.emnlp-main.4</doi> | |
<resource>https://aclanthology.org/2020.emnlp-main.4</resource> | |
</doi_data> | |
</conference_paper> |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<?xml version="1.0" encoding="UTF-8"?> | |
<doi_batch xmlns="http://www.crossref.org/schema/4.4.2" | |
xmlns:ai="http://www.crossref.org/AccessIndicators.xsd" | |
xmlns:ali="http://www.niso.org/schemas/ali/1.0/" | |
xmlns:fr="http://www.crossref.org/fundref.xsd" | |
xmlns:jats="http://www.ncbi.nlm.nih.gov/JATS1" | |
xmlns:rel="http://www.crossref.org/relations.xsd" | |
xmlns:xlink="http://www.w3.org/1999/xlink" | |
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" | |
version="4.4.2" | |
xsi:schemaLocation="http://www.crossref.org/schema/4.4.2 http://www.crossref.org/schema/deposit/crossref4.4.2.xsd"> | |
<head> | |
<doi_batch_id>10.1162/tacl_a_00443</doi_batch_id> | |
<timestamp>2022012712115621100</timestamp> | |
<depositor> | |
<depositor_name>Silverchair</depositor_name> | |
<email_address>[email protected]</email_address> | |
</depositor> | |
<registrant>MIT Press</registrant> | |
</head> | |
<body> | |
<journal> | |
<journal_metadata language="en"> | |
<full_title>Transactions of the Association for Computational Linguistics</full_title> | |
<issn media_type="electronic">2307-387X</issn> | |
</journal_metadata> | |
<journal_issue> | |
<publication_date media_type="other"> | |
<year>2021</year> | |
</publication_date> | |
<publication_date media_type="print"> | |
<month>12</month> | |
<day>30</day> | |
<year>2021</year> | |
</publication_date> | |
<journal_volume> | |
<volume>9</volume> | |
</journal_volume> | |
</journal_issue> | |
<journal_article publication_type="full_text"> | |
<titles> | |
<title>Word Representation Learning in Multimodal Pre-Trained Transformers: An Intrinsic Evaluation</title> | |
</titles> | |
<contributors> | |
<person_name sequence="first" contributor_role="author"> | |
<given_name>Sandro</given_name> | |
<surname>Pezzelle</surname> | |
<affiliation>Institute for Logic, Language and Computation, University of Amsterdam, The Netherlands. [email protected]</affiliation> | |
</person_name> | |
<person_name sequence="additional" contributor_role="author"> | |
<given_name>Ece</given_name> | |
<surname>Takmaz</surname> | |
<affiliation>Institute for Logic, Language and Computation, University of Amsterdam, The Netherlands. [email protected]</affiliation> | |
</person_name> | |
<person_name sequence="additional" contributor_role="author"> | |
<given_name>Raquel</given_name> | |
<surname>Fernández</surname> | |
<affiliation>Institute for Logic, Language and Computation, University of Amsterdam, The Netherlands. [email protected]</affiliation> | |
</person_name> | |
</contributors> | |
<jats:abstract> | |
<jats:title>Abstract</jats:title> | |
<jats:p>This study carries out a systematic intrinsic evaluation of the semantic representations learned by state-of-the-art pre-trained multimodal Transformers. These representations are claimed to be task-agnostic and shown to help on many downstream language-and-vision tasks. However, the extent to which they align with human semantic intuitions remains unclear. We experiment with various models and obtain static word representations from the contextualized ones they learn. We then evaluate them against the semantic judgments provided by human speakers. In line with previous evidence, we observe a generalized advantage of multimodal representations over language- only ones on concrete word pairs, but not on abstract ones. On the one hand, this confirms the effectiveness of these models to align language and vision, which results in better semantic representations for concepts that are grounded in images. On the other hand, models are shown to follow different representation learning patterns, which sheds some light on how and when they perform multimodal integration.</jats:p> | |
</jats:abstract> | |
<publication_date media_type="other"> | |
<year>2021</year> | |
</publication_date> | |
<publication_date media_type="print"> | |
<month>12</month> | |
<day>30</day> | |
<year>2021</year> | |
</publication_date> | |
<publication_date media_type="online"> | |
<month>12</month> | |
<day>30</day> | |
<year>2021</year> | |
</publication_date> | |
<pages> | |
<first_page>1563</first_page> | |
<last_page>1579</last_page> | |
</pages> | |
<crossmark> | |
<crossmark_version>2</crossmark_version> | |
<crossmark_policy>10.1162/mitpressjournals.corrections.policy</crossmark_policy> | |
<crossmark_domains> | |
<crossmark_domain> | |
<domain>direct.mit.edu</domain> | |
</crossmark_domain> | |
</crossmark_domains> | |
<crossmark_domain_exclusive>true</crossmark_domain_exclusive> | |
<custom_metadata> | |
<ai:program name="AccessIndicators"> | |
<ai:license_ref applies_to="vor" start_date="2022-01-03">https://creativecommons.org/licenses/by/4.0/</ai:license_ref> | |
</ai:program> | |
</custom_metadata> | |
</crossmark> | |
<doi_data> | |
<doi>10.1162/tacl_a_00443</doi> | |
<resource>https://direct.mit.edu/tacl/article/doi/10.1162/tacl_a_00443/108935/Word-Representation-Learning-in-Multimodal-Pre</resource> | |
<collection property="syndication"> | |
<item> | |
<resource mime_type="application/pdf" content_version="vor">https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00443/1979754/tacl_a_00443.pdf</resource> | |
</item> | |
</collection> | |
<collection property="crawler-based"> | |
<item crawler="iParadigms"> | |
<resource>https://direct.mit.edu/tacl/article-pdf/doi/10.1162/tacl_a_00443/1979754/tacl_a_00443.pdf</resource> | |
</item> | |
</collection> | |
</doi_data> | |
<citation_list> | |
<citation key="2022012712115621100_bib1"> | |
<volume_title>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</volume_title> | |
<author>Anderson</author> | |
<first_page>6077</first_page> | |
<cYear>2018</cYear> | |
<doi>10.1109/CVPR.2018.00636</doi> | |
<article_title>Bottom-up and top-down attention for image captioning and visual question answering</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib2"> | |
<volume_title>Proceedings of the IEEE International Conference on Computer Vision</volume_title> | |
<author>Antol</author> | |
<first_page>2425</first_page> | |
<cYear>2015</cYear> | |
<doi>10.1109/ICCV.2015.279</doi> | |
<article_title>VQA: Visual question answering</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib3"> | |
<journal_title>Language and Linguistics Compass</journal_title> | |
<author>Baroni</author> | |
<volume>10</volume> | |
<issue>1</issue> | |
<first_page>3</first_page> | |
<cYear>2016</cYear> | |
<doi>10.1111/lnc3.12170</doi> | |
<article_title>Grounding distributional semantics in the visual world</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib4"> | |
<journal_title>Annual Review of Psychology</journal_title> | |
<author>Barsalou</author> | |
<volume>59</volume> | |
<first_page>617</first_page> | |
<cYear>2008</cYear> | |
<doi>10.1146/annurev.psych.59.103006.093639</doi> | |
<article_title>Grounded cognition</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib5"> | |
<volume_title>Proceedings of the 27th International Conference on Computational Linguistics</volume_title> | |
<author>Beinborn</author> | |
<first_page>2325</first_page> | |
<cYear>2018</cYear> | |
<article_title>Multimodal grounding for language processing</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib6"> | |
<volume_title>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</volume_title> | |
<author>Bommasani</author> | |
<first_page>4758</first_page> | |
<cYear>2020</cYear> | |
<doi>10.18653/v1/2020.acl-main.431</doi> | |
<article_title>Interpreting pretrained contextualized representations via reductions to static embeddings</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib7"> | |
<journal_title>Journal of Artificial Intelligence Research</journal_title> | |
<author>Bruni</author> | |
<volume>49</volume> | |
<first_page>1</first_page> | |
<cYear>2014</cYear> | |
<doi>10.1613/jair.4135</doi> | |
<article_title>Multimodal distributional semantics</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib8"> | |
<volume_title>Proceedings of the 20th ACM International Conference on Multimedia</volume_title> | |
<author>Bruni</author> | |
<first_page>1219</first_page> | |
<cYear>2012</cYear> | |
<doi>10.1145/2393347.2396422</doi> | |
<article_title>Distributional semantics with eyes: Using image analysis to improve computational representations of word meaning</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib9"> | |
<journal_title>Behavior Research Methods</journal_title> | |
<author>Brysbaert</author> | |
<volume>46</volume> | |
<issue>3</issue> | |
<first_page>904</first_page> | |
<cYear>2014</cYear> | |
<doi>10.3758/s13428-013-0403-5</doi> | |
<article_title>Concreteness ratings for 40 thousand generally known English word lemmas</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib10"> | |
<journal_title>Transactions of the Association for Computational Linguistics</journal_title> | |
<author>Bugliarello</author> | |
<cYear>2021</cYear> | |
<doi>10.1162/tacl_a_00408</doi> | |
<article_title>Multimodal pretraining unmasked: A meta-analysis and a unified framework of vision-and-language BERTs</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib11"> | |
<volume_title>European Conference on Computer Vision</volume_title> | |
<author>Cao</author> | |
<first_page>565</first_page> | |
<cYear>2020</cYear> | |
<doi>10.1007/978-3-030-58539-6_34</doi> | |
<article_title>Behind the scene: Revealing the secrets of pre-trained vision-and-language models</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib12"> | |
<volume_title>European Conference on Computer Vision</volume_title> | |
<author>Chen</author> | |
<first_page>104</first_page> | |
<cYear>2020</cYear> | |
<doi>10.1007/978-3-030-58577-8_7</doi> | |
<article_title>UNITER: Universal image-text representation learning</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib13"> | |
<volume_title>Proceedings of the 26th International Conference on Computational Linguistics</volume_title> | |
<author>Collell Talleda</author> | |
<first_page>2807</first_page> | |
<cYear>2016</cYear> | |
<article_title>Is an image worth more than a thousand words? On the fine-grain semantic differences between visual and linguistic representations</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib14"> | |
<volume_title>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition (CVPR)</volume_title> | |
<author>Das</author> | |
<cYear>2017</cYear> | |
<article_title>Visual dialog</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib15"> | |
<volume_title>Proceedings of the Eighth Joint Conference on Lexical and Computational Semantics (* SEM 2019)</volume_title> | |
<author>Davis</author> | |
<first_page>118</first_page> | |
<cYear>2019</cYear> | |
<article_title>Deconstructing multimodality: Visual properties and visual context in human semantic processing</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib16"> | |
<volume_title>Symbols and Embodiment: Debates on Meaning and Cognition</volume_title> | |
<author>Vega</author> | |
<cYear>2012</cYear> | |
</citation> | |
<citation key="2022012712115621100_bib17"> | |
<volume_title>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</volume_title> | |
<author>Vries</author> | |
<first_page>5503</first_page> | |
<cYear>2017</cYear> | |
<doi>10.1109/CVPR.2017.475</doi> | |
<article_title>GuessWhat?! Visual object discovery through multi-modal dialogue</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib18"> | |
<volume_title>Proceedings of the 2019 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long and Short Papers)</volume_title> | |
<author>Devlin</author> | |
<first_page>4171</first_page> | |
<cYear>2019</cYear> | |
<article_title>BERT: Pre-training of deep bidirectional transformers for language understanding</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib19"> | |
<volume_title>Proceedings of the 1st Work shop on Evaluating Vector-Space Representations for NLP</volume_title> | |
<author>Faruqui</author> | |
<first_page>30</first_page> | |
<cYear>2016</cYear> | |
<doi>10.18653/v1/W16-2506</doi> | |
<article_title>Problems with evaluation of word embeddings using word similarity tasks</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib20"> | |
<journal_title>ACM Transactions on Information Systems</journal_title> | |
<author>Finkelstein</author> | |
<volume>20</volume> | |
<issue>1</issue> | |
<first_page>116</first_page> | |
<cYear>2002</cYear> | |
<doi>10.1145/503104.503110</doi> | |
<article_title>Placing search in context: The concept revisited</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib21"> | |
<journal_title>Studies in Linguistic Analysis</journal_title> | |
<author>Firth</author> | |
<cYear>1957</cYear> | |
<article_title>A synopsis of linguistic theory, 1930–1955</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib22"> | |
<volume_title>Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing</volume_title> | |
<author>Gerz</author> | |
<first_page>2173</first_page> | |
<cYear>2016</cYear> | |
<doi>10.18653/v1/D16-1235</doi> | |
<article_title>SimVerb-3500: A large-scale evaluation set of verb similarity</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib23"> | |
<journal_title>Physica D: Nonlinear Phenomena</journal_title> | |
<author>Harnad</author> | |
<volume>42</volume> | |
<issue>1𢀓3</issue> | |
<first_page>335</first_page> | |
<cYear>1990</cYear> | |
<doi>10.1016/0167-2789(90)90087-6</doi> | |
<article_title>The symbol grounding problem</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib24"> | |
<journal_title>Word</journal_title> | |
<author>Harris</author> | |
<volume>10</volume> | |
<issue>2–3</issue> | |
<first_page>146</first_page> | |
<cYear>1954</cYear> | |
<doi>10.1080/00437956.1954.11659520</doi> | |
<article_title>Distributional structure</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib25"> | |
<journal_title>Transactions of the Association for Computational Linguistics</journal_title> | |
<author>Hendricks</author> | |
<cYear>2021</cYear> | |
<doi>10.1162/tacl_a_00385</doi> | |
<article_title>Decoupling the role of data, attention, and losses in multimodal Transformers</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib26"> | |
<journal_title>arXiv preprint arXiv: 2106.09141</journal_title> | |
<author>Hendricks</author> | |
<cYear>2021</cYear> | |
<doi>10.18653/v1/2021.findings-acl.318</doi> | |
<article_title>Probing image-language Transformers for verb understanding</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib27"> | |
<volume_title>Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)</volume_title> | |
<author>Hill</author> | |
<first_page>255</first_page> | |
<cYear>2014</cYear> | |
<doi>10.3115/v1/D14-1032</doi> | |
<article_title>Learning abstract concept embeddings from multi-modal data: Since you probably can’t see what I mean</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib28"> | |
<journal_title>Computational Linguistics</journal_title> | |
<author>Hill</author> | |
<volume>41</volume> | |
<issue>4</issue> | |
<first_page>665</first_page> | |
<cYear>2015</cYear> | |
<doi>10.1162/COLI_a_00237</doi> | |
<article_title>Simlex-999: Evaluating semantic models with (genuine) similarity estimation</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib29"> | |
<volume_title>Proceedings of the 2016 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</volume_title> | |
<author>Huang</author> | |
<first_page>1233</first_page> | |
<cYear>2016</cYear> | |
<doi>10.18653/v1/N16-1147</doi> | |
<article_title>Visual storytelling</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib30"> | |
<volume_title>Proceedings of the 2021 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</volume_title> | |
<author>Ilharco</author> | |
<first_page>5367</first_page> | |
<cYear>2021</cYear> | |
<doi>10.18653/v1/2021.naacl-main.422</doi> | |
<article_title>Probing contextual language models for common ground with visual representations</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib31"> | |
<volume_title>Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)</volume_title> | |
<author>Kiela</author> | |
<first_page>36</first_page> | |
<cYear>2014</cYear> | |
<doi>10.3115/v1/D14-1005</doi> | |
<article_title>Learning image embeddings using convolutional neural networks for improved multi-modal semantics</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib32"> | |
<volume_title>Proceedings of the 2016 Conference on Empirical Methods in Natural Language Processing</volume_title> | |
<author>Kiela</author> | |
<first_page>447</first_page> | |
<cYear>2016</cYear> | |
<doi>10.18653/v1/D16-1043</doi> | |
<article_title>Comparing data sources and architectures for deep visual representation learning in semantics</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib33"> | |
<volume_title>Proceedings of the IEEE Conference on Computer Vision and Pattern Recognition</volume_title> | |
<author>Kottur</author> | |
<first_page>4985</first_page> | |
<cYear>2016</cYear> | |
<doi>10.1109/CVPR.2016.539</doi> | |
<article_title>Visual word2vec (vis-w2v): Learning visually grounded word embeddings using abstract scenes</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib34"> | |
<journal_title>Psychological Review</journal_title> | |
<author>Landauer</author> | |
<volume>104</volume> | |
<issue>2</issue> | |
<first_page>211</first_page> | |
<cYear>1997</cYear> | |
<doi>10.1037/0033-295X.104.2.211</doi> | |
<article_title>A solution to Plato’s problem: The latent semantic analysis theory of acquisition, induction, and representation of knowledge</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib35"> | |
<journal_title>Engineering Applications of Artificial Intelligence</journal_title> | |
<author>Lastra-Díaz</author> | |
<volume>85</volume> | |
<first_page>645</first_page> | |
<cYear>2019</cYear> | |
<doi>10.1016/j.engappai.2019.07.010</doi> | |
<article_title>A reproducible survey on word embeddings and ontology-based methods for word similarity: linear combinations outperform the state of the art</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib36"> | |
<volume_title>Proceedings of the 2015 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies</volume_title> | |
<author>Lazaridou</author> | |
<first_page>153</first_page> | |
<cYear>2015</cYear> | |
<doi>10.3115/v1/N15-1016</doi> | |
<article_title>Combining language and vision with a multimodal skip-gram model</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib37"> | |
<journal_title>arXiv preprint arXiv:1908.03557</journal_title> | |
<author>Li</author> | |
<cYear>2019</cYear> | |
<article_title>VisualBERT: A simple and performant baseline for vision and language</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib38"> | |
<volume_title>Proceedings of the 58th Annual Meeting of the Association for Computational Linguistics</volume_title> | |
<author>Li</author> | |
<first_page>5265</first_page> | |
<cYear>2020</cYear> | |
<doi>10.18653/v1/2020.acl-main.469</doi> | |
<article_title>What does BERT with vision look at?</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib39"> | |
<volume_title>European Conference on Computer Vision</volume_title> | |
<author>Lin</author> | |
<first_page>740</first_page> | |
<cYear>2014</cYear> | |
<doi>10.1007/978-3-319-10602-1_48</doi> | |
<article_title>Microsoft COCO: Common objects in context</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib40"> | |
<volume_title>Advances in Neural Information Processing Systems</volume_title> | |
<author>Jiasen</author> | |
<cYear>2019</cYear> | |
<article_title>ViLBERT: Pretraining task-agnostic visiolinguistic representations for vision-and- language tasks</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib41"> | |
<journal_title>Artificial Intelligence</journal_title> | |
<author>Lüddecke</author> | |
<volume>274</volume> | |
<first_page>44</first_page> | |
<cYear>2019</cYear> | |
<doi>10.1016/j.artint.2018.12.009</doi> | |
<article_title>Distributional semantics of objects in visual scenes in comparison to text</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib42"> | |
<volume_title>Proceedings of the 16th Conference of the European Chapter of the Association for Computational Linguistics: Main Volume</volume_title> | |
<author>Ma</author> | |
<first_page>42</first_page> | |
<cYear>2021</cYear> | |
<article_title>On the (in)effectiveness of images for text classification</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib43"> | |
<journal_title>Cortex</journal_title> | |
<author>Meteyard</author> | |
<volume>48</volume> | |
<issue>7</issue> | |
<first_page>788</first_page> | |
<cYear>2012</cYear> | |
<doi>10.1016/j.cortex.2010.11.002</doi> | |
<article_title>Coming of age: A review of embodiment and the neuroscience of semantics</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib44"> | |
<journal_title>Proceedings of the Society for Computation in Linguistics</journal_title> | |
<author>Mickus</author> | |
<volume>3</volume> | |
<cYear>2020</cYear> | |
<article_title>What do you mean, BERT? Assessing BERT as a Distributional Semantics Model</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib45"> | |
<volume_title>1st International Conference on Learning Representations, ICLR 2013, Scottsdale, Arizona, USA, May 2–4, 2013, Workshop Track Proceedings</volume_title> | |
<author>Mikolov</author> | |
<cYear>2013</cYear> | |
<article_title>Efficient estimation of word representations in vector space</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib46"> | |
<journal_title>Natural Language Engineering</journal_title> | |
<author>Navigli</author> | |
<volume>25</volume> | |
<issue>6</issue> | |
<first_page>693</first_page> | |
<cYear>2019</cYear> | |
<doi>10.1017/S1351324919000305</doi> | |
<article_title>An overview of word and sense similarity</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib47"> | |
<volume_title>Proceedings of the ‘Beyond Language: Multimodal Semantic Representations’ Workshop</volume_title> | |
<author>Parcalabescu</author> | |
<cYear>2021</cYear> | |
<article_title>Seeing past words: Testing the cross-modal capabilities of pretrained V&L models on counting tasks</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib48"> | |
<volume_title>Proceedings of the 2014 Conference on Empirical Methods in Natural Language Processing (EMNLP)</volume_title> | |
<author>Pennington</author> | |
<first_page>1532</first_page> | |
<cYear>2014</cYear> | |
<doi>10.3115/v1/D14-1162</doi> | |
<article_title>GloVe: Global Vectors for word representation</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib49"> | |
<volume_title>Proceedings of the 2018 Conference of the North American Chapter of the Association for Computational Linguistics: Human Language Technologies, Volume 1 (Long Papers)</volume_title> | |
<author>Peters</author> | |
<first_page>2227</first_page> | |
<cYear>2018</cYear> | |
<doi>10.18653/v1/N18-1202</doi> | |
<article_title>Deep contextualized word representations</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib50"> | |
<journal_title>Transactions of the Association for Computational Linguistics</journal_title> | |
<author>Rogers</author> | |
<volume>8</volume> | |
<first_page>842</first_page> | |
<cYear>2020</cYear> | |
<doi>10.1162/tacl_a_00349</doi> | |
<article_title>A primer in BERTology: What we know about how BERT works</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib51"> | |
<journal_title>Cognitive Science</journal_title> | |
<author>Rotaru</author> | |
<volume>44</volume> | |
<issue>4</issue> | |
<first_page>e12830</first_page> | |
<cYear>2020</cYear> | |
<doi>10.1111/cogs.12830</doi> | |
<article_title>Constructing semantic models from words, images, and emojis</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib52"> | |
<journal_title>Communications of the ACM</journal_title> | |
<author>Rubenstein</author> | |
<volume>8</volume> | |
<issue>10</issue> | |
<first_page>627</first_page> | |
<cYear>1965</cYear> | |
<doi>10.1145/365628.365657</doi> | |
<article_title>Contextual correlates of synonymy</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib53"> | |
<volume_title>Proceedings of the 56th Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</volume_title> | |
<author>Sharma</author> | |
<first_page>2556</first_page> | |
<cYear>2018</cYear> | |
<doi>10.18653/v1/P18-1238</doi> | |
<article_title>Conceptual captions: A cleaned, hypernymed, image alt-text dataset for automatic image captioning</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib54"> | |
<volume_title>Proceedings of the 52nd Annual Meeting of the Association for Computational Linguistics (Volume 1: Long Papers)</volume_title> | |
<author>Silberer</author> | |
<first_page>721</first_page> | |
<cYear>2014</cYear> | |
<doi>10.3115/v1/P14-1068</doi> | |
<article_title>Learning grounded meaning representations with autoencoders</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib55"> | |
<journal_title>arXiv preprint arXiv:2004.08744</journal_title> | |
<author>Singh</author> | |
<cYear>2020</cYear> | |
<article_title>Are we pretraining it right? Digging deeper into visio-linguistic pretraining</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib56"> | |
<journal_title>Artificial Intelligence Review</journal_title> | |
<author>Taieb</author> | |
<volume>53</volume> | |
<issue>6</issue> | |
<first_page>4407</first_page> | |
<cYear>2020</cYear> | |
<doi>10.1007/s10462-019-09796-3</doi> | |
<article_title>A survey of semantic relatedness evaluation datasets and procedures</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib57"> | |
<volume_title>Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing and the 9th International Joint Conference on Natural Language Processing (EMNLP-IJCNLP)</volume_title> | |
<author>Tan</author> | |
<first_page>5100</first_page> | |
<cYear>2019</cYear> | |
<doi>10.18653/v1/D19-1514</doi> | |
<article_title>LXMERT: Learning cross-modality encoder representations from transformers</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib58"> | |
<volume_title>Proceedings of the 2020 Conference on Empirical Methods in Natural Language Processing (EMNLP)</volume_title> | |
<author>Tan</author> | |
<first_page>2066</first_page> | |
<cYear>2020</cYear> | |
<doi>10.18653/v1/2020.emnlp-main.162</doi> | |
<article_title>Vokenization: Improving language understanding via contextualized, visually-grounded supervision</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib59"> | |
<volume_title>Proceedings of the 57th Annual Meeting of the Association for Computational Linguistics</volume_title> | |
<author>Tenney</author> | |
<first_page>4593</first_page> | |
<cYear>2019</cYear> | |
<doi>10.18653/v1/P19-1452</doi> | |
<article_title>BERT Rediscovers the Classical NLP Pipeline</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib60"> | |
<journal_title>Journal of Artificial Intelligence Research</journal_title> | |
<author>Turney</author> | |
<volume>37</volume> | |
<first_page>141</first_page> | |
<cYear>2010</cYear> | |
<doi>10.1613/jair.2934</doi> | |
<article_title>From frequency to meaning: Vector space models of semantics</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib61"> | |
<volume_title>7th International Conference on Learning Representations, ICLR 2019</volume_title> | |
<author>Wang</author> | |
<cYear>2019</cYear> | |
<doi>10.18653/v1/W18-5446</doi> | |
<article_title>GLUE: A multi-task benchmark and analysis platform for natural language understanding</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib62"> | |
<volume_title>Proceedings of the AAAI Conference on Artificial Intelligence</volume_title> | |
<author>Wang</author> | |
<cYear>2018</cYear> | |
<article_title>Learning multimodal word representation via dynamic fusion methods</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib63"> | |
<volume_title>Proceedings of the 13th International Conference on Computational Semantics- Long Papers</volume_title> | |
<author>Westera</author> | |
<first_page>120</first_page> | |
<cYear>2019</cYear> | |
<doi>10.18653/v1/W19-0410</doi> | |
<article_title>Don’t blame distributional semantics if it can’t do entailment</article_title> | |
</citation> | |
<citation key="2022012712115621100_bib64"> | |
<volume_title>Proceedings of the AAAI Conference on Artificial Intelligence</volume_title> | |
<author>Zablocki</author> | |
<cYear>2018</cYear> | |
<article_title>Learning multi-modal word representation grounded in visual context</article_title> | |
</citation> | |
</citation_list> | |
</journal_article> | |
</journal> | |
</body> | |
</doi_batch> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment