[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"sidebar-data":3,"breadcrumb-conf-2026":849,"conference-2026":850,"papers-2026":851},{"conferences":4,"tutorials":254,"workshops":260},[5,27,47,65,79,96,113,130,147,164,180,195,211,225,240],{"conference_id":6,"year":7,"proceedings_title":8,"venue_ids":9,"isbn":10,"issn":11,"doi":12,"publisher":13,"editors":14,"conference_name":15,"conference_acronym":16,"conference_number":17,"conference_location":18,"conference_city":19,"conference_country":20,"conference_start_date":21,"conference_end_date":22,"conference_url":23,"pdf_url":24,"img_conf_url":25,"paperCount":26},"lrec2026","2026","Proceedings of the Fifteenth Language Resources and Evaluation Conference (LREC 2026)","lrec","978-2-493814-49-4","2522-2686","10.63317\u002F4fxzgre27xzj","European Language Resources Association (ELRA)","Stelios Piperidis, Núria Bel, Henk van den Heuvel, Nancy Ide, Simon Krek, Antonio Toral","The Fifteenth Language Resources and Evaluation Conference (LREC 2026)","LREC","15","Palau de Congressos de Palma","Palma, Mallorca","Spain","2026-05-11","2026-05-16","https:\u002F\u002Flrec2026.info","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002FLREC-2026.pdf",null,944,{"conference_id":28,"year":29,"proceedings_title":30,"venue_ids":31,"isbn":32,"issn":11,"doi":33,"publisher":34,"editors":35,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_city":40,"conference_country":41,"conference_start_date":42,"conference_end_date":43,"conference_url":44,"pdf_url":45,"img_conf_url":25,"paperCount":46},"lrec2024","2024","Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)","lrec|coling","979-10-95546-34-4","10.63317\u002F375ba8vd9q2v","European Language Resources Association (ELRA) and ICCL","Nicoletta Calzolari, Min-Yen Kan, Veronique Hoste, Alessandro Lenci, Sakriani Sakti, Nianwen Xue","Joint International Conference on Computational Linguistics, Language Resources and Evaluation","LREC-COLING","14","Lingotto Conference Centre","Turin","Italy","2024-05-20","2024-05-25","https:\u002F\u002Flrec-coling-2024.org","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002FLREC-2024.pdf",1554,{"conference_id":48,"year":49,"proceedings_title":50,"venue_ids":9,"isbn":51,"issn":11,"doi":52,"publisher":13,"editors":53,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_city":57,"conference_country":58,"conference_start_date":59,"conference_end_date":60,"conference_url":61,"pdf_url":62,"img_conf_url":63,"paperCount":64},"lrec2022","2022","Proceedings of the Thirteenth International Conference on Language Resources and Evaluation (LREC 2022)","79-10-95546-38-2","10.63317\u002F296vkvmh42ye","Nicoletta Calzolari, Frédéric Béchet, Philippe Blache, Khalid Choukri, Christopher Cieri, Thierry Declerck, Sara Goggi, Hitoshi Isahara, Bente Maegaard, Joseph Mariani, Hélène Mazo, Jan Odijk, Stelios Piperidis2020","Thirteenth Language Resources and Evaluation Conference","13","Palais du Pharo","Marseille","France","2022-06-20","2022-06-25","https:\u002F\u002Flrec2022.lrec-conf.org\u002Fen\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2022\u002FLREC-2022.pdf","",804,{"conference_id":66,"year":67,"proceedings_title":68,"venue_ids":9,"isbn":69,"issn":11,"doi":70,"publisher":13,"editors":71,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_city":57,"conference_country":58,"conference_start_date":74,"conference_end_date":75,"conference_url":76,"pdf_url":77,"img_conf_url":63,"paperCount":78},"lrec2020","2020","Proceedings of the Twelfth International Conference on Language Resources and Evaluation (LREC 2020)","79-10-95546-34-4","10.63317\u002F4j46u44gnpwr","Nicoletta Calzolari, Frédéric Béchet, Philippe Blache, Khalid Choukri, Christopher Cieri, Thierry Declerck, Sara Goggi, Hitoshi Isahara, Bente Maegaard, Joseph Mariani, Hélène Mazo, Asuncion Moreno, Jan Odijk, Stelios Piperidis","Twelfth Language Resources and Evaluation Conference","12","2020-05-11","2020-05-16","https:\u002F\u002Flrec2020.lrec-conf.org\u002Fen\u002Findex.html","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002FLREC-2020.pdf",895,{"conference_id":80,"year":81,"proceedings_title":82,"venue_ids":9,"isbn":83,"issn":11,"doi":84,"publisher":13,"editors":85,"conference_name":86,"conference_acronym":16,"conference_number":87,"conference_location":88,"conference_city":89,"conference_country":90,"conference_start_date":91,"conference_end_date":92,"conference_url":93,"pdf_url":94,"img_conf_url":63,"paperCount":95},"lrec2018","2018","Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)","79-10-95546-00-9","10.63317\u002F25jzjyk647iz","Nicoletta Calzolari, Khalid Choukri, Christopher Cieri, Thierry Declerck, Sara Goggi, Koiti Hasida, Hitoshi Isahara, Bente Maegaard, Joseph Mariani, Hélène Mazo, Asuncion Moreno, Jan Odijk, Stelios Piperidis, Takenobu Tokunaga","Eleventh International Conference on Language Resources and Evaluation","11","Phoenix Seagaia Resort","Miyazaki","Japan","2018-05-07","2018-05-12","http:\u002F\u002Flrec2018.lrec-conf.org\u002Fen\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2018\u002FLREC2018_Proceedings.zip",728,{"conference_id":97,"year":98,"proceedings_title":99,"venue_ids":9,"isbn":100,"issn":11,"doi":101,"publisher":13,"editors":102,"conference_name":103,"conference_acronym":16,"conference_number":104,"conference_location":105,"conference_city":106,"conference_country":107,"conference_start_date":108,"conference_end_date":109,"conference_url":110,"pdf_url":111,"img_conf_url":63,"paperCount":112},"lrec2016","2016","Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)","978-2-9517408-9-1","10.63317\u002F5mruwrazrwbg","Nicoletta Calzolari, Khalid Choukri, Thierry Declerck, Sara Goggi, Marko Grobelnik, Bente Maegaard, Joseph Mariani, Hélène Mazo, Asunción Moreno, Jan Odijk, Stelios Piperidis","Tenth International Conference on Language Resources and Evaluation","10","Bernardinsko Naselje","Portorož","Slovenia","2016-05-23","2016-05-28","http:\u002F\u002Flrec2016.lrec-conf.org\u002Fen\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2016\u002FLREC2016_Proceedings.zip",745,{"conference_id":114,"year":115,"proceedings_title":116,"venue_ids":9,"isbn":117,"issn":11,"doi":118,"publisher":13,"editors":119,"conference_name":120,"conference_acronym":16,"conference_number":121,"conference_location":122,"conference_city":123,"conference_country":124,"conference_start_date":125,"conference_end_date":126,"conference_url":127,"pdf_url":128,"img_conf_url":63,"paperCount":129},"lrec2014","2014","Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC 2014)","978-2-9517408-8-4","10.63317\u002F3ebxpiqq4ikp","Nicoletta Calzolari, Khalid Choukri, Thierry Declerck, Hrafn Loftsson, Bente Maegaard, Joseph Mariani, Asuncion Moreno, Jan Odijk, Stelios Piperidis","Ninth International Conference on Language Resources and Evaluation","9","Harpa Concert Hall and Conference Centre","Reykjavik","Iceland","2014-05-26","2014-05-31","http:\u002F\u002Flrec2014.lrec-conf.org\u002Fen\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2014\u002FLREC2014_Proceedings.zip",746,{"conference_id":131,"year":132,"proceedings_title":133,"venue_ids":9,"isbn":134,"issn":11,"doi":135,"publisher":13,"editors":136,"conference_name":137,"conference_acronym":16,"conference_number":138,"conference_location":139,"conference_city":140,"conference_country":141,"conference_start_date":142,"conference_end_date":143,"conference_url":144,"pdf_url":145,"img_conf_url":63,"paperCount":146},"lrec2012","2012","Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC 2012)","978-2-9517408-7-7","10.63317\u002F42za3jv29xvs","Nicoletta Calzolari, Khalid Choukri, Thierry Declerck, Mehmet Doğan, Bente Maegaard, Joseph Mariani, Asuncion Moreno, Jan Odijk, Stelios Piperidis","Eighth International Conference on Language Resources and Evaluation","8","Istanbul Convention & Exhibition Centre (ICEC) (Lütfi Kırdar)","Istanbul","Turkey","2012-05-21","2012-05-27","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2012\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2012\u002FLREC2012_Proceedings.zip",670,{"conference_id":148,"year":149,"proceedings_title":150,"venue_ids":9,"isbn":151,"issn":11,"doi":152,"publisher":13,"editors":153,"conference_name":154,"conference_acronym":16,"conference_number":155,"conference_location":156,"conference_city":157,"conference_country":158,"conference_start_date":159,"conference_end_date":160,"conference_url":161,"pdf_url":162,"img_conf_url":63,"paperCount":163},"lrec2010","2010","Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC 2010)","2-9517408-6-7","10.63317\u002F32m6vov78mmv","Nicoletta Calzolari, Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Mike Rosner, Daniel Tapias","Seventh International Conference on Language Resources and Evaluation","7","Mediterranean Conference Centre (MCC)","Valletta","Malta","2010-05-17","2010-05-23","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2010\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2010\u002FLREC2010_Proceedings.zip",645,{"conference_id":165,"year":166,"proceedings_title":167,"venue_ids":9,"isbn":168,"issn":11,"doi":169,"publisher":13,"editors":170,"conference_name":171,"conference_acronym":16,"conference_number":172,"conference_location":173,"conference_city":174,"conference_country":175,"conference_start_date":176,"conference_end_date":177,"conference_url":178,"pdf_url":63,"img_conf_url":63,"paperCount":179},"lrec2008","2008","Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC 2008)","2-9517408-4-0","10.63317\u002F3c6xa89msnta","Nicoletta Calzolari, Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Daniel Tapias","Sixth International Conference on Language Resources and Evaluation","6","Palais des Congrès","Marrakech","Morocco","2008-05-28","2008-05-30","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2008\u002F",620,{"conference_id":181,"year":182,"proceedings_title":183,"venue_ids":9,"isbn":184,"issn":11,"doi":185,"publisher":13,"editors":186,"conference_name":187,"conference_acronym":16,"conference_number":188,"conference_location":189,"conference_city":190,"conference_country":41,"conference_start_date":191,"conference_end_date":192,"conference_url":193,"pdf_url":63,"img_conf_url":63,"paperCount":194},"lrec2006","2006","Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC 2006)","2-9517408-2-4","10.63317\u002F2xx3x75ppppa","Nicoletta Calzolari, Khalid Choukri, Aldo Gangemi, Bente Maegaard, Joseph Mariani, Jan Odijk, Daniel Tapias","Fifth International Conference on Language Resources and Evaluation","5","Magazzini del Cotone","Genoa","2006-05-24","2006-05-26","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2006\u002F",513,{"conference_id":196,"year":197,"proceedings_title":198,"venue_ids":9,"isbn":199,"issn":11,"doi":200,"publisher":13,"editors":201,"conference_name":202,"conference_acronym":16,"conference_number":203,"conference_location":204,"conference_city":205,"conference_country":206,"conference_start_date":207,"conference_end_date":208,"conference_url":209,"pdf_url":63,"img_conf_url":63,"paperCount":210},"lrec2004","2004","Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC 2004)","2-9517408-1-6","10.63317\u002F2s47745g6zhw","Maria Teresa Lino, Maria Francisca Xavier, Fatima Ferreira, Rute Costa, Raquel Silva","Fourth International Conference on Language Resources and Evaluation","4","Centro Cultural de Belém","Lisbon","Portugal","2004-05-26","2004-05-28","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2004\u002F",524,{"conference_id":212,"year":213,"proceedings_title":214,"venue_ids":9,"isbn":63,"issn":11,"doi":215,"publisher":13,"editors":216,"conference_name":217,"conference_acronym":16,"conference_number":218,"conference_location":219,"conference_city":220,"conference_country":20,"conference_start_date":221,"conference_end_date":222,"conference_url":223,"pdf_url":63,"img_conf_url":63,"paperCount":224},"lrec2002","2002","Proceedings of the Third International Conference on Language Resources and Evaluation (LREC 2002)","10.63317\u002F3ha6dpna2o97","Manuel González Rodríguez, Carmen Paz Suarez Araujo","Third International Conference on Language Resources and Evaluation","3","Auditorio Alfredo Kraus","Las Palmas","2002-05-29","2002-05-31","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2002\u002F",354,{"conference_id":226,"year":227,"proceedings_title":228,"venue_ids":9,"isbn":63,"issn":11,"doi":229,"publisher":13,"editors":230,"conference_name":231,"conference_acronym":16,"conference_number":232,"conference_location":233,"conference_city":234,"conference_country":235,"conference_start_date":236,"conference_end_date":237,"conference_url":238,"pdf_url":63,"img_conf_url":63,"paperCount":239},"lrec2000","2000","Proceedings of the Second International Conference on Language Resources and Evaluation (LREC 2000)","10.63317\u002F3yosukd7w6sn","Maria Gavrilidou, George Carayannis, Stella Markantonatou, Stelios Piperidis, Greg Stainhauer","Second International Conference on Language Resources and Evaluation","2","Zappeion Megaron","Athens","Greece","2000-05-31","2000-06-02","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2000\u002F",280,{"conference_id":241,"year":242,"proceedings_title":243,"venue_ids":9,"isbn":63,"issn":11,"doi":244,"publisher":13,"editors":245,"conference_name":246,"conference_acronym":16,"conference_number":247,"conference_location":248,"conference_city":249,"conference_country":20,"conference_start_date":250,"conference_end_date":251,"conference_url":252,"pdf_url":63,"img_conf_url":63,"paperCount":253},"lrec1998","1998","Proceedings of the First International Conference on Language Resources and Evaluation (LREC 1998)","10.63317\u002F5a986fnjefzm","Antonio Rubio,Natividad Gallardo, Rosa Castro, Antonio Tejada","Language Resources and Evaluation Conference","1","Palacio de Congresos de Granada","Granada","1998-05-28","1998-05-30","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec1998\u002F",212,[255],{"year":29,"proceedings_title":256,"paperCount":257,"doi":258,"pdf_url":259,"venue_ids":31,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024): Tutorial Summaries",13,"10.63317\u002F3piy8jnqffp3","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Ftutorials\u002FLREC-2024-Tutorials.pdf",{"2020":261,"2022":452,"2024":617},[262,269,276,282,289,296,302,308,315,322,328,335,341,348,354,359,364,370,376,382,387,392,397,402,407,413,419,425,431,436,442,447],{"workshop_id":263,"year":67,"full_workshop_id":264,"proceedings_title":265,"paperCount":266,"doi":267,"pdf_url":268,"venue_ids":263,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"aespen","lrec2020_ws_aespen","Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020",11,"10.63317\u002F58onsa8rnrrz","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FAESPEN2020\u002FAESPEN-2020.pdf",{"workshop_id":270,"year":67,"full_workshop_id":271,"proceedings_title":272,"paperCount":273,"doi":274,"pdf_url":275,"venue_ids":270,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"ai4hi","lrec2020_ws_ai4hi","Proceedings of the 1st International Workshop on Artificial Intelligence for Historical Image Enrichment and Access",5,"10.63317\u002F3m5ep69cw7jj","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FAI4HI2020\u002FAI4HI-2020.pdf",{"workshop_id":277,"year":67,"full_workshop_id":278,"proceedings_title":279,"paperCount":266,"doi":280,"pdf_url":281,"venue_ids":277,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"bucc","lrec2020_ws_bucc","Proceedings of the 13th Workshop on Building and Using Comparable Corpora","10.63317\u002F2fx83jms4c9r","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FBUCC2020\u002FBUCC-2020.pdf",{"workshop_id":283,"year":67,"full_workshop_id":284,"proceedings_title":285,"paperCount":286,"doi":287,"pdf_url":288,"venue_ids":283,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"calcs","lrec2020_ws_calcs","Proceedings of the 4th Workshop on Computational Approaches to Code Switching",9,"10.63317\u002F3jbxrkvj6qkv","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FCS2020\u002FCALCS-2020.pdf",{"workshop_id":290,"year":67,"full_workshop_id":291,"proceedings_title":292,"paperCount":293,"doi":294,"pdf_url":295,"venue_ids":290,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"cllrd","lrec2020_ws_cllrd","Proceedings of the LREC 2020 Workshop on \"Citizen Linguistics in Language Resource Development\"",8,"10.63317\u002F3qo9e6q6vq5f","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002Fcllrd2020\u002FCLLRD-2020.pdf",{"workshop_id":297,"year":67,"full_workshop_id":298,"proceedings_title":299,"paperCount":266,"doi":300,"pdf_url":301,"venue_ids":297,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"clssts","lrec2020_ws_clssts","Proceedings of the workshop on Cross-Language Search and Summarization of Text and Speech (CLSSTS2020)","10.63317\u002F3p6twmv6mhc5","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FCLSSTS2020\u002FCLSSTS-2020.pdf",{"workshop_id":303,"year":67,"full_workshop_id":304,"proceedings_title":305,"paperCount":286,"doi":306,"pdf_url":307,"venue_ids":303,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"cmlc","lrec2020_ws_cmlc","Proceedings of the 8th Workshop on Challenges in the Management of Large Corpora","10.63317\u002F236pt6g4g4s4","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FCMLC-8\u002FCMLC-2020.pdf",{"workshop_id":309,"year":67,"full_workshop_id":310,"proceedings_title":311,"paperCount":312,"doi":313,"pdf_url":314,"venue_ids":309,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"computerm","lrec2020_ws_computerm","Proceedings of the 6th International Workshop on Computational Terminology",15,"10.63317\u002F4jp43md9xe2q","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FCOMPUTERM2020\u002FCOMPUTERM-2020.pdf",{"workshop_id":316,"year":67,"full_workshop_id":317,"proceedings_title":318,"paperCount":319,"doi":320,"pdf_url":321,"venue_ids":316,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"framenet","lrec2020_ws_framenet","Proceedings of the International FrameNet Workshop 2020: Towards a Global, Multilingual FrameNet",12,"10.63317\u002F4tjynpg2ohf3","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002Fframenet2020\u002FFrameNet-2020.pdf",{"workshop_id":323,"year":67,"full_workshop_id":324,"proceedings_title":325,"paperCount":319,"doi":326,"pdf_url":327,"venue_ids":323,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"gamnlp","lrec2020_ws_gamnlp","Proceedings of the Workshop on Games and Natural Language Processing","10.63317\u002F5ahttrxdfnza","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FGames-NLP\u002FGAMNLP-2020.pdf",{"workshop_id":329,"year":67,"full_workshop_id":330,"proceedings_title":331,"paperCount":332,"doi":333,"pdf_url":334,"venue_ids":329,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"globalex","lrec2020_ws_globalex","Proceedings of the 2020 Globalex Workshop on Linked Lexicography",18,"10.63317\u002F34yjjfrnwvj8","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FGLOBALEX2020\u002FGLOBALEX-2020.pdf",{"workshop_id":336,"year":67,"full_workshop_id":337,"proceedings_title":338,"paperCount":319,"doi":339,"pdf_url":340,"venue_ids":336,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"isa","lrec2020_ws_isa","Proceedings of the 16th Joint ACL-ISO Workshop on Interoperable Semantic Annotation","10.63317\u002F5id7rv8izjcd","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FISA16\u002FISA-2020.pdf",{"workshop_id":342,"year":67,"full_workshop_id":343,"proceedings_title":344,"paperCount":345,"doi":346,"pdf_url":347,"venue_ids":342,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"iwltp","lrec2020_ws_iwltp","Proceedings of the 1st International Workshop on Language Technology Platforms",17,"10.63317\u002F4hc34do825yz","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FIWLTP2020\u002FIWLTP-2020.pdf",{"workshop_id":349,"year":67,"full_workshop_id":350,"proceedings_title":351,"paperCount":319,"doi":352,"pdf_url":353,"venue_ids":349,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"ldl","lrec2020_ws_ldl","Proceedings of the 7th Workshop on Linked Data in Linguistics (LDL-2020)","10.63317\u002F3mn9ttzvdbxs","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FLDL2020\u002FLDL-2020.pdf",{"workshop_id":355,"year":67,"full_workshop_id":356,"proceedings_title":357,"paperCount":293,"doi":358,"pdf_url":63,"venue_ids":355,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"lincr","lrec2020_ws_lincr","Proceedings of the Second Workshop on Linguistic and Neurocognitive Resources","10.63317\u002F24gnv8q9cz94",{"workshop_id":360,"year":67,"full_workshop_id":361,"proceedings_title":362,"paperCount":286,"doi":363,"pdf_url":63,"venue_ids":360,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"lr4sshoc","lrec2020_ws_lr4sshoc","Proceedings of the Workshop about Language Resources for the SSH Cloud","10.63317\u002F5j7vesdm7yia",{"workshop_id":365,"year":67,"full_workshop_id":366,"proceedings_title":367,"paperCount":368,"doi":369,"pdf_url":63,"venue_ids":365,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"lt4gov","lrec2020_ws_lt4gov","Proceedings of the 1st Workshop on Language Technologies for Government and Public Administration (LT4Gov)",6,"10.63317\u002F5i8su82ish3i",{"workshop_id":371,"year":67,"full_workshop_id":372,"proceedings_title":373,"paperCount":374,"doi":375,"pdf_url":63,"venue_ids":371,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"lt4hala","lrec2020_ws_lt4hala","Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies for Historical and Ancient Languages",21,"10.63317\u002F4jnfg39ctsra",{"workshop_id":377,"year":67,"full_workshop_id":378,"proceedings_title":379,"paperCount":380,"doi":381,"pdf_url":63,"venue_ids":377,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"mmw","lrec2020_ws_mmw","Proceedings of the LREC 2020 Workshop on Multimodal Wordnets (MMW2020)",7,"10.63317\u002F5pcp4c88d6n8",{"workshop_id":383,"year":67,"full_workshop_id":384,"proceedings_title":385,"paperCount":368,"doi":386,"pdf_url":63,"venue_ids":383,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"multilingualbio","lrec2020_ws_multilingualbio","Proceedings of the LREC 2020 Workshop on Multilingual Biomedical Text Processing (MultilingualBIO 2020)","10.63317\u002F4pfckaywoxxa",{"workshop_id":388,"year":67,"full_workshop_id":389,"proceedings_title":390,"paperCount":273,"doi":391,"pdf_url":63,"venue_ids":388,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"onion","lrec2020_ws_onion","Proceedings of LREC2020 Workshop \"People in language, vision and the mind\" (ONION2020)","10.63317\u002F2oxdsr8tue27",{"workshop_id":393,"year":67,"full_workshop_id":394,"proceedings_title":395,"paperCount":332,"doi":396,"pdf_url":63,"venue_ids":393,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"osact","lrec2020_ws_osact","Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection","10.63317\u002F2xjmcg9vsxcp",{"workshop_id":398,"year":67,"full_workshop_id":399,"proceedings_title":400,"paperCount":257,"doi":401,"pdf_url":63,"venue_ids":398,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"parlaclarin","lrec2020_ws_parlaclarin","Proceedings of the Second ParlaCLARIN Workshop","10.63317\u002F3qhkh6dmemmn",{"workshop_id":403,"year":67,"full_workshop_id":404,"proceedings_title":405,"paperCount":286,"doi":406,"pdf_url":63,"venue_ids":403,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"rail","lrec2020_ws_rail","Proceedings of the first workshop on Resources for African Indigenous Languages","10.63317\u002F3fjhbkudhcmc",{"workshop_id":408,"year":67,"full_workshop_id":409,"proceedings_title":410,"paperCount":411,"doi":412,"pdf_url":63,"venue_ids":408,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"readi","lrec2020_ws_readi","Proceedings of the 1st Workshop on Tools and Resources to Empower People with REAding DIfficulties (READI)",14,"10.63317\u002F4p5m2euxriim",{"workshop_id":414,"year":67,"full_workshop_id":415,"proceedings_title":416,"paperCount":417,"doi":418,"pdf_url":63,"venue_ids":414,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"restup","lrec2020_ws_restup","Proceedings of the Workshop on Resources and Techniques for User and Author Profiling in Abusive Language",4,"10.63317\u002F3y3vzhsp3qb7",{"workshop_id":420,"year":67,"full_workshop_id":421,"proceedings_title":422,"paperCount":423,"doi":424,"pdf_url":63,"venue_ids":420,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"signlang","lrec2020_ws_signlang","Proceedings of the LREC2020 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives",36,"10.63317\u002F3nocn9xntuki",{"workshop_id":426,"year":67,"full_workshop_id":427,"proceedings_title":428,"paperCount":429,"doi":430,"pdf_url":63,"venue_ids":426,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"sltu","lrec2020_ws_sltu","Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)",52,"10.63317\u002F25p2yts6fk3q",{"workshop_id":432,"year":67,"full_workshop_id":433,"proceedings_title":434,"paperCount":293,"doi":435,"pdf_url":63,"venue_ids":432,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"stoc","lrec2020_ws_stoc","Proceedings for the First International Workshop on Social Threats in Online Conversations: Understanding and Management","10.63317\u002F4p7j6t9bjg8m",{"workshop_id":437,"year":67,"full_workshop_id":438,"proceedings_title":439,"paperCount":440,"doi":441,"pdf_url":63,"venue_ids":437,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"trac","lrec2020_ws_trac","Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying",25,"10.63317\u002F27yyhn22v2fc",{"workshop_id":443,"year":67,"full_workshop_id":444,"proceedings_title":445,"paperCount":293,"doi":446,"pdf_url":63,"venue_ids":443,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"wac","lrec2020_ws_wac","Proceedings of the 12th Web as Corpus Workshop","10.63317\u002F2va68regv5ni",{"workshop_id":448,"year":67,"full_workshop_id":449,"proceedings_title":450,"paperCount":319,"doi":451,"pdf_url":63,"venue_ids":448,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"wildre","lrec2020_ws_wildre","Proceedings of the WILDRE5– 5th Workshop on Indian Language Data: Resources and Evaluation","10.63317\u002F2ydivss2veo9",[453,458,463,467,472,478,483,488,494,499,504,509,514,520,525,530,535,540,545,550,554,559,564,569,573,577,582,587,593,598,603,608,613],{"workshop_id":277,"year":49,"full_workshop_id":454,"proceedings_title":455,"paperCount":286,"doi":456,"pdf_url":457,"venue_ids":277,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_bucc","Proceedings of the BUCC Workshop within LREC 2022","10.63317\u002F2mqwgvrp7zkn","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2022\u002Fworkshops\u002FBUCC\u002F2022.bucc-1.0.pdf",{"workshop_id":459,"year":49,"full_workshop_id":460,"proceedings_title":461,"paperCount":332,"doi":462,"pdf_url":63,"venue_ids":459,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"cltw","lrec2022_ws_cltw","Proceedings of the 4th Celtic Language Technology Workshop within LREC2022","10.63317\u002F3x8fjtq6m25s",{"workshop_id":303,"year":49,"full_workshop_id":464,"proceedings_title":465,"paperCount":368,"doi":466,"pdf_url":63,"venue_ids":303,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_cmlc","Proceedings of the Workshop on Challenges in the Management of Large Corpora (CMLC-10)","10.63317\u002F2ajestpwy3c8",{"workshop_id":468,"year":49,"full_workshop_id":469,"proceedings_title":470,"paperCount":293,"doi":471,"pdf_url":63,"venue_ids":468,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"csrnlp","lrec2022_ws_csrnlp","Proceedings of the First Computing Social Responsibility Workshop within the 13th Language Resources and Evaluation Conference","10.63317\u002F3xphwxosghv8",{"workshop_id":473,"year":49,"full_workshop_id":474,"proceedings_title":475,"paperCount":476,"doi":477,"pdf_url":63,"venue_ids":473,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"dclrl","lrec2022_ws_dclrl","Proceedings of the Workshop on Dataset Creation for Lower-Resourced Languages within the 13th Language Resources and Evaluation Conference",10,"10.63317\u002F4652bsvzarmy",{"workshop_id":479,"year":49,"full_workshop_id":480,"proceedings_title":481,"paperCount":368,"doi":482,"pdf_url":63,"venue_ids":479,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"digitam","lrec2022_ws_digitam","Proceedings of the Workshop on Processing Language Variation: Digital Armenian (DigitAm) within the 13th Language Resources and Evaluation Conference","10.63317\u002F369nz2tcm6qc",{"workshop_id":484,"year":49,"full_workshop_id":485,"proceedings_title":486,"paperCount":332,"doi":487,"pdf_url":63,"venue_ids":484,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"eurali","lrec2022_ws_eurali","Proceedings of the Workshop on Resources and Technologies for Indigenous, Endangered and Lesser-resourced Languages in Eurasia within the 13th Language Resources and Evaluation Conference","10.63317\u002F4dhjcavy7q7y",{"workshop_id":489,"year":49,"full_workshop_id":490,"proceedings_title":491,"paperCount":492,"doi":493,"pdf_url":63,"venue_ids":489,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"fnp","lrec2022_ws_fnp","Proceedings of the 4th Financial Narrative Processing Workshop @LREC2022",24,"10.63317\u002F29xpoafy85p4",{"workshop_id":495,"year":49,"full_workshop_id":496,"proceedings_title":497,"paperCount":380,"doi":498,"pdf_url":63,"venue_ids":495,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"games","lrec2022_ws_games","Proceedings of the 9th Workshop on Games and Natural Language Processing within the 13th Language Resources and Evaluation Conference","10.63317\u002F5om6f5meam4s",{"workshop_id":500,"year":49,"full_workshop_id":501,"proceedings_title":502,"paperCount":257,"doi":503,"pdf_url":63,"venue_ids":500,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"gwll","lrec2022_ws_gwll","Proceedings of Globalex Workshop on Linked Lexicography within the 13th Language Resources and Evaluation Conference","10.63317\u002F5knvvemaz9uw",{"workshop_id":336,"year":49,"full_workshop_id":505,"proceedings_title":506,"paperCount":507,"doi":508,"pdf_url":63,"venue_ids":336,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_isa","Proceedings of the 18th Joint ACL - ISO Workshop on Interoperable Semantic Annotation within LREC2022",19,"10.63317\u002F4h3ue6m3sam4",{"workshop_id":510,"year":49,"full_workshop_id":511,"proceedings_title":512,"paperCount":368,"doi":513,"pdf_url":63,"venue_ids":510,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lateraisse","lrec2022_ws_lateraisse","Proceedings of the First Workshop on Language Technology and Resources for a Fair, Inclusive, and Safe Society within the 13th Language Resources and Evaluation Conference","10.63317\u002F5osn5jjjbomp",{"workshop_id":515,"year":49,"full_workshop_id":516,"proceedings_title":517,"paperCount":518,"doi":519,"pdf_url":63,"venue_ids":515,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"law","lrec2022_ws_law","Proceedings of the 16th Linguistic Annotation Workshop (LAW-XVI) within LREC2022",20,"10.63317\u002F3fysdho22dbb",{"workshop_id":521,"year":49,"full_workshop_id":522,"proceedings_title":523,"paperCount":312,"doi":524,"pdf_url":63,"venue_ids":521,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"legal","lrec2022_ws_legal","Proceedings of the Workshop on Ethical and Legal Issues in Human Language Technologies and Multilingual De-Identification of Sensitive Data In Language Resources within the 13th Language Resources and Evaluation Conference","10.63317\u002F273whfjsjapd",{"workshop_id":371,"year":49,"full_workshop_id":526,"proceedings_title":527,"paperCount":528,"doi":529,"pdf_url":63,"venue_ids":371,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_lt4hala","Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages",31,"10.63317\u002F3dte53mz4zvu",{"workshop_id":531,"year":49,"full_workshop_id":532,"proceedings_title":533,"paperCount":345,"doi":534,"pdf_url":63,"venue_ids":531,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"mwe","lrec2022_ws_mwe","Proceedings of the 18th Workshop on Multiword Expressions @LREC2022","10.63317\u002F2fftdmypb747",{"workshop_id":536,"year":49,"full_workshop_id":537,"proceedings_title":538,"paperCount":286,"doi":539,"pdf_url":63,"venue_ids":536,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"nidcp","lrec2022_ws_nidcp","Proceedings of the 2nd Workshop on Novel Incentives in Data Collection from People: models, implementations, challenges and results within LREC 2022","10.63317\u002F2dox4kgfq3mg",{"workshop_id":541,"year":49,"full_workshop_id":542,"proceedings_title":543,"paperCount":312,"doi":544,"pdf_url":63,"venue_ids":541,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"nlperspectives","lrec2022_ws_nlperspectives","Proceedings of the 1st Workshop on Perspectivist Approaches to NLP @LREC2022","10.63317\u002F5nzs42fwjimz",{"workshop_id":393,"year":49,"full_workshop_id":546,"proceedings_title":547,"paperCount":548,"doi":549,"pdf_url":63,"venue_ids":393,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_osact","Proceedinsg of the 5th Workshop on Open-Source Arabic Corpora and Processing Tools with Shared Tasks on Qur'an QA and Fine-Grained Hate Speech Detection",28,"10.63317\u002F4u4quhegagc5",{"workshop_id":398,"year":49,"full_workshop_id":551,"proceedings_title":552,"paperCount":507,"doi":553,"pdf_url":63,"venue_ids":398,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_parlaclarin","Proceedings of the Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference","10.63317\u002F4zzb69hz9ebb",{"workshop_id":555,"year":49,"full_workshop_id":556,"proceedings_title":557,"paperCount":411,"doi":558,"pdf_url":63,"venue_ids":555,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"politicalnlp","lrec2022_ws_politicalnlp","Proceedings of the LREC 2022 workshop on Natural Language Processing for Political Sciences","10.63317\u002F5h778npybpti",{"workshop_id":560,"year":49,"full_workshop_id":561,"proceedings_title":562,"paperCount":368,"doi":563,"pdf_url":63,"venue_ids":560,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"pvlam","lrec2022_ws_pvlam","Proceedings of the 2nd Workshop on People in Vision, Language, and the Mind","10.63317\u002F52rissdzp475",{"workshop_id":565,"year":49,"full_workshop_id":566,"proceedings_title":567,"paperCount":319,"doi":568,"pdf_url":63,"venue_ids":565,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"rapid","lrec2022_ws_rapid","Proceedings of the RaPID Workshop - Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive\u002Fpsychiatric\u002Fdevelopmental impairments - within the 13th Language Resources and Evaluation Conference","10.63317\u002F4jch25mm92pa",{"workshop_id":408,"year":49,"full_workshop_id":570,"proceedings_title":571,"paperCount":286,"doi":572,"pdf_url":63,"venue_ids":408,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_readi","Proceedings of the 2nd Workshop on Tools and Resources to Empower People with REAding DIfficulties (READI) within the 13th Language Resources and Evaluation Conference","10.63317\u002F56pm6ipunttk",{"workshop_id":414,"year":49,"full_workshop_id":574,"proceedings_title":575,"paperCount":417,"doi":576,"pdf_url":63,"venue_ids":414,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_restup","Proceedings of the Second International Workshop on Resources and Techniques for User Information in Abusive Language Analysis","10.63317\u002F53due7cg7w44",{"workshop_id":578,"year":49,"full_workshop_id":579,"proceedings_title":580,"paperCount":368,"doi":581,"pdf_url":63,"venue_ids":578,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"salld","lrec2022_ws_salld","Proceedings of the 2nd Workshop on Sentiment Analysis and Linguistic Linked Data","10.63317\u002F2ueor4yvpz4s",{"workshop_id":420,"year":49,"full_workshop_id":583,"proceedings_title":584,"paperCount":585,"doi":586,"pdf_url":63,"venue_ids":420,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_signlang","Proceedings of the LREC2022 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources",32,"10.63317\u002F2rifm6bf4efz",{"workshop_id":588,"year":49,"full_workshop_id":589,"proceedings_title":590,"paperCount":591,"doi":592,"pdf_url":63,"venue_ids":588,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"sigul","lrec2022_ws_sigul","Proceedings of the 1st Annual Meeting of the ELRA\u002FISCA Special Interest Group on Under-Resourced Languages",27,"10.63317\u002F5nb3qu29q9zi",{"workshop_id":594,"year":49,"full_workshop_id":595,"proceedings_title":596,"paperCount":507,"doi":597,"pdf_url":63,"venue_ids":594,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"sltat","lrec2022_ws_sltat","Proceedings of the 7th International Workshop on Sign Language Translation and Avatar Technology: The Junction of the Visual and the Textual: Challenges and Perspectives","10.63317\u002F3xfoevzar6ig",{"workshop_id":599,"year":49,"full_workshop_id":600,"proceedings_title":601,"paperCount":476,"doi":602,"pdf_url":63,"venue_ids":599,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"smila","lrec2022_ws_smila","Proceedings of the Workshop on Smiling and Laughter across Contexts and the Life-span within the 13th Language Resources and Evaluation Conference","10.63317\u002F47g2oou8nqdu",{"workshop_id":604,"year":49,"full_workshop_id":605,"proceedings_title":606,"paperCount":368,"doi":607,"pdf_url":63,"venue_ids":604,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"tdle","lrec2022_ws_tdle","Proceedings of the Workshop Towards Digital Language Equality within the 13th Language Resources and Evaluation Conference","10.63317\u002F3cx3opcocn9i",{"workshop_id":609,"year":49,"full_workshop_id":610,"proceedings_title":611,"paperCount":380,"doi":612,"pdf_url":63,"venue_ids":609,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"term","lrec2022_ws_term","Proceedings of the Workshop on Terminology in the 21st century: many faces, many places","10.63317\u002F23pdrqa3onr3",{"workshop_id":448,"year":49,"full_workshop_id":614,"proceedings_title":615,"paperCount":345,"doi":616,"pdf_url":63,"venue_ids":448,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_wildre","Proceedings of the WILDRE-6 Workshop within the 13th Language Resources and Evaluation Conference","10.63317\u002F34agbocrmxe4",[618,623,630,638,644,649,656,663,670,677,683,690,696,703,711,717,723,729,735,742,748,755,762,768,774,780,786,792,798,805,812,818,825,831,837,843],{"workshop_id":277,"year":29,"full_workshop_id":619,"proceedings_title":620,"paperCount":312,"doi":621,"pdf_url":622,"venue_ids":277,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_bucc","Proceedings of the 17th Workshop on Building and Using Comparable Corpora (BUCC) @ LREC-COLING 2024","10.63317\u002F3tk8bqt3knqn","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fbucc\u002F2024.bucc-1.0.pdf",{"workshop_id":624,"year":29,"full_workshop_id":625,"proceedings_title":626,"paperCount":293,"doi":627,"pdf_url":628,"venue_ids":629,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"cawl","lrec2024_ws_cawl","Proceedings of the Second Workshop on Computation and Written Language (CAWL) @ LREC-COLING 2024","10.63317\u002F5jv5da4ct2px","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fcawl\u002F2024.cawl-1.0.pdf","cawl|ws",{"workshop_id":631,"year":29,"full_workshop_id":632,"proceedings_title":633,"paperCount":634,"doi":635,"pdf_url":636,"venue_ids":637,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"cl4health","lrec2024_ws_cl4health","Proceedings of the First Workshop on Patient-Oriented Language Processing (CL4Health) @ LREC-COLING 2024",33,"10.63317\u002F3keuurbv54de","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fpolp\u002F2024.cl4health-1.0.pdf","cl4health|ws",{"workshop_id":639,"year":29,"full_workshop_id":640,"proceedings_title":641,"paperCount":507,"doi":642,"pdf_url":643,"venue_ids":639,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"cogalex","lrec2024_ws_cogalex","Proceedings of the Workshop on Cognitive Aspects of the Lexicon @ LREC-COLING 2024","10.63317\u002F2gq7359pqznx","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fdelite\u002F2024.delite-1.0.pdf",{"workshop_id":645,"year":29,"full_workshop_id":646,"proceedings_title":647,"paperCount":380,"doi":648,"pdf_url":643,"venue_ids":645,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"delite","lrec2024_ws_delite","Proceedings of the First Workshop on Language-driven Deliberation Technology (DELITE) @ LREC-COLING 2024","10.63317\u002F3pcpupr4j9wb",{"workshop_id":650,"year":29,"full_workshop_id":651,"proceedings_title":652,"paperCount":332,"doi":653,"pdf_url":654,"venue_ids":655,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"determit","lrec2024_ws_determit","Proceedings of the Workshop on DeTermIt! Evaluating Text Difficulty in a Multilingual Context @ LREC-COLING 2024","10.63317\u002F32qtrrr46eau","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fdetermit\u002F2024.determit-1.0.pdf","determit|ws",{"workshop_id":657,"year":29,"full_workshop_id":658,"proceedings_title":659,"paperCount":293,"doi":660,"pdf_url":661,"venue_ids":662,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"dlnld","lrec2024_ws_dlnld","Proceedings of the Workshop on Deep Learning and Linked Data (DLnLD) @ LREC-COLING 2024","10.63317\u002F543pjjgkbst9","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fdlnld\u002F2024.dlnld-1.0.pdf","dlnld|ws",{"workshop_id":664,"year":29,"full_workshop_id":665,"proceedings_title":666,"paperCount":345,"doi":667,"pdf_url":668,"venue_ids":669,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"dmr","lrec2024_ws_dmr","Proceedings of the Fifth International Workshop on Designing Meaning Representations @ LREC-COLING 2024","10.63317\u002F5q4wbidauaxn","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fdmr\u002F2024.dmr-1.0.pdf","dmr|ws",{"workshop_id":671,"year":29,"full_workshop_id":672,"proceedings_title":673,"paperCount":312,"doi":674,"pdf_url":675,"venue_ids":676,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"ecnlp","lrec2024_ws_ecnlp","Proceedings of the Seventh Workshop on e-Commerce and NLP @ LREC-COLING 2024","10.63317\u002F4upp3i6m57nt","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fecnlp\u002F2024.ecnlp-1.0.pdf","ecnlp|ws",{"workshop_id":484,"year":29,"full_workshop_id":678,"proceedings_title":679,"paperCount":293,"doi":680,"pdf_url":681,"venue_ids":682,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_eurali","Proceedings of the 2nd Workshop on Resources and Technologies for Indigenous, Endangered and Lesser-resourced Languages in Eurasia (EURALI) @ LREC-COLING 2024","10.63317\u002F3z633pd4tyg2","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Feurali\u002F2024.eurali-1.0.pdf","eurali|ws",{"workshop_id":684,"year":29,"full_workshop_id":685,"proceedings_title":686,"paperCount":687,"doi":688,"pdf_url":689,"venue_ids":684,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"finnlp","lrec2024_ws_finnlp","Proceedings of the Joint Workshop of the 7th Financial Technology and Natural Language Processing, the 5th Knowledge Discovery from Unstructured Data in Financial Services, and the 4th Workshop on Economics and Natural Language Processing",34,"10.63317\u002F46uvxxoj8prq","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Ffinnlp\u002F2024.finnlp-1.0.pdf",{"workshop_id":495,"year":29,"full_workshop_id":691,"proceedings_title":692,"paperCount":319,"doi":693,"pdf_url":694,"venue_ids":695,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_games","Proceedings of the 10th Workshop on Games and Natural Language Processing @ LREC-COLING 2024","10.63317\u002F4d46836qy76p","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fgames\u002F2024.games-1.0.pdf","games|ws",{"workshop_id":697,"year":29,"full_workshop_id":698,"proceedings_title":699,"paperCount":286,"doi":700,"pdf_url":701,"venue_ids":702,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"htres","lrec2024_ws_htres","Proceedings of the First Workshop on Holocaust Testimonies as Language Resources (HTRes) @ LREC-COLING 2024","10.63317\u002F47iakwwytvs8","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fhtres\u002F2024.htres-1.0.pdf","htres|ws",{"workshop_id":704,"year":29,"full_workshop_id":705,"proceedings_title":706,"paperCount":707,"doi":708,"pdf_url":709,"venue_ids":710,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"humeval","lrec2024_ws_humeval","Proceedings of the Fourth Workshop on Human Evaluation of NLP Systems (HumEval) @ LREC-COLING 2024",26,"10.63317\u002F3jfrug2yvkgc","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fhumeval\u002F2024.humeval-1.0.pdf","humeval|ws",{"workshop_id":336,"year":29,"full_workshop_id":712,"proceedings_title":713,"paperCount":332,"doi":714,"pdf_url":715,"venue_ids":716,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_isa","Proceedings of the 20th Joint ACL - ISO Workshop on Interoperable Semantic Annotation @ LREC-COLING 2024","10.63317\u002F5g5ddg8i3y47","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fisa\u002F2024.isa-1.0.pdf","isa|ws",{"workshop_id":349,"year":29,"full_workshop_id":718,"proceedings_title":719,"paperCount":312,"doi":720,"pdf_url":721,"venue_ids":722,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_ldl","Proceedings of the 9th Workshop on Linked Data in Linguistics @ LREC-COLING 2024","10.63317\u002F4gz96nfw2gdk","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fldl\u002F2024.ldl-1.0.pdf","ldl|ws",{"workshop_id":521,"year":29,"full_workshop_id":724,"proceedings_title":725,"paperCount":266,"doi":726,"pdf_url":727,"venue_ids":728,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_legal","Proceedings of the Workshop on Legal and Ethical Issues in Human Language Technologies @ LREC-COLING 2024","10.63317\u002F2wkziwv5fb97","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Flegal\u002F2024.legal-1.0.pdf","legal|ws",{"workshop_id":371,"year":29,"full_workshop_id":730,"proceedings_title":731,"paperCount":634,"doi":732,"pdf_url":733,"venue_ids":734,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_lt4hala","Proceedings of the Third Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA) @ LREC-COLING-2024","10.63317\u002F2vavxjcscp8z","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Flt4hala\u002F2024.lt4hala-1.0.pdf","lt4hala|ws",{"workshop_id":736,"year":29,"full_workshop_id":737,"proceedings_title":738,"paperCount":273,"doi":739,"pdf_url":740,"venue_ids":741,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"mathnlp","lrec2024_ws_mathnlp","Proceedings of the 2nd Workshop on Mathematical Natural Language Processing @ LREC-COLING 2024","10.63317\u002F2ydwrzo67zpj","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fmathnlp\u002F2024.mathnlp-1.0.pdf","mathnlp|ws",{"workshop_id":531,"year":29,"full_workshop_id":743,"proceedings_title":744,"paperCount":591,"doi":745,"pdf_url":746,"venue_ids":747,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_mwe","Proceedings of the Joint Workshop on Multiword Expressions and Universal Dependencies (MWE-UD) @ LREC-COLING 2024","10.63317\u002F42csaq87z39r","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fmwe\u002F2024.mweud-1.0.pdf","mwe|udw|ws",{"workshop_id":749,"year":29,"full_workshop_id":750,"proceedings_title":751,"paperCount":273,"doi":752,"pdf_url":753,"venue_ids":754,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"neusymbridge","lrec2024_ws_neusymbridge","Proceedings of the Workshop: Bridging Neurons and Symbols for Natural Language Processing and Knowledge Graphs Reasoning (NeusymBridge) @ LREC-COLING-2024","10.63317\u002F2vsheftp3ti9","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fneusymbridge\u002F2024.neusymbridge-1.0.pdf","neusymbridge|ws",{"workshop_id":541,"year":29,"full_workshop_id":756,"proceedings_title":757,"paperCount":758,"doi":759,"pdf_url":760,"venue_ids":761,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_nlperspectives","Proceedings of the 3rd Workshop on Perspectivist Approaches to NLP (NLPerspectives) @ LREC-COLING 2024",16,"10.63317\u002F2cojnfknheph","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fnlperspectives\u002F2024.nlperspectives-1.0.pdf","nlperspectives|ws",{"workshop_id":393,"year":29,"full_workshop_id":763,"proceedings_title":764,"paperCount":345,"doi":765,"pdf_url":766,"venue_ids":767,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_osact","Proceedings of the 6th Workshop on Open-Source Arabic Corpora and Processing Tools (OSACT) with Shared Tasks on Arabic LLMs Hallucination and Dialect to MSA Machine Translation @ LREC-COLING 2024","10.63317\u002F5d5qxytkajay","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fosact\u002F2024.osact-1.0.pdf","osact|ws",{"workshop_id":398,"year":29,"full_workshop_id":769,"proceedings_title":770,"paperCount":440,"doi":771,"pdf_url":772,"venue_ids":773,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_parlaclarin","Proceedings of the IV Workshop on Creating, Analysing, and Increasing Accessibility of Parliamentary Corpora (ParlaCLARIN) @ LREC-COLING 2024","10.63317\u002F46c8xka7m8f7","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fparlaclarin\u002F2024.parlaclarin-1.0.pdf","parlaclarin|ws",{"workshop_id":555,"year":29,"full_workshop_id":775,"proceedings_title":776,"paperCount":476,"doi":777,"pdf_url":778,"venue_ids":779,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_politicalnlp","Proceedings of the Second Workshop on Natural Language Processing for Political Sciences @ LREC-COLING 2024","10.63317\u002F3qf3r8pwtkvp","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fpoliticalnlp\u002F2024.politicalnlp-1.0.pdf","politicalnlp|ws",{"workshop_id":403,"year":29,"full_workshop_id":781,"proceedings_title":782,"paperCount":345,"doi":783,"pdf_url":784,"venue_ids":785,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_rail","Proceedings of the Fifth Workshop on Resources for African Indigenous Languages @ LREC-COLING 2024","10.63317\u002F2iyqymd34fup","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Frail\u002F2024.rail-1.0.pdf","rail|ws",{"workshop_id":565,"year":29,"full_workshop_id":787,"proceedings_title":788,"paperCount":266,"doi":789,"pdf_url":790,"venue_ids":791,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_rapid","Proceedings of the Fifth Workshop on Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive\u002Fpsychiatric\u002Fdevelopmental impairments @LREC-COLING 2024","10.63317\u002F5pc4wtot6r3x","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Frapid\u002F2024.rapid-1.0.pdf","rapid|ws",{"workshop_id":408,"year":29,"full_workshop_id":793,"proceedings_title":794,"paperCount":286,"doi":795,"pdf_url":796,"venue_ids":797,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_readi","Proceedings of the 3rd Workshop on Tools and Resources for People with REAding DIfficulties (READI) @ LREC-COLING 2024","10.63317\u002F4b546asxrjr6","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Freadi\u002F2024.readi-1.0.pdf","readi|ws",{"workshop_id":799,"year":29,"full_workshop_id":800,"proceedings_title":801,"paperCount":273,"doi":802,"pdf_url":803,"venue_ids":804,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"rfp","lrec2024_ws_rfp","Proceedings of the First Workshop on Reference, Framing, and Perspective @ LREC-COLING 2024","10.63317\u002F4xwx3twp9qoy","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Frfp\u002F2024.rfp-1.0.pdf","rfp|ws",{"workshop_id":806,"year":29,"full_workshop_id":807,"proceedings_title":808,"paperCount":273,"doi":809,"pdf_url":810,"venue_ids":811,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"safety4convai","lrec2024_ws_safety4convai","Proceedings of Safety4ConvAI: The Third Workshop on Safety for Conversational AI @ LREC-COLING 2024","10.63317\u002F4johe7jpagg6","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fsafeai\u002F2024.safety4convai-1.0.pdf","safety4convai|ws",{"workshop_id":420,"year":29,"full_workshop_id":813,"proceedings_title":814,"paperCount":815,"doi":816,"pdf_url":817,"venue_ids":420,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_signlang","Proceedings of the LREC-COLING 2024 11th Workshop on the Representation and Processing of Sign Languages: Evaluation of Sign Language Resources",45,"10.63317\u002F4e7aayu2htd6","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fsignlang\u002F2024.signlang-1.0.pdf",{"workshop_id":588,"year":29,"full_workshop_id":819,"proceedings_title":820,"paperCount":821,"doi":822,"pdf_url":823,"venue_ids":824,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_sigul","Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024",50,"10.63317\u002F55wjiy53vy99","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fsigul\u002F2024.sigul-1.0.pdf","sigul|ws",{"workshop_id":604,"year":29,"full_workshop_id":826,"proceedings_title":827,"paperCount":368,"doi":828,"pdf_url":829,"venue_ids":830,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_tdle","Proceedings of the Second International Workshop Towards Digital Language Equality (TDLE): Focusing on Sustainability @ LREC-COLING 2024","10.63317\u002F3p5nrhhwdhbe","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Ftdle\u002F2024.tdle-1.0.pdf","tdle|ws",{"workshop_id":437,"year":29,"full_workshop_id":832,"proceedings_title":833,"paperCount":345,"doi":834,"pdf_url":835,"venue_ids":836,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_trac","Proceedings of the Fourth Workshop on Threat, Aggression & Cyberbullying @ LREC-COLING-2024","10.63317\u002F2ev2ox49nijy","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Ftrac\u002F2024.trac-1.0.pdf","trac|ws",{"workshop_id":838,"year":29,"full_workshop_id":839,"proceedings_title":840,"paperCount":758,"doi":841,"pdf_url":842,"venue_ids":838,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"unlp","lrec2024_ws_unlp","Proceedings of the Third Ukrainian Natural Language Processing Workshop (UNLP) @ LREC-COLING 2024","10.63317\u002F5bwu58575ghh","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Funlp\u002F2024.unlp-1.0.pdf",{"workshop_id":448,"year":29,"full_workshop_id":844,"proceedings_title":845,"paperCount":266,"doi":846,"pdf_url":847,"venue_ids":848,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_wildre","Proceedings of the 7th Workshop on Indian Language Data: Resources and Evaluation","10.63317\u002F52j5bum2j3fk","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fwildre\u002F2024.wildre-1.0.pdf","wildre|ws",{"conference_id":6,"year":7,"proceedings_title":8,"venue_ids":9,"isbn":10,"issn":11,"doi":12,"publisher":13,"editors":14,"conference_name":15,"conference_acronym":16,"conference_number":17,"conference_location":18,"conference_city":19,"conference_country":20,"conference_start_date":21,"conference_end_date":22,"conference_url":23,"pdf_url":24,"img_conf_url":25,"paperCount":26},{"conference_id":6,"year":7,"proceedings_title":8,"venue_ids":9,"isbn":10,"issn":11,"doi":12,"publisher":13,"editors":14,"conference_name":15,"conference_acronym":16,"conference_number":17,"conference_location":18,"conference_city":19,"conference_country":20,"conference_start_date":21,"conference_end_date":22,"conference_url":23,"pdf_url":24,"img_conf_url":25,"paperCount":26},[852,872,895,924,944,967,984,1001,1027,1047,1067,1084,1119,1133,1162,1182,1205,1222,1254,1279,1302,1324,1344,1363,1383,1399,1425,1437,1475,1495,1524,1542,1589,1637,1654,1677,1694,1715,1735,1758,1778,1801,1821,1842,1859,1882,1901,1921,1941,1964,1981,2029,2055,2078,2098,2114,2135,2164,2185,2218,2235,2258,2276,2297,2335,2351,2368,2383,2412,2426,2447,2470,2490,2512,2534,2551,2568,2588,2602,2652,2672,2689,2712,2731,2751,2779,2799,2823,2849,2863,2880,2916,3012,3030,3043,3066,3086,3103,3118,3153,3178,3194,3208,3237,3272,3298,3326,3340,3357,3391,3490,3509,3529,3555,3581,3632,3662,3680,3716,3739,3758,3785,3846,3865,3904,3922,3942,3973,3988,4006,4047,4077,4094,4119,4164,4184,4207,4232,4267,4288,4310,4327,4344,4366,4395,4434,4452,4470,4486,4512,4535,4557,4574,4591,4615,4636,4667,4691,4710,4726,4747,4766,4784,4809,4832,4859,4876,4890,4907,4925,4949,4966,4983,5012,5026,5053,5077,5096,5117,5135,5152,5178,5198,5219,5247,5278,5307,5329,5352,5375,5404,5425,5445,5464,5484,5506,5528,5580,5604,5627,5648,5670,5690,5709,5750,5768,5787,5805,5830,5857,5874,5891,5906,5927,5945,5969,5991,6018,6039,6062,6082,6104,6120,6146,6161,6184,6207,6226,6259,6283,6302,6320,6336,6362,6384,6401,6414,6428,6448,6478,6501,6528,6553,6577,6611,6634,6660,6682,6711,6728,6742,6779,6802,6827,6844,6864,6882,6898,6920,6939,6958,6974,7011,7043,7062,7078,7095,7114,7134,7150,7167,7185,7202,7228,7257,7275,7303,7323,7336,7352,7371,7385,7401,7416,7435,7458,7480,7508,7525,7552,7573,7595,7620,7637,7661,7681,7697,7722,7742,7757,7777,7823,7839,7856,7869,7901,7920,7934,7951,7981,7999,8023,8044,8074,8092,8117,8136,8153,8178,8195,8214,8236,8265,8285,8300,8330,8346,8360,8383,8398,8415,8428,8444,8464,8481,8498,8513,8534,8552,8574,8593,8615,8643,8662,8677,8712,8736,8749,8763,8782,8804,8825,8845,8863,8881,8902,8922,8939,8958,8994,9027,9049,9068,9089,9104,9120,9159,9194,9211,9237,9254,9272,9292,9311,9336,9363,9384,9404,9487,9501,9516,9535,9554,9574,9592,9609,9642,9660,9682,9698,9714,9742,9759,9782,9805,9825,9852,9873,9893,9911,9927,9949,9970,9993,10014,10038,10055,10072,10088,10111,10128,10153,10172,10190,10225,10248,10262,10284,10307,10336,10367,10388,10411,10431,10450,10467,10483,10507,10527,10547,10570,10584,10597,10618,10638,10676,10696,10723,10744,10763,10784,10801,10825,10853,10876,10909,10923,10945,10963,10981,10997,11017,11037,11070,11101,11127,11163,11180,11208,11243,11265,11282,11306,11320,11337,11363,11382,11402,11418,11441,11457,11481,11497,11520,11543,11562,11583,11604,11619,11646,11665,11681,11699,11736,11756,11769,11785,11806,11822,11838,11857,11879,11902,11922,11944,11958,11973,11989,12003,12017,12030,12050,12068,12082,12107,12128,12149,12207,12227,12247,12267,12289,12316,12355,12384,12448,12465,12482,12503,12539,12564,12594,12617,12638,12654,12696,12713,12733,12746,12773,12797,12812,12843,12864,13227,13245,13258,13286,13302,13331,13348,13365,13388,13406,13429,13448,13478,13496,13521,13550,13573,13599,13622,13644,13665,13689,13703,13718,13735,13750,13771,13791,13812,13835,13851,13869,13894,13918,13933,13949,13967,13987,14003,14020,14035,14051,14068,14088,14114,14143,14164,14183,14209,14226,14246,14273,14293,14313,14335,14353,14371,14386,14416,14435,14458,14487,14509,14525,14540,14554,14571,14594,14621,14650,14668,14682,14710,14725,14748,14764,14781,14796,14816,14849,14872,14892,14905,14924,14941,14957,14977,14994,15011,15036,15056,15073,15088,15118,15134,15152,15175,15193,15212,15227,15242,15257,15272,15285,15302,15331,15355,15375,15393,15412,15430,15450,15467,15484,15501,15518,15536,15554,15579,15596,15628,15657,15677,15694,15721,15735,15754,15772,15801,15814,15837,15863,15881,15901,15919,15951,15966,15992,16010,16032,16051,16079,16097,16112,16130,16145,16164,16185,16198,16245,16264,16284,16298,16318,16333,16354,16370,16387,16403,16422,16437,16453,16488,16503,16522,16539,16553,16569,16581,16599,16619,16637,16654,16672,16696,16718,16735,16750,16768,16786,16809,16830,16847,16863,16885,16908,16920,16941,16966,17000,17018,17034,17061,17075,17096,17110,17128,17149,17175,17190,17224,17240,17257,17271,17290,17309,17326,17504,17526,17542,17567,17586,17603,17619,17637,17659,17677,17694,17711,17737,17758,17775,17793,17807,17822,17840,17862,17880,17900,17917,17942,17957,17972,17988,18010,18029,18048,18069,18088,18120,18139,18159,18185,18208,18225,18242,18263,18280,18298,18311,18331,18348,18372,18390,18406,18423,18443,18465,18484,18504,18523,18552,18570,18586,18604,18668,18688,18701,18718,18737,18753,18775,18800,18814,18831,18852,18874,18899,18917,18931,18949,18968,18985,19028,19050,19067,19092,19111,19136,19152,19177,19192,19209,19225,19243,19276,19292,19308,19325,19350,19384,19404,19420,19443,19474,19491,19515,19533,19551,19584,19603,19620,19639,19654,19669,19695,19716,19732,19750,19768,19783,19809,19832,19857,19873,19889,19910,19929,19962,19984,20007,20022,20039,20057,20076,20094,20112,20133,20150,20166,20178,20200,20218,20238,20258,20275,20296,20320,20338,20355,20376,20394,20413,20436,20453,20474,20495,20512,20526,20542,20560,20576,20602,20623,20639,20659,20678,20691,20705,20723,20755,20769,20790,20813,20832,20850,20865,20878,20895,20916,20931,20945,20957,20971,20985,21010,21025,21046,21060,21081,21102,21115,21154,21175,21200,21215,21233,21253,21275,21290,21305,21321,21344,21380,21398,21412,21424,21440,21459,21476],{"paper_id":853,"title":854,"year":7,"month":188,"day":63,"doi":855,"resource_url":856,"first_page":247,"last_page":857,"pdf_url":858,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":859,"paper_type":860,"authors":861,"abstract":871},"lrec2026-main-001","Beyond Generic Responses: Target-Aware Strategies for Countering Hate Speech","10.63317\u002F2mhddxjwnbmu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-001","18","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.1.pdf","chang-etal-2026-beyond","main",[862,865,868],{"paper_id":853,"author_seq":247,"given_name":863,"surname":864,"affiliation":63,"orcid":63},"Yen-Yu","Chang",{"paper_id":853,"author_seq":232,"given_name":866,"surname":867,"affiliation":63,"orcid":63},"Daryna","Dementieva",{"paper_id":853,"author_seq":218,"given_name":869,"surname":870,"affiliation":63,"orcid":63},"Alexander","Fraser","Effective counter-narratives (CNs) are essential for combating online hate speech, yet generic responses often fail to address the specific needs of targeted groups. This paper proposes a target-aware CN generation framework that incorporates demographic-specific tokens into transformer-based models. Our approach enhances the contextual relevance by introducing target-group tokens into the model’s vocabulary. To assess CN quality, we employ a multifaceted evaluation framework, including automatic metrics and LLM as Judges (JudgeLM). Evaluation with a wide range of language models demonstrates that target group tokens markedly improve contextual relevance of generated CN, particularly in small and medium models, with measurable gains in validity as CN and contextual relevance. Even for large instruction-tuned models, such as LLaMA-3, incorporating target-specific information proves effective in enhancing contextual relevance of generated responses. Warning: This paper contains offensive texts that are only used for combating online hate.",{"paper_id":873,"title":874,"year":7,"month":188,"day":63,"doi":875,"resource_url":876,"first_page":877,"last_page":878,"pdf_url":879,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":880,"paper_type":860,"authors":881,"abstract":894},"lrec2026-main-002","Topic-Initiator: A Proactive Chatbot with Personalized Topic RAG for Enhancing Willingness to Converse","10.63317\u002F5incc586qu8f","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-002","19","34","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.2.pdf","matsuo-etal-2026-topic",[882,885,888,891],{"paper_id":873,"author_seq":247,"given_name":883,"surname":884,"affiliation":63,"orcid":63},"Kazuya","Matsuo",{"paper_id":873,"author_seq":232,"given_name":886,"surname":887,"affiliation":63,"orcid":63},"Atsushi","Otsuka",{"paper_id":873,"author_seq":218,"given_name":889,"surname":890,"affiliation":63,"orcid":63},"Narichika","Nomoto",{"paper_id":873,"author_seq":203,"given_name":892,"surname":893,"affiliation":63,"orcid":63},"Makoto","Nakatsuji","Stimulating users’ conversational willingness to converse remains a major challenge in chatbot research. Most existing chatbots respond passively to user inputs, relying on users to select conversation topics, which often reduces their willingness. To address this issue, we propose, Topic-Initiator, a proactive chatbot that initiates conversations with new topics aligned to user interests. It gathers information from external sources (e.g., the web) to obtain potentially novel and engaging topics. To support this capability, we also introduce a novel Retrieval-Augmented Generation (RAG) framework, Personalized-Topic RAG (PT-RAG), designed to retrieve new and interesting topics for each user. Unlike existing RAG methods that fails to surface unseen information, PT-RAG leverages the inference capabilities of Large Language Models (LLMs) to identify content that matches the user’s interests but is not yet known to them. Specifically, PT-RAG estimates a user’s interests and knowledge from past interactions and organizes collected information into categories. Then, it uses an LLM to select a category that matches their interests and obtain information not seen in their knowledge from the selected category. Automatic and human evaluations demonstrate that PT-RAG retrieves new and interesting information more accurately and that Topic-Initiator significantly enhances users’ willingness to converse compared to existing methods.",{"paper_id":896,"title":897,"year":7,"month":188,"day":63,"doi":898,"resource_url":899,"first_page":900,"last_page":901,"pdf_url":902,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":903,"paper_type":860,"authors":904,"abstract":923},"lrec2026-main-003","CoachLah: A Singlish–English Parallel Corpus of Health Coaching Conversations with Behavior Goal Annotations","10.63317\u002F54des2aenesc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-003","35","49","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.3.pdf","bojic-etal-2026-coachlah",[905,908,911,914,917,920],{"paper_id":896,"author_seq":247,"given_name":906,"surname":907,"affiliation":63,"orcid":63},"Iva","Bojic",{"paper_id":896,"author_seq":232,"given_name":909,"surname":910,"affiliation":63,"orcid":63},"Mathieu","Ravaut",{"paper_id":896,"author_seq":218,"given_name":912,"surname":913,"affiliation":63,"orcid":63},"Stephanie Hilary Xinyi","Ma",{"paper_id":896,"author_seq":203,"given_name":915,"surname":916,"affiliation":63,"orcid":63},"Doreen","Tan",{"paper_id":896,"author_seq":188,"given_name":918,"surname":919,"affiliation":63,"orcid":63},"Andy Hau Yan","Ho",{"paper_id":896,"author_seq":172,"given_name":921,"surname":922,"affiliation":63,"orcid":63},"Andy","Khong","Health coaching (HC) aims to promote sustainable behavior change through goal-oriented dialogue, but research in this area is limited by the scarcity of authentic, transcript-based corpora. Existing datasets are small, English-only, and Western-centric, overlooking cultural and linguistic factors that shape real-world HC interactions. We introduce CoachLah, the first Singlish–English parallel corpus of HC conversations collected from a randomized controlled trial in Singapore. The dataset comprises 36,852 utterances transcribed from almost 160 hours of recorded HC sessions with 51 clients and 4 professional health coaches. Each dialogue is speaker-labeled, transcribed in Singlish, and aligned with high-quality English translations to preserve linguistic and cultural nuances. All sessions include HC summaries written by health coaches after each HC session, from which behavioral goals were manually annotated. To demonstrate the dataset’s utility, we benchmark two downstream tasks: (i) Singlish-to-English translation using fine-tuned open-weight models (e.g., Gemma-2-9B-it) with Low-Rank Adaptation, and (ii) behavioral goal extraction from unstructured HC summaries using span-based modeling (e.g., DeBERTa-v3-base). Together, these contributions establish the first culturally grounded benchmark for low-resource, goal-oriented dialogue research in HC. Both the code and the dataset are available at: https:\u002F\u002Fgithub.com\u002FIvaBojic\u002FCoachLah.",{"paper_id":925,"title":926,"year":7,"month":188,"day":63,"doi":927,"resource_url":928,"first_page":929,"last_page":930,"pdf_url":931,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":932,"paper_type":860,"authors":933,"abstract":943},"lrec2026-main-004","Faithful Medical Dialogue Generation Using Homo-Heterogeneous Exemplar-based In-Context Knowledge Grounding","10.63317\u002F43dzmqsdheef","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-004","50","63","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.4.pdf","priya-etal-2026-faithful",[934,937,940],{"paper_id":925,"author_seq":247,"given_name":935,"surname":936,"affiliation":63,"orcid":63},"Priyanshu","Priya",{"paper_id":925,"author_seq":232,"given_name":938,"surname":939,"affiliation":63,"orcid":63},"Hardik","Goyal",{"paper_id":925,"author_seq":218,"given_name":941,"surname":942,"affiliation":63,"orcid":63},"Asif","Ekbal","The growing reliance on tele-healthcare has heightened the demand for accessible and professional health support. Artificial Intelligence (AI)-assisted medical dialogue systems have emerged as key solutions, with Large Language Models (LLMs) advancing the generation tasks. However, their susceptibility to hallucination leads to inaccurate and unreliable information, posing major challenges. To address this, we propose a novel approach to mitigate hallucinations in LLMs by integrating external knowledge and in-context learning mechanisms for faithful medical dialogue generation (MDG). In particular, we devise an In-context Medical Knowledge-grounded Dialogue Generator (IMKDG), a novel plug-and-play retrieval-based framework that leverages external medical knowledge, in-context learning (ICL), and retrieval methods to enable LLMs to generate faithful responses, thereby enhancing their performance on the MDG task. We utilize large-scale medical knowledge based on the Unified Medical Language System (UMLS) to retrieve knowledge pertinent to the dialogue context. Further, to enhance the LLMs’ ICL capability for the MDG task, we propose the Homo-Heterogeneous Exemplar Selection (H2ES) method, a novel in-context exemplar retrieval method based on both dialogue context and medical knowledge. Automatic and human evaluations on the MedDialog-EN and CDialog datasets across various LLMs demonstrate the efficacy of the proposed framework in mitigating hallucinations.",{"paper_id":945,"title":946,"year":7,"month":188,"day":63,"doi":947,"resource_url":948,"first_page":949,"last_page":950,"pdf_url":951,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":952,"paper_type":860,"authors":953,"abstract":966},"lrec2026-main-005","Investigating Proactivity in Multimodal Task-Guidance Dialogues","10.63317\u002F48vfm547f6wc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-005","64","76","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.5.pdf","brenna-etal-2026-investigating",[954,957,960,963],{"paper_id":945,"author_seq":247,"given_name":955,"surname":956,"affiliation":63,"orcid":63},"Sofia","Brenna",{"paper_id":945,"author_seq":232,"given_name":958,"surname":959,"affiliation":63,"orcid":63},"Elisabetta","Jezek",{"paper_id":945,"author_seq":218,"given_name":961,"surname":962,"affiliation":63,"orcid":63},"Matthias","Kraus",{"paper_id":945,"author_seq":203,"given_name":964,"surname":965,"affiliation":63,"orcid":63},"Bernardo","Magnini","While proactivity, i.e., the ability to take the initiative and anticipate requests in order to improve the effectiveness of a conversation, has been traditionally investigated in task-oriented dialogues (e.g., booking a restaurant), less work addresses proactive behaviours in task-guidance dialogues (e.g., guide to execute recipes), where the expert instructor is supposed to interact and supervise a user in a real-world setting. We analyse a corpus of video-recorded task-guided dialogues and explore two key features of proactivity in this context: (i) the impact of multimodal features, with respect to chat-based dialogues; (ii) the impact of instructions and actions grounded in a real situation. Through a comparison between task-oriented and task-guidance annotated dialogues, we find that task-guided dialogues are highly collaborative interactions, where preventing mistakes and maintaining the correct process order is essential for achieving the dialogue goal. In addition, the video information available in the task-guidance setting can be corrective for false positive proactive behaviours, although without introducing substantial differences. To support our analysis and to foster further research we provide a corpus of multimodal task-guidance dialogues annotated according to proactivity.",{"paper_id":968,"title":969,"year":7,"month":188,"day":63,"doi":970,"resource_url":971,"first_page":972,"last_page":973,"pdf_url":974,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":975,"paper_type":860,"authors":976,"abstract":983},"lrec2026-main-006","Investigating How LLMs Propagate Female Stereotypes: Comparing What Models Say via Prompts with What They Represent in Their Embeddings","10.63317\u002F48i8c4bmbog3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-006","77","92","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.6.pdf","nuez-etal-2026-investigating",[977,980],{"paper_id":968,"author_seq":247,"given_name":978,"surname":979,"affiliation":63,"orcid":63},"Andrea Valderrey","Nuñez",{"paper_id":968,"author_seq":232,"given_name":981,"surname":982,"affiliation":63,"orcid":63},"Jelke","Bloem","As Large Language Models (LLMs) are increasingly deployed in sensitive domains, concerns about their encoding and reproduction of social bias have intensified. We examine how gender stereotypes are represented in embeddings and expressed in outputs across three models: BERT, base LLaMA-2-7b, and instruction-tuned LLaMA-2-7b-Chat. Focusing on seven female-oriented stereotype categories, we compare embedding-level bias using Directional Embedding Probing with output-level behavior measured via masked token prediction (BERT) and narrative prompt completions (LLaMA models). LLaMA-2-Chat showed the strongest representational–behavioral alignment, with female-aligned scores ranging from 60% to 100% and a significant point-biserial correlation (r = 0.55, p = 0.0008). BERT exhibited weaker alignment (0%–60%; r = 0.39, p = 0.054), while base LLaMA-2 showed intermediate but inconsistent patterns. These findings suggest that instruction tuning is associated with clearer alignment between internal representations and generated outputs, while prompt design plays a critical role in surfacing latent bias. The study contributes to fairness research by emphasizing the need to assess both internal representations and their behavioral expression in LLMs.",{"paper_id":985,"title":986,"year":7,"month":188,"day":63,"doi":987,"resource_url":988,"first_page":989,"last_page":990,"pdf_url":991,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":992,"paper_type":860,"authors":993,"abstract":1000},"lrec2026-main-007","Why So Separate: Analyzing In-Context Learning from a Vector Space Perspective","10.63317\u002F2o2ek4komzqz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-007","93","106","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.7.pdf","kalmbach-etal-2026-why",[994,997],{"paper_id":985,"author_seq":247,"given_name":995,"surname":996,"affiliation":63,"orcid":63},"Tobias","Kalmbach",{"paper_id":985,"author_seq":232,"given_name":998,"surname":999,"affiliation":63,"orcid":63},"Sandipan","Sikdar","In-context learning (ICL) is a popular prompting strategy for large language models. ICL allows models to learn tasks using demonstrative examples alone, without any weight updates or training. Nevertheless, it is still largely unclear why ICL works. In this paper, we investigate ICL from a new viewpoint, namely a vector space perspective, and extract insights for ICL from this analysis. In our experiments, we extract the hidden representations, i.e., embeddings, created by a large language model when passing an ICL prompt through it. We find that these embeddings generated by large language models are separable in the vector space when applying ICL. The degree of separability is dependent on the difficulty of the task, the size of the model and other factors, like the labels of demonstrative examples. We also find that, especially for large models, the separability is indicative of the classification performance. As an application, we utilize our findings to explain peculiarities of ICL and to select demonstrative examples for ICL. Experiments across multiple datasets show that this way of selecting examples consistently outperforms the commonly used random selection method.",{"paper_id":1002,"title":1003,"year":7,"month":188,"day":63,"doi":1004,"resource_url":1005,"first_page":1006,"last_page":1007,"pdf_url":1008,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1009,"paper_type":860,"authors":1010,"abstract":1026},"lrec2026-main-008","Explaining Explanations: Interpretability Methods for Discourse Analysis of Transformer Attention Maps","10.63317\u002F3mygtrz7g6vj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-008","107","116","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.8.pdf","escouflaire-etal-2026-explaining",[1011,1014,1017,1020,1023],{"paper_id":1002,"author_seq":247,"given_name":1012,"surname":1013,"affiliation":63,"orcid":63},"Louis","Escouflaire",{"paper_id":1002,"author_seq":232,"given_name":1015,"surname":1016,"affiliation":63,"orcid":63},"Jérémie","Bogaert",{"paper_id":1002,"author_seq":218,"given_name":1018,"surname":1019,"affiliation":63,"orcid":63},"Antonin","Descampe",{"paper_id":1002,"author_seq":203,"given_name":1021,"surname":1022,"affiliation":63,"orcid":63},"Cédrick","Fairon",{"paper_id":1002,"author_seq":188,"given_name":1024,"surname":1025,"affiliation":63,"orcid":63},"Francois-Xavier","Standaert","While LLMs have achieved state-of-the-art performance in NLP, their opacity hinders a human understanding of their predictions. Standard explainability techniques often prioritize technical faithfulness over linguistic plausibility. This paper argues for an interdisciplinary approach that integrates discourse analysis to critically interpret model explanations. We conduct a case study using CamemBERT, fine-tuned to classify French journalistic texts as news or opinion. We employ Layer-wise Relevance Propagation to generate attention maps for 1,000 test articles and analyze the token-level relevance scores through both in-depth qualitative analysis and a quantitative ranking of high-attention tokens. Our findings reveal that CamemBERT successfully captures genre-specific linguistic markers: it attends to cues of reported speech and temporal anchors in news, and to expressive punctuation, evaluative adjectives, and first-person pronouns in opinion. The discourse-analytic lens moves us beyond superficial observations, demonstrating how the model interprets features like punctuation as structural or stylistic conventions. We argue that integrating linguistic expertise into the explainability pipeline yields more nuanced, human-readable explanations.",{"paper_id":1028,"title":1029,"year":7,"month":188,"day":63,"doi":1030,"resource_url":1031,"first_page":1032,"last_page":1033,"pdf_url":1034,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1035,"paper_type":860,"authors":1036,"abstract":1046},"lrec2026-main-009","TempPerturb-Eval: On the Joint Effects of Internal Temperature and External Perturbations in RAG Robustness","10.63317\u002F3v28zfbo6agu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-009","117","127","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.9.pdf","zhou-etal-2026-tempperturb",[1037,1040,1043],{"paper_id":1028,"author_seq":247,"given_name":1038,"surname":1039,"affiliation":63,"orcid":63},"Yongxin","Zhou",{"paper_id":1028,"author_seq":232,"given_name":1041,"surname":1042,"affiliation":63,"orcid":63},"Philippe","Mulhem",{"paper_id":1028,"author_seq":218,"given_name":1044,"surname":1045,"affiliation":63,"orcid":63},"Didier","Schwab","The evaluation of Retrieval-Augmented Generation (RAG) systems typically examines retrieval quality and generation parameters like temperature in isolation, overlooking their interaction. This work presents a systematic investigation of how text perturbations (simulating noisy retrieval) interact with temperature settings across multiple LLM runs. We propose a comprehensive RAG Perturbation-Temperature Analysis Framework that subjects retrieved documents to three distinct perturbation types across varying temperature settings. Through extensive experiments on HotpotQA with both open-source and proprietary LLMs, we demonstrate that performance degradation follows distinct patterns: high-temperature settings consistently amplify vulnerability to perturbations, while certain perturbation types exhibit non-linear sensitivity across the temperature range. Our work yields three key contributions: (1) a diagnostic benchmark for assessing RAG robustness, (2) an analytical framework for quantifying perturbation-temperature interactions, and (3) practical guidelines for model selection and parameter tuning under noisy retrieval conditions.",{"paper_id":1048,"title":1049,"year":7,"month":188,"day":63,"doi":1050,"resource_url":1051,"first_page":1052,"last_page":1053,"pdf_url":1054,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1055,"paper_type":860,"authors":1056,"abstract":1066},"lrec2026-main-010","Refusal Steering: Fine-grained Control over LLM Refusal Behaviour for Sensitive Topics","10.63317\u002F2zypcp2r538o","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-010","128","151","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.10.pdf","garcaferrero-etal-2026-refusal",[1057,1060,1063],{"paper_id":1048,"author_seq":247,"given_name":1058,"surname":1059,"affiliation":63,"orcid":63},"Iker","García-Ferrero",{"paper_id":1048,"author_seq":232,"given_name":1061,"surname":1062,"affiliation":63,"orcid":63},"David","Montero",{"paper_id":1048,"author_seq":218,"given_name":1064,"surname":1065,"affiliation":63,"orcid":63},"Roman","Orus","We introduce Refusal Steering, an inference-time method to exercise fine-grained control over Large Language Models refusal behaviour on politically sensitive topics without retraining. We replace fragile pattern-based refusal detection with an LLM-as-a-judge that assigns refusal confidence scores and we propose a ridge-regularized variant to compute steering vectors that better isolate the refusal–compliance direction. On Qwen3-Next-80B-A3B-Thinking, our method removes the refusal behaviour of the model around politically sensitive topics while maintaining safety on JailbreakBench and near-baseline performance on general benchmarks. The approach generalizes across 4B and 80B models and can also induce targeted refusals when desired. We analize the steering vectors and show that refusal signals concentrate in deeper layers of the transformer and are distributed across many dimensions. Together, these results demonstrate that activation steering can remove political refusal behaviour while retaining safety alignment for harmful content, offering a practical path to controllable, transparent moderation at inference time.",{"paper_id":1068,"title":1069,"year":7,"month":188,"day":63,"doi":1070,"resource_url":1071,"first_page":1072,"last_page":1073,"pdf_url":1074,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1075,"paper_type":860,"authors":1076,"abstract":1083},"lrec2026-main-011","To Predict or Not to Predict? Towards Reliable Uncertainty Estimation in the Presence of Noise","10.63317\u002F5ecb4bj2cv4v","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-011","152","168","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.11.pdf","khallaf-etal-2026-predict",[1077,1080],{"paper_id":1068,"author_seq":247,"given_name":1078,"surname":1079,"affiliation":63,"orcid":63},"Nouran","Khallaf",{"paper_id":1068,"author_seq":232,"given_name":1081,"surname":1082,"affiliation":63,"orcid":63},"Serge","Sharoff","This study examines the role of uncertainty estimation (UE) methods in multilingual text classification under noisy and non-topical conditions. Using a complex-vs-simple sentence classification task across several languages, we evaluate a range of UE techniques against a range of metrics to assess their quality. Results indicate that while methods relying on softmax outputs remain competitive in high-resource in-domain settings, their reliability declines in low-resource or domain-shift scenarios. In contrast, Monte Carlo dropout approaches demonstrate consistently strong performance across all languages, offering more robust calibration, stable decision thresholds, and greater discriminative power even under adverse conditions. We further demonstrate the positive impact of UE on non-topical classification: selectively abstaining from predicting the 10% most uncertain instances increases the macro F1 score from 0.81 to 0.85 in the Readme task. By integrating UE with trustworthiness metrics, this study provides actionable insights for developing more reliable NLP systems in real-world multilingual environments.",{"paper_id":1085,"title":1086,"year":7,"month":188,"day":63,"doi":1087,"resource_url":1088,"first_page":1089,"last_page":1090,"pdf_url":1091,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1092,"paper_type":860,"authors":1093,"abstract":1118},"lrec2026-main-012","An Extreme Multi-label Text Classification (XMTC) Library Dataset: What If We Took \"Use of Practical AI in Digital Libraries\" Seriously?","10.63317\u002F5kag6gjg636f","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-012","169","184","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.12.pdf","dsouza-etal-2026-extreme",[1094,1097,1100,1103,1106,1109,1112,1115],{"paper_id":1085,"author_seq":247,"given_name":1095,"surname":1096,"affiliation":63,"orcid":63},"Jennifer","D'Souza",{"paper_id":1085,"author_seq":232,"given_name":1098,"surname":1099,"affiliation":63,"orcid":63},"Sameer","Sadruddin",{"paper_id":1085,"author_seq":218,"given_name":1101,"surname":1102,"affiliation":63,"orcid":63},"Maximilian","Kaehler",{"paper_id":1085,"author_seq":203,"given_name":1104,"surname":1105,"affiliation":63,"orcid":63},"Andrea","Salfinger",{"paper_id":1085,"author_seq":188,"given_name":1107,"surname":1108,"affiliation":63,"orcid":63},"Luca","Zaccagna",{"paper_id":1085,"author_seq":172,"given_name":1110,"surname":1111,"affiliation":63,"orcid":63},"Francesca","Incitti",{"paper_id":1085,"author_seq":155,"given_name":1113,"surname":1114,"affiliation":63,"orcid":63},"Lauro","Snidaro",{"paper_id":1085,"author_seq":138,"given_name":1116,"surname":1117,"affiliation":63,"orcid":63},"Osma","Suominen","Subject indexing is vital for discovery but hard to sustain at scale and across languages. We release a large bilingual (English\u002FGerman) corpus of catalog records annotated with the Integrated Authority File (GND), plus a machine-actionable GND taxonomy. The resource enables ontology-aware multi-label classification, mapping text to authority terms, and agent-assisted cataloging with reproducible, authority-grounded evaluation. We provide a brief statistical profile and qualitative error analyses of three systems. We invite the community to assess not only accuracy but usefulness and transparency, toward authority-anchored AI co-pilots that amplify catalogers’ work.",{"paper_id":1120,"title":1121,"year":7,"month":188,"day":63,"doi":1122,"resource_url":1123,"first_page":1124,"last_page":1125,"pdf_url":1126,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1127,"paper_type":860,"authors":1128,"abstract":1132},"lrec2026-main-013","A Historical Database for the Study of Obstruent-Lateral Palatalization in Ibero-Romance","10.63317\u002F3wedhjzs4jt9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-013","185","193","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.13.pdf","covelo-2026-historical",[1129],{"paper_id":1120,"author_seq":247,"given_name":1130,"surname":1131,"affiliation":63,"orcid":63},"Andrea García","Covelo","Studying irregular sound changes requires documenting not only words that underwent the change but also those that did not. Obstruent-lateral (OL) palatalization in Ibero-Romance, i.e., Galician, Portuguese, and Spanish, is one such change, exhibiting three distinctive patterns: unusual distribution (\u002Fpl fl kl\u002F typically palatalized but \u002Fbl gl\u002F rarely did), irregular implementation (not all eligible words underwent palatalization), and variable outcomes (dependent on obstruent voicing and cluster word position). This paper presents a cross-linguistic historical dataset of 659 inherited words from principally Galician, Portuguese, and Spanish, with and without palatalization, traceable to etyma containing OL clusters. The dataset draws on etymological dictionaries, philological works, and historical corpora. A digitalized version of the Diccionario Crítico Etimológico Castellano e Hispánico (Corominas and Pascual, 2012) served as the backbone for systematically identifying etyma containing OL clusters. The compiled corpus contains 473 words with certain etymologies and comparable coverage across the three languages. By providing the first comprehensive compilation of both palatal and non-palatal historical evidence, this dataset enables the systematic study of OL palatalization in Ibero-Romance.",{"paper_id":1134,"title":1135,"year":7,"month":188,"day":63,"doi":1136,"resource_url":1137,"first_page":1138,"last_page":1139,"pdf_url":1140,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1141,"paper_type":860,"authors":1142,"abstract":1161},"lrec2026-main-014","Is Clinical Text Enough? A Multimodal Study on Mortality Prediction in Heart Failure Patients","10.63317\u002F47hsfchk79n6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-014","194","206","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.14.pdf","khettari-etal-2026-is",[1143,1146,1149,1152,1155,1158],{"paper_id":1134,"author_seq":247,"given_name":1144,"surname":1145,"affiliation":63,"orcid":63},"Oumaima El","Khettari",{"paper_id":1134,"author_seq":232,"given_name":1147,"surname":1148,"affiliation":63,"orcid":63},"Virgile","Barthet",{"paper_id":1134,"author_seq":218,"given_name":1150,"surname":1151,"affiliation":63,"orcid":63},"Guillaume","Hocquet",{"paper_id":1134,"author_seq":203,"given_name":1153,"surname":1154,"affiliation":63,"orcid":63},"Joconde","Weller",{"paper_id":1134,"author_seq":188,"given_name":1156,"surname":1157,"affiliation":63,"orcid":63},"Emmanuel","Morin",{"paper_id":1134,"author_seq":172,"given_name":1159,"surname":1160,"affiliation":63,"orcid":63},"Pierre","Zweigenbaum","Accurate short-term mortality prediction in heart failure (HF) remains challenging, particularly when relying on structured electronic health record (EHR) data alone. We evaluate transformer-based models on a French HF cohort, comparing text-only, structured-only, multimodal, and LLM-based approaches. Our results show that enriching clinical text with entity-level representations improves prediction over CLS embeddings alone, and that supervised multimodal fusion of text and structured variables achieves the best overall performance. In contrast, large language models perform inconsistently across modalities and decoding strategies, with text-only prompts outperforming structured or multimodal inputs. These findings highlight that entity-aware multimodal transformers offer the most reliable solution for short-term HF outcome prediction, while current LLM prompting remains limited for clinical decision support.",{"paper_id":1163,"title":1164,"year":7,"month":188,"day":63,"doi":1165,"resource_url":1166,"first_page":1167,"last_page":1168,"pdf_url":1169,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1170,"paper_type":860,"authors":1171,"abstract":1181},"lrec2026-main-015","HistoriQA-ThirdRepublic: Multi-Hop Question Answering Corpus for Historical Research, Parliamentary Debates from the French Third Republic (1870-1940)","10.63317\u002F3xibvwpihfoe","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-015","207","223","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.15.pdf","pellet-etal-2026-historiqa",[1172,1175,1178],{"paper_id":1163,"author_seq":247,"given_name":1173,"surname":1174,"affiliation":63,"orcid":63},"Aurelien","Pellet",{"paper_id":1163,"author_seq":232,"given_name":1176,"surname":1177,"affiliation":63,"orcid":63},"Marie Anna","Puren",{"paper_id":1163,"author_seq":218,"given_name":1179,"surname":1180,"affiliation":63,"orcid":63},"Julien","Perez","We present HistoriQA-ThirdRepublic: a French-language dataset of multi-hop historical questions derived from parliamentary debates and newspapers of the French Third Republic. Designed in collaboration with a historian, the corpus captures complex reasoning patterns typical of historical inquiry, including cross-source synthesis, temporal reasoning, and the integration of sparse evidence. The dataset is made of 1782 questions and emphasizes multi-hop connections across heterogeneous historical documents, providing a resource for evaluating retrieval-augmented and large language model systems in domain-specific contexts. We describe the methodology for constructing the corpus, including the selection and alignment of sources, question validation, and metadata integration. While the dataset focuses on French historical documents, our methodology can be readily adapted to other languages and national corpora. Finally, we demonstrate how the corpus can support realistic evaluation scenarios for multi-hop question answering, bridging the gap between NLP benchmarks and the needs of historical scholarship.",{"paper_id":1183,"title":1184,"year":7,"month":188,"day":63,"doi":1185,"resource_url":1186,"first_page":1187,"last_page":1188,"pdf_url":1189,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1190,"paper_type":860,"authors":1191,"abstract":1204},"lrec2026-main-016","BURMESE-SAN: Burmese NLP Benchmark for Evaluating Large Language Models","10.63317\u002F54dxgzy8h77c","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-016","224","245","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.16.pdf","aung-etal-2026-burmese",[1192,1195,1198,1201],{"paper_id":1183,"author_seq":247,"given_name":1193,"surname":1194,"affiliation":63,"orcid":63},"Thura","Aung",{"paper_id":1183,"author_seq":232,"given_name":1196,"surname":1197,"affiliation":63,"orcid":63},"Jann Railey","Montalan",{"paper_id":1183,"author_seq":218,"given_name":1199,"surname":1200,"affiliation":63,"orcid":63},"Jian Gang","Ngui",{"paper_id":1183,"author_seq":203,"given_name":1202,"surname":1203,"affiliation":63,"orcid":63},"Peerat","Limkonchotiwat","We introduce BURMESE-SAN, the first holistic benchmark that systematically evaluates large language models (LLMs) for Burmese across three core NLP competencies: understanding (NLU), reasoning (NLR), and generation (NLG). BURMESE-SAN consolidates seven subtasks spanning these competencies, including Question Answering, Sentiment Analysis, Toxicity Detection, Causal Reasoning, Natural Language Inference, Abstractive Summarization, and Machine Translation, several of which were previously unavailable for Burmese. The benchmark is constructed through a rigorous native-speaker-driven process to ensure linguistic naturalness, fluency, and cultural authenticity while minimizing translation-induced artifacts. We conduct a large-scale evaluation of both open-weight and commercial LLMs to examine challenges in Burmese modeling arising from limited pretraining coverage, rich morphology, and syntactic variation. Our results show that Burmese performance depends more on architectural design, language representation, and instruction tuning than on model scale alone. In particular, Southeast Asia regional fine-tuning and newer model generations yield substantial gains. Finally, we release BURMESE-SAN as a public leaderboard to support systematic evaluation and sustained progress in Burmese and other low-resource languages. https:\u002F\u002Fleaderboard.sea-lion.ai\u002Fdetailed\u002FMY",{"paper_id":1206,"title":1207,"year":7,"month":188,"day":63,"doi":1208,"resource_url":1209,"first_page":1210,"last_page":1211,"pdf_url":1212,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1213,"paper_type":860,"authors":1214,"abstract":1221},"lrec2026-main-017","Assessing the Political Fairness of Multilingual LLMs: A Case Study Based on a 21-Way Multiparallel EuroParl Dataset","10.63317\u002F3wwi6bzcsd86","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-017","246","265","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.17.pdf","lerner-etal-2026-assessing",[1215,1218],{"paper_id":1206,"author_seq":247,"given_name":1216,"surname":1217,"affiliation":63,"orcid":63},"Paul","Lerner",{"paper_id":1206,"author_seq":232,"given_name":1219,"surname":1220,"affiliation":63,"orcid":63},"François","Yvon","The political biases of Large Language Models (LLMs) are usually assessed by simulating their answers to English surveys. In this work, we propose an alternative framing of political biases, relying on principles of fairness in multilingual translation. We systematically compare the translation quality of speeches in the European Parliament (EP), observing systematic differences with majority parties from left and right being better translated than outsider parties. This study is made possible by a new, 21-way multiparallel version of EuroParl, the parliamentary proceedings of the EP, which includes the political affiliations of each speaker. The dataset consists of 1.5M sentences for a total of 40M words and 249M characters. It covers three years, 1000+ speakers, 7 countries, 12 EU parties, 25 EU committees, and hundreds of national parties.",{"paper_id":1223,"title":1224,"year":7,"month":188,"day":63,"doi":1225,"resource_url":1226,"first_page":1227,"last_page":1228,"pdf_url":1229,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1230,"paper_type":860,"authors":1231,"abstract":1253},"lrec2026-main-018","AfriStereo: A Culturally Grounded Dataset for Evaluating Stereotypical Bias in Large Language Models","10.63317\u002F58oaqcxpogdy","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-018","266","280","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.18.pdf","beux-etal-2026-afristereo",[1232,1235,1238,1241,1244,1247,1250],{"paper_id":1223,"author_seq":247,"given_name":1233,"surname":1234,"affiliation":63,"orcid":63},"Yann Le","Beux",{"paper_id":1223,"author_seq":232,"given_name":1236,"surname":1237,"affiliation":63,"orcid":63},"Oluchi","Audu",{"paper_id":1223,"author_seq":218,"given_name":1239,"surname":1240,"affiliation":63,"orcid":63},"Oche David","Ankeli",{"paper_id":1223,"author_seq":203,"given_name":1242,"surname":1243,"affiliation":63,"orcid":63},"Dhananjay","Balakrishnan",{"paper_id":1223,"author_seq":188,"given_name":1245,"surname":1246,"affiliation":63,"orcid":63},"Melissah","Weya",{"paper_id":1223,"author_seq":172,"given_name":1248,"surname":1249,"affiliation":63,"orcid":63},"Marie Daniella","Ralaiarinosy",{"paper_id":1223,"author_seq":155,"given_name":1251,"surname":1252,"affiliation":63,"orcid":63},"Ignatius","Ezeani","Existing AI bias evaluation benchmarks largely reflect Western perspectives, leaving African contexts underrepresented and enabling harmful stereotypes in applications across various domains. To address this gap, we introduce AfriStereo, the first open-source African stereotype dataset and evaluation framework grounded in local socio-cultural contexts. Through community engaged efforts across Senegal, Kenya, and Nigeria, we collect 1,163 stereotypes spanning gender, ethnicity, religion, age, and profession. Using few-shot prompting with human-in-the-loop validation, we augment the dataset to over 5,000 stereotype–antistereotype pairs. Entries are validated through semantic clustering and manual annotation by culturally informed reviewers. Preliminary evaluation of language models reveals that nine of eleven models exhibit statistically significant bias in our setup, with Bias Preference Ratios (BPR) ranging from 0.63 to 0.78 (p ≤ 0.05), indicating systematic preferences for stereotypes over antistereotypes, particularly across age, profession, and gender dimensions. Domain-specific models appear to show weaker bias in our setup, suggesting task-specific training may mitigate some associations. Looking ahead, AfriStereo opens pathways for future research on culturally grounded bias evaluation and mitigation, offering key methodologies for the AI community on building more equitable, context-aware, and globally inclusive NLP technologies.",{"paper_id":1255,"title":1256,"year":7,"month":188,"day":63,"doi":1257,"resource_url":1258,"first_page":1259,"last_page":1260,"pdf_url":1261,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1262,"paper_type":860,"authors":1263,"abstract":1278},"lrec2026-main-019","Judging Instruction Responses in a Low-Resource Language: A Case Study on Basque","10.63317\u002F32e7ij6myh5i","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-019","281","298","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.19.pdf","ponce-etal-2026-judging",[1264,1266,1269,1272,1275],{"paper_id":1255,"author_seq":247,"given_name":1061,"surname":1265,"affiliation":63,"orcid":63},"Ponce",{"paper_id":1255,"author_seq":232,"given_name":1267,"surname":1268,"affiliation":63,"orcid":63},"Harritxu","Gete",{"paper_id":1255,"author_seq":218,"given_name":1270,"surname":1271,"affiliation":63,"orcid":63},"Thierry","Etchegoyhen",{"paper_id":1255,"author_seq":203,"given_name":1273,"surname":1274,"affiliation":63,"orcid":63},"Irune","Zubiaga",{"paper_id":1255,"author_seq":188,"given_name":1276,"surname":1277,"affiliation":63,"orcid":63},"Aitor","Soroa","Evaluating the quality of answers to a given instruction is a demanding and time-consuming task, limiting the scalability of human assessment. Large language models (LLMs) have been proposed as automatic judges to reduce this effort, but their reliability in low-resource contexts remains uncertain. Additionally, the premise that humans are reliable judges of fine-grained response quality needs to be assessed as well, if correlation with automated judges on this task is to be considered a gold standard. In this work, we investigate the performance of various LLM-as-a-judge in a low-resource scenario, namely Basque, and evaluate its correlation with human judgements. Additionally, we measure the agreement between human judgments themselves, to assess their viability as a valid reference. To perform our experiments, we translated and manually post-edited the Just-Eval benchmark, a suite of benchmarks tackling fine-grained aspects of response quality. We also extend the evaluation with a novel category aimed at judging both language consistency and grammaticality. Our results show that state of the art models exhibit fairly poor correlations with humans and amongst themselves, calling for the development of dedicated LLM-as-a-judge models for this language.",{"paper_id":1280,"title":1281,"year":7,"month":188,"day":63,"doi":1282,"resource_url":1283,"first_page":1284,"last_page":1285,"pdf_url":1286,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1287,"paper_type":860,"authors":1288,"abstract":1301},"lrec2026-main-020","Appeal, Align, Divide? Stance Detection for Group-Directed Messages in German Parliamentary Debates","10.63317\u002F3grc7kgkrm24","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-020","299","318","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.20.pdf","rehbein-etal-2026-appeal",[1289,1292,1295,1298],{"paper_id":1280,"author_seq":247,"given_name":1290,"surname":1291,"affiliation":63,"orcid":63},"Ines","Rehbein",{"paper_id":1280,"author_seq":232,"given_name":1293,"surname":1294,"affiliation":63,"orcid":63},"Maris Leander","Buttmann",{"paper_id":1280,"author_seq":218,"given_name":1296,"surname":1297,"affiliation":63,"orcid":63},"Julian","Schlenker",{"paper_id":1280,"author_seq":203,"given_name":1299,"surname":1300,"affiliation":63,"orcid":63},"Simone Paolo","Ponzetto","This paper presents a new benchmark for detecting group-based appeals, i.e., positive or negative references towards social groups, in German parliamentary debates. In the first step, group mentions are identified as targets for stance detection. In the next step, three human annotators assign stance labels to the group mentions, coding the speaker’s perspective towards the specific group. The created benchmark data is then used to investigate the capacity of Large Language Models (LLMs) for detecting polticians’ stances towards social groups. We explore the potential of different prompting strategies (zero-shot prompting, few-shot prompting, Chain-of-Thought) for this task and compare the results to a supervised BERT baseline, showing that in low-resource scenarios LLMs can outperform smaller fine-tuned models without the need for annotating large datasets.",{"paper_id":1303,"title":1304,"year":7,"month":188,"day":63,"doi":1305,"resource_url":1306,"first_page":1307,"last_page":1308,"pdf_url":1309,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1310,"paper_type":860,"authors":1311,"abstract":1323},"lrec2026-main-021","Report-based Recommendations for Policy Making and Agency Operations: Dataset and LLM Evaluation","10.63317\u002F22rdsbxtnqe5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-021","319","332","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.21.pdf","edwards-etal-2026-report",[1312,1315,1317,1320],{"paper_id":1303,"author_seq":247,"given_name":1313,"surname":1314,"affiliation":63,"orcid":63},"Aleksandra","Edwards",{"paper_id":1303,"author_seq":232,"given_name":1316,"surname":1314,"affiliation":63,"orcid":63},"Thomas",{"paper_id":1303,"author_seq":218,"given_name":1318,"surname":1319,"affiliation":63,"orcid":63},"Jose","Camacho-Collados",{"paper_id":1303,"author_seq":203,"given_name":1321,"surname":1322,"affiliation":63,"orcid":63},"Alun","Preece","Large Language Models (LLMs) are extensively used in text generation tasks. These generative capabilities bring us to a point where LLMs could potentially provide useful insights in policy making or agency operations. In this paper, we introduce a new task consisting of generating recommendations which can be used to inform future actions and improvements of agencies work within private and public organisations. In particular, we present the first benchmark and coherent evaluation for developing recommendation systems to inform organisation policies. This task is clearly different from usual product or user recommendation systems, but rather aims at providing a basis to suggest policy improvements based on the conclusions drawn from reports. Our results demonstrate that state-of-the-art LLMs have the potential to emphasize and reflect on key issues and learning points within generated recommendations.",{"paper_id":1325,"title":1326,"year":7,"month":188,"day":63,"doi":1327,"resource_url":1328,"first_page":1329,"last_page":1330,"pdf_url":1331,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1332,"paper_type":860,"authors":1333,"abstract":1343},"lrec2026-main-022","ConceptKT: A Benchmark for Concept-Level Deficiency Prediction in Knowledge Tracing","10.63317\u002F34ncpc8ovxbz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-022","333","343","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.22.pdf","kang-etal-2026-conceptkt",[1334,1337,1340],{"paper_id":1325,"author_seq":247,"given_name":1335,"surname":1336,"affiliation":63,"orcid":63},"Yu-Chen","Kang",{"paper_id":1325,"author_seq":232,"given_name":1338,"surname":1339,"affiliation":63,"orcid":63},"Yu-Chien","Tang",{"paper_id":1325,"author_seq":218,"given_name":1341,"surname":1342,"affiliation":63,"orcid":63},"An-Zi","Yen","Knowledge Tracing (KT) is a critical technique for modeling student knowledge to support personalized learning. However, most KT systems focus on binary correctness prediction and cannot diagnose the underlying conceptual misunderstandings that lead to errors. Such fine-grained diagnostic feedback is essential for designing targeted instruction and effective remediation. In this work, we introduce the task of concept-level deficiency prediction, which extends traditional KT by identifying the specific concepts a student is likely to struggle with on future problems. We present ConceptKT, a dataset annotated with labels that capture both the concepts required to solve each question and the missing concepts underlying incorrect responses. We investigate in-context learning approaches to KT and evaluate the diagnostic capabilities of various Large Language Models (LLMs) and Large Reasoning Models (LRMs). Different strategies for selecting informative historical records are explored. Experimental results demonstrate that selecting response histories based on conceptual alignment and semantic similarity leads to improved performance on both correctness prediction and concept-level deficiency identification.",{"paper_id":1345,"title":1346,"year":7,"month":188,"day":63,"doi":1347,"resource_url":1348,"first_page":1349,"last_page":1350,"pdf_url":1351,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1352,"paper_type":860,"authors":1353,"abstract":1362},"lrec2026-main-023","Open-access Dataset on Acceptability Ratings of Korean Clausal Constructions by Humans and GPT Models","10.63317\u002F2icd7h29b849","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-023","344","356","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.23.pdf","shin-etal-2026-open",[1354,1357,1360],{"paper_id":1345,"author_seq":247,"given_name":1355,"surname":1356,"affiliation":63,"orcid":63},"Gyu-Ho","Shin",{"paper_id":1345,"author_seq":232,"given_name":1358,"surname":1359,"affiliation":63,"orcid":63},"Soo-Hwan","Lee",{"paper_id":1345,"author_seq":218,"given_name":1361,"surname":1359,"affiliation":63,"orcid":63},"Chanyoung","The present study introduces a new, open-access dataset on acceptability ratings of Korean clausal constructions at the morphosyntax–semantics interface (dative, passive, and negative polarity item). The dataset comprises (i) linguistically controlled sentence materials, (ii) ratings from targeted adult populations (individuals in their 20s), and (iii) parallel ratings from GPT variants (including ChatGPT). Alongside the release, we assess the alignment between GPT- and human-derived ratings to probe the extent to which GPT architectures can approximate patterns of human sentence comprehension.",{"paper_id":1364,"title":1365,"year":7,"month":188,"day":63,"doi":1366,"resource_url":1367,"first_page":1368,"last_page":1369,"pdf_url":1370,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1371,"paper_type":860,"authors":1372,"abstract":1382},"lrec2026-main-024","Talk2Ref: A Dataset for Reference Prediction from Scientific Talks","10.63317\u002F5axffrnj6tm8","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-024","357","371","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.24.pdf","broy-etal-2026-talk2ref",[1373,1376,1379],{"paper_id":1364,"author_seq":247,"given_name":1374,"surname":1375,"affiliation":63,"orcid":63},"Frederik Yannick","Broy",{"paper_id":1364,"author_seq":232,"given_name":1377,"surname":1378,"affiliation":63,"orcid":63},"Maike","Züfle",{"paper_id":1364,"author_seq":218,"given_name":1380,"surname":1381,"affiliation":63,"orcid":63},"Jan","Niehues","Scientific talks are a growing medium for disseminating research, and automatically identifying relevant literature that grounds or enriches a talk would be highly valuable for researchers and students alike. We introduce Reference Prediction from Talks (RPT), a new task that maps long, and unstructured scientific presentations to relevant papers. To support research on RPT, we present Talk2Ref, the first large-scale dataset of its kind, containing 6,279 talks and 43,429 cited papers (26 per talk on average), where relevance is approximated by the papers cited in the talk’s corresponding source publication. We establish strong baselines by evaluating state-of-the-art text embedding models in zero-shot retrieval scenarios, and propose a dual-encoder architecture trained on Talk2Ref. We further explore strategies for handling long transcripts, as well as training for domain adaptation. Our results show that fine-tuning on Talk2Ref significantly improves citation prediction performance, demonstrating both the challenges of the task and the effectiveness of our dataset for learning semantic representations from spoken scientific content. The dataset and trained models are released under an open license to foster future research on integrating spoken scientific communication into citation recommendation systems.",{"paper_id":1384,"title":1385,"year":7,"month":188,"day":63,"doi":1386,"resource_url":1387,"first_page":1388,"last_page":1389,"pdf_url":1390,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1391,"paper_type":860,"authors":1392,"abstract":1398},"lrec2026-main-025","MuSaG: A Multimodal German Sarcasm Dataset with Full-Modal Annotations","10.63317\u002F2dc7oajwrnmt","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-025","372","392","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.25.pdf","scott-etal-2026-musag",[1393,1396,1397],{"paper_id":1384,"author_seq":247,"given_name":1394,"surname":1395,"affiliation":63,"orcid":63},"Aaron Robert","Scott",{"paper_id":1384,"author_seq":232,"given_name":1377,"surname":1378,"affiliation":63,"orcid":63},{"paper_id":1384,"author_seq":218,"given_name":1380,"surname":1381,"affiliation":63,"orcid":63},"Sarcasm is a complex form of figurative language in which the intended meaning contradicts the literal one. Its prevalence in social media and popular culture poses persistent challenges for natural language understanding, sentiment analysis, and content moderation. With the emergence of multimodal large language models, sarcasm detection extends beyond text and requires integrating cues from audio and vision. We present MuSaG, the first German multimodal sarcasm detection dataset, consisting of 33 minutes of manually selected and human-annotated statements from German television shows. Each instance provides aligned text, audio, and video modalities, annotated separately by humans, enabling evaluation in unimodal and multimodal settings. We benchmark nine open-source and commercial models, spanning text, audio, vision, and multimodal architectures, and compare their performance to human annotations. Our results show that while humans rely heavily on audio in conversational settings, models perform best on text. This highlights a gap in current multimodal models and motivates the use of MuSaG for developing models better suited to realistic scenarios. We release MuSaG publicly to support future research on multimodal sarcasm detection and human–model alignment.",{"paper_id":1400,"title":1401,"year":7,"month":188,"day":63,"doi":1402,"resource_url":1403,"first_page":1404,"last_page":1405,"pdf_url":1406,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1407,"paper_type":860,"authors":1408,"abstract":1424},"lrec2026-main-026","Icelandic Math Eval: A Competitive Mathematics Benchmark for Large Language Models","10.63317\u002F43xpv5rqyhr6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-026","393","406","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.26.pdf","einarsson-etal-2026-icelandic",[1409,1412,1415,1418,1421],{"paper_id":1400,"author_seq":247,"given_name":1410,"surname":1411,"affiliation":63,"orcid":63},"Hafsteinn","Einarsson",{"paper_id":1400,"author_seq":232,"given_name":1413,"surname":1414,"affiliation":63,"orcid":63},"Jökull Ari","Haraldsson",{"paper_id":1400,"author_seq":218,"given_name":1416,"surname":1417,"affiliation":63,"orcid":63},"Ívar Armin","Derayat",{"paper_id":1400,"author_seq":203,"given_name":1419,"surname":1420,"affiliation":63,"orcid":63},"Sigrún Helga","Lund",{"paper_id":1400,"author_seq":188,"given_name":1422,"surname":1423,"affiliation":63,"orcid":63},"Benedikt Steinar","Magnússon","We introduce Icelandic Math Eval, the first comprehensive benchmark for evaluating large language models (LLMs) on competitive mathematics problems in Icelandic. Our dataset comprises 1,027 problems from Icelandic mathematics competitions spanning from 1984 to 2025, covering algebra, geometry, number theory, and combinatorics across ten difficulty levels. We evaluate three state-of-the-art models, Claude Sonnet 4.5, Gemini 2.5 Pro, and GPT-5, using a dual evaluation methodology that tests both with and without multiple-choice options. Our results reveal several key findings: (1) models achieve 81-93% overall accuracy, demonstrating substantial cross-lingual transfer of mathematical reasoning capabilities; (2) a dramatic 17.5 percentage point performance drop on problems containing images highlights persistent challenges in multimodal mathematical reasoning; (3) a 6.7 percentage point gap between evaluation modes suggests that multiple-choice formats may overestimate genuine reasoning capabilities; and (4) systematic performance degradation with increasing difficulty, dropping to 43% on the most challenging problems. Using an LLM-as-judge evaluation approach, we provide detailed analysis across problem types, difficulty levels, and model capabilities. This work contributes to multilingual AI evaluation and demonstrates the importance of developing rigorous benchmarks for diverse languages to ensure comprehensive assessment of AI capabilities.",{"paper_id":1426,"title":1427,"year":7,"month":188,"day":63,"doi":1428,"resource_url":1429,"first_page":1430,"last_page":1431,"pdf_url":1432,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1433,"paper_type":860,"authors":1434,"abstract":1436},"lrec2026-main-027","MazeEval: A Benchmark for Testing Sequential Decision-Making in Language Models","10.63317\u002F4nm93hckcaf2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-027","407","418","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.27.pdf","einarsson-2026-mazeeval",[1435],{"paper_id":1426,"author_seq":247,"given_name":1410,"surname":1411,"affiliation":63,"orcid":63},"As Large Language Models (LLMs) increasingly power autonomous agents in robotics and embodied AI, understanding their spatial reasoning capabilities becomes crucial for reliable deployment. We introduce MazeEval, a benchmark designed to evaluate pure spatial reasoning in LLMs through coordinate-based maze navigation tasks without visual input. Using a function-calling interface, models navigate mazes of varying complexity (5 x 5 to 15 x 15 grids) using only coordinate feedback and distance-to-wall information. We evaluate eight state-of-the-art LLMs across identical mazes in both English and Icelandic to assess cross-linguistic transfer of spatial abilities. Our findings reveal striking disparities: while OpenAI’s O3 achieves perfect navigation up to 30 x 30 mazes, other models exhibit catastrophic failure beyond 9 x 9 mazes, with 100% of failures attributed to excessive looping behavior. We document significant performance degradation in Icelandic, with models solving mazes 3-4 sizes smaller than in English, suggesting spatial reasoning emerges from linguistic patterns rather than language-agnostic mechanisms. These results highlight that spatial intelligence remains fundamentally constrained by training data availability, with important implications for global deployment of LLM-powered autonomous systems.",{"paper_id":1438,"title":1439,"year":7,"month":188,"day":63,"doi":1440,"resource_url":1441,"first_page":1442,"last_page":1443,"pdf_url":1444,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1445,"paper_type":860,"authors":1446,"abstract":1474},"lrec2026-main-028","J-ClinicalBench: A Benchmark for Evaluating Large Language Models on Practical Clinical Tasks in Japanese","10.63317\u002F2uwf25atuoom","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-028","419","430","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.28.pdf","shimizu-etal-2026-clinicalbench",[1447,1450,1453,1456,1459,1462,1465,1468,1471],{"paper_id":1438,"author_seq":247,"given_name":1448,"surname":1449,"affiliation":63,"orcid":63},"Seiji","Shimizu",{"paper_id":1438,"author_seq":232,"given_name":1451,"surname":1452,"affiliation":63,"orcid":63},"Tomohiro","Nishiyama",{"paper_id":1438,"author_seq":218,"given_name":1454,"surname":1455,"affiliation":63,"orcid":63},"Hisada","Shohei",{"paper_id":1438,"author_seq":203,"given_name":1457,"surname":1458,"affiliation":63,"orcid":63},"Yamato","Himi",{"paper_id":1438,"author_seq":188,"given_name":1460,"surname":1461,"affiliation":63,"orcid":63},"Shoko","Wakamiya",{"paper_id":1438,"author_seq":172,"given_name":1463,"surname":1464,"affiliation":63,"orcid":63},"Yuki","Yanagisawa",{"paper_id":1438,"author_seq":155,"given_name":1466,"surname":1467,"affiliation":63,"orcid":63},"Masami","Tsuchiya",{"paper_id":1438,"author_seq":138,"given_name":1469,"surname":1470,"affiliation":63,"orcid":63},"Satoko","Hori",{"paper_id":1438,"author_seq":121,"given_name":1472,"surname":1473,"affiliation":63,"orcid":63},"Eiji","Aramaki","Recent advances in large language models (LLMs) have accelerated the NLP applications in the medical and clinical domains. However, evaluations remain limited for non-English languages, such as Japanese, where clinical corpora are particularly scarce. To address this gap, we present J-ClinicalBench, a publicly available benchmark designed to reflect realistic Japanese clinical tasks. We first created 227 expert-authored clinical documents and newly constructed five datasets for core clinical tasks. Building on these datasets, J-ClinicalBench comprises nine clinical tasks spanning clinical language reasoning, generation, and understanding. We establish baseline performance on J-ClinicalBench by evaluating state-of-the-art proprietary and Japanese open-source LLMs, providing the first assessment of their utility in practical clinical scenarios. By releasing this benchmark, we aim to foster the development and evaluation of clinically applicable LLMs in Japanese healthcare, bridging the current gap between clinical NLP research and clinical practice.",{"paper_id":1476,"title":1477,"year":7,"month":188,"day":63,"doi":1478,"resource_url":1479,"first_page":1480,"last_page":1481,"pdf_url":1482,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1483,"paper_type":860,"authors":1484,"abstract":1494},"lrec2026-main-029","Is One Dataset Enough for Evaluation? Studying Generalizability of Automated Essay Scoring Models","10.63317\u002F4sepdcv3iix7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-029","431","440","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.29.pdf","eltanbouly-etal-2026-is",[1485,1488,1491],{"paper_id":1476,"author_seq":247,"given_name":1486,"surname":1487,"affiliation":63,"orcid":63},"Sohaila","Eltanbouly",{"paper_id":1476,"author_seq":232,"given_name":1489,"surname":1490,"affiliation":63,"orcid":63},"Marwan","Sayed",{"paper_id":1476,"author_seq":218,"given_name":1492,"surname":1493,"affiliation":63,"orcid":63},"Tamer","Elsayed","Automated Essay Scoring (AES) has made significant advancements in writing assessment. Recently, cross-prompt AES has gained attention because of its focus on generalizing to unseen prompts. Despite the promise of these advancements, a critical question remains: how generalizable and robust are those models when applied to diverse datasets? This study assesses the generalizability of eight cross-prompt AES models across three different datasets. We employ two experimental setups: the within-dataset approach, where both training and testing occur on the same dataset, and the cross-dataset approach, which challenges the models by evaluating their performance on previously unseen datasets. The experimental results show significant performance inconsistencies, highlighting that relying on a single dataset is insufficient for building robust and generalizable AES systems.",{"paper_id":1496,"title":1497,"year":7,"month":188,"day":63,"doi":1498,"resource_url":1499,"first_page":1500,"last_page":1501,"pdf_url":1502,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1503,"paper_type":860,"authors":1504,"abstract":1523},"lrec2026-main-030","HiFi-KPI: A Dataset for Hierarchical KPI Extraction from Earnings Filings","10.63317\u002F2nbsp7zzfb3g","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-030","441","455","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.30.pdf","jensen-etal-2026-hifi",[1505,1508,1511,1514,1517,1520],{"paper_id":1496,"author_seq":247,"given_name":1506,"surname":1507,"affiliation":63,"orcid":63},"Rasmus Thyge Aavang","Jensen",{"paper_id":1496,"author_seq":232,"given_name":1509,"surname":1510,"affiliation":63,"orcid":63},"Giovanni","Rizzi",{"paper_id":1496,"author_seq":218,"given_name":1512,"surname":1513,"affiliation":63,"orcid":63},"Rasmus","Tjalk-Bøggild",{"paper_id":1496,"author_seq":203,"given_name":1515,"surname":1516,"affiliation":63,"orcid":63},"Alexandre","Iolov",{"paper_id":1496,"author_seq":188,"given_name":1518,"surname":1519,"affiliation":63,"orcid":63},"Mike","Zhang",{"paper_id":1496,"author_seq":172,"given_name":1521,"surname":1522,"affiliation":63,"orcid":63},"Johannes","Bjerva","Accurate tagging of earnings reports can yield significant short-term returns for stakeholders. The machine-readable inline eXtensible Business Reporting Language (iXBRL) is mandated for public financial filings. Yet, its complex, fine-grained taxonomy limits the cross-company transferability of tagged Key Performance Indicators (KPIs). To address this, we introduce the Hierarchical Financial Key Performance Indicator (HiFi-KPI) dataset, a large-scale corpus of 1.65M paragraphs and 198k unique, hierarchically organized labels linked to iXBRL taxonomies. HiFi-KPI supports multiple tasks and we evaluate three: KPI classification, KPI extraction, and structured KPI extraction. For rapid evaluation, we also release HiFi-KPI-Lite, a manually curated 2.5K-instance subset. Baselines on HiFi-KPI-Lite show that encoder-based models achieve over 0.906 macro-F1 on classification, while Large Language Models (LLMs) reach 0.440 F1 on structured extraction. Finally, a qualitative analysis reveals that extraction errors primarily relate to dates. We open-source all code and data at Anonymous.",{"paper_id":1525,"title":1526,"year":7,"month":188,"day":63,"doi":1527,"resource_url":1528,"first_page":1529,"last_page":1530,"pdf_url":1531,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1532,"paper_type":860,"authors":1533,"abstract":1541},"lrec2026-main-031","UniSkill: A Dataset for Matching University Curricula to Professional Competencies","10.63317\u002F2n39qzvk2eqe","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-031","456","469","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.31.pdf","musazade-etal-2026-uniskill",[1534,1537,1540],{"paper_id":1525,"author_seq":247,"given_name":1535,"surname":1536,"affiliation":63,"orcid":63},"Nurlan","Musazade",{"paper_id":1525,"author_seq":232,"given_name":1538,"surname":1539,"affiliation":63,"orcid":63},"József","Mezei",{"paper_id":1525,"author_seq":218,"given_name":1518,"surname":1519,"affiliation":63,"orcid":63},"Skill extraction and recommendation systems have been studied from recruiter, applicant, and education perspectives. While AI applications in job advertisements have received broad attention, deficiencies in the instructed skills side remain a challenge. In this work, we address the scarcity of publicly available datasets by releasing both manually annotated and synthetic datasets of skills from the European Skills, Competences, Qualifications and Occupations (ESCO) taxonomy and university course pairs and publishing corresponding annotation guidelines. Specifically, we match graduate-level university courses with skills from the Systems Analysts and Management and Organization Analyst ESCO occupation groups at two granularities: course title with a skill, and course sentence with a skill. We train language models on this dataset to serve as a baseline for retrieval and recommendation systems for course-to-skill and skill-to-course matching. We evaluate the models on a portion of the annotated data. Our BERT model achieves 87% F1-score, showing that course and skill matching is a feasible task.",{"paper_id":1543,"title":1544,"year":7,"month":188,"day":63,"doi":1545,"resource_url":1546,"first_page":1547,"last_page":1548,"pdf_url":1549,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1550,"paper_type":860,"authors":1551,"abstract":1588},"lrec2026-main-032","A Dataset for Evaluating ASR on Specialized Vocabulary","10.63317\u002F568sthbwdhap","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-032","470","480","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.32.pdf","klering-etal-2026-dataset",[1552,1555,1558,1561,1564,1567,1570,1573,1576,1579,1582,1585],{"paper_id":1543,"author_seq":247,"given_name":1553,"surname":1554,"affiliation":63,"orcid":63},"Emily Haubert","Klering",{"paper_id":1543,"author_seq":232,"given_name":1556,"surname":1557,"affiliation":63,"orcid":63},"Eduardo Gabriel","Cortes",{"paper_id":1543,"author_seq":218,"given_name":1559,"surname":1560,"affiliation":63,"orcid":63},"Tatjana","Chernenko",{"paper_id":1543,"author_seq":203,"given_name":1562,"surname":1563,"affiliation":63,"orcid":63},"Mariana Vargas","Trarbach",{"paper_id":1543,"author_seq":188,"given_name":1565,"surname":1566,"affiliation":63,"orcid":63},"Gabriel de Oliveira","Ramos",{"paper_id":1543,"author_seq":172,"given_name":1568,"surname":1569,"affiliation":63,"orcid":63},"Sandro José","Rigo",{"paper_id":1543,"author_seq":155,"given_name":1571,"surname":1572,"affiliation":63,"orcid":63},"Maitê","Dupont",{"paper_id":1543,"author_seq":138,"given_name":1574,"surname":1575,"affiliation":63,"orcid":63},"Ana Luiza Treichel","Vianna",{"paper_id":1543,"author_seq":121,"given_name":1577,"surname":1578,"affiliation":63,"orcid":63},"Gabriela Krause dos","Santos",{"paper_id":1543,"author_seq":104,"given_name":1580,"surname":1581,"affiliation":63,"orcid":63},"Vinicius Meirelles","Pereira",{"paper_id":1543,"author_seq":87,"given_name":1583,"surname":1584,"affiliation":63,"orcid":63},"Denis Andrei de","Araujo",{"paper_id":1543,"author_seq":73,"given_name":1586,"surname":1587,"affiliation":63,"orcid":63},"Rafael","Kunst","Evaluating the ability of Automatic Speech Recognition (ASR) models to transcribe specialized vocabulary remains a persistent challenge, as standard datasets predominantly feature common words and thus obscure weaknesses on rare or out-of-vocabulary (OOV) terms. To address this limitation, we introduce a linguistically curated bilingual dataset (English and Portuguese) comprising 13,846 utterances (18.7 hours) distributed across synthetic and literature-derived subsets, with OOV rates reaching up to 100%. We further propose a diagnostic evaluation framework that partitions recognition performance into Biased Word Error Rate (B-WER), targeting domain-specific jargon, and Unbiased Word Error Rate (U-WER), focusing on general vocabulary. Baseline evaluations using Whisper models (medium, large-v3, and large-v3-turbo) confirm the necessity of this framework. On the most challenging datasets, B-WER reaches 0.88–0.90, whereas U-WER remains as low as 0.06–0.19, demonstrating that conventional WER masks critical failure modes in jargon recognition. Additionally, an oracle upper bound experiment shows that providing correct jargon via prompting reduces B-WER by 0.50–0.70 absolute, quantifying the considerable potential for contextual biasing. We release the datasets and evaluation scripts as a reproducible benchmark to foster research on domain-aware contextual biasing and OOV handling in ASR systems.",{"paper_id":1590,"title":1591,"year":7,"month":188,"day":63,"doi":1592,"resource_url":1593,"first_page":1594,"last_page":1595,"pdf_url":1596,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1597,"paper_type":860,"authors":1598,"abstract":1636},"lrec2026-main-033","SommBench: Assessing Sommelier Expertise of Language Models","10.63317\u002F3i8wafseg64a","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-033","481","497","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.33.pdf","brach-etal-2026-sommbench",[1599,1602,1605,1608,1610,1612,1615,1618,1621,1624,1627,1630,1633],{"paper_id":1590,"author_seq":247,"given_name":1600,"surname":1601,"affiliation":63,"orcid":63},"William","Brach",{"paper_id":1590,"author_seq":232,"given_name":1603,"surname":1604,"affiliation":63,"orcid":63},"Tomas","Bedej",{"paper_id":1590,"author_seq":218,"given_name":1606,"surname":1607,"affiliation":63,"orcid":63},"Jacob","Nielsen",{"paper_id":1590,"author_seq":203,"given_name":1606,"surname":1609,"affiliation":63,"orcid":63},"Pichna",{"paper_id":1590,"author_seq":188,"given_name":1611,"surname":1604,"affiliation":63,"orcid":63},"Juraj",{"paper_id":1590,"author_seq":172,"given_name":1613,"surname":1614,"affiliation":63,"orcid":63},"Eemeli","Saarensilta",{"paper_id":1590,"author_seq":155,"given_name":1616,"surname":1617,"affiliation":63,"orcid":63},"Julie","Dupouy",{"paper_id":1590,"author_seq":138,"given_name":1619,"surname":1620,"affiliation":63,"orcid":63},"Gianluca","Barmina",{"paper_id":1590,"author_seq":121,"given_name":1622,"surname":1623,"affiliation":63,"orcid":63},"Andrea Blasi","Núñez",{"paper_id":1590,"author_seq":104,"given_name":1625,"surname":1626,"affiliation":63,"orcid":63},"Peter","Schneider-Kamp",{"paper_id":1590,"author_seq":87,"given_name":1628,"surname":1629,"affiliation":63,"orcid":63},"Kristian","Košťál",{"paper_id":1590,"author_seq":73,"given_name":1631,"surname":1632,"affiliation":63,"orcid":63},"Michal","Ries",{"paper_id":1590,"author_seq":55,"given_name":1634,"surname":1635,"affiliation":63,"orcid":63},"Lukas Galke","Poech","With the rapid advances of large language models, it becomes increasingly important to systematically evaluate their multilingual and multicultural capabilities. Previous cultural evaluation benchmarks focus mainly on basic cultural knowledge that can be encoded in linguistic form. Here, we propose SommBench, a multilingual benchmark to assess sommelier expertise, a domain deeply grounded in the senses of smell and taste. While language models learn about sensory properties exclusively through textual descriptions, SommBench tests whether this textual grounding is sufficient to emulate expert-level sensory judgment. SommBench comprises three main tasks: Wine Theory Question Answering (WTQA), Wine Feature Completion (WFC), and Food-Wine Pairing (FWP). SommBench is available in multiple languages: English, Slovak, Swedish, Finnish, German, Danish, Italian, and Spanish. This helps separate a language model’s wine expertise from its language skills. The benchmark datasets were developed in close collaboration with a professional sommelier and native speakers of the respective languages, resulting in 1,024 questions for wine theory question answering, 1,000 examples for wine feature completion, and 1,000 examples of food-wine pairing. We provide results for the most popular language models, including closed-weights models such as Gemini 2.5, and open-weights models, such as GPT-OSS and Qwen 3. Our results show that the most capable models perform well on wine theory question answering (up to 97% correct with a closed-weights model), yet feature completion (peaking at 65%) and food-wine pairing show (MCC ranging between 0 and 0.39) turn out to be more challenging. These results position SommBench as an interesting and challenging benchmark for evaluating the sommelier expertise of language models. The benchmark is publicly available at https:\u002F\u002Fgithub.com\u002Fsommify\u002Fsommbench.",{"paper_id":1638,"title":1639,"year":7,"month":188,"day":63,"doi":1640,"resource_url":1641,"first_page":1642,"last_page":1643,"pdf_url":1644,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1645,"paper_type":860,"authors":1646,"abstract":1653},"lrec2026-main-034","CzechDocs: A Multiway Parallel Dataset of Formatted Documents for Minority Languages in Czechia","10.63317\u002F55v27paqpusz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-034","498","504","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.34.pdf","jon-etal-2026-czechdocs",[1647,1650],{"paper_id":1638,"author_seq":247,"given_name":1648,"surname":1649,"affiliation":63,"orcid":63},"Josef","Jon",{"paper_id":1638,"author_seq":232,"given_name":1651,"surname":1652,"affiliation":63,"orcid":63},"Ondřej","Bojar","We present CzechDocs, a multiway parallel dataset of formatted documents (HTML, DOCX, and PDF) covering Czech and minority languages used in Czechia—primarily Ukrainian and English, with smaller portions of Vietnamese, Russian and other languages. The dataset is designed to support the evaluation of machine translation systems that aim to preserve document formatting during translation. We provide a comparison of the most common approaches to format-preserving machine translation on a validation subset of the dataset. This validation split, together with the evaluation toolkit, is publicly released for further research. A held-out test split will be reserved for a future shared task focused on document-level translation with formatting preservation.",{"paper_id":1655,"title":1656,"year":7,"month":188,"day":63,"doi":1657,"resource_url":1658,"first_page":1659,"last_page":1660,"pdf_url":1661,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1662,"paper_type":860,"authors":1663,"abstract":1676},"lrec2026-main-035","An LLM-Based Assistant for Debt Waiver Court Procedures","10.63317\u002F57sfssjpk6di","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-035","505","514","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.35.pdf","padro-etal-2026-llm",[1664,1667,1670,1673],{"paper_id":1655,"author_seq":247,"given_name":1665,"surname":1666,"affiliation":63,"orcid":63},"Lluis","Padro",{"paper_id":1655,"author_seq":232,"given_name":1668,"surname":1669,"affiliation":63,"orcid":63},"Daniel","Ferrés",{"paper_id":1655,"author_seq":218,"given_name":1671,"surname":1672,"affiliation":63,"orcid":63},"Roser","Saurí",{"paper_id":1655,"author_seq":203,"given_name":1674,"surname":1675,"affiliation":63,"orcid":63},"Mireia","Artigot","Spanish Insolvency Law 1\u002F2020 of the 5th of May enables individuals to apply for debt waiver under certain conditions. The large number of applications submitted each year places a heavy burden on judges and court officers, who must examine heterogeneous documentation before issuing a ruling. This paper presents an AI-based assistant designed to support the processing of debt waiver cases. The system integrates PDF-to-text conversion, rule-based document classification, large language model (LLM)-based information extraction, and post-processing to consolidate fragmented or duplicated records. A front-end interface provides structured summaries of the application content, and can automatically generate draft rulings. Evaluated on a set of real applications, the system achieves over 92% F1 in document classification and up to 91% F1 in personal data extraction, showing the potential of open-source LLMs to reduce administrative workload and accelerate judicial procedures, while keeping the final decision with the judge.",{"paper_id":1678,"title":1679,"year":7,"month":188,"day":63,"doi":1680,"resource_url":1681,"first_page":1682,"last_page":1683,"pdf_url":1684,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1685,"paper_type":860,"authors":1686,"abstract":1693},"lrec2026-main-036","Enhancing Clinical Trial Analysis through Large Language Models for Multi-Evidence Natural Language Inference","10.63317\u002F4askncgswnyv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-036","515","523","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.36.pdf","chandrasegaran-etal-2026-enhancing",[1687,1690],{"paper_id":1678,"author_seq":247,"given_name":1688,"surname":1689,"affiliation":63,"orcid":63},"Shobanapriyan","Chandrasegaran",{"paper_id":1678,"author_seq":232,"given_name":1691,"surname":1692,"affiliation":63,"orcid":63},"Amal","Htait","The exponential growth of clinical trial reports (CTRs) presents a critical challenge for evidence-based medicine, with manual systematic reviews requiring months to synthesise findings. This paper evaluates Large Language Models (LLMs) and retrieval methods for automated Natural Language Inference (NLI) and evidence extraction from CTRs, and seeks to improve upon previously reported results in this domain. Using the NLI4CT dataset containing 2,400 annotated statement-evidence pairs from breast cancer trials, we conducted a comparative evaluation of general-purpose LLMs, domain-specific LLMs, and transformer-based baselines across entailment classification and evidence retrieval tasks. Reasoning-capable, general-purpose LLMs (such as Qwen-32B) demonstrated superior performance in the entailment classification task, exceeding both the performance of other models evaluated in this study and the previously reported state-of-the-art results. Although domain-specific adaptations showed improvements at comparable scale, larger general-purpose language models maintained superior absolute performance. For evidence retrieval, Large Language embedding models (such as bge-large-en-v1.5) surpassed classic transformer-based ranking approaches. These findings demonstrate that modern LLMs with reasoning capabilities can effectively support real-time clinical evidence synthesis without task-specific fine-tuning, offering a pathway toward scalable automated systems for clinical trial interpretation that could substantially reduce the evidence-to-practice gap in medical decision-making.",{"paper_id":1695,"title":1696,"year":7,"month":188,"day":63,"doi":1697,"resource_url":1698,"first_page":1699,"last_page":1700,"pdf_url":1701,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":1702,"bibkey":1703,"paper_type":860,"authors":1704,"abstract":1714},"lrec2026-main-037","A Systematic Comparison of Large Language Models for Data Annotation in NER Tasks","10.63317\u002F4qnuuw7rjs24","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-037","524","548","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.37.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.37_OptionalSupplementaryMaterial.zip","haq-etal-2026-systematic",[1705,1708,1711],{"paper_id":1695,"author_seq":247,"given_name":1706,"surname":1707,"affiliation":63,"orcid":63},"Muhammad Uzair Ul","Haq",{"paper_id":1695,"author_seq":232,"given_name":1709,"surname":1710,"affiliation":63,"orcid":63},"Davide","Rigoni",{"paper_id":1695,"author_seq":218,"given_name":1712,"surname":1713,"affiliation":63,"orcid":63},"Alessandro","Sperduti","High-quality annotated data is essential for training effective machine learning models, especially for fine-grained tasks like Named Entity Recognition (NER), where each token in a sentence must be tagged with a golden annotation. While Large Language Models (LLMs) show strong potential in automating data annotation, existing literature lacks extensive evaluations that systematically compare different models, embedding strategies, and context selection methods, particularly on complex, real-world datasets. This paper fills this gap by conducting a comprehensive study of LLMs for NER annotation across four diverse datasets. It benchmarks both proprietary and open-source LLMs at the 7B to 70B parameter scale, including a 32B reasoning-optimized model, and explores multiple context selection strategies. Two evaluations are performed: (i) the assessment of the practical utility of LLM-generated annotations by fine-tuning a RoBERTa model on LLM-generated annotations and measuring downstream performance; (ii) the assessment of only LLM-generated annotations using token-level metrics, like Precision, Recall, F1, and agreement with human annotations (Cohen’s κ). Empirical results, supported by statistical tests, highlight the importance of choosing suitable LLMs and embedding models and reveal key trade-offs between model scale and annotation quality. Challenging datasets like SKILLSPAN further expose the limitations of current LLM-based annotation pipelines, emphasizing the need for benchmarking on difficult, real-world tasks.",{"paper_id":1716,"title":1717,"year":7,"month":188,"day":63,"doi":1718,"resource_url":1719,"first_page":1720,"last_page":1721,"pdf_url":1722,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1723,"paper_type":860,"authors":1724,"abstract":1734},"lrec2026-main-038","Toward Generalized Cross-Lingual Hateful Language Detection with Web-Scale Data and Ensemble LLM Annotations","10.63317\u002F22xjdmqtv6mk","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-038","549","559","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.38.pdf","dang-etal-2026-generalized",[1725,1728,1731],{"paper_id":1716,"author_seq":247,"given_name":1726,"surname":1727,"affiliation":63,"orcid":63},"Dang Hai","Dang",{"paper_id":1716,"author_seq":232,"given_name":1729,"surname":1730,"affiliation":63,"orcid":63},"Jelena","Mitrović",{"paper_id":1716,"author_seq":218,"given_name":1732,"surname":1733,"affiliation":63,"orcid":63},"Michael","Granitzer","We study whether large-scale unlabelled web data and LLM-based synthetic annotations can improve multilingual hate speech detection. Starting from texts crawled via OpenWebSearch (OWS) in four languages (English, German, Spanish, Vietnamese), we pursue two complementary strategies. First, we apply continued pre-training to BERT models by continuing masked language modelling on unlabelled OWS texts before supervised fine-tuning, and show that this yields an average macro-F1 gain of approximately 3% over standard baselines across sixteen benchmarks, with stronger gains in low-resource settings. Second, we use four open-source LLMs (Mistral-7B, Llama3.1-8B, Gemma2-9B, Qwen2.5-14B) to produce synthetic annotations through three ensemble strategies: mean averaging, majority voting, and a LightGBM meta-learner. The LightGBM ensemble consistently outperforms the other strategies. Fine-tuning on these synthetic labels substantially benefits a small model Llama3.2-1B: +11% pooled F1), but provides only a modest gain for the larger Qwen2.5-14B (+0.6%). Our results indicate that the combination of web-scale unlabelled data and LLM-ensemble annotations is most valuable for smaller models and low-data languages.",{"paper_id":1736,"title":1737,"year":7,"month":188,"day":63,"doi":1738,"resource_url":1739,"first_page":1740,"last_page":1741,"pdf_url":1742,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1743,"paper_type":860,"authors":1744,"abstract":1757},"lrec2026-main-039","Can LLMs Faithfully Explain Themselves in Low-Resource Languages? A Case Study on Emotion Detection in Persian","10.63317\u002F4tds2kffmg7j","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-039","560","579","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.39.pdf","mehrazar-etal-2026-can",[1745,1748,1751,1754],{"paper_id":1736,"author_seq":247,"given_name":1746,"surname":1747,"affiliation":63,"orcid":63},"Mobina","Mehrazar",{"paper_id":1736,"author_seq":232,"given_name":1749,"surname":1750,"affiliation":63,"orcid":63},"Mohammad Amin","Yousefi",{"paper_id":1736,"author_seq":218,"given_name":1752,"surname":1753,"affiliation":63,"orcid":63},"Parisa","Beygi",{"paper_id":1736,"author_seq":203,"given_name":1755,"surname":1756,"affiliation":63,"orcid":63},"Behnam","Bahrak","Large language models (LLMs) are increasingly used to generate self-explanations alongside their predictions, a practice that raises concerns about the faithfulness of these explanations, especially in low-resource languages. This study evaluates the faithfulness of LLM-generated explanations in the context of emotion classification in Persian, a low-resource language, by comparing the influential words identified by the model against those identified by human annotators. We assess faithfulness using confidence scores derived from token-level log-probabilities. Two prompting strategies, differing in the order of explanation and prediction (Predict-then-Explain and Explain-then-Predict), are tested for their impact on explanation faithfulness. Our results reveal that while LLMs achieve strong classification performance, their generated explanations often diverge from faithful reasoning, showing greater agreement with each other than with human judgments. These results highlight the limitations of current explanation methods and metrics, emphasizing the need for more robust approaches to ensure LLM reliability in multilingual and low-resource contexts.",{"paper_id":1759,"title":1760,"year":7,"month":188,"day":63,"doi":1761,"resource_url":1762,"first_page":1763,"last_page":1764,"pdf_url":1765,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1766,"paper_type":860,"authors":1767,"abstract":1777},"lrec2026-main-040","Are LLMs Good Text Diacritizers? An Arabic and Yoruba Case Study","10.63317\u002F5cm5exvnmd9r","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-040","580","589","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.40.pdf","toyin-etal-2026-are",[1768,1771,1774],{"paper_id":1759,"author_seq":247,"given_name":1769,"surname":1770,"affiliation":63,"orcid":63},"Hawau Olamide","Toyin",{"paper_id":1759,"author_seq":232,"given_name":1772,"surname":1773,"affiliation":63,"orcid":63},"Samar Mohamed","Magdy",{"paper_id":1759,"author_seq":218,"given_name":1775,"surname":1776,"affiliation":63,"orcid":63},"Hanan","Aldarmaki","We investigate the effectiveness of large language models (LLMs) for text diacritization in two typologically distinct languages: Arabic and Yoruba. To enable a rigorous evaluation, we introduce a novel multilingual dataset MultiDiac, with diverse samples that capture a range of diacritic ambiguities. We evaluate 12 LLMs varying in size, accessibility, and language coverage, and benchmark them against 4 specialized diacritization models. Additionally, we fine-tune four small open-source models using LoRA for Yoruba. Our results show that many off-the-shelf LLMs outperform specialized diacritization models for both Arabic and Yoruba, but smaller models suffer from hallucinations. We find that fine-tuning on a small dataset can help improve diacritization performance and reduce hallucination rates for Yoruba.",{"paper_id":1779,"title":1780,"year":7,"month":188,"day":63,"doi":1781,"resource_url":1782,"first_page":1783,"last_page":1784,"pdf_url":1785,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1786,"paper_type":860,"authors":1787,"abstract":1800},"lrec2026-main-041","Automatic Suggestions of Supplements in the Herculaneum Papyri: Language Models and RESTful API","10.63317\u002F3httqkjtey6v","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-041","590","598","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.41.pdf","grosso-etal-2026-automatic",[1788,1791,1794,1797],{"paper_id":1779,"author_seq":247,"given_name":1789,"surname":1790,"affiliation":63,"orcid":63},"Angelo Mario Del","Grosso",{"paper_id":1779,"author_seq":232,"given_name":1792,"surname":1793,"affiliation":63,"orcid":63},"Gabriele","Giannessi",{"paper_id":1779,"author_seq":218,"given_name":1795,"surname":1796,"affiliation":63,"orcid":63},"Simone","Zenzaro",{"paper_id":1779,"author_seq":203,"given_name":1798,"surname":1799,"affiliation":63,"orcid":63},"Federico","Boschetti","This paper addresses a computational philology task focused on the automatic restoration of textual gaps (i.e., lacunae) in the Herculaneum Papyri, whose Ancient Greek texts are inherently fragmentary due to damage caused by carbonization. The objective of this work is to show the preliminary results concerning the development of a web-based suggestion service for proposing plausible supplements to fill lacunae, thereby supporting the philological process of producing new critical editions within a new web-based digital scholarly editing environment. To automatically provide such suggestions, we have developed systems that generate textual supplements in Ancient Greek, employing both neural (BERT-like) and statistical (n-gram) language modeling approaches.",{"paper_id":1802,"title":1803,"year":7,"month":188,"day":63,"doi":1804,"resource_url":1805,"first_page":1806,"last_page":1807,"pdf_url":1808,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1809,"paper_type":860,"authors":1810,"abstract":1820},"lrec2026-main-042","Designing LLM Agents for User-Centered Language Service Selection","10.63317\u002F59qnh9w46bv6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-042","599","608","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.42.pdf","ogawa-etal-2026-designing",[1811,1814,1817],{"paper_id":1802,"author_seq":247,"given_name":1812,"surname":1813,"affiliation":63,"orcid":63},"Ryoichiro","Ogawa",{"paper_id":1802,"author_seq":232,"given_name":1815,"surname":1816,"affiliation":63,"orcid":63},"Donghui","Lin",{"paper_id":1802,"author_seq":218,"given_name":1818,"surname":1819,"affiliation":63,"orcid":63},"Fumito","Uwano","With the rapid expansion of language resources and services across repositories and platforms, users face an overwhelming number of options. While this diversity promises flexibility, non-experts struggle to compose appropriate resource pipelines and select services that satisfy both functional and non-functional requirements. We propose a user-centered framework of LLM agents that interprets natural-language requests and performs end-to-end language service selection. The agents extract functional requirements to form coherent task compositions and select suitable language services for each component by interpreting non-functional quality aspects embedded in contextual cues. To ensure reliable and explainable decisions, we employ a four-step structured reasoning procedure that combines Few-Shot exemplars and Chain-of-Thought reasoning: extracting functional requirements, inducing non-functional evaluation axes, applying these axes as constraints in candidate retrieval, and determining a final composition. We construct a benchmark dataset pairing diverse user requests with standardized language service profiles containing metadata and quality indicators, and evaluate our framework against representative prompting-based baselines. Results show consistent gains in Precision, Recall, and F1-score, demonstrating improved capture of both functional intent and quality preferences. These findings demonstrate that structured LLM agents can bridge natural-language user intents and language service configurations, enabling end-to-end selection and composition in a transparent and user-centered manner.",{"paper_id":1822,"title":1823,"year":7,"month":188,"day":63,"doi":1824,"resource_url":1825,"first_page":1826,"last_page":1827,"pdf_url":1828,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1829,"paper_type":860,"authors":1830,"abstract":1841},"lrec2026-main-043","User Profiling for Specification-Sensitive Recommendations with Large Language Model Prompting","10.63317\u002F5csm2qecy6jh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-043","609","618","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.43.pdf","chien-etal-2026-user",[1831,1834,1835,1838],{"paper_id":1822,"author_seq":247,"given_name":1832,"surname":1833,"affiliation":63,"orcid":63},"Chih-Yu","Chien",{"paper_id":1822,"author_seq":232,"given_name":1341,"surname":1342,"affiliation":63,"orcid":63},{"paper_id":1822,"author_seq":218,"given_name":1836,"surname":1837,"affiliation":63,"orcid":63},"Hen-Hsen","Huang",{"paper_id":1822,"author_seq":203,"given_name":1839,"surname":1840,"affiliation":63,"orcid":63},"Hsin-Hsi","Chen","Recently, there has been an increasing focus in research on the potential applications of large language models (LLMs) for personalized recommendations. Previous studies utilize LLMs to analyze the interaction between users and products to establish various personalized recommendation systems. However, recommendation becomes particularly challenging when items are associated with varied attributes, influenced by personal preferences, and described primarily through unstructured data. Moreover, analyzing implicit user preferences with product specifications for specification-sensitive recommendations remains largely unexplored. In this paper, we propose a framework that fully leverages prompting-based strategies to analyze user reviews and item attributes for the generation of user and product profiles, respectively. These profiles capture users’ implicit preferences and enable rating prediction or product recommendation, which are crucial for personalized recommendations. Experimental results show that our proposed framework effectively handles complex item attributes and user preferences to achieve promising performances in rating prediction.",{"paper_id":1843,"title":1844,"year":7,"month":188,"day":63,"doi":1845,"resource_url":1846,"first_page":1847,"last_page":1848,"pdf_url":1849,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1850,"paper_type":860,"authors":1851,"abstract":1858},"lrec2026-main-044","Comparing Traditional and LLM-based Approaches for Automated Scoring of Dutch Writing Products","10.63317\u002F3raujfonf7cv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-044","619","630","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.44.pdf","kruijsbergen-etal-2026-comparing",[1852,1855],{"paper_id":1843,"author_seq":247,"given_name":1853,"surname":1854,"affiliation":63,"orcid":63},"Joni","Kruijsbergen",{"paper_id":1843,"author_seq":232,"given_name":1856,"surname":1857,"affiliation":63,"orcid":63},"Orphee De","Clercq","This research examines several traditional and recent approaches for automated grading of Dutch texts written by adolescent L1 speakers. We relied on a proprietary dataset comprising human-scored texts. Following recent paradigms in NLP research, we compared training a feature-based model to fine-tuning both mono- and multilingual BERT-based and generative large language models. The latter were also prompted directly in a zero-shot setting. The results reveal that the feature-based and BERT-based approaches are promising for the task at hand and even complementary, although there is still room for improvement. The error analysis demonstrates that the generative models do not only make more errors in classification, but that these error are also more problematic. We therefore conclude that especially generative LLMs are not directly employable in this educational context.",{"paper_id":1860,"title":1861,"year":7,"month":188,"day":63,"doi":1862,"resource_url":1863,"first_page":1864,"last_page":1865,"pdf_url":1866,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1867,"paper_type":860,"authors":1868,"abstract":1881},"lrec2026-main-045","Decode the Law: Towards Legal Text Simplification with Large Language Models","10.63317\u002F2i7en3xycmhi","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-045","631","641","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.45.pdf","rabbani-etal-2026-decode",[1869,1872,1875,1878],{"paper_id":1860,"author_seq":247,"given_name":1870,"surname":1871,"affiliation":63,"orcid":63},"Mohammed Danish","Rabbani",{"paper_id":1860,"author_seq":232,"given_name":1873,"surname":1874,"affiliation":63,"orcid":63},"Subhadeep","Roy",{"paper_id":1860,"author_seq":218,"given_name":1876,"surname":1877,"affiliation":63,"orcid":63},"Sayantan","Mitra",{"paper_id":1860,"author_seq":203,"given_name":1879,"surname":1880,"affiliation":63,"orcid":63},"Tulika","Saha","Legal documents are often verbose and structurally complex, posing significant barriers to public understanding and equitable access to justice. Despite growing interest in text simplification, efforts targeting the legal domain remain limited by a lack of robust, high-quality resources. In this paper, we address this gap by introducing SIMPLE-LAW, a curated benchmark dataset of over 6,000 aligned pairs of original and simplified legal passages, specifically constructed to facilitate research in legal text simplification by leveraging large language models (LLMs). We evaluate this dataset across both in-context learning and parameter-efficient fine-tuning paradigms using a range of state-of-the-art LLMs, with Unsloth variants of Mistral, LLaMA-3.2, Gemma, and Qwen-2.5. We assess performance using BERTScore, ROUGE, SARI, and a hallucination detection score, to capture both fidelity and readability. Results show that fine-tuned models significantly outperform in-context learners in terms of simplification quality and factual consistency. By offering a new dataset, rigorous evaluation, and baseline comparisons, our work provides a critical foundation for developing transparent and accessible AI systems in the legal domain.",{"paper_id":1883,"title":1884,"year":7,"month":188,"day":63,"doi":1885,"resource_url":1886,"first_page":1887,"last_page":1888,"pdf_url":1889,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1890,"paper_type":860,"authors":1891,"abstract":1900},"lrec2026-main-046","CLASE: A Hybrid Method for Chinese Legalese Stylistic Evaluation","10.63317\u002F2xrbbj7oaghv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-046","642","653","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.46.pdf","ma-etal-2026-clase",[1892,1894,1897],{"paper_id":1883,"author_seq":247,"given_name":1893,"surname":913,"affiliation":63,"orcid":63},"Yiran Rex",{"paper_id":1883,"author_seq":232,"given_name":1895,"surname":1896,"affiliation":63,"orcid":63},"Yuxiao","Ye",{"paper_id":1883,"author_seq":218,"given_name":1898,"surname":1899,"affiliation":63,"orcid":63},"Huiyuan","Xie","Legal text generated by large language models (LLMs) can usually achieve reasonable factual accuracy, but it frequently fails to adhere to the specialised stylistic norms and linguistic conventions of legal writing. In order to improve stylistic quality, a crucial first step is to establish a reliable evaluation method. However, having legal experts manually develop such a metric is impractical, as the implicit stylistic requirements in legal writing practice are difficult to formalise into explicit rubrics. Meanwhile, existing automatic evaluation methods also fall short: reference-based metrics conflate semantic accuracy with stylistic fidelity, and LLM-as-a-judge evaluations suffer from opacity and inconsistency. To address these challenges, we introduce CLASE (Chinese LegAlese Stylistic Evaluation), a hybrid evaluation method that focuses on the stylistic performance of legal text. The method incorporates a hybrid scoring mechanism that combines 1) linguistic feature-based scores and 2) experience-guided LLM-as-a-judge scores. Both the feature coefficients and the LLM scoring experiences are learned from contrastive pairs of authentic legal documents and their LLM-restored counterparts. This hybrid design captures both surface-level features and implicit stylistic norms in a transparent, reference-free manner. Experiments on 200 Chinese legal documents show that CLASE achieves substantially higher alignment with human judgments than traditional metrics and pure LLM-as-a-judge methods. Beyond improved alignment, CLASE provides interpretable score breakdowns and suggestions for improvements, offering a scalable and practical solution for professional stylistic evaluation in legal text generation (Code and data for CLASE is available at: https:\u002F\u002Fgithub.com\u002Frexera\u002FCLASE).",{"paper_id":1902,"title":1903,"year":7,"month":188,"day":63,"doi":1904,"resource_url":1905,"first_page":1906,"last_page":1907,"pdf_url":1908,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1909,"paper_type":860,"authors":1910,"abstract":1920},"lrec2026-main-047","Neural Network-assisted Analysis of Tube Vocal Tract Models","10.63317\u002F3qp8kma6ohov","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-047","654","663","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.47.pdf","song-etal-2026-neural",[1911,1914,1917],{"paper_id":1902,"author_seq":247,"given_name":1912,"surname":1913,"affiliation":63,"orcid":63},"Runhui","Song",{"paper_id":1902,"author_seq":232,"given_name":1915,"surname":1916,"affiliation":63,"orcid":63},"Johan","Sjons",{"paper_id":1902,"author_seq":218,"given_name":1918,"surname":1919,"affiliation":63,"orcid":63},"Axel G.","Ekstrom","We present a pipeline for deep neural network assisted modeling and analysis of the behavior of an acoustic tube. The vocal tract is represented as a series of cylindrical tube segments, each characterized by fixed length and variable cross-sectional area. A large synthetic dataset of such tube configurations is generated, and a circuit theory–based algorithm predicts corresponding formant frequencies. To explore mapping between vocal tract shapes and formant values, the pipeline integrates both linear regression and nonlinear machine learning models - including multilayer perceptrons. Model interpretability is measured using Shapley Additive Explanations (SHAP), which quantifies the contribution of each segment to predicted formant frequencies. The proposed framework enables detailed exploration of the articulatory-acoustic relationships inherent to an acoustic tube and vocal tract simulacrum. We present and describe the pipeline in the context of modeling effects of perturbations on the first three formants for a 16-cm tube, divided into 1 cm segments. Our pipeline can be applied to any method that models predictions of behavior of an acoustic tube, where the tube is conceived as a series of segmented units.",{"paper_id":1922,"title":1923,"year":7,"month":188,"day":63,"doi":1924,"resource_url":1925,"first_page":1926,"last_page":1927,"pdf_url":1928,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1929,"paper_type":860,"authors":1930,"abstract":1940},"lrec2026-main-048","Central Kurdish Text-to-Speech and Its Application in Speech-to-Text Translation","10.63317\u002F4hfwowidu34u","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-048","664","673","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.48.pdf","mohammadamini-etal-2026-central",[1931,1934,1937],{"paper_id":1922,"author_seq":247,"given_name":1932,"surname":1933,"affiliation":63,"orcid":63},"Mohammad","Mohammadamini",{"paper_id":1922,"author_seq":232,"given_name":1935,"surname":1936,"affiliation":63,"orcid":63},"Meysam","Shamsi",{"paper_id":1922,"author_seq":218,"given_name":1938,"surname":1939,"affiliation":63,"orcid":63},"Marie","Tahon","In this study, we show how from available resources develop high-quality TTS models for low-resource scenarios that according to our extensive evaluation surpass the models trained on dedicated TTS data recorded in the studio. We develop three Text-to-Speech (TTS) models for Central Kurdish as a low-resource language using F5-TTS architecture. The models are trained on Central Kurdish TTS datasets in which two of them are curated from audiobooks during this study and the third one is evaluated for the first time. We also demonstrate the potential of TTS models for developing other speech technologies in low-resource languages by proposing a speech synthesis framework used in a speech-to-text translation application, achieving promising results on standard speech translation benchmarks. The curated TTS resources and models will be publicly available under CC BY-NC-ND 4.0 license",{"paper_id":1942,"title":1943,"year":7,"month":188,"day":63,"doi":1944,"resource_url":1945,"first_page":1946,"last_page":1947,"pdf_url":1948,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1949,"paper_type":860,"authors":1950,"abstract":1963},"lrec2026-main-049","QuALA-NL: Question & Answer with Legal Attribution in Dutch","10.63317\u002F5i9bqybga69e","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-049","674","684","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.49.pdf","drie-etal-2026-quala",[1951,1954,1957,1960],{"paper_id":1942,"author_seq":247,"given_name":1952,"surname":1953,"affiliation":63,"orcid":63},"Romy A.N. van","Drie",{"paper_id":1942,"author_seq":232,"given_name":1955,"surname":1956,"affiliation":63,"orcid":63},"Roos M.","Bakker",{"paper_id":1942,"author_seq":218,"given_name":1958,"surname":1959,"affiliation":63,"orcid":63},"Daan L. Di","Scala",{"paper_id":1942,"author_seq":203,"given_name":1961,"surname":1962,"affiliation":63,"orcid":63},"Maaike de","Boer","Ensuring trustworthy and traceable outputs from Large Language Models (LLMs) is crucial in high-stakes domains such as law. Retrieval-Augmented Generation (RAG) offers a way to enhance LLMs with domain-specific or updated information and provide attribution to the source, and recent work has focused on knowledge-based RAG (K-RAG) for improved factual grounding. However, proper evaluation of such systems requires high-quality datasets. To address this need, we introduce QuALA-NL: a dataset that provides attributions to legal formalizations, enabling experiments with K-RAG in the legal domain. The dataset contains 101 QA pairs on three Dutch laws, with attributions to the law text and a formalization of the interpretation of the legal text. To demonstrate the capabilities of the dataset, we perform experiments using four configurations: LLM-only, RAG using legal texts, K-RAG using a formalization of the legal texts, and RAG combining both legal texts and the formalizations. The results show that K-RAG has the highest retrieval scores, but that this method is outperformed by text-based RAG on generation. A qualitative analysis shows that the use of the knowledge graph for the generation of answers can be improved. QuALA-NL can be used in future work to experiment with knowledge-based Retrieval Augmented Generation methods.",{"paper_id":1965,"title":1966,"year":7,"month":188,"day":63,"doi":1967,"resource_url":1968,"first_page":1969,"last_page":1970,"pdf_url":1971,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1972,"paper_type":860,"authors":1973,"abstract":1980},"lrec2026-main-050","SouDeC: Source Detection and Classification in Czech","10.63317\u002F3xvp6edtnr5g","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-050","685","693","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.50.pdf","mrovsk-etal-2026-soudec",[1974,1977],{"paper_id":1965,"author_seq":247,"given_name":1975,"surname":1976,"affiliation":63,"orcid":63},"Jiří","Mírovský",{"paper_id":1965,"author_seq":232,"given_name":1978,"surname":1979,"affiliation":63,"orcid":63},"Barbora","Hladka","We present a method of attribution source detection and classification in Czech. A plain text (typically, a newspaper article) enters the SouDec system, gets parsed with the external tool UDPipe into Universal-Dependencies style of sentence representation, and then is analyzed for occurrences of attribution signals and sources. The list of attribution signals has been extracted from a corpus of Czech newspaper articles annotated with interlinked attribution signals and sources, and has been complemented with context and syntax information to help distinguish relevant occurrences of the signals. The SouDec system further classifies the attribution sources in one of five classes: anonymous, partially anonymous, unofficial, official non-political and official political, using information from another external tool, a recognizer and classifier of named entities, NameTag 3. While our source detection method gets results comparable to existing systems for other languages, further improvements can be achieved by incorporating fully-fledged automatic coreference resolution into the classification method. In a focused case study, we test a possible usage of SouDeC for distinguishing domain-specific texts of less vs. more reputable origin.",{"paper_id":1982,"title":1983,"year":7,"month":188,"day":63,"doi":1984,"resource_url":1985,"first_page":1986,"last_page":1987,"pdf_url":1988,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1989,"paper_type":860,"authors":1990,"abstract":2028},"lrec2026-main-051","Frame Semantic Patterns for Identifying Underreporting of Notifiable Events in Healthcare: The Case of Gender-Based Violence","10.63317\u002F47vpwbo6vbxc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-051","694","704","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.51.pdf","dutra-etal-2026-frame",[1991,1994,1997,2000,2003,2006,2009,2012,2015,2018,2021,2023,2025],{"paper_id":1982,"author_seq":247,"given_name":1992,"surname":1993,"affiliation":63,"orcid":63},"Lívia","Dutra",{"paper_id":1982,"author_seq":232,"given_name":1995,"surname":1996,"affiliation":63,"orcid":63},"Arthur","Lorenzi",{"paper_id":1982,"author_seq":218,"given_name":1998,"surname":1999,"affiliation":63,"orcid":63},"Lais","Berno",{"paper_id":1982,"author_seq":203,"given_name":2001,"surname":2002,"affiliation":63,"orcid":63},"Franciany","Campos",{"paper_id":1982,"author_seq":188,"given_name":2004,"surname":2005,"affiliation":63,"orcid":63},"Karoline","Biscardi",{"paper_id":1982,"author_seq":172,"given_name":2007,"surname":2008,"affiliation":63,"orcid":63},"Kenneth","Brown",{"paper_id":1982,"author_seq":155,"given_name":2010,"surname":2011,"affiliation":63,"orcid":63},"Marcelo","Viridiano",{"paper_id":1982,"author_seq":138,"given_name":2013,"surname":2014,"affiliation":63,"orcid":63},"Frederico","Belcavello",{"paper_id":1982,"author_seq":121,"given_name":2016,"surname":2017,"affiliation":63,"orcid":63},"Ely E.","Matos",{"paper_id":1982,"author_seq":104,"given_name":2019,"surname":2020,"affiliation":63,"orcid":63},"Olivia","Guaranha",{"paper_id":1982,"author_seq":87,"given_name":2022,"surname":1578,"affiliation":63,"orcid":63},"Erik",{"paper_id":1982,"author_seq":73,"given_name":955,"surname":2024,"affiliation":63,"orcid":63},"Reinach",{"paper_id":1982,"author_seq":55,"given_name":2026,"surname":2027,"affiliation":63,"orcid":63},"Tiago Timponi","Torrent","We introduce a methodology for the identification of notifiable events in the domain of healthcare. The methodology harnesses semantic frames to define fine-grained patterns and search them in unstructured data, namely, open-text fields in e-medical records. We apply the methodology to the problem of underreporting of gender-based violence (GBV) in e-medical records produced during patients’ visits to primary care units. A total of eight patterns are defined and searched on a corpus of 21 million sentences in Brazilian Portuguese extracted from e-SUS APS. The results are manually evaluated by linguists and the precision of each pattern measured. Our findings reveal that the methodology effectively identifies reports of violence with a precision of 0.726, confirming its robustness. Designed as a transparent, efficient, low-carbon, and language-agnostic pipeline, the approach can be easily adapted to other health surveillance contexts, contributing to the broader, ethical, and explainable use of NLP in public health systems.",{"paper_id":2030,"title":2031,"year":7,"month":188,"day":63,"doi":2032,"resource_url":2033,"first_page":2034,"last_page":2035,"pdf_url":2036,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2037,"paper_type":860,"authors":2038,"abstract":2054},"lrec2026-main-052","PrePPER: A Preference Pattern-based Profiling Framework for Explainable Recommendation","10.63317\u002F45ibtz44yq3h","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-052","705","715","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.52.pdf","usumi-etal-2026-prepper",[2039,2042,2045,2048,2051],{"paper_id":2030,"author_seq":247,"given_name":2040,"surname":2041,"affiliation":63,"orcid":63},"Taisuke","Usumi",{"paper_id":2030,"author_seq":232,"given_name":2043,"surname":2044,"affiliation":63,"orcid":63},"Akiko","Masaki",{"paper_id":2030,"author_seq":218,"given_name":2046,"surname":2047,"affiliation":63,"orcid":63},"Sanae","Muramatsu",{"paper_id":2030,"author_seq":203,"given_name":2049,"surname":2050,"affiliation":63,"orcid":63},"Akira","Sakamoto",{"paper_id":2030,"author_seq":188,"given_name":2052,"surname":2053,"affiliation":63,"orcid":63},"Takeharu","Eda","Large Language Models (LLMs) have demonstrated remarkable performance across diverse tasks, drawing increasing attention to their application in recommendation systems. In particular, recommendation systems using natural language-based user profiles have attracted attention for improving transparency and scrutability. However, existing methods fail to fully leverage the recommendation capabilities of LLMs due to the unspecified importance of user preferences within user profiles and unmatched preference types between user profiles and item profiles. To address these challenges, we propose PrePPER, a novel preference pattern-based profiling framework designed to explicitly capture the importance of user preferences and enhance the alignment between user profiles and item profiles. PrePPER enables the extraction of users’ preference patterns, which denote characteristic tendencies in user preferences, and the determination of their importance by clustering users’ preferences. Specifically, we first extract users’ preferences from their reviews and perform clustering on the extracted preferences. Based on the clustered preferences, we then infer users’ preference patterns along with their relative importance, and construct user and item profiles using this information. Our proposed profiles incorporate the importance of user preferences and enhance the relatedness between user and item profiles, thereby improving the recommendation performance of existing recommender systems.",{"paper_id":2056,"title":2057,"year":7,"month":188,"day":63,"doi":2058,"resource_url":2059,"first_page":2060,"last_page":2061,"pdf_url":2062,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2063,"paper_type":860,"authors":2064,"abstract":2077},"lrec2026-main-053","Evaluating the Impact of Source Diversity for RAG in Historical Research","10.63317\u002F4yz9z3uvzasd","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-053","716","734","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.53.pdf","mahadeshwar-etal-2026-evaluating",[2065,2068,2071,2074],{"paper_id":2056,"author_seq":247,"given_name":2066,"surname":2067,"affiliation":63,"orcid":63},"Ruhi","Mahadeshwar",{"paper_id":2056,"author_seq":232,"given_name":2069,"surname":2070,"affiliation":63,"orcid":63},"Andreas van","Cranenburgh",{"paper_id":2056,"author_seq":218,"given_name":2072,"surname":2073,"affiliation":63,"orcid":63},"Tommaso","Caselli",{"paper_id":2056,"author_seq":203,"given_name":2075,"surname":2076,"affiliation":63,"orcid":63},"Malvina","Nissim","Historical research increasingly benefits from large language models (LLMs). However, LLMs are prone to factual inaccuracy, unreliability, and biased interpretations of data. Retrieval-augmented generation (RAG) approaches have emerged as solutions, but may inadvertently perpetuate biased perspectives embedded in historical archives. This paper investigates how source diversity in RAG impacts perspective variation in historical question answering. We compile a multilingual corpus (English, French, Dutch) of historical documents spanning multiple countries and focus on Napoleon Bonaparte. We evaluate three Qwen3 models across ten questions using a multi-layered framework combining traditional metrics (BERTScore, ROUGE-L), frame semantics analysis, and syntactic profiling. Our results highlight that, while traditional similarity metrics suggest high semantic consistency, frame-semantic analysis exposes substantial perspective shifts. Baseline answers present \"flattened\" cross-lingual perspectives, whereas RAG introduces diversity. Critically, this diversity manifests differently across languages, demonstrating language-specific patterns. Our findings highlight limitations of traditional evaluation metrics for perspective-sensitive tasks and demonstrate that RAG constitutes active perspective transformation rather than neutral augmentation.",{"paper_id":2079,"title":2080,"year":7,"month":188,"day":63,"doi":2081,"resource_url":2082,"first_page":2083,"last_page":2084,"pdf_url":2085,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2086,"paper_type":860,"authors":2087,"abstract":2097},"lrec2026-main-054","Automatic Essay Scoring and Feedback Generation in Basque Language Learning","10.63317\u002F2vf49six8jhv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-054","735","746","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.54.pdf","azurmendi-etal-2026-automatic",[2088,2091,2094],{"paper_id":2079,"author_seq":247,"given_name":2089,"surname":2090,"affiliation":63,"orcid":63},"Ekhi","Azurmendi",{"paper_id":2079,"author_seq":232,"given_name":2092,"surname":2093,"affiliation":63,"orcid":63},"Xabier","Arregi",{"paper_id":2079,"author_seq":218,"given_name":2095,"surname":2096,"affiliation":63,"orcid":63},"Oier Lopez de","Lacalle","This paper introduces the first publicly available dataset for Automatic Essay Scoring (AES) and feedback generation in Basque, targeting the CEFR C1 proficiency level. The dataset comprises 3,200 essays from HABE, each annotated by expert evaluators with criterion specific scores covering correctness, richness, coherence, cohesion, and task alignment enriched with detailed feedback and error examples. We fine-tune open-source models, including RoBERTa-EusCrawl and Latxa 8B\u002F70B, for scoring. We focused on correctness criteria for the explanation generation, adapting Latxa to correctly predict both, scores and explanations. Our experiments show that encoder models remain highly reliable for AES, while supervised fine-tuning (SFT) of Latxa significantly enhances performance, surpassing state-of-the-art (SoTA) closed-source systems such as GPT-5 and Claude Sonnet 4.5 in scoring consistency and feedback quality. We also propose a novel evaluation methodology for assessing feedback generation, combining automatic consistency metrics with expert-based validation of extracted learner errors. Results demonstrate that the fine-tuned Latxa model produces criterion-aligned, pedagogically meaningful feedback and identifies a wider range of error types than proprietary models. This resource and benchmark establish a foundation for transparent, reproducible, and educationally grounded NLP research in low-resource languages such as Basque. The dataset, models and manual evalution results are available here: https:\u002F\u002Fhuggingface.co\u002Fcollections\u002FEkhiAzur\u002Fhabe-hitz-c1",{"paper_id":2099,"title":2100,"year":7,"month":188,"day":63,"doi":2101,"resource_url":2102,"first_page":2103,"last_page":2104,"pdf_url":2105,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2106,"paper_type":860,"authors":2107,"abstract":2113},"lrec2026-main-055","Paragraph Segmentation Revisited: Towards a Standard Task for Structuring Speech","10.63317\u002F3eczsids4mek","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-055","747","759","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.55.pdf","retkowski-etal-2026-paragraph",[2108,2111],{"paper_id":2099,"author_seq":247,"given_name":2109,"surname":2110,"affiliation":63,"orcid":63},"Fabian","Retkowski",{"paper_id":2099,"author_seq":232,"given_name":869,"surname":2112,"affiliation":63,"orcid":63},"Waibel","Automatic speech transcripts are often delivered as unstructured word streams that impede readability and repurposing. We recast paragraph segmentation as the missing structuring step and fill three gaps at the intersection of speech processing and text segmentation. First, we establish TEDPara (human-annotated TED talks) and YTSegPara (YouTube videos with synthetic labels) as the first benchmarks for the paragraph segmentation task. The benchmarks focus on the underexplored speech domain, where paragraph segmentation has traditionally not been part of post-processing, while also contributing to the wider text segmentation field, which still lacks robust and naturalistic benchmarks. Second, we propose a constrained-decoding formulation that lets large language models insert paragraph breaks while preserving the original transcript, enabling faithful, sentence-aligned evaluation. Third, we show that a compact model (MiniSeg) attains state-of-the-art accuracy and, when extended hierarchically, jointly predicts chapters and paragraphs with minimal computational cost. Together, our resources and methods establish paragraph segmentation as a standardized, practical task in speech processing.",{"paper_id":2115,"title":2116,"year":7,"month":188,"day":63,"doi":2117,"resource_url":2118,"first_page":2119,"last_page":2120,"pdf_url":2121,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2122,"paper_type":860,"authors":2123,"abstract":2134},"lrec2026-main-056","High-Order Question Generation in a Multilingual Educational Context","10.63317\u002F56rihjwb6jq7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-056","760","769","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.56.pdf","uar-etal-2026-high",[2124,2127,2130,2133],{"paper_id":2115,"author_seq":247,"given_name":2125,"surname":2126,"affiliation":63,"orcid":63},"Suna Şeyma","Uçar",{"paper_id":2115,"author_seq":232,"given_name":2128,"surname":2129,"affiliation":63,"orcid":63},"Itziar","Aldabe",{"paper_id":2115,"author_seq":218,"given_name":2131,"surname":2132,"affiliation":63,"orcid":63},"Nora","Aranberri",{"paper_id":2115,"author_seq":203,"given_name":1856,"surname":1857,"affiliation":63,"orcid":63},"Critical thinking is a fundamental skill that helps learners move beyond simple memorization. One way to develop this skill is through high-order questioning. However, crafting such questions remains a challenge for educators, and classroom practices tend to rely on low-order questions. Large Language Models have demonstrated strong capabilities in generating high-order questions, especially when guided by prompts based on Bloom’s Taxonomy. Yet, existing research has largely centered on this framework and focused only on English. This study addresses these gaps by introducing prompts grounded in two alternative frameworks: Claim-Evidence-Reasoning and Divergent Questioning within a multilingual context using Basque, Spanish, and English. Results indicate that while both an open-source and a proprietary model rather effectively generate questions in all three languages, only about half of the answerable questions are recognized by teachers as high-order. A positive finding is that the alternative frameworks produce structurally and conceptually varied questions, suggesting they could complement each other and provide viable alternatives to Bloom’s Taxonomy.",{"paper_id":2136,"title":2137,"year":7,"month":188,"day":63,"doi":2138,"resource_url":2139,"first_page":2140,"last_page":2141,"pdf_url":2142,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2143,"paper_type":860,"authors":2144,"abstract":2163},"lrec2026-main-057","From Print to Digital and beyond: The Retrodigitization of a Historical Dictionary of Italian as a Hybrid Lexical Resource","10.63317\u002F338howsz93sg","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-057","770","777","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.57.pdf","biffi-etal-2026-print",[2145,2148,2151,2154,2157,2160],{"paper_id":2136,"author_seq":247,"given_name":2146,"surname":2147,"affiliation":63,"orcid":63},"Marco","Biffi",{"paper_id":2136,"author_seq":232,"given_name":2149,"surname":2150,"affiliation":63,"orcid":63},"Sebastiana","Cucurullo",{"paper_id":2136,"author_seq":218,"given_name":2152,"surname":2153,"affiliation":63,"orcid":63},"Manuel","Favaro",{"paper_id":2136,"author_seq":203,"given_name":2155,"surname":2156,"affiliation":63,"orcid":63},"Elisa","Guadagnini",{"paper_id":2136,"author_seq":188,"given_name":2158,"surname":2159,"affiliation":63,"orcid":63},"Simonetta","Montemagni",{"paper_id":2136,"author_seq":172,"given_name":2161,"surname":2162,"affiliation":63,"orcid":63},"Eva","Sassolini","This paper presents the retrodigitization project of the Grande Dizionario della Lingua Italiana (GDLI), the largest and most comprehensive historical dictionary of the Italian language. The GDLI’s 23,000 pages — originally designed for human consultation — constitute an exceptional repository of linguistic and cultural-historical information, while posing significant challenges to large-scale digitization and data structuring. The project, still ongoing, will result in the development of a set of interoperable and interlinked resources: (i) a TEI-XML edition of the dictionary text, encoding its complex lexicographic and citation structure; (ii) an annotated corpus of the quoted examples, enabling linguistic and historical research across centuries; and (iii) a database of cited authors and works. Together, these components form a hybrid lexical resource that establishes the foundations for innovative and advanced modes of accessing and exploring the rich and multifaceted content of this historical dictionary.",{"paper_id":2165,"title":2166,"year":7,"month":188,"day":63,"doi":2167,"resource_url":2168,"first_page":2169,"last_page":2170,"pdf_url":2171,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2172,"paper_type":860,"authors":2173,"abstract":2184},"lrec2026-main-058","Learning through News: Bridging the Gap between Algorithmic Recommendation and Human Curation","10.63317\u002F29rc2j4yc3un","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-058","778","794","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.58.pdf","debaene-etal-2026-learning",[2174,2177,2180,2181],{"paper_id":2165,"author_seq":247,"given_name":2175,"surname":2176,"affiliation":63,"orcid":63},"Florian","Debaene",{"paper_id":2165,"author_seq":232,"given_name":2178,"surname":2179,"affiliation":63,"orcid":63},"Loic De","Langhe",{"paper_id":2165,"author_seq":218,"given_name":1856,"surname":1857,"affiliation":63,"orcid":63},{"paper_id":2165,"author_seq":203,"given_name":2182,"surname":2183,"affiliation":63,"orcid":63},"Veronique","Hoste","News recommendation systems play a central role in how readers access and process current events. Most recommenders’ underlying algorithmic strategies, however, prioritize user engagement over comprehension, amplifying risks of misinformation and filter bubbles. This study investigates whether fine-grained content-based recommendation strategies favor human knowledge retention and explores how such a content-based recommendation can be operationalized using event coreference–based document modeling. To this purpose, we first measure the effect of manually curated content-based news recommendation on knowledge retention across five news topics with 126 Dutch speaking participants. Next, we investigate document retrieval by comparing a state-of-the-art event coreference resolution system for Dutch which recommends news articles based on event chains with a document similarity retrieval baseline using state-of-the-art embedding models in three increasingly more complex test settings. The results demonstrate that human-curated content-based recommendation can positively and significantly impact readers’ knowledge retention. Moreover, we show that a fine-grained coreference system can approach said level of human curation better than state-of-the-art document retrieval methods. In general, this holds potential for scalable, comprehension-oriented news recommendation.",{"paper_id":2186,"title":2187,"year":7,"month":188,"day":63,"doi":2188,"resource_url":2189,"first_page":2190,"last_page":2191,"pdf_url":2192,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":2193,"bibkey":2194,"paper_type":860,"authors":2195,"abstract":2217},"lrec2026-main-059","MaskedVerbalizer: Automatic Verbalizer Construction for Few-Shot Text Classification in Low-Resource Right-to-Left Languages","10.63317\u002F2ijipj4r77bn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-059","795","804","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.59.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.59_OptionalSupplementaryMaterial.zip","ullah-etal-2026-maskedverbalizer",[2196,2199,2202,2205,2208,2211,2214],{"paper_id":2186,"author_seq":247,"given_name":2197,"surname":2198,"affiliation":63,"orcid":63},"Faizad","Ullah",{"paper_id":2186,"author_seq":232,"given_name":2200,"surname":2201,"affiliation":63,"orcid":63},"Furqan","Sikandar",{"paper_id":2186,"author_seq":218,"given_name":2203,"surname":2204,"affiliation":63,"orcid":63},"Areeba","Waqar",{"paper_id":2186,"author_seq":203,"given_name":2206,"surname":2207,"affiliation":63,"orcid":63},"Faizan","Ali",{"paper_id":2186,"author_seq":188,"given_name":2209,"surname":2210,"affiliation":63,"orcid":63},"Muhammad Sohaib","Ayub",{"paper_id":2186,"author_seq":172,"given_name":2212,"surname":2213,"affiliation":63,"orcid":63},"Mubashar","Mushtaq",{"paper_id":2186,"author_seq":155,"given_name":2215,"surname":2216,"affiliation":63,"orcid":63},"Asim","Karim","Text classification in low-resource right-to-left languages faces significant challenges due to the scarcity of annotated data and the morphological richness of languages such as Arabic, Urdu, Sindhi, and Pashto. Arabic and Urdu alone are spoken by over 380+ million and 246+ million people worldwide, respectively. Pashto is the national language of Afghanistan, highlighting the importance of effective language technologies. While multilingual Pre-trained Language Models (PLMs) have shown promising results, they typically require extensive labeled datasets and computationally expensive fine-tuning to achieve better performance. Such limitations make these PLMs impractical for the low-resource settings described above. Therefore, we employ a few-shot strategy (zero, 4, or 8 shots) to achieve results comparable to those of standard fine-tuning. In this work, we propose MaskedVerbalizer, a novel technique designed for few-shot text classification. Our method introduces an automatic verbalizer construction approach that generates class-specific label words in 4-shot settings, eliminating the need for extensive manual intervention. Despite maintaining a simple model architecture, MaskedVerbalizer achieves effective performance in classification benchmarks. Experimental results demonstrate that our method effectively addresses the core challenges of low-resource text classification, providing a practical, computationally efficient solution. We achieved accuracies of 90.43% and 92.72% with mBERT and XLM-RoBERTa, respectively, representing improvements of 25–30% over soft and automatic verbalizers. The code for MaskedVerbalizer is publicly available at https:\u002F\u002Fgithub.com\u002FFurqann-hue\u002FMV.",{"paper_id":2219,"title":2220,"year":7,"month":188,"day":63,"doi":2221,"resource_url":2222,"first_page":2223,"last_page":2224,"pdf_url":2225,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2226,"paper_type":860,"authors":2227,"abstract":2234},"lrec2026-main-060","RBR: RAG-Based Open-Domain Question Answering Using a Ranking Approach to Document Retrieval","10.63317\u002F23z52omk7zss","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-060","805","817","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.60.pdf","naravajhula-etal-2026-rbr",[2228,2231],{"paper_id":2219,"author_seq":247,"given_name":2229,"surname":2230,"affiliation":63,"orcid":63},"Priyatam Sai","Naravajhula",{"paper_id":2219,"author_seq":232,"given_name":2232,"surname":2233,"affiliation":63,"orcid":63},"Vincent","Ng","Retrieval-Augmented Generation (RAG) has emerged as a promising approach to ODQA. A RAG-based ODQA system is typically composed of two components: a retriever that retrieves the passages that are most relevant to a given query, and a generator that generates the answer to the query by combining the information from the retrieved passages. Existing retrievers typically identify the most relevant passages by computing the similarity between the query and each passage in a given collection. In other words, they do not compare which of two passages is more relevant to the given query. We hypothesize, however, that we can improve RAG-based ODQA systems by modeling the relationship among the passages to be retrieved, specifically by learning which passages are more relevant than the others to the given query. To do so, we propose a ranking-based approach to passage retrieval, where we first rank the candidate passages w.r.t. the query and subsequently refine the score associated with each of these passages using a Graph Attention Network. We evaluate our approach to ODQA, RBR (Ranking-Based Retrieval), on two commonly-used ODQA datasets, Natural Questions and TriviaQA. Experimental results show that RBR slightly outperforms PA-RAG, a state-of-the-art ODQA system, by 0.45 points and 1.01 points in Exact Match score on Natural Questions and TriviaQA, respectively.",{"paper_id":2236,"title":2237,"year":7,"month":188,"day":63,"doi":2238,"resource_url":2239,"first_page":2240,"last_page":2241,"pdf_url":2242,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2243,"paper_type":860,"authors":2244,"abstract":2257},"lrec2026-main-061","Sentence-Level Back-Transliteration of Romanized Indian Languages: Performance Analysis and Challenges","10.63317\u002F2fe72eyjbmj3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-061","818","827","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.61.pdf","kumar-etal-2026-sentence",[2245,2248,2251,2254],{"paper_id":2236,"author_seq":247,"given_name":2246,"surname":2247,"affiliation":63,"orcid":63},"Saurabh","Kumar",{"paper_id":2236,"author_seq":232,"given_name":2249,"surname":2250,"affiliation":63,"orcid":63},"Dhruvkumar Babubhai","Kakadiya",{"paper_id":2236,"author_seq":218,"given_name":2252,"surname":2253,"affiliation":63,"orcid":63},"Sanasam Ranbir","Singh",{"paper_id":2236,"author_seq":203,"given_name":2255,"surname":2256,"affiliation":63,"orcid":63},"Sukumar","Nandi","The widespread use of Romanized text for Indian languages, particularly on social media platforms, poses significant challenges for natural language processing due to the lack of standardized orthography and the presence of contextual ambiguities. In this study, we explore sentence-level back-transliteration for 13 Indian languages, focusing on addressing the limitations of word-level models that fail to capture contextual dependencies. We evaluate state-of-the-art models, including fine-tuned LLaMA, mT5, and Multilingual Transformer models, comparing their performance against the baseline IndicXlit model. In addition, we conduct a comprehensive error analysis to gain deeper insights into model performance. Our results demonstrate that fine-tuned LLaMA and the proposed IndiXform model, specifically designed to leverage sentence-level context, significantly outperform zero-shot LLaMA and the IndicXlit baseline. These findings provide valuable insights into handling contextual ambiguities and enhancing the accuracy of back-transliteration systems for Indian languages.",{"paper_id":2259,"title":2260,"year":7,"month":188,"day":63,"doi":2261,"resource_url":2262,"first_page":2263,"last_page":2264,"pdf_url":2265,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2266,"paper_type":860,"authors":2267,"abstract":2275},"lrec2026-main-062","Cross-Corpus CEFR Classification through Artificial Learners Perplexities","10.63317\u002F2bvpcczgao2t","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-062","828","837","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.62.pdf","stearns-etal-2026-cross",[2268,2270,2273],{"paper_id":2259,"author_seq":247,"given_name":964,"surname":2269,"affiliation":63,"orcid":63},"Stearns",{"paper_id":2259,"author_seq":232,"given_name":2271,"surname":2272,"affiliation":63,"orcid":63},"John P.","McCrae",{"paper_id":2259,"author_seq":218,"given_name":1316,"surname":2274,"affiliation":63,"orcid":63},"Gaillat","The complexity of neural methods for automatic proficiency assessment often sacrifices interpretability and robustness. This paper presents a competitive alternative for CEFR classification using optimized statistical models with a novel perplexity-based feature engineering pipeline. We introduce LLM-derived perplexity features as a proxy for how unexpected a learner’s word choices are: native model perplexity measures unexpectedness relative to native language use, while Artificial Learner model perplexity quantifies relative to a specific proficiency level. While recent work favors end-to-end neural architectures, we demonstrate that traditional pipelines enhanced with these interpretable perplexity features can achieve comparable performance on established benchmarks. We evaluate two transfer scenarios: zero-shot (trained on EFCAMDAT, tested on external corpora) and 90-10 split (same features, in-domain classifier training). On KUPA-KEYS, perplexity features achieve RMSE 0.707 (zero-shot) and 0.660 (90-10 split), outperforming fine-tuned BERT and prompt-based LLMs. On CELVA-SP, zero-shot perplexity shows limited generalization (RMSE 1.437 vs. LLM’s 1.016), but statistical models close this gap in the 90-10 split (RMSE 0.872). Across all three evaluation datasets, perplexity-based models achieve the best average macro F1 in the 90-10 split (0.446 vs. 0.287 for BERT and 0.175 for prompting), demonstrating that interpretable features paired with domain-adapted classifiers provide the most robust cross-domain representations. We contribute: (1) state-of-the-art KUPA-KEYS results with interpretable models, (2) the first comprehensive CELVA-SP benchmark, and (3) evidence that feature-level transfer outperforms both end-to-end fine-tuning and zero-shot prompting.",{"paper_id":2277,"title":2278,"year":7,"month":188,"day":63,"doi":2279,"resource_url":2280,"first_page":2281,"last_page":2282,"pdf_url":2283,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":2284,"bibkey":2285,"paper_type":860,"authors":2286,"abstract":2296},"lrec2026-main-063","CorpusClues: Scalable Unsupervised Similarity Search for Historical Texts Using MinHash-LSH","10.63317\u002F2ufpfohnmg3i","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-063","838","847","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.63.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.63_OptionalSupplementaryMaterial.zip","lemay-etal-2026-corpusclues",[2287,2290,2293],{"paper_id":2277,"author_seq":247,"given_name":2288,"surname":2289,"affiliation":63,"orcid":63},"Paulien","Lemay",{"paper_id":2277,"author_seq":232,"given_name":2291,"surname":2292,"affiliation":63,"orcid":63},"Klaas","Bentein",{"paper_id":2277,"author_seq":218,"given_name":2294,"surname":2295,"affiliation":63,"orcid":63},"Els","Lefever","CorpusClues is a prototype web-based platform for large-scale, unsupervised clustering of textual data, designed to address the specific challenges of historical corpora. It leverages the well-established computational techniques of MinHash and Locality-Sensitive Hashing (LSH) at the character level in order to detect structural similarities between texts even when exact patterns diverge. This approach makes CorpusClues robust to orthographic variation, such as historical spelling differences, while remaining fast and language-agnostic, capable of processing large and heterogeneous corpora without relying on language-specific models or preprocessing. Researchers can explore resulting clusters through interactive visualizations and exportable data, gaining access to patterns that would otherwise require the slow and uncertain process of manual collation. Evaluation against labeled gold standards shows that the system consistently produces high-quality clustering, accurately reconstructing relationships between texts despite substantial orthographic variation. By combining computational efficiency with user-friendly design, CorpusClues provides an accessible yet rigorous means of uncovering formulaicity and textual transmission at scale, opening new possibilities for the study of historical textual traditions.",{"paper_id":2298,"title":2299,"year":7,"month":188,"day":63,"doi":2300,"resource_url":2301,"first_page":2302,"last_page":2303,"pdf_url":2304,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2305,"paper_type":860,"authors":2306,"abstract":2334},"lrec2026-main-064","BenCSSmark: Making the Social Sciences Count in LLM Research","10.63317\u002F3cd2b8m83z28","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-064","848","859","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.64.pdf","chatelain-etal-2026-bencssmark",[2307,2310,2313,2316,2319,2322,2325,2328,2330,2333],{"paper_id":2298,"author_seq":247,"given_name":2308,"surname":2309,"affiliation":63,"orcid":63},"Arnault","Chatelain",{"paper_id":2298,"author_seq":232,"given_name":2311,"surname":2312,"affiliation":63,"orcid":63},"Etienne","Ollion",{"paper_id":2298,"author_seq":218,"given_name":2314,"surname":2315,"affiliation":63,"orcid":63},"Qianwen","Guan",{"paper_id":2298,"author_seq":203,"given_name":2317,"surname":2318,"affiliation":63,"orcid":63},"Diandra","Fabre",{"paper_id":2298,"author_seq":188,"given_name":2320,"surname":2321,"affiliation":63,"orcid":63},"Lorraine","Goeuriot",{"paper_id":2298,"author_seq":172,"given_name":2323,"surname":2324,"affiliation":63,"orcid":63},"Emile","Chapuis",{"paper_id":2298,"author_seq":155,"given_name":2326,"surname":2327,"affiliation":63,"orcid":63},"Abdelkrim","Beloued",{"paper_id":2298,"author_seq":138,"given_name":1938,"surname":2329,"affiliation":63,"orcid":63},"Candito",{"paper_id":2298,"author_seq":121,"given_name":2331,"surname":2332,"affiliation":63,"orcid":63},"Nicolas","Hervé",{"paper_id":2298,"author_seq":104,"given_name":1044,"surname":1045,"affiliation":63,"orcid":63},"This position paper argues that the under-representation of social science tasks in contemporary LLM benchmarks limits advances in both LLM evaluation and social scientific inquiry. Benchmarks — standardized tools for assessing computational systems — are pivotal in the development of artificial intelligence (AI), including large language models (LLMs). Benchmarks do more than measure progress — they actively structure it, shaping reputations, research agendas, and commercial outcomes. Despite this central role, the social sciences are largely absent from mainstream evaluation frameworks, even though scholars in these fields generate dozens of rigorously annotated, context-sensitive datasets each year. Integrating this work into benchmark design could significantly improve the generalization and robustness of AI models. In turn, models trained on social scientific tasks would likely yield better performance on classic and contemporary tasks in disciplines as diverse as history, sociology, political science or economics. This is all the more pressing as these disciplines are quickly turning to LLMs for assistance. To address this gap, we introduce BenCSSmark, a benchmark composed of datasets annotated by computational social scientists. By integrating social scientific perspectives into benchmarking, BenCSSmark seeks to promote more robust, transparent, and socially relevant AI systems and to foster efficient collaboration.",{"paper_id":2336,"title":2337,"year":7,"month":188,"day":63,"doi":2338,"resource_url":2339,"first_page":2340,"last_page":2341,"pdf_url":2342,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2343,"paper_type":860,"authors":2344,"abstract":2350},"lrec2026-main-065","Predicting Topic (Co-)Occurrence Using Topic Networks Built from the Project Gutenberg Corpus","10.63317\u002F58x3h7gjbpb4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-065","860","869","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.65.pdf","verma-etal-2026-predicting",[2345,2348],{"paper_id":2336,"author_seq":247,"given_name":2346,"surname":2347,"affiliation":63,"orcid":63},"Bhuvanesh","Verma",{"paper_id":2336,"author_seq":232,"given_name":869,"surname":2349,"affiliation":63,"orcid":63},"Mehler","Although temporal topic modeling has been widely applied to scientific and legal texts, literary corpora have largely been overlooked in this regard. To address this issue, we analyze topic evolution in a subset of the Project Gutenberg (PG) corpus. We model this subset as a sequence of topic networks that capture the emergence, persistence, and interaction of thematic structures over decades. Using supervised topic representations, we predict nodes (topics) and edges (topic pairings) to forecast future topics and their co-occurrence. Our experiments demonstrate moderate to strong temporal persistence in topic connectivity patterns across three topic systems, with ROC-AUC and AP values consistently above 0.85. We find that the temporal span of topic networks significantly impacts predictive performance: longer spans improve the stability and recall of topic presence, while shorter spans better capture evolving topic relationships. Overall, our findings demonstrate the predictability of topics in literary texts over time.",{"paper_id":2352,"title":2353,"year":7,"month":188,"day":63,"doi":2354,"resource_url":2355,"first_page":2356,"last_page":2357,"pdf_url":2358,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2359,"paper_type":860,"authors":2360,"abstract":2367},"lrec2026-main-066","AraHopeCorpus: Annotation Guidelines and Dataset for Hope Speech in Arabic Social Media Crisis Discourse","10.63317\u002F4sz7jad3jug5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-066","870","880","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.66.pdf","sharqawi-etal-2026-arahopecorpus",[2361,2364],{"paper_id":2352,"author_seq":247,"given_name":2362,"surname":2363,"affiliation":63,"orcid":63},"Esra'a Ahmad","Sharqawi",{"paper_id":2352,"author_seq":232,"given_name":2365,"surname":2366,"affiliation":63,"orcid":63},"Wajdi","Zaghouani","Social media has become a crucial arena for shaping public narratives during armed conflicts, providing space for both harmful and constructive communication. While hate speech and misinformation have been widely studied, expressions that promote resilience, solidarity, and optimism remain underexplored, particularly in Arabic contexts. This paper introduces AraHopeCorpus, the first annotated dataset of Arabic hope speech collected from ten thousand YouTube comments related to the war on Gaza between 2023 and 2024. Using a detailed annotation framework, comments were classified into three categories: hope speech, no hope speech, and neutral or unclear discourse. The dataset shows that hopeful language dominates, accounting for more than sixty four percent of all comments. These expressions of hope appear mainly as religious encouragement, collective solidarity, and optimism for endurance and justice. No hope speech, representing about thirteen percent, reflects despair and disillusionment, while the rest of the comments contain neutral or mixed content. Inter-Annotator Agreement reached substantial levels (Cohen’s Kappa equals 0.71), though dialectal variation, sarcasm, and implicit meaning posed annotation challenges. A comparative analysis between human annotators and ChatGPT revealed that large language models can support annotation but remain limited in handling dialectal and culturally embedded expressions. AraHopeCorpus will be released for research purposes under an open and non commercial license. It provides a valuable resource for studying constructive digital discourse, enabling further research on hope speech detection, crisis communication, and resilience in Arabic social media.",{"paper_id":2369,"title":2370,"year":7,"month":188,"day":63,"doi":2371,"resource_url":2372,"first_page":2373,"last_page":2374,"pdf_url":2375,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2376,"paper_type":860,"authors":2377,"abstract":2382},"lrec2026-main-067","Cohesion-6K: An Arabic Dataset for Analyzing Social Cohesion and Conflict in Online Discourse","10.63317\u002F4twhjaneai4o","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-067","881","892","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.67.pdf","alathba-etal-2026-cohesion",[2378,2381],{"paper_id":2369,"author_seq":247,"given_name":2379,"surname":2380,"affiliation":63,"orcid":63},"Aisha Ali","Al-Athba",{"paper_id":2369,"author_seq":232,"given_name":2365,"surname":2366,"affiliation":63,"orcid":63},"The study of online discourse has become central to understanding societal polarization. While much research has focused on detecting overt toxicity, the subtle dynamics of social cohesion, meaning the interaction between divisive and unifying narratives, remain computationally underexplored. This paper presents Cohesion-6K, a manually and ChatGPT-assisted annotated dataset of six thousand Arabic public Facebook posts related to the Israeli Occupation of Palestine. Each post is assigned to one of five discourse categories that represent a continuum from conflict to cohesion: Conflict, Resolution, Community Engagement, Supportive Interactions, and Shared Values. The annotation process combines expert human judgment with model-assisted pre-labeling verified by trained annotators, achieving substantial inter-annotator agreement (Cohen’s kappa = 0.85). Quantitative analysis reveals a consistent engagement gap, where conflict-oriented posts receive between two and four times more user interaction than resolution-oriented ones (p \u003C 0.01). This pattern illustrates how divisive discourse tends to attract disproportionate visibility in Arabic social media spaces. Cohesion-6K provides a transparent and reproducible resource for the study of online cohesion and polarization. The dataset, annotation guidelines, and preprocessing code will be released for research use under an open license, supporting future work in computational social science, digital communication, and Arabic natural language processing.",{"paper_id":2384,"title":2385,"year":7,"month":188,"day":63,"doi":2386,"resource_url":2387,"first_page":2388,"last_page":2389,"pdf_url":2390,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2391,"paper_type":860,"authors":2392,"abstract":2411},"lrec2026-main-068","Reference-free Evaluation at Inference for NER\u002FNEL over OCRed Historical Texts","10.63317\u002F24wx62fjkprg","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-068","893","904","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.68.pdf","nguyen-etal-2026-reference",[2393,2396,2399,2402,2405,2408],{"paper_id":2384,"author_seq":247,"given_name":2394,"surname":2395,"affiliation":63,"orcid":63},"Tien-Nam","Nguyen",{"paper_id":2384,"author_seq":232,"given_name":2397,"surname":2398,"affiliation":63,"orcid":63},"Adam","Jatowt",{"paper_id":2384,"author_seq":218,"given_name":2400,"surname":2401,"affiliation":63,"orcid":63},"Ahmed","Hamdi",{"paper_id":2384,"author_seq":203,"given_name":2403,"surname":2404,"affiliation":63,"orcid":63},"Mickael","Coustaty",{"paper_id":2384,"author_seq":188,"given_name":2406,"surname":2407,"affiliation":63,"orcid":63},"Thi Hong Hanh","Tran",{"paper_id":2384,"author_seq":172,"given_name":2409,"surname":2410,"affiliation":63,"orcid":63},"Antoine","Doucet","Named Entity Recognition (NER) and Named Entity Linking (NEL) are core tasks in entity extraction, yet their robustness is limited when applied to noisy documents, such as those generated by Optical Character Recognition (OCR) over historical documents. Although large language models (LLMs) have shown strong zero-shot and few-shot performance on NER and NEL tasks, prior work has largely focused on using LLMs as direct predictors rather than evaluating extraction performance. In this study, we explore the feasibility of using LLMs as learned evaluators to estimate the quality of NER\u002FNEL outputs, especially in settings where human-annotated references are unavailable at inference time. We propose supervised approaches that fine-tune LLMs to predict quality scores based on training data with gold annotations, enabling reference-free quality estimation once trained. Experiments on the HIPE-2020 benchmark across English, French, and German languages demonstrate that fine-tuned LLMs provide reliable estimates of output quality. Our findings suggest that LLM-based evaluation can support quality control and enable evaluation in noisy setting.",{"paper_id":2413,"title":2414,"year":7,"month":188,"day":63,"doi":2415,"resource_url":2416,"first_page":2417,"last_page":2418,"pdf_url":2419,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2420,"paper_type":860,"authors":2421,"abstract":2425},"lrec2026-main-069","Echoes of the Troubadours: A Corpus of Troubadour Poetry for Stylometric Analysis and Authorship Attribution","10.63317\u002F5kd5docuu6qb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-069","905","918","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.69.pdf","langhe-etal-2026-echoes",[2422,2423,2424],{"paper_id":2413,"author_seq":247,"given_name":2178,"surname":2179,"affiliation":63,"orcid":63},{"paper_id":2413,"author_seq":232,"given_name":1856,"surname":1857,"affiliation":63,"orcid":63},{"paper_id":2413,"author_seq":218,"given_name":2182,"surname":2183,"affiliation":63,"orcid":63},"We present TrobaCor, a curated corpus of medieval troubadour poetry, which comprises 1668 unique Old Occitan texts by a large variety of authors. Clustering and stylometric experiments show that we can accurately model authorial style beyond topical content, even though formulaic or topically diverse genres remain challenging. Furthermore, we can model and detect traces of an author’s stylistic \"DNA\" even in short-form collaborative poetry, offering a uniquely fine-grained perspective in the field. In addition, we provide self-organizing map visualizations in order to provide an interpretable view of stylistic patterns across authors. TrobaCor is publicly released to support reproducible research in NLP and digital humanities on this low-resource historical corpus.",{"paper_id":2427,"title":2428,"year":7,"month":188,"day":63,"doi":2429,"resource_url":2430,"first_page":2431,"last_page":2432,"pdf_url":2433,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2434,"paper_type":860,"authors":2435,"abstract":2446},"lrec2026-main-070","Gretino: A Greek and Latin Dataset to Benchmark Retrieval Systems in Classical Languages","10.63317\u002F3ipryhrqmwvi","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-070","919","928","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.70.pdf","toyin-etal-2026-gretino",[2436,2437,2439,2442,2444],{"paper_id":2427,"author_seq":247,"given_name":1769,"surname":1770,"affiliation":63,"orcid":63},{"paper_id":2427,"author_seq":232,"given_name":1798,"surname":2438,"affiliation":63,"orcid":63},"Iezzi",{"paper_id":2427,"author_seq":218,"given_name":2440,"surname":2441,"affiliation":63,"orcid":63},"Elia","Scapini",{"paper_id":2427,"author_seq":203,"given_name":2443,"surname":1798,"affiliation":63,"orcid":63},"Giulio",{"paper_id":2427,"author_seq":188,"given_name":1509,"surname":2445,"affiliation":63,"orcid":63},"Puccetti","Semantic similarity search is a method for exploring large text corpora and retrieving conceptually related content. Although widely used in modern language applications, it remains underexplored in the context of classical literature, where it could provide scholars with tools to uncover meaningful connections across authors, genres, and languages, surpassing the limitations of rule-based or keyword search systems. To promote the adoption of semantic retrieval in classical languages, we introduce Gretino, the first benchmark dataset for evaluating semantic search systems in Latin, Ancient Greek, and cross-lingual settings. Gretino comprises 240 carefully designed queries, each paired with five semantically relevant passages in Latin and Greek. The dataset is divided into two subsets: Gretino Silver, consisting of 200 queries and 1,000 targets (evenly split between Latin and Greek), generated with the assistance of ChatGPT and subsequently reviewed; and Gretino Gold, a manually curated high-quality subset of 40 queries and 200 targets, fully based on authentic classical texts. We evaluate four pre-trained language models: GreBERTa, LaBERTa, PhilBERTA, and SPhilBERTa and demonstrate the potential of a contrastive learning approach based on SimCSE (Gao et al., 2021) for fine-tuning, showing that training on carefully curated bilingual corpora, with texts aligned in the two languages, can improve retrieval performance.",{"paper_id":2448,"title":2449,"year":7,"month":188,"day":63,"doi":2450,"resource_url":2451,"first_page":2452,"last_page":2453,"pdf_url":2454,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2455,"paper_type":860,"authors":2456,"abstract":2469},"lrec2026-main-071","A Recipe for Adapting Multilingual Embedders to OCR-Error Robustness and Historical Texts","10.63317\u002F29rfx5wcyz3z","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-071","929","935","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.71.pdf","michail-etal-2026-recipe",[2457,2460,2463,2466],{"paper_id":2448,"author_seq":247,"given_name":2458,"surname":2459,"affiliation":63,"orcid":63},"Andrianos","Michail",{"paper_id":2448,"author_seq":232,"given_name":2461,"surname":2462,"affiliation":63,"orcid":63},"Stylianos","Psychias",{"paper_id":2448,"author_seq":218,"given_name":2464,"surname":2465,"affiliation":63,"orcid":63},"Juri","Opitz",{"paper_id":2448,"author_seq":203,"given_name":2467,"surname":2468,"affiliation":63,"orcid":63},"Simon","Clematide","Modern multilingual text embedding models excel at semantic search on contemporary text but their performance degrades measurably on digitized historical documents. This issue is especially pronounced for underrepresented languages such as Luxembourgish, where historical materials combine evolving spelling conventions with OCR artifacts absent from standard training data. To address these challenges, we introduce OCR M-GTE, a pair of multilingual embedding models adapted for OCR robustness and historical texts, and show that the observed degradation can be mitigated through a simple multi-step training procedure tailored to historical variants and OCR noise. We evaluate the models on standard semantic search tasks, simulated OCR degradation, and genuine historical collections, observing consistent improvements under OCR-induced noise and on genuine historical data while maintaining comparable performance on clean modern text. Our ablation findings suggest that multilingual embedding models can be effectively adapted to perform robust cross-lingual search in heterogeneous European digitized corpora. We release our adapted models, code, and datasets under the AGPL-3.0 license: https:\u002F\u002Fgithub.com\u002Fimpresso\u002Focr-robust-multilingual-embeddings",{"paper_id":2471,"title":2472,"year":7,"month":188,"day":63,"doi":2473,"resource_url":2474,"first_page":2475,"last_page":2476,"pdf_url":2477,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2478,"paper_type":860,"authors":2479,"abstract":2489},"lrec2026-main-072","Phrase-Level Segmentation on Medieval Corpora for Aligning Multilingual Texts","10.63317\u002F32huzuuokpfr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-072","936","946","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.72.pdf","ing-etal-2026-phrase",[2480,2483,2486],{"paper_id":2471,"author_seq":247,"given_name":2481,"surname":2482,"affiliation":63,"orcid":63},"Lucence","Ing",{"paper_id":2471,"author_seq":232,"given_name":2484,"surname":2485,"affiliation":63,"orcid":63},"Matthias Gille","Levenson",{"paper_id":2471,"author_seq":218,"given_name":2487,"surname":2488,"affiliation":63,"orcid":63},"Carolina","Macedo","This paper presents an approach to multilingual alignment for medieval languages, focusing on the prior step of\"phrase\" segmentation. It outlines the challenges posed by historical data and describes different strategies forsegmenting texts in multiple languages. It releases a gold-standard segmentation corpus based on various literaryand historical works from the late Middle Ages in Europe. This corpus consists of texts in seven medieval languages (French, Castilian, Catalan, Portuguese, Latin, Italian, English). Several architectures are tested with both in-domain and out-of-domain evaluation sets.",{"paper_id":2491,"title":2492,"year":7,"month":188,"day":63,"doi":2493,"resource_url":2494,"first_page":2495,"last_page":2496,"pdf_url":2497,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2498,"paper_type":860,"authors":2499,"abstract":2511},"lrec2026-main-073","RAGE: Roman and Greek Emotions","10.63317\u002F5dgh2dbex43c","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-073","947","957","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.73.pdf","riemenschneider-etal-2026-rage",[2500,2503,2506,2508],{"paper_id":2491,"author_seq":247,"given_name":2501,"surname":2502,"affiliation":63,"orcid":63},"Frederick","Riemenschneider",{"paper_id":2491,"author_seq":232,"given_name":2504,"surname":2505,"affiliation":63,"orcid":63},"Jonathan D.","Geiger",{"paper_id":2491,"author_seq":218,"given_name":1316,"surname":2507,"affiliation":63,"orcid":63},"Kuhn-Treichel",{"paper_id":2491,"author_seq":203,"given_name":2509,"surname":2510,"affiliation":63,"orcid":63},"Anette","Frank","The study of emotions in ancient Greek and Latin literature has largely been qualitative, relying on close reading, while existing computational methods often focus on coarse-grained sentiment polarity, which limits their use for nuanced literary analysis. To bridge this gap, we present RAGE (Roman And Greek Emotions), a new corpus of approximately 100 000 words of annotated classical literature spanning multiple genres and authors. Our multi-layered annotation framework, inspired by semantic role labeling, is designed for fine-grained analysis, capturing not only the emotion itself but also its experiencer, cause, and target. We adopt a nuanced emotion taxonomy and enrich each emotion instance with additional layers for intensity, explicitness, and negation. To facilitate comparative analysis, characters are linked to Wikidata or a local ontology. We demonstrate the utility of our corpus through corpus-level exploratory analyses and an in-depth case study. RAGE and its accompanying guidelines provide a valuable resource for applying quantitative methods to the study of emotions in classical texts.",{"paper_id":2513,"title":2514,"year":7,"month":188,"day":63,"doi":2515,"resource_url":2516,"first_page":2517,"last_page":2518,"pdf_url":2519,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2520,"paper_type":860,"authors":2521,"abstract":2533},"lrec2026-main-074","From Variance to Invariance: Qualitative Content Analysis for Narrative Graph Annotation","10.63317\u002F2chc9vvj5kxc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-074","958","972","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.74.pdf","huang-etal-2026-variance",[2522,2524,2527,2530],{"paper_id":2513,"author_seq":247,"given_name":2523,"surname":1837,"affiliation":63,"orcid":63},"Junbo",{"paper_id":2513,"author_seq":232,"given_name":2525,"surname":2526,"affiliation":63,"orcid":63},"Max","Weinig",{"paper_id":2513,"author_seq":218,"given_name":2528,"surname":2529,"affiliation":63,"orcid":63},"Ulrich","Fritsche",{"paper_id":2513,"author_seq":203,"given_name":2531,"surname":2532,"affiliation":63,"orcid":63},"Ricardo","Usbeck","Narratives in news discourse play a critical role in shaping public understanding of economic events, such as inflation. Annotating and evaluating these narratives in a structured manner remains a key challenge for Natural Language Processing (NLP). In this work, we introduce a narrative graph annotation framework that integrates principles from qualitative content analysis (QCA) to enhance methodological consistency. We present a dataset of inflation narratives annotated as directed acyclic graphs (DAGs), where nodes represent events and edges encode causal relations. To evaluate annotation quality, we employed a 6 × 3 factorial experimental design to examine the effects of narrative representation (six levels) and distance metric type (three levels) on inter-annotator agreement (Krippendorrf’s α), capturing the presence of human label variation (HLV) in narrative interpretations. Our analysis shows that (1) lenient metrics (overlap-based distance) overestimate reliability; (2) locally-constrained representations (e.g., one-hop neighbors) reduce annotation variability. Our annotation and implementation of graph-based Krippendorrf’s α are open-sourced. The annotation framework and evaluation results provide practical guidance for NLP research on graph-based narrative annotation.",{"paper_id":2535,"title":2536,"year":7,"month":188,"day":63,"doi":2537,"resource_url":2538,"first_page":2539,"last_page":2540,"pdf_url":2541,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2542,"paper_type":860,"authors":2543,"abstract":2550},"lrec2026-main-075","A Dataset of Historical Medical Periodicals Annotated with Textual Genre","10.63317\u002F47ayx3btr7ka","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-075","973","984","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.75.pdf","danilova-etal-2026-dataset",[2544,2547],{"paper_id":2535,"author_seq":247,"given_name":2545,"surname":2546,"affiliation":63,"orcid":63},"Vera","Danilova",{"paper_id":2535,"author_seq":232,"given_name":2548,"surname":2549,"affiliation":63,"orcid":63},"Sara","Stymne","Historical corpora, especially those compiled from magazines and periodicals, are complex due to the diversity of text types and evolving genre conventions. Addressing these challenges requires systematic genre annotation and well-defined classification schemes to support downstream NLP tasks. This paper introduces a dataset of historical medical periodical texts in German and Swedish annotated for textual genre and additional features that may influence genre identification, such as the presence of OCR errors. We describe the development of the genre classification, annotator recruitment and training procedures, and provide an analysis of the annotator agreement.",{"paper_id":2552,"title":2553,"year":7,"month":188,"day":63,"doi":2554,"resource_url":2555,"first_page":2556,"last_page":2557,"pdf_url":2558,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2559,"paper_type":860,"authors":2560,"abstract":2567},"lrec2026-main-076","Preserving Endangered Linguistic Heritage: Developing a Corpus for the Study of Contact-induced Changes in Corfioto","10.63317\u002F22f25qjioax5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-076","985","996","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.76.pdf","nunzio-etal-2026-preserving",[2561,2564],{"paper_id":2552,"author_seq":247,"given_name":2562,"surname":2563,"affiliation":63,"orcid":63},"Giorgio Maria Di","Nunzio",{"paper_id":2552,"author_seq":232,"given_name":2565,"surname":2566,"affiliation":63,"orcid":63},"Georgios","Vardakis","This paper presents current results of a work-in-progress project on the aims, goals, and methods for compiling a state-of-the-art morphosyntactically annotated corpus of Corfioto, the endangered Balkan Venetan variety of the Corfiot Jews. It gives an outline of the workflow for building, archiving, managing and annotating the first mixed-language corpus of original oral and written data of the Corfiot Jews, based on the Universal Dependencies (UD) framework and introduces the design and the implementation of an application for the Interactive MorPhosyntactic Annotation of Corfioto (IMPACT). The creation and the annotation of the corpus serves three goals: i) attain a quantitative analysis of variation in available data for the analysis of contact-induced syntactic change in clausal complementation in Corfioto; ii) enable the creation of a gold standard and the training of a model for the linguistic annotation of all data in the Universal Dependencies framework; and iii) contribute to the ever-growing research in the development of language resources and tools for endangered and low-resource contact varieties via the collaboration of computational, theoretical and fieldwork linguists.",{"paper_id":2569,"title":2570,"year":7,"month":188,"day":63,"doi":2571,"resource_url":2572,"first_page":2573,"last_page":2574,"pdf_url":2575,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2576,"paper_type":860,"authors":2577,"abstract":2587},"lrec2026-main-077","To Eat and beyond: A FrameNet-Inspired Annotation of Food and Its Uses over Time","10.63317\u002F4cvow3cune2z","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-077","997","1008","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.77.pdf","paccosi-etal-2026-eat",[2578,2581,2584],{"paper_id":2569,"author_seq":247,"given_name":2579,"surname":2580,"affiliation":63,"orcid":63},"Teresa","Paccosi",{"paper_id":2569,"author_seq":232,"given_name":2582,"surname":2583,"affiliation":63,"orcid":63},"Gauri","Bhagwat",{"paper_id":2569,"author_seq":218,"given_name":2585,"surname":2586,"affiliation":63,"orcid":63},"Marieke van","Erp","We present an annotation scheme and a manually annotated dataset in English, grounded in Frame Semantics and its generative extension through qualia relations, developed specifically for the food domain. Our primary goal is to capture the diverse and often less frequent uses of food in historical English texts, with a particular focus on the various processes to which food is subjected and the contexts in which it is employed. We provide the annotation scheme, describe the annotation process and release the annotated dataset for food and its uses, along with some preliminary experiments assessing the capabilities of LLMs in applying this annotation scheme.",{"paper_id":2589,"title":2590,"year":7,"month":188,"day":63,"doi":2591,"resource_url":2592,"first_page":2593,"last_page":2594,"pdf_url":2595,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2596,"paper_type":860,"authors":2597,"abstract":2601},"lrec2026-main-078","To Overfit or Not to Overfit? An Evaluation of HTR Workflow on 17Th-18Th Century French Corpus","10.63317\u002F23hc8mveght2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-078","1009","1016","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.78.pdf","tiger-2026-overfit",[2598],{"paper_id":2589,"author_seq":247,"given_name":2599,"surname":2600,"affiliation":63,"orcid":63},"Marine","Tiger","This paper presents the results of an evaluation of general Handwritten Text Recognition (HTR) models applied to 17th and 18th century corpus written in modern French and the fine-tuning of the models. Our aim was to transcribe a corpus from this period using existing pre-trained models and to assess their performance on such data. While these general models offer a large linguistic coverage, our results demonstrate they are often insufficiently adapted to the specific handwriting nuances and orthographic inconsistencies of early modern French. To improve the results, we fine-tuned a base model to develop a specialized version trained on our dataset. Although the model still encountered difficulties due to highly variable handwriting styles, it significantly improved transcription accuracy and reduced processing time. Following this step, we used a semi-automatic post-correction tool to address remaining errors and integrated Named Entity Recognition (NER) steps for automated TEI-XML encoding. This paper discusses the evaluation results of both the HTR and NER models, and how the overfitting allows to get better transcriptions on a specific corpus.",{"paper_id":2603,"title":2604,"year":7,"month":188,"day":63,"doi":2605,"resource_url":2606,"first_page":2607,"last_page":2608,"pdf_url":2609,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2610,"paper_type":860,"authors":2611,"abstract":2651},"lrec2026-main-079","Automatic Segmentation of Classical Tibetan Texts into Autochthonous and Allochthonous Regions","10.63317\u002F2iyfjjv9boc6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-079","1017","1030","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.79.pdf","bilitski-etal-2026-automatic",[2612,2615,2618,2621,2624,2627,2630,2633,2636,2639,2642,2645,2648],{"paper_id":2603,"author_seq":247,"given_name":2613,"surname":2614,"affiliation":63,"orcid":63},"Guy","Bilitski",{"paper_id":2603,"author_seq":232,"given_name":2616,"surname":2617,"affiliation":63,"orcid":63},"Lev","Shechter",{"paper_id":2603,"author_seq":218,"given_name":2619,"surname":2620,"affiliation":63,"orcid":63},"Sonam","Jamtsho",{"paper_id":2603,"author_seq":203,"given_name":2622,"surname":2623,"affiliation":63,"orcid":63},"Nir","Marciano",{"paper_id":2603,"author_seq":188,"given_name":2625,"surname":2626,"affiliation":63,"orcid":63},"Nicola","Bajetta",{"paper_id":2603,"author_seq":172,"given_name":2628,"surname":2629,"affiliation":63,"orcid":63},"Rebecca","Sunden",{"paper_id":2603,"author_seq":155,"given_name":2631,"surname":2632,"affiliation":63,"orcid":63},"Omri","Drori",{"paper_id":2603,"author_seq":138,"given_name":2634,"surname":2635,"affiliation":63,"orcid":63},"Kai Golan","Hashiloni",{"paper_id":2603,"author_seq":121,"given_name":2637,"surname":2638,"affiliation":63,"orcid":63},"Orr","Zwebner",{"paper_id":2603,"author_seq":104,"given_name":2640,"surname":2641,"affiliation":63,"orcid":63},"Asaf","Shina",{"paper_id":2603,"author_seq":87,"given_name":2643,"surname":2644,"affiliation":63,"orcid":63},"Orna","Almogi",{"paper_id":2603,"author_seq":73,"given_name":2646,"surname":2647,"affiliation":63,"orcid":63},"Dorji","Wangchuk",{"paper_id":2603,"author_seq":55,"given_name":2649,"surname":2650,"affiliation":63,"orcid":63},"Kfir","Bar","We introduce a new computational framework for segmenting Classical Tibetan texts into autochthonous and allochthonous regions, distinguishing between indigenous Tibetan compositions and translated materials, primarily from Sanskrit sources. To support this task, we release the first annotated Tibetan corpus for ALLO\u002FAUTO segmentation and evaluate several multilingual encoders, including mBERT and XLM-R, fine-tuned for sequence labeling. Our best model achieves strong alignment with expert annotations, showing that multilingual representations can effectively capture philological boundaries in low-resource settings. This work contributes new resources and methods for computational philology and sheds light on the linguistic markers that trace the intercultural transmission of Buddhist thought in Tibet.",{"paper_id":2653,"title":2654,"year":7,"month":188,"day":63,"doi":2655,"resource_url":2656,"first_page":2657,"last_page":2658,"pdf_url":2659,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2660,"paper_type":860,"authors":2661,"abstract":2671},"lrec2026-main-080","RespondeoQA: A Benchmark for Bilingual Latin-English Question Answering","10.63317\u002F58p5htfv3nad","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-080","1031","1043","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.80.pdf","hudspeth-etal-2026-respondeoqa",[2662,2665,2668],{"paper_id":2653,"author_seq":247,"given_name":2663,"surname":2664,"affiliation":63,"orcid":63},"Marisa","Hudspeth",{"paper_id":2653,"author_seq":232,"given_name":2666,"surname":2667,"affiliation":63,"orcid":63},"Patrick J.","Burns",{"paper_id":2653,"author_seq":218,"given_name":2669,"surname":2670,"affiliation":63,"orcid":63},"Brendan","O'Connor","We introduce a benchmark dataset for question answering and translation in bilingual Latin and English settings, containing about 7,800 question–answer pairs. The questions are drawn from Latin pedagogical sources, including exams, quizbowl-style trivia, and textbooks ranging from the 1800s to the present. After automated extraction, cleaning, and manual review, the dataset covers a diverse range of question types: knowledge- and skill-based, multihop reasoning, constrained translation, and mixed language pairs. To our knowledge, this is the first QA benchmark centered on Latin. As a case study, we evaluate three large language models–LLaMa 3, Qwen QwQ, and OpenAI’s o3-mini–finding that all perform worse on skill-oriented questions. Although the reasoning models perform better on scansion and literary-device tasks, they offer limited improvement overall. QwQ performs slightly better on questions asked in Latin, but LLaMa3 and o3-mini are more task dependent. This dataset provides a new resource for assessing model capabilities in a specialized linguistic and cultural domain, and the creation process can be easily adapted for other languages. The dataset is available at: [https:\u002F\u002Fgithub.com\u002Fslanglab\u002FRespondeoQA](https:\u002F\u002Fgithub.com\u002Fslanglab\u002FRespondeoQA)",{"paper_id":2673,"title":2674,"year":7,"month":188,"day":63,"doi":2675,"resource_url":2676,"first_page":2677,"last_page":2678,"pdf_url":2679,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2680,"paper_type":860,"authors":2681,"abstract":2688},"lrec2026-main-081","Transformer-Enabled Diachronic Analysis of Vedic Sanskrit: Neural Methods for Quantifying Types of Language Change","10.63317\u002F2h4yw3wrpuut","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-081","1044","1053","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.81.pdf","hariharan-etal-2026-transformer",[2682,2685],{"paper_id":2673,"author_seq":247,"given_name":2683,"surname":2684,"affiliation":63,"orcid":63},"Ananth A.","Hariharan",{"paper_id":2673,"author_seq":232,"given_name":2686,"surname":2687,"affiliation":63,"orcid":63},"David R.","Mortensen","This study demonstrates how hybrid neural-symbolic methods can yield significant new insights into the evolution of a morphologically rich, low-resource language. We challenge the naive assumption that linguistic change is simplification by quantitatively analyzing over 2,000 years of Sanskrit, demonstrating how weakly-supervised hybrid methods can yield new insights into the evolution of morphologically rich, low-resource languages. Our approach addresses data scarcity through weak supervision, using 100+ high-precision regex patterns to generate pseudo-labels for fine-tuning a multilingual BERT. We then fuse symbolic and neural outputs via a novel confidence-weighted ensemble, creating a system that is both scalable and interpretable. Applying this framework to a 1.47-million-word diachronic corpus, our ensemble achieves a 52.4% overall feature detection rate. Our findings reveal that Sanskrit’s overall morphological complexity does not decrease but is instead dynamically redistributed: while earlier verbal features show cyclical patterns of decline, complexity shifts to other domains, evidenced by a dramatic expansion in compounding and the emergence of new philosophical terminology. Critically, our system produces well-calibrated uncertainty estimates, with confidence strongly correlating with accuracy (Pearson r = 0.92) and low overall calibration error (ECE = 0.043), bolstering the reliability of these findings for computational philology.",{"paper_id":2690,"title":2691,"year":7,"month":188,"day":63,"doi":2692,"resource_url":2693,"first_page":2694,"last_page":2695,"pdf_url":2696,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2697,"paper_type":860,"authors":2698,"abstract":2711},"lrec2026-main-082","Ithaca Revisited: Benchmarking a Domain-Specific Model for Epigraphy in the Age of LLMs","10.63317\u002F3gucgvmwsf45","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-082","1054","1070","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.82.pdf","locaputo-etal-2026-ithaca",[2699,2701,2703,2705,2708],{"paper_id":2690,"author_seq":247,"given_name":1712,"surname":2700,"affiliation":63,"orcid":63},"Locaputo",{"paper_id":2690,"author_seq":232,"given_name":1104,"surname":2702,"affiliation":63,"orcid":63},"Brunello",{"paper_id":2690,"author_seq":218,"given_name":2625,"surname":2704,"affiliation":63,"orcid":63},"Saccomanno",{"paper_id":2690,"author_seq":203,"given_name":2706,"surname":2707,"affiliation":63,"orcid":63},"Paraskevi","Platanou",{"paper_id":2690,"author_seq":188,"given_name":2709,"surname":2710,"affiliation":63,"orcid":63},"Giuseppe","Serra","The restoration and interpretation of fragmentary inscriptions remain central challenges in epigraphy, where scholars must reconstruct missing text and determine an inscription’s provenance and chronology from limited evidence. Ithaca, a neural model introduced in 2022, represented a landmark advance in this field, achieving highly accurate results in text restoration and spatio-temporal attribution. Since then, general-purpose large language models (LLMs) such as GPT, Claude, and Gemini have achieved remarkable versatility across many domains, raising the question of whether specialized architectures like Ithaca are still required. In this paper, we revisit Ithaca with a dual focus. First, we benchmark its performance against GPT-5, finding that Ithaca continues to substantially outperform a state-of-the-art general-purpose LLM used in a retrieval-augmented in-context learning setting. Second, we conduct a systematic analysis to characterize Ithaca’s behavior under varying conditions, including lacuna size and position, inscription origin, and semantic topic. Statistical analyses highlight its systematic strengths and weaknesses. Taken together, our results map Ithaca’s performance profile, enabling more informed use in research and teaching.",{"paper_id":2713,"title":2714,"year":7,"month":188,"day":63,"doi":2715,"resource_url":2716,"first_page":2717,"last_page":2718,"pdf_url":2719,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2720,"paper_type":860,"authors":2721,"abstract":2730},"lrec2026-main-083","Beyond Literal Meaning: How LLMs Interpret Yemeni Proverbs","10.63317\u002F4hxnxxxq5iu2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-083","1071","1080","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.83.pdf","thmer-etal-2026-beyond",[2722,2725,2727],{"paper_id":2713,"author_seq":247,"given_name":2723,"surname":2724,"affiliation":63,"orcid":63},"Nasser","Thmer",{"paper_id":2713,"author_seq":232,"given_name":2207,"surname":2726,"affiliation":63,"orcid":63},"Al-Laith",{"paper_id":2713,"author_seq":218,"given_name":2728,"surname":2729,"affiliation":63,"orcid":63},"Muhammad","Shoaib","We present a benchmark Yemeni proverbs dataset paired with expert-annotated explanations, designed to evaluate the cultural reasoning abilities of large language models (LLMs). Using zero-shot and few-shot prompting, we assess seven LLMs through both automatic and human evaluation. Results show that instruction-tuned models like GPT-4o and Gemini 1.5 Pro outperform smaller models in both automatic and human evaluations. Few-shot prompting significantly improves performance across all models, underscoring its value for figurative and culturally grounded language tasks. Notably, ALLaM, a bilingual model trained on Arabic and English, achieves competitive results, demonstrating the potential of regionally adapted models for low-resource cultural tasks. LLM-as-a-Judge evaluation correlates strongly with human assessment (Kendall’s τ up to 0.98). Error analysis identifies recurring literal interpretation and cultural misalignment as key failure modes.",{"paper_id":2732,"title":2733,"year":7,"month":188,"day":63,"doi":2734,"resource_url":2735,"first_page":2736,"last_page":2737,"pdf_url":2738,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2739,"paper_type":860,"authors":2740,"abstract":2750},"lrec2026-main-084","CEFR Level Prediction for Short Russian L2 Texts: Evaluating Classifiers and Instruction-Based LLMs","10.63317\u002F27p9pbh4oods","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-084","1081","1091","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.84.pdf","glazkova-etal-2026-cefr",[2741,2744,2747],{"paper_id":2732,"author_seq":247,"given_name":2742,"surname":2743,"affiliation":63,"orcid":63},"Anna","Glazkova",{"paper_id":2732,"author_seq":232,"given_name":2745,"surname":2746,"affiliation":63,"orcid":63},"Antonina","Laposhina",{"paper_id":2732,"author_seq":218,"given_name":2748,"surname":2749,"affiliation":63,"orcid":63},"Dmitry","Morozov","This study explores the automated prediction of text complexity levels for short Russian texts on the Common European Framework of Reference for Languages (CEFR) scale. The dataset consists of 7,322 nonfictional fragments (15–30 words) extracted from textbooks for learners of Russian as a second language and filtered according to linguistic feature distributions typical of each CEFR level, with additional validation conducted by 4 human experts. Each text fragment was annotated with 127 linguistic features, including lexical, morphological, syntactic, and length-based characteristics. We evaluate several approaches to text complexity assessment: traditional machine learning classifiers, fine-tuned transformer models, and instruction-based large language models (LLMs). Among all models, RuBERT achieved the best strict F1-score (47.8%) and the lowest mean absolute error (0.56), while instruction-based LLMs such as YandexGPT captured overall complexity trends but underperformed in exact classification. Feature ablation experiments demonstrated that lexical features are the most informative for CEFR prediction. Our findings confirm that fine-tuned language models currently offer the most reliable results for short-text CEFR assessment in Russian, whereas instruction-based LLMs show potential for qualitative analysis of text difficulty patterns.",{"paper_id":2752,"title":2753,"year":7,"month":188,"day":63,"doi":2754,"resource_url":2755,"first_page":2756,"last_page":2757,"pdf_url":2758,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2759,"paper_type":860,"authors":2760,"abstract":2778},"lrec2026-main-085","Evaluation of Document-Level Text Simplification in Japanese","10.63317\u002F23krjsuxizo3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-085","1092","1109","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.85.pdf","yamashita-etal-2026-evaluation",[2761,2764,2767,2770,2773,2775],{"paper_id":2752,"author_seq":247,"given_name":2762,"surname":2763,"affiliation":63,"orcid":63},"Iori","Yamashita",{"paper_id":2752,"author_seq":232,"given_name":2765,"surname":2766,"affiliation":63,"orcid":63},"Hikari","Tanaka",{"paper_id":2752,"author_seq":218,"given_name":2768,"surname":2769,"affiliation":63,"orcid":63},"Hajime","Kiyama",{"paper_id":2752,"author_seq":203,"given_name":2771,"surname":2772,"affiliation":63,"orcid":63},"Kexin","Bian",{"paper_id":2752,"author_seq":188,"given_name":2774,"surname":1840,"affiliation":63,"orcid":63},"Zhousi",{"paper_id":2752,"author_seq":172,"given_name":2776,"surname":2777,"affiliation":63,"orcid":63},"Mamoru","Komachi","This study establishes an evaluation framework for document-level text simplification in Japanese by constructing a human-annotated dataset and examining the reliability of LLM-based automatic evaluation. We first developed detailed annotation guidelines covering four criteria—necessity, sufficiency, sentence-level simplicity, and document-level simplicity—and collected human ratings for 1,128 source–target document pairs derived from the Wikipedia part of the Japanese simplification corpus JADOS. Using this dataset, we conducted extensive experiments comparing human judgments with evaluations from large language models, including GPT, Claude, and Gemini. The results show that GPT-4o and Gemini 2.5 Pro achieve high agreement with human annotators even in the 0-shot setting, demonstrating their potential as reliable automatic evaluators for Japanese simplification. However, LLMs exhibited a consistent tendency to underestimate document-level simplicity, particularly for kanji-dense texts or texts with relatively long sentences and a small number of sentences. This work provides the first benchmark for evaluating document-level text simplification in Japanese and offers practical evidence that LLM-based evaluation can support scalable assessment for Japanese document-level simplification.",{"paper_id":2780,"title":2781,"year":7,"month":188,"day":63,"doi":2782,"resource_url":2783,"first_page":2784,"last_page":2785,"pdf_url":2786,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2787,"paper_type":860,"authors":2788,"abstract":2798},"lrec2026-main-086","Parallel Corpus Filtering Based on Semantic Similarity and Surface Dissimilarity for Japanese Text Simplification with LLMs","10.63317\u002F2o26gctx8fej","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-086","1110","1116","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.86.pdf","maekawa-etal-2026-parallel",[2789,2792,2795],{"paper_id":2780,"author_seq":247,"given_name":2790,"surname":2791,"affiliation":63,"orcid":63},"Daisuke","Maekawa",{"paper_id":2780,"author_seq":232,"given_name":2793,"surname":2794,"affiliation":63,"orcid":63},"Tomoyuki","Kajiwara",{"paper_id":2780,"author_seq":218,"given_name":2796,"surname":2797,"affiliation":63,"orcid":63},"Takashi","Ninomiya","We are focusing on low-cost fine-tuning for large language models (LLMs) in Japanese text simplification. LLMs have achieved high performance even with fine-tuning on small parallel corpora in tasks such as machine translation and dialogue response generation. In this study, we propose a method of parallel corpus filtering for text simplification and investigate how much the number of sentence pairs for fine-tuning LLMs can be reduced. Experimental results on Japanese corpora in three domains revealed that the ability to perform text simplification tasks can be acquired even from a very small corpus of 16 to 64 sentence pairs. Although more parallel corpora are needed to acquire domain knowledge, our method outperformed full fine-tuning while reducing the training corpus by approximately 70%.",{"paper_id":2800,"title":2801,"year":7,"month":188,"day":63,"doi":2802,"resource_url":2803,"first_page":2804,"last_page":2805,"pdf_url":2806,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2807,"paper_type":860,"authors":2808,"abstract":2822},"lrec2026-main-087","A Multilingual Human Annotated Corpus of Original and Easy-to-Read Texts to Support Access to Democratic Participatory Processes","10.63317\u002F4b56cza6e7zk","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-087","1117","1128","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.87.pdf","riegler-etal-2026-multilingual",[2809,2812,2815,2818,2821],{"paper_id":2800,"author_seq":247,"given_name":2810,"surname":2811,"affiliation":63,"orcid":63},"Verena","Riegler",{"paper_id":2800,"author_seq":232,"given_name":2813,"surname":2814,"affiliation":63,"orcid":63},"Stefan","Bott",{"paper_id":2800,"author_seq":218,"given_name":2816,"surname":2817,"affiliation":63,"orcid":63},"Horacio","Saggion",{"paper_id":2800,"author_seq":203,"given_name":2819,"surname":2820,"affiliation":63,"orcid":63},"Almudena Rascón","Alcaina",{"paper_id":2800,"author_seq":188,"given_name":1078,"surname":1079,"affiliation":63,"orcid":63},"Being able to understand information is a key factor for a self-determined life and society. The study of automatic text simplification is often limited by the availability of high quality material for the training and evaluation on automatic simplifiers. This is true for English, but more so for less resourced languages like Spanish, Catalan and Italian. In order to fill this gap, we present a corpus of of original texts with high quality simplification produced by human experts in text simplification. It was developed within a project to assess the impact of Easy-to-Read (E2R) language for democratic participation. The original texts were compiled from domains related to this topic. The corpus includes different text types, selected based on relevance, copyright availability, and ethical standards. All texts were simplified to Easy-to-Read level. The corpora hold significant scientific value, particularly as it includes the first annotated corpora of its kind for the Catalan language. It also represents a noteworthy contribution for Spanish and Italian, offering high-quality, human-annotated language resources that are rarely available in these domains. The corpora will be made freely accessible to the public.",{"paper_id":2824,"title":2825,"year":7,"month":188,"day":63,"doi":2826,"resource_url":2827,"first_page":2828,"last_page":2829,"pdf_url":2830,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2831,"paper_type":860,"authors":2832,"abstract":2848},"lrec2026-main-088","Proffiliadur: Welsh Language Text Profiling Toolkit","10.63317\u002F5c6yawn79s5h","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-088","1129","1142","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.88.pdf","gutirrezroln-etal-2026-proffiliadur",[2833,2836,2839,2842,2845],{"paper_id":2824,"author_seq":247,"given_name":2834,"surname":2835,"affiliation":63,"orcid":63},"Nicolás","Gutiérrez-Rolón",{"paper_id":2824,"author_seq":232,"given_name":2837,"surname":2838,"affiliation":63,"orcid":63},"Jonathan","Davies",{"paper_id":2824,"author_seq":218,"given_name":2840,"surname":2841,"affiliation":63,"orcid":63},"Tomos","Williams",{"paper_id":2824,"author_seq":203,"given_name":2843,"surname":2844,"affiliation":63,"orcid":63},"Dawn","Knight",{"paper_id":2824,"author_seq":188,"given_name":2846,"surname":2847,"affiliation":63,"orcid":63},"Fernando","Alva-Manchego","We introduce Proffiliadur, a Python toolkit for text profiling and readability analysis in Welsh. The toolkit computes 141 surface, lexical, morphological, and syntactic indices, designed to capture linguistic variation while incorporating a Welsh-specific tokenisation process that enables accurate morphological analysis and handles phenomena such as initial consonant mutation. Proffiliadur enables systematic assessment of text accessibility and supports applications in education, healthcare, and public communication. We demonstrate the toolkit’s usefulness through two complementary analyses. First, we examine texts written in accordance with the Cymraeg Clîr (\"Clear Welsh\") principles and compare them with regular Welsh texts. Second, we analyse texts across CEFR proficiency levels to explore how linguistic complexity varies with learner ability. We also evaluate feature-based and neural classification models for automatic complexity detection, showing that interpretable linguistic indices alone achieve strong predictive performance (F1 = 0.94), comparable to a fine-tuned transformer (F1 = 0.97). Proffiliadur provides the first dedicated text profiling toolkit for Welsh, offering reproducible, linguistically grounded measures of readability for a low-resource language.",{"paper_id":2850,"title":2851,"year":7,"month":188,"day":63,"doi":2852,"resource_url":2853,"first_page":2854,"last_page":2855,"pdf_url":2856,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2857,"paper_type":860,"authors":2858,"abstract":2862},"lrec2026-main-089","Recovering Registers from Leveled Wordlists","10.63317\u002F54axogzishsk","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-089","1143","1154","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.89.pdf","ehara-2026-recovering",[2859],{"paper_id":2850,"author_seq":247,"given_name":2860,"surname":2861,"affiliation":63,"orcid":63},"Yo","Ehara","For vocabulary learning in language acquisition, it is desirable for learners to acquire words that they are likely to need in the language environments they will encounter. Such language environments are referred to as “registers” in general corpora, which are typically designed to include diverse registers. However, the proportion of registers included, that is, which registers are included and to what extent, is determined by the circumstances under which each general corpus was compiled and is not necessarily optimized for language learning. To bridge this gap, various leveled wordlists have been created in language education using linguistic resources other than word frequency, such as expert judgment and learner responses. However, it has not been quantitatively clear what gap in register proportions in general corpora these leveled wordlists were designed to fill. This study proposes a method that, given a leveled wordlist and a general corpus, estimates the register ratio that best aligns the frequency ordering of words across registers with the leveled wordlist. This makes it easier for learners and educators to interpret which wordlists are appropriate for particular learning goals. Our method is formulated as a linear programming problem and yields a globally optimal solution. Unlike neural networks, it is less susceptible to variation due to initial values or approximation and is therefore easier to interpret. We evaluated the proposed method on two languages, English and Japanese, through a range of experiments. We further show that it can also be used to evaluate vocabulary lists created for specific contexts, such as those generated by Large Language Models like ChatGPT.",{"paper_id":2864,"title":2865,"year":7,"month":188,"day":63,"doi":2866,"resource_url":2867,"first_page":2868,"last_page":2869,"pdf_url":2870,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2871,"paper_type":860,"authors":2872,"abstract":2879},"lrec2026-main-090","Fill-in-the-Blanks: Automatic Generation and Evaluation of Language Models' Pseudonyms for English and Swedish Texts","10.63317\u002F42gh3j2hfmca","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-090","1155","1169","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.90.pdf","szawerna-etal-2026-fill",[2873,2876],{"paper_id":2864,"author_seq":247,"given_name":2874,"surname":2875,"affiliation":63,"orcid":63},"Maria Irena","Szawerna",{"paper_id":2864,"author_seq":232,"given_name":2877,"surname":2878,"affiliation":63,"orcid":63},"Jacob Lee","Suchardt","While considerable effort has gone into developing solutions for detecting Personally Identifiable Information (PII) in linguistic data, less research has gone into automating the generation of appropriate pseudonyms and developing evaluation methods, both relevant for the creation of privacy-friendly language resources. We conduct pilot experiments using Masked and Generative Large Language Models to generate predictions for redacted PII-spans in a cloze-like fashion for English legal texts and parallel news articles in Swedish and English. Furthermore, we explore metrics for automatic evaluation of the generated pseudonyms in the legal data, and investigate the effect of part-of-speech constraints on performance. For the parallel, multilingual data, we contribute our manual PII-annotation and conduct a fine-grained error analysis across two of our pseudonym generation methods and a baseline. Our results illustrate the complexity of pseudonym evaluation and the particular challenge of automatic, at-scale evaluation as well as the models’ tendency to predict prototypical and even stereotypical answers.",{"paper_id":2881,"title":2882,"year":7,"month":188,"day":63,"doi":2883,"resource_url":2884,"first_page":2885,"last_page":2886,"pdf_url":2887,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2888,"paper_type":860,"authors":2889,"abstract":2915},"lrec2026-main-091","Integrating Services, Platforms and Resources into a National Infrastructure Cluster for FAIR Language and Cultural Data","10.63317\u002F5gyii2myd4yz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-091","1170","1178","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.91.pdf","pedonese-etal-2026-integrating",[2890,2893,2896,2899,2902,2904,2907,2910,2911,2912],{"paper_id":2881,"author_seq":247,"given_name":2891,"surname":2892,"affiliation":63,"orcid":63},"Giulia","Pedonese",{"paper_id":2881,"author_seq":232,"given_name":2894,"surname":2895,"affiliation":63,"orcid":63},"Daniele","Melaccio",{"paper_id":2881,"author_seq":218,"given_name":2897,"surname":2898,"affiliation":63,"orcid":63},"Michele","Mallia",{"paper_id":2881,"author_seq":203,"given_name":2900,"surname":2901,"affiliation":63,"orcid":63},"Monica","Monachini",{"paper_id":2881,"author_seq":188,"given_name":1110,"surname":2903,"affiliation":63,"orcid":63},"Frontini",{"paper_id":2881,"author_seq":172,"given_name":2905,"surname":2906,"affiliation":63,"orcid":63},"Valeria","Quochi",{"paper_id":2881,"author_seq":155,"given_name":2908,"surname":2909,"affiliation":63,"orcid":63},"Fahad","Khan",{"paper_id":2881,"author_seq":138,"given_name":1789,"surname":1790,"affiliation":63,"orcid":63},{"paper_id":2881,"author_seq":121,"given_name":1798,"surname":1799,"affiliation":63,"orcid":63},{"paper_id":2881,"author_seq":104,"given_name":2913,"surname":2914,"affiliation":63,"orcid":63},"Riccardo Del","Gratta","In the context of evolving European and national policies for research infrastructure governance, this paper presents the contribution of a national consortium for language resources and technology to the construction of a national infrastructure for FAIR and interoperable language and cultural data within a broader Humanities and Heritage Open Science initiative. As the national node of a European research infrastructure for language resources, the consortium contributes to translating FAIR and Open Science principles into practice by integrating technical, methodological, and training dimensions. Its activities combine several coordinated components: FAIRification workflows and ontology-based metadata mediation to enhance semantic interoperability across infrastructures; the refactoring and exposure of services through a federated API gateway; and the implementation of a Linguistic Linked Open Data (LLOD) pilot for the validation, transformation, and publication of interoperable RDF datasets. A national training ecosystem — comprising a training platform and a FAIR learning library — supports capacity building and the creation of FAIR-by-design learning materials. Finally, a permanent research observatory monitors community practices and needs, providing evidence-based insights for the continuous improvement of services and training provision. Together, these components demonstrate a coherent strategy for implementing FAIR and Open Science at the national level, while ensuring alignment with major European and national initiatives in the SSH data ecosystem.",{"paper_id":2917,"title":2918,"year":7,"month":188,"day":63,"doi":2919,"resource_url":2920,"first_page":2921,"last_page":2922,"pdf_url":2923,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2924,"paper_type":860,"authors":2925,"abstract":3011},"lrec2026-main-092","Common European Language Data Space: Development, Current Status, and Future Perspectives","10.63317\u002F4e5kpxxu2hvv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-092","1179","1188","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.92.pdf","piperidis-etal-2026-common",[2926,2929,2932,2935,2938,2941,2944,2947,2950,2953,2956,2959,2962,2964,2967,2970,2973,2977,2980,2983,2987,2991,2995,2999,3003,3007],{"paper_id":2917,"author_seq":247,"given_name":2927,"surname":2928,"affiliation":63,"orcid":63},"Stelios","Piperidis",{"paper_id":2917,"author_seq":232,"given_name":2930,"surname":2931,"affiliation":63,"orcid":63},"Penny","Labropoulou",{"paper_id":2917,"author_seq":218,"given_name":2933,"surname":2934,"affiliation":63,"orcid":63},"Dimitrios","Galanis",{"paper_id":2917,"author_seq":203,"given_name":2936,"surname":2937,"affiliation":63,"orcid":63},"Khalid","Choukri",{"paper_id":2917,"author_seq":188,"given_name":2939,"surname":2940,"affiliation":63,"orcid":63},"Andrejs","Vasiļjevs",{"paper_id":2917,"author_seq":172,"given_name":2942,"surname":2943,"affiliation":63,"orcid":63},"Mitos","Deligiannis",{"paper_id":2917,"author_seq":155,"given_name":2945,"surname":2946,"affiliation":63,"orcid":63},"Katerina","Gkirtzou",{"paper_id":2917,"author_seq":138,"given_name":2948,"surname":2949,"affiliation":63,"orcid":63},"Dimitris","Gkoumas",{"paper_id":2917,"author_seq":121,"given_name":2951,"surname":2952,"affiliation":63,"orcid":63},"Athanasia","Kolovou",{"paper_id":2917,"author_seq":104,"given_name":2954,"surname":2955,"affiliation":63,"orcid":63},"Leon","Voukoutis",{"paper_id":2917,"author_seq":87,"given_name":2957,"surname":2958,"affiliation":63,"orcid":63},"Kanella","Pouli",{"paper_id":2917,"author_seq":73,"given_name":2960,"surname":2961,"affiliation":63,"orcid":63},"Maria","Giagkou",{"paper_id":2917,"author_seq":55,"given_name":2960,"surname":2963,"affiliation":63,"orcid":63},"Gavriilidou",{"paper_id":2917,"author_seq":38,"given_name":2965,"surname":2966,"affiliation":63,"orcid":63},"Katrin","Marheinecke",{"paper_id":2917,"author_seq":17,"given_name":2968,"surname":2969,"affiliation":63,"orcid":63},"Elena","Leitner",{"paper_id":2917,"author_seq":2971,"given_name":2467,"surname":2972,"affiliation":63,"orcid":63},"16","Ostermann",{"paper_id":2917,"author_seq":2974,"given_name":2975,"surname":2976,"affiliation":63,"orcid":63},"17","Stefania","Raccioppa",{"paper_id":2917,"author_seq":857,"given_name":2978,"surname":2979,"affiliation":63,"orcid":63},"Kossay","Talmoudi",{"paper_id":2917,"author_seq":877,"given_name":2981,"surname":2982,"affiliation":63,"orcid":63},"Victoria","Arranz",{"paper_id":2917,"author_seq":2984,"given_name":2985,"surname":2986,"affiliation":63,"orcid":63},"20","Valérie","Mapelli",{"paper_id":2917,"author_seq":2988,"given_name":2989,"surname":2990,"affiliation":63,"orcid":63},"21","Helene","Mazo",{"paper_id":2917,"author_seq":2992,"given_name":2993,"surname":2994,"affiliation":63,"orcid":63},"22","Fernanda González","Campo",{"paper_id":2917,"author_seq":2996,"given_name":2997,"surname":2998,"affiliation":63,"orcid":63},"23","Shi","Yu",{"paper_id":2917,"author_seq":3000,"given_name":3001,"surname":3002,"affiliation":63,"orcid":63},"24","Aivars","Bērziņš",{"paper_id":2917,"author_seq":3004,"given_name":3005,"surname":3006,"affiliation":63,"orcid":63},"25","Andis","Lagzdiņš",{"paper_id":2917,"author_seq":3008,"given_name":3009,"surname":3010,"affiliation":63,"orcid":63},"26","Georg","Rehm","Common European Data Spaces (CEDS) are aimed at creating a single market for data across the EU that will power AI innovation. CEDS cover 14 sectors\u002Fdomains and will allow secure, trustworthy data\u002FAI models exchange between companies, public administrations etc. The Common European Language Data Space (LDS) is part of CEDS and is already made available in beta phase. The paper presents its technical design and implementation, its governance framework as well as use cases that demonstrate its value. LDS aspires to become part of the future European Language Technology ecosystem.",{"paper_id":3013,"title":3014,"year":7,"month":188,"day":63,"doi":3015,"resource_url":3016,"first_page":3017,"last_page":3018,"pdf_url":3019,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3020,"paper_type":860,"authors":3021,"abstract":3029},"lrec2026-main-093","Euskorpora: A Strategic Framework for Digital Sovereignty and Linguistic Inclusion of Basque in the Era of AI","10.63317\u002F4azrgurn3893","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-093","1189","1196","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.93.pdf","arranz-etal-2026-euskorpora",[3022,3023,3024,3027],{"paper_id":3013,"author_seq":247,"given_name":2981,"surname":2982,"affiliation":63,"orcid":63},{"paper_id":3013,"author_seq":232,"given_name":2548,"surname":2093,"affiliation":63,"orcid":63},{"paper_id":3013,"author_seq":218,"given_name":3025,"surname":3026,"affiliation":63,"orcid":63},"Leire","Barañano",{"paper_id":3013,"author_seq":203,"given_name":1276,"surname":3028,"affiliation":63,"orcid":63},"García-Pablos","Euskorpora is a pioneering initiative designed to establish a comprehensive digital infrastructure for the development of speech and language technologies in Basque. Built upon European, Spanish, and Basque strategies, it addresses the scarcity of linguistic data, foundational models, and technological resources for this non-Indo-European, low-resourced language. The project integrates large-scale data collection from public institutions and private organisations, creating extensive multimodal corpora that cover the linguistic, dialectal, and domain diversity of Basque. These resources support the training of open language models for speech, translation, and language understanding, as well as the establishment of an interoperable infrastructure aligned with European initiatives such as the European Language Data Space (LDS). By combining linguistic research, artificial intelligence, and data governance, Euskorpora ensures the digital sovereignty and inclusion of the Basque language within the global AI ecosystem. Beyond its regional focus, it stands as a transferable model for advancing linguistic diversity, technological innovation, and equitable digital transformation in multilingual Europe.",{"paper_id":3031,"title":3032,"year":7,"month":188,"day":63,"doi":3033,"resource_url":3034,"first_page":3035,"last_page":3036,"pdf_url":3037,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3038,"paper_type":860,"authors":3039,"abstract":3042},"lrec2026-main-094","Automating FAIRness: A FAIRification Tool within the Language Resources Infrastructure","10.63317\u002F2ryf7gasio58","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-094","1197","1204","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.94.pdf","melaccio-etal-2026-automating",[3040,3041],{"paper_id":3031,"author_seq":247,"given_name":2894,"surname":2895,"affiliation":63,"orcid":63},{"paper_id":3031,"author_seq":232,"given_name":2900,"surname":2901,"affiliation":63,"orcid":63},"In addition to technical interoperability, FAIRness encompasses governance, policy, and ethical aspects, reflecting how language data are produced, represented, and managed within research infrastructures. Ensuring FAIR compliance of language resources is essential for transparent and sustainable research in the social sciences and humanities, enabling data accessibility, quality, and long-term community reuse. The FAIRification Tool — created by CLARIN IT as part of the Humanities and Heritage Italian Open Science Cloud (H2IOSC) — is a modular system that automates and enhances FAIR compliance for language resources. The tool builds upon and extends existing FAIR data assessment frameworks by combining automatic and human validation, a feedback dashboard, certification thresholds, and domain-specific extensions aligned with linguistic metadata standards. It supports FAIR-by-design practices by operationalizing FAIR concepts and embedding them into repository workflows, thereby promoting interoperability across CLARIN, H2IOSC, and EOSC. The tool’s effectiveness has been demonstrated through an initial evaluation conducted on a representative set of linguistic datasets, which revealed notable improvements (30–40%) in FAIR scores, particularly in the Findable and Reusable dimensions, contributing to responsible, policy-aware, and transparent language data management within the European Open Science landscape. demonstrated through an initial evaluation conducted on a representative set of linguistic datasets, which revealed notable improvements (30–40%) in FAIR scores, particularly in the Findable and Reusable dimensions, contributing to responsible, policy-aware, and transparent language data management within the European Open Science landscape.",{"paper_id":3044,"title":3045,"year":7,"month":188,"day":63,"doi":3046,"resource_url":3047,"first_page":3048,"last_page":3049,"pdf_url":3050,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3051,"paper_type":860,"authors":3052,"abstract":3065},"lrec2026-main-095","FIBER: A Multilingual Evaluation Resource for Factual Inference Bias","10.63317\u002F32j8kpkoevin","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-095","1205","1215","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.95.pdf","munis-etal-2026-fiber",[3053,3056,3059,3062],{"paper_id":3044,"author_seq":247,"given_name":3054,"surname":3055,"affiliation":63,"orcid":63},"Evren Ayberk","Munis",{"paper_id":3044,"author_seq":232,"given_name":3057,"surname":3058,"affiliation":63,"orcid":63},"Deniz","Yilmaz",{"paper_id":3044,"author_seq":218,"given_name":3060,"surname":3061,"affiliation":63,"orcid":63},"Arianna","Muti",{"paper_id":3044,"author_seq":203,"given_name":3063,"surname":3064,"affiliation":63,"orcid":63},"Cagri","Toraman","Large language models are widely used across domains, yet there are concerns about their factual reliability and biases. Factual knowledge probing offers a systematic means to evaluate these aspects. Most existing benchmarks focus on single-entity facts and monolingual data. We therefore present FIBER, a multilingual benchmark for evaluating factual knowledge in single- and multi-entity settings. The dataset includes sentence completion, question-answering, and object-count prediction tasks in English, Italian, and Turkish. Using FIBER, we examine whether the prompt language induces inference bias in entity selection and how large language models perform on multi-entity versus single-entity questions. The results indicate that the language of the prompt can influence the model’s generated output, particularly for entities associated with the country corresponding to that language. However, this effect varies across different topics such that 31% of the topics exhibit factual inference bias score greater than 0.5. Moreover, the level of bias differs across languages such that Turkish prompts show higher bias compared to Italian in 83% of the topics, suggesting a language-dependent pattern. Our findings also show that models face greater difficulty when handling multi-entity questions than the single-entity questions. Model performance differs across both languages and model sizes. The highest mean average precision is achieved in English, while Turkish and Italian lead to noticeably lower scores. Larger models, including Llama-3.1-8B and Qwen-2.5-7B, show consistently better performance than smaller 3B-4B models.",{"paper_id":3067,"title":3068,"year":7,"month":188,"day":63,"doi":3069,"resource_url":3070,"first_page":3071,"last_page":3072,"pdf_url":3073,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3074,"paper_type":860,"authors":3075,"abstract":3085},"lrec2026-main-096","EthiQuest: LLM-Powered Ethical Questionnaire Generation for Research Review","10.63317\u002F3oju9vtxpezh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-096","1216","1225","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.96.pdf","kapania-etal-2026-ethiquest",[3076,3079,3082],{"paper_id":3067,"author_seq":247,"given_name":3077,"surname":3078,"affiliation":63,"orcid":63},"Ishank","Kapania",{"paper_id":3067,"author_seq":232,"given_name":3080,"surname":3081,"affiliation":63,"orcid":63},"Radhika","Mamidi",{"paper_id":3067,"author_seq":218,"given_name":3083,"surname":3084,"affiliation":63,"orcid":63},"Rahul","Mishra","Building upon the critical importance of ethical considerations in research, we introduce a novel task of Ethical Questionnaire Generation (EQG) for research papers. Ethical review has become an indispensable component of the research process, helping identify potential risks, biases, and societal impacts that may arise from scientific work. In this paper, we present EthiQuest, a comprehensive dataset comprising 3663 research papers paired with their corresponding ethical questionnaires extracted from major conference proceedings. We explore various approaches leveraging large language models (LLMs) to automatically generate context-aware ethical questionnaires, examining the unique challenges of capturing domain-specific ethical concerns, ensuring comprehensive coverage of potential issues, and maintaining question relevance and clarity. Our experiments demonstrate the effectiveness of fine-tuned LLMs in generating pertinent ethical questions across diverse research domains. We provide detailed analysis of question quality, coverage metrics, and practical insights for deploying such systems in real-world research review processes. The EQG dataset and code can be accessed at https:\u002F\u002Fanonymous.4open.science\u002Fr\u002Feqg-979C\u002F.",{"paper_id":3087,"title":3088,"year":7,"month":188,"day":63,"doi":3089,"resource_url":3090,"first_page":3091,"last_page":3092,"pdf_url":3093,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3094,"paper_type":860,"authors":3095,"abstract":3102},"lrec2026-main-097","NegNLI-BR: A Brazilian Portuguese Benchmark for Negation in Natural Language Inference","10.63317\u002F3mrey394c68a","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-097","1226","1235","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.97.pdf","westhelle-etal-2026-negnli",[3096,3099],{"paper_id":3087,"author_seq":247,"given_name":3097,"surname":3098,"affiliation":63,"orcid":63},"Matheus","Westhelle",{"paper_id":3087,"author_seq":232,"given_name":3100,"surname":3101,"affiliation":63,"orcid":63},"Viviane","Moreira","Recent studies have questioned the ability of Large Language Models (LLMs) to handle logical negation. We revisit this issue within the Natural Language Inference (NLI) task, specifically investigating whether modern LLMs can distinguish negations that alter logical entailment (“important”) from those that do not (“unimportant”). For this purpose, we introduce NegNLI-BR, a new benchmark dataset in Portuguese designed to exercise this distinction. We evaluate a range of recent open-source LLMs, comparing the performance of their base and post-trained versions. Furthermore, we employ a causal probe to measure the Average Treatment Effect of negation interventions on the internal representations of LLMs. Our findings show that many recent LLMs, including smaller variants, effectively handle negation. The causal analysis reveals that important negations induce a stable and significant effect on model representations, distinct from unimportant negations or neutral filler words. We also observe that post-training generally enhances this representational sensitivity, suggesting it refines the models’ ability to encode the logical impact of negation.",{"paper_id":3104,"title":3105,"year":7,"month":188,"day":63,"doi":3106,"resource_url":3107,"first_page":3108,"last_page":3109,"pdf_url":3110,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3111,"paper_type":860,"authors":3112,"abstract":3117},"lrec2026-main-098","SWE-QA: A Dataset and Benchmark for Complex Code Understanding","10.63317\u002F3af6kzbod876","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-098","1236","1251","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.98.pdf","elkoussy-etal-2026-swe",[3113,3116],{"paper_id":3104,"author_seq":247,"given_name":3114,"surname":3115,"affiliation":63,"orcid":63},"Laila","Elkoussy",{"paper_id":3104,"author_seq":232,"given_name":1179,"surname":1180,"affiliation":63,"orcid":63},"In this paper, we introduce SWE-QA, a text and code corpus aimed at benchmarking multi-hop code comprehension, addressing the gap between simplified evaluation tasks and the complex reasoning required in real-world software development. While existing code understanding benchmarks focus on isolated snippets, developers must routinely connect information across multiple dispersed code segments. The dataset comprises 9,072 multiple-choice questions systematically generated from 12 Python repositories of SWE-bench, evaluating several recurrent reasoning patterns like Declaration-and-Call questions that link entity definitions to their usage, and Interacting-Entity questions that examine the dynamic relationships among multiple collaborating components. Generated through parsing-based entity extraction and Large Language Model assisted question construction with carefully validated distractors, the benchmark distinguishes genuine comprehension from superficial pattern matching. Evaluation of 15 language models (360M to 671B parameters) reveals significant challenges in multi-hop reasoning, with best performance reaching 74.41% accuracy. Dense architectures consistently outperform mixture-of-experts models by 10-14 percentage points, while reasoning-enhanced variants show inconsistent benefits.",{"paper_id":3119,"title":3120,"year":7,"month":188,"day":63,"doi":3121,"resource_url":3122,"first_page":3123,"last_page":3124,"pdf_url":3125,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3126,"paper_type":860,"authors":3127,"abstract":3152},"lrec2026-main-099","Augmenting LLM Reasoning with Dynamic Notes Writing for Complex MultiHop QA","10.63317\u002F4uyke9dmgz56","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-099","1252","1279","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.99.pdf","maheshwary-etal-2026-augmenting",[3128,3131,3134,3137,3140,3143,3146,3149],{"paper_id":3119,"author_seq":247,"given_name":3129,"surname":3130,"affiliation":63,"orcid":63},"Rishabh","Maheshwary",{"paper_id":3119,"author_seq":232,"given_name":3132,"surname":3133,"affiliation":63,"orcid":63},"Masoud","Hashemi",{"paper_id":3119,"author_seq":218,"given_name":3135,"surname":3136,"affiliation":63,"orcid":63},"Khyati","Mahajan",{"paper_id":3119,"author_seq":203,"given_name":3138,"surname":3139,"affiliation":63,"orcid":63},"Shiva Krishna Reddy","Malay",{"paper_id":3119,"author_seq":188,"given_name":3141,"surname":3142,"affiliation":63,"orcid":63},"sai rajeswar","mudumba",{"paper_id":3119,"author_seq":172,"given_name":3144,"surname":3145,"affiliation":63,"orcid":63},"Sathwik Tejaswi","Madhusudhan",{"paper_id":3119,"author_seq":155,"given_name":3147,"surname":3148,"affiliation":63,"orcid":63},"Spandana","Gella",{"paper_id":3119,"author_seq":138,"given_name":3150,"surname":3151,"affiliation":63,"orcid":63},"Vikas","Yadav","Iterative RAG for multi-hop question answering faces challenges with lengthy contexts and the buildup of irrelevant information. This hinders a model’s capacity to process and reason over retrieved content and limits performance. While recent methods focus on compressing retrieved information, they are either restricted to single-round RAG, require finetuning or lack scalability in iterative RAG. To address these, we propose NotesWriting, a method that generates concise and relevant notes from retrieved documents at each step, thereby reducing noise and retaining only essential information. This increases the effective context length of Large Language Models (LLMs), allowing them to reason and plan more effectively while processing larger volumes of input text due to the compression in the form of notes. NotesWriting is framework agnostic and can be integrated with different iterative RAG methods. We demonstrate its effectiveness with three iterative RAG methods, across two models and four evaluation datasets. NotesWriting yields an average improvement of 15.6 percentage points overall, by scaling the amount of ingested information.",{"paper_id":3154,"title":3155,"year":7,"month":188,"day":63,"doi":3156,"resource_url":3157,"first_page":3158,"last_page":3159,"pdf_url":3160,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3161,"paper_type":860,"authors":3162,"abstract":3177},"lrec2026-main-100","Information Asymmetry across Language Varieties: A Case Study on Cantonese-Mandarin and Bavarian-German QA","10.63317\u002F3m35a6d3v5w7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-100","1280","1302","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.100.pdf","pei-etal-2026-information",[3163,3166,3169,3171,3174],{"paper_id":3154,"author_seq":247,"given_name":3164,"surname":3165,"affiliation":63,"orcid":63},"Renhao","Pei",{"paper_id":3154,"author_seq":232,"given_name":3167,"surname":3168,"affiliation":63,"orcid":63},"Siyao","Peng",{"paper_id":3154,"author_seq":218,"given_name":2810,"surname":3170,"affiliation":63,"orcid":63},"Blaschke",{"paper_id":3154,"author_seq":203,"given_name":3172,"surname":3173,"affiliation":63,"orcid":63},"Robert","Litschko",{"paper_id":3154,"author_seq":188,"given_name":3175,"surname":3176,"affiliation":63,"orcid":63},"Barbara","Plank","Large Language Models (LLMs) are becoming a common way for humans to seek knowledge, yet their coverage and reliability vary widely. Especially for local language varieties, there are large asymmetries, e.g., information in local Wikipedia that is absent from the standard variant. However, little is known about how well LLMs perform under such information asymmetry, especially on closely related languages. We manually construct a novel challenge question-answering (QA) dataset that captures knowledge conveyed on a local Wikipedia page, which is absent from their higher-resource counterparts — covering Mandarin Chinese vs. Cantonese and German vs. Bavarian. Our experiments show that LLMs fail to answer questions about information only in local editions of Wikipedia. Providing context from lead sections substantially improves performance, with further gains possible via translation. Our topical, geographic annotations, and stratified evaluations reveal the usefulness of local Wikipedia editions as sources of both regional and global information. These findings raise critical questions about inclusivity and cultural coverage of LLMs.",{"paper_id":3179,"title":3180,"year":7,"month":188,"day":63,"doi":3181,"resource_url":3182,"first_page":3183,"last_page":3184,"pdf_url":3185,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3186,"paper_type":860,"authors":3187,"abstract":3193},"lrec2026-main-101","FRASE: Frame-based Structured Representations for Generalizable SPARQL Query Generation","10.63317\u002F52g4z7jtim8o","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-101","1303","1319","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.101.pdf","diallo-etal-2026-frase",[3188,3191],{"paper_id":3179,"author_seq":247,"given_name":3189,"surname":3190,"affiliation":63,"orcid":63},"Papa Abdou Karim Karou","Diallo",{"paper_id":3179,"author_seq":232,"given_name":1691,"surname":3192,"affiliation":63,"orcid":63},"Zouaq","Translating natural language questions into SPARQL queries enables Knowledge Base querying for factual and up-to-date responses. However, existing datasets for this task are predominantly template-based, leading models to learn superficial mappings between question and query templates rather than developing true generalization capabilities. As a result, models struggle when encountering naturally phrased, template-free questions. This paper introduces FRASE (FRAme-based Semantic Enhancement), a novel approach that leverages Frame Semantic Role Labeling (FSRL) to overcome this limitation. In addition, we present LCQ1-Frame, LCQ2-Frame, and QALD-10-Frame—a suite of new datasets derived from LC-QuAD 1.0, LC-QuAD 2.0, and QALD-10 where each question is enriched using FRASE through frame detection and the mapping of frame-elements to their corresponding arguments. We evaluate our approach for the Question-2-SPARQL task through extensive experiments using recent large language models (LLMs) under different fine-tuning configurations. Our results demonstrate that integrating frame-based structured representations consistently improves SPARQL generation performance, particularly in challenging generalization scenarios when test questions feature unseen templates (unknown template splits) and when they are all naturally phrased (reformulated questions).",{"paper_id":3195,"title":3196,"year":7,"month":188,"day":63,"doi":3197,"resource_url":3198,"first_page":3199,"last_page":3200,"pdf_url":3201,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3202,"paper_type":860,"authors":3203,"abstract":3207},"lrec2026-main-102","Representing Multimodality in Terminology Resources","10.63317\u002F2n5q4sh59xp9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-102","1320","1330","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.102.pdf","vezzani-2026-representing",[3204],{"paper_id":3195,"author_seq":247,"given_name":3205,"surname":3206,"affiliation":63,"orcid":63},"Federica","Vezzani","This paper addresses the lack of a multimodal approach to specialized knowledge representation in terminology work. In particular, we introduce a new Multimodal Terminological Metamodel (MTM) for the design of terminology resources which introduces an explicit modality layer, enabling uniform modelling of different language modalities within domain-specific and concept-oriented resources. The metamodel is formalised via an entity-relationship schema and a systematic contrast with the baseline framework – the Terminological Markup Framework (TMF; ISO-16642 (2017)) – to specify revised entities, relations, and cardinalities. As case study, we instantiate the MTM for the signed modality by defining a minimal data-category module with level-placement constraints, and we provide a lightweight, TBX-inspired XML serialisation that packages modality-specific terminological data in a consistent structure. Together, these components deliver a reproducible specification for designing and exchanging multimodal terminology resources.",{"paper_id":3209,"title":3210,"year":7,"month":188,"day":63,"doi":3211,"resource_url":3212,"first_page":3213,"last_page":3214,"pdf_url":3215,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3216,"paper_type":860,"authors":3217,"abstract":3236},"lrec2026-main-103","EPOP: A Benchmark Corpus for Assessing NLP Models on Structured Information Extraction in Plant Health","10.63317\u002F4in2fpefq4pz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-103","1331","1340","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.103.pdf","nedellec-etal-2026-epop",[3218,3221,3223,3226,3228,3231,3234],{"paper_id":3209,"author_seq":247,"given_name":3219,"surname":3220,"affiliation":63,"orcid":63},"Claire","Nedellec",{"paper_id":3209,"author_seq":232,"given_name":2599,"surname":3222,"affiliation":63,"orcid":63},"Courtin",{"paper_id":3209,"author_seq":218,"given_name":3224,"surname":3225,"affiliation":63,"orcid":63},"Xinzhi","Yao",{"paper_id":3209,"author_seq":203,"given_name":1938,"surname":3227,"affiliation":63,"orcid":63},"Grosdidier",{"paper_id":3209,"author_seq":188,"given_name":3229,"surname":3230,"affiliation":63,"orcid":63},"Isabelle","Pieretti",{"paper_id":3209,"author_seq":172,"given_name":3232,"surname":3233,"affiliation":63,"orcid":63},"Sandy","Duperier",{"paper_id":3209,"author_seq":155,"given_name":3172,"surname":3235,"affiliation":63,"orcid":63},"Bossy","We introduce the EPOP (Epidemiomonitoring of Plants) corpus, a new annotated resource for structured information extraction in the domain of plant health epidemiology. The corpus consists of translated news reports that reflect real-world phytosanitary monitoring scenarios. It includes annotations for named entities (e.g. Plant, Pest, Vector, Disease, Dissemination Pathway), identity coreferences, and both binary and complex n-ary relations that represent key events such as Transmits or Causes, along with their modalities. A distinctive feature of EPOP is its normalization layer where mentions of species and geographical locations are linked to canonical identifiers in the NCBI Taxonomy and GeoNames, enabling semantic disambiguation and integration with external knowledge bases. As the first publicly available corpus of its kind, EPOP presents a realistic and challenging benchmark, with high linguistic variability, entity role ambiguity, and long-distance relations. We report baseline results on core tasks (named entity recognition, normalization (entity-linking), and relation extraction) using both fine-tuned BERT-based models and hard-prompted large language models. These experiments demonstrate the utility of EPOP while also identifying areas for improvement, particularly in the extraction of complex relations. The corpus is released under an open license, to support research in environmental NLP, crop protection, and knowledge graph enrichment.",{"paper_id":3238,"title":3239,"year":7,"month":188,"day":63,"doi":3240,"resource_url":3241,"first_page":3242,"last_page":3243,"pdf_url":3244,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3245,"paper_type":860,"authors":3246,"abstract":3271},"lrec2026-main-104","ReTaT: A Unified Benchmark for Relation Extraction across Text and Table","10.63317\u002F4k8jqsbkjuir","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-104","1341","1351","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.104.pdf","ettaleb-etal-2026-retat",[3247,3250,3253,3256,3259,3262,3265,3268],{"paper_id":3238,"author_seq":247,"given_name":3248,"surname":3249,"affiliation":63,"orcid":63},"Mohamed","Ettaleb",{"paper_id":3238,"author_seq":232,"given_name":3251,"surname":3252,"affiliation":63,"orcid":63},"Thibault","Ehrhart",{"paper_id":3238,"author_seq":218,"given_name":3254,"surname":3255,"affiliation":63,"orcid":63},"Nathalie","Aussenac-Gilles",{"paper_id":3238,"author_seq":203,"given_name":3257,"surname":3258,"affiliation":63,"orcid":63},"Yoan","Chabot",{"paper_id":3238,"author_seq":188,"given_name":3260,"surname":3261,"affiliation":63,"orcid":63},"Mouna","Kamel",{"paper_id":3238,"author_seq":172,"given_name":3263,"surname":3264,"affiliation":63,"orcid":63},"Véronique","Moriceau",{"paper_id":3238,"author_seq":155,"given_name":3266,"surname":3267,"affiliation":63,"orcid":63},"Raphael","Troncy",{"paper_id":3238,"author_seq":138,"given_name":3269,"surname":3270,"affiliation":63,"orcid":63},"Fanfu","Wei","While prior work in Information Extraction (IE) has focused on extracting information from either textual content or tables in isolation, they miss critical information that emerges only from their interplay. Indeed, tables may summarize facts sparse in the text, while text can disambiguate or elaborate on table entries. This complementarity may take the form of relations which are expressed across text and tables. In this context, we are interested in the task of extracting such relations whose expression spans the two modalities. This task is an original one, for which no reference evaluation corpora exists. Thus we created ReTaT, a corpus that can be used to train and evaluate systems for extracting such relations. This corpus is composed of (table, surrounding text) pairs extracted from Wikipedia pages and has been manually annotated with relation triples. ReTaT is organized in three datasets with distinct characteristics: domain (business, telecommunication and female celebrities), size (from 50 to 255 pairs), language (English vs French), type of relations (data vs object properties), close vs open list of relation, size of the surrounding text (paragraph vs full page). We then assessed its quality and suitability for the joint table-text relation extraction task using Large Language Models (LLMs), at a time when LLMs have demonstrated their ability to extract relations from either text or tables in isolation.",{"paper_id":3273,"title":3274,"year":7,"month":188,"day":63,"doi":3275,"resource_url":3276,"first_page":3277,"last_page":3278,"pdf_url":3279,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3280,"paper_type":860,"authors":3281,"abstract":3297},"lrec2026-main-105","LitTx: A New Treatment Relation Extraction Dataset","10.63317\u002F5kshomz64z55","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-105","1352","1360","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.105.pdf","jiang-etal-2026-littx",[3282,3285,3288,3291,3294],{"paper_id":3273,"author_seq":247,"given_name":3283,"surname":3284,"affiliation":63,"orcid":63},"Yuhang","Jiang",{"paper_id":3273,"author_seq":232,"given_name":3286,"surname":3287,"affiliation":63,"orcid":63},"Md Sultan Al","Nahian",{"paper_id":3273,"author_seq":218,"given_name":3289,"surname":3290,"affiliation":63,"orcid":63},"Li Hao Richie","Xu",{"paper_id":3273,"author_seq":203,"given_name":3292,"surname":3293,"affiliation":63,"orcid":63},"Rani","Chikkanna",{"paper_id":3273,"author_seq":188,"given_name":3295,"surname":3296,"affiliation":63,"orcid":63},"Ramakanth","Kavuluru","The interest in biomedical relation extraction (RE) continues to persist even in the LLM era owing to RE being a prominent way to build knowledge graphs, which further ground LLM applications, especially in preventing hallucinations. Therapy-disease treatment relations from scientific literature are an important type in RE as they indicate emerging therapeutic hypotheses and off-label usages being explored in the community. An automatically extracted evolving knowledge-base of such relations will be of great utility to researchers because doing it manually is not viable with the exponential growth of biomedical articles. In this paper, toward this end, we introduce a new expert-annotated dataset LitTx for identifying treatment relationships discussed in literature given the lack of such datasets in the recent past. Besides confirmed or implied positive relations, we also introduce a new \"conditional treatment\" relation type where hedging or a potential relationship is indicated. Our baseline RE models with this new dataset demonstrate promising results, while also revealing clear areas for improvement. To foster innovation and ensure replicability in the biomedical RE community, we release our dataset, code, and annotation guidelines publicly: https:\u002F\u002Fgithub.com\u002Fbionlproc\u002FLitTx_dataset.",{"paper_id":3299,"title":3300,"year":7,"month":188,"day":63,"doi":3301,"resource_url":3302,"first_page":3303,"last_page":3304,"pdf_url":3305,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3306,"paper_type":860,"authors":3307,"abstract":3325},"lrec2026-main-106","LegitimNarrate: A Dataset for Analyzing Legitimation Mechanisms in Crowdfunding Narratives","10.63317\u002F3adcqvpnznxt","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-106","1361","1371","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.106.pdf","lagrid-etal-2026-legitimnarrate",[3308,3311,3314,3317,3319,3322],{"paper_id":3299,"author_seq":247,"given_name":3309,"surname":3310,"affiliation":63,"orcid":63},"Asmaa","Lagrid",{"paper_id":3299,"author_seq":232,"given_name":3312,"surname":3313,"affiliation":63,"orcid":63},"Sebastien","Fournier",{"paper_id":3299,"author_seq":218,"given_name":3315,"surname":3316,"affiliation":63,"orcid":63},"Benedicte","Aldebert",{"paper_id":3299,"author_seq":203,"given_name":2207,"surname":3318,"affiliation":63,"orcid":63},"Ghods",{"paper_id":3299,"author_seq":188,"given_name":3320,"surname":3321,"affiliation":63,"orcid":63},"Daisy","Bertrand",{"paper_id":3299,"author_seq":172,"given_name":3323,"surname":3324,"affiliation":63,"orcid":63},"Gael","Leboeuf","New ventures face challenges due to their liability of newness and need to gain legitimacy within the context of crowdfunding to secure vital resources for growth and survival. Previous studies have primarily assessed crowdfunding success through structured metadata or social media analytics, often neglecting detailed examinations of campaign narratives. To fill this gap, we introduce LegitimNarrate, an expert-annotated dataset specifically designed to analyze legitimation mechanisms in crowdfunding narratives. This dataset comprises 97 Kickstarter campaign descriptions segmented into 4,954 sentences, each meticulously annotated by management experts according to theoretical legitimacy frameworks. We benchmark LegitimNarrate with various contextual sentence-classification methods. This resource facilitates comprehensive research on discursive legitimacy and the role of narrative in crowdfunding contexts.",{"paper_id":3327,"title":3328,"year":7,"month":188,"day":63,"doi":3329,"resource_url":3330,"first_page":3331,"last_page":3332,"pdf_url":3333,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3334,"paper_type":860,"authors":3335,"abstract":3339},"lrec2026-main-107","A Fine-tuned ASR Model for Historical American Dialect Recordings","10.63317\u002F5bjeqct6ozd3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-107","1372","1381","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.107.pdf","coats-2026-fine",[3336],{"paper_id":3327,"author_seq":247,"given_name":3337,"surname":3338,"affiliation":63,"orcid":63},"Steven","Coats","This paper introduces DASS2019_NLP, a newly cleaned and curated version of the Digital Archive of Southern Speech, a major historical resource for the study of Southern American English, together with six Whisper ASR models fine-tuned on the data. The 344 hours of conversational speech were recorded by fieldworkers between 1969 and 1983 across the Southern United States. Each Whisper model was fine-tuned on DASS2019_NLP, then evaluated on held-out DASS2019_NLP data, a subset of the Corpus of Regional African American Language (CORAAL), and a subset of Common Voice. The fine-tuned models show consistent learning trajectories and achieve an average 37% reduction in WER on in-domain data relative to baseline models. Notably, they also improve transcription accuracy on CORAAL, suggesting enhanced robustness to African American English. As expected under read vs. conversational style mismatch, accuracy on CV generally favors the OpenAI baselines. Both the DASS2019_NLP dataset and the best-performing fine-tuned model (whisper-large-v3-DASS-ct2) have been publicly released. These resources provide new tools for quantitative research in historical sociolinguistics, facilitating large-scale analyses of phonological, lexical, and grammatical change in Southern and African American English.",{"paper_id":3341,"title":3342,"year":7,"month":188,"day":63,"doi":3343,"resource_url":3344,"first_page":3345,"last_page":3346,"pdf_url":3347,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3348,"paper_type":860,"authors":3349,"abstract":3356},"lrec2026-main-108","A Comprehensive Full-Form Lexicon for Arabic NLP and Speech Technology","10.63317\u002F2gbvmmu4ix5e","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-108","1382","1393","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.108.pdf","haralambous-etal-2026-comprehensive",[3350,3353],{"paper_id":3341,"author_seq":247,"given_name":3351,"surname":3352,"affiliation":63,"orcid":63},"Yannis","Haralambous",{"paper_id":3341,"author_seq":232,"given_name":3354,"surname":3355,"affiliation":63,"orcid":63},"Jack","Halpern","Natural Language Processing (NLP) applications require morphological data with precise grammatical attributes, while speech technology requires abundant phonemic and phonetic data. This presents a challenge for Arabic due to its abundant morphological, orthographic, and phonemic ambiguity in both MSA and its various dialects. Existing systems struggle with incomplete and unstructured web data, leading to suboptimal performance in both morphological analysis and speech applications. This paper presents ArabLEX, a full-form lexicon (includes all wordforms, i.e., fully inflected\u002Fcliticized members of a lexeme class) that addresses these issues by providing a large-scale database designed to enhance NLP accuracy. It comprises approximately 570 million entries with fully inflected forms and detailed morphological, phonetic, and orthographic attributes. ArabLEX serves as a foundational framework for developing comprehensive Arabic lexical resources for NLP, particularly for speech technology, as well as dialect databases.",{"paper_id":3358,"title":3359,"year":7,"month":188,"day":63,"doi":3360,"resource_url":3361,"first_page":3362,"last_page":3363,"pdf_url":3364,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3365,"paper_type":860,"authors":3366,"abstract":3390},"lrec2026-main-109","MzansiText and MzansiLM: An Open Corpus and Decoder-Only Language Model for South African Languages","10.63317\u002F4g7i3kqszquh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-109","1394","1408","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.109.pdf","lombard-etal-2026-mzansitext",[3367,3370,3373,3376,3379,3382,3385,3388],{"paper_id":3358,"author_seq":247,"given_name":3368,"surname":3369,"affiliation":63,"orcid":63},"Anri M.","Lombard",{"paper_id":3358,"author_seq":232,"given_name":3371,"surname":3372,"affiliation":63,"orcid":63},"Temi","Aina",{"paper_id":3358,"author_seq":218,"given_name":3374,"surname":3375,"affiliation":63,"orcid":63},"Ethan","Wolff",{"paper_id":3358,"author_seq":203,"given_name":3377,"surname":3378,"affiliation":63,"orcid":63},"Elan","Norvick",{"paper_id":3358,"author_seq":188,"given_name":3380,"surname":3381,"affiliation":63,"orcid":63},"Sbonelo","Gumede",{"paper_id":3358,"author_seq":172,"given_name":3383,"surname":3384,"affiliation":63,"orcid":63},"Simbarashe","Mawere",{"paper_id":3358,"author_seq":155,"given_name":3386,"surname":3387,"affiliation":63,"orcid":63},"Francois","Meyer",{"paper_id":3358,"author_seq":138,"given_name":1380,"surname":3389,"affiliation":63,"orcid":63},"Buys","Decoder-only language models can be adapted to diverse tasks through instruction finetuning, but the extent to which this generalizes at small scale for low-resource languages remains unclear. We focus on the languages of South Africa, where we are not aware of a publicly available decoder-only model that explicitly targets all eleven official written languages, nine of which are low-resource. We introduce MzansiText, a curated multilingual pretraining corpus with a reproducible filtering pipeline, and MzansiLM, a 125M-parameter language model trained from scratch. We evaluate MzansiLM on natural language understanding and generation using three adaptation regimes: monolingual task-specific finetuning, multilingual task-specific finetuning, and general multi-task instruction finetuning. Monolingual task-specific finetuning achieves strong performance on data-to-text generation, reaching 20.65 BLEU on isiXhosa and competing with encoder-decoder baselines over ten times larger. Multilingual task-specific finetuning benefits closely related languages on topic classification, achieving 78.5% macro-F1 on isiXhosa news classification. While MzansiLM adapts effectively to supervised NLU and NLG tasks, few-shot reasoning remains challenging at this model size, with performance near chance even for much larger decoder-only models. We release MzansiText and MzansiLM to provide a reproducible decoder-only baseline and clear guidance on adaptation strategies for South African languages at small scale.",{"paper_id":3392,"title":3393,"year":7,"month":188,"day":63,"doi":3394,"resource_url":3395,"first_page":3396,"last_page":3397,"pdf_url":3398,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3399,"paper_type":860,"authors":3400,"abstract":3489},"lrec2026-main-110","Very Large-Scale Multilingual Resources for LLMs and MT. Mono- and Bi-lingual Data, Multilingual Evaluation, and Pre-Trained Models","10.63317\u002F25xbdofco9od","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-110","1409","1434","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.110.pdf","oepen-etal-2026-very",[3401,3404,3407,3410,3413,3416,3419,3422,3424,3427,3430,3433,3435,3438,3441,3444,3447,3450,3453,3456,3459,3462,3465,3468,3471,3474,3477,3481,3485],{"paper_id":3392,"author_seq":247,"given_name":3402,"surname":3403,"affiliation":63,"orcid":63},"Stephan","Oepen",{"paper_id":3392,"author_seq":232,"given_name":3405,"surname":3406,"affiliation":63,"orcid":63},"Nikolay","Arefyev",{"paper_id":3392,"author_seq":218,"given_name":3408,"surname":3409,"affiliation":63,"orcid":63},"Mikko","Aulamo",{"paper_id":3392,"author_seq":203,"given_name":3411,"surname":3412,"affiliation":63,"orcid":63},"Marta","Bañón",{"paper_id":3392,"author_seq":188,"given_name":3414,"surname":3415,"affiliation":63,"orcid":63},"Maja","Buljan",{"paper_id":3392,"author_seq":172,"given_name":3417,"surname":3418,"affiliation":63,"orcid":63},"Laurie V.","Burchell",{"paper_id":3392,"author_seq":155,"given_name":3420,"surname":3421,"affiliation":63,"orcid":63},"Lucas Georges Gabriel","Charpentier",{"paper_id":3392,"author_seq":138,"given_name":3423,"surname":1840,"affiliation":63,"orcid":63},"Pinzhen",{"paper_id":3392,"author_seq":121,"given_name":3425,"surname":3426,"affiliation":63,"orcid":63},"Mariia","Fedorova",{"paper_id":3392,"author_seq":104,"given_name":3428,"surname":3429,"affiliation":63,"orcid":63},"Ona de","Gibert",{"paper_id":3392,"author_seq":87,"given_name":3431,"surname":3432,"affiliation":63,"orcid":63},"Barry","Haddow",{"paper_id":3392,"author_seq":73,"given_name":1380,"surname":3434,"affiliation":63,"orcid":63},"Hajič",{"paper_id":3392,"author_seq":55,"given_name":3436,"surname":3437,"affiliation":63,"orcid":63},"Jindrich","Helcl",{"paper_id":3392,"author_seq":38,"given_name":3439,"surname":3440,"affiliation":63,"orcid":63},"Andrey","Kutuzov",{"paper_id":3392,"author_seq":17,"given_name":3442,"surname":3443,"affiliation":63,"orcid":63},"Veronika","Laippala",{"paper_id":3392,"author_seq":2971,"given_name":3445,"surname":3446,"affiliation":63,"orcid":63},"Zihao","Li",{"paper_id":3392,"author_seq":2974,"given_name":3448,"surname":3449,"affiliation":63,"orcid":63},"Bhavitvya","Malik",{"paper_id":3392,"author_seq":857,"given_name":3451,"surname":3452,"affiliation":63,"orcid":63},"Vladislav","Mikhailov",{"paper_id":3392,"author_seq":877,"given_name":3454,"surname":3455,"affiliation":63,"orcid":63},"Amanda","Myntti",{"paper_id":3392,"author_seq":2984,"given_name":3457,"surname":3458,"affiliation":63,"orcid":63},"Dayyán","O'Brien",{"paper_id":3392,"author_seq":2988,"given_name":3460,"surname":3461,"affiliation":63,"orcid":63},"Lucie","Polakova",{"paper_id":3392,"author_seq":2992,"given_name":3463,"surname":3464,"affiliation":63,"orcid":63},"Gema","Ramírez-Sánchez",{"paper_id":3392,"author_seq":2996,"given_name":3466,"surname":3467,"affiliation":63,"orcid":63},"Janine","Siewert",{"paper_id":3392,"author_seq":3000,"given_name":3469,"surname":3470,"affiliation":63,"orcid":63},"Pavel","Stepachev",{"paper_id":3392,"author_seq":3004,"given_name":3472,"surname":3473,"affiliation":63,"orcid":63},"Joerg","Tiedemann",{"paper_id":3392,"author_seq":3008,"given_name":3475,"surname":3476,"affiliation":63,"orcid":63},"Teemu","Vahtola",{"paper_id":3392,"author_seq":3478,"given_name":3479,"surname":3480,"affiliation":63,"orcid":63},"27","Dusan","Varis",{"paper_id":3392,"author_seq":3482,"given_name":3483,"surname":3484,"affiliation":63,"orcid":63},"28","Fedor","Vitiugin",{"paper_id":3392,"author_seq":3486,"given_name":3487,"surname":3488,"affiliation":63,"orcid":63},"29","Jaume","Zaragoza","We present an ongoing initiative to provide open, very large, high-quality, and richly annotated textual datasets for almost 200 languages. At 30 trillion tokens, this is likely the largest generally available multilingual collection of LLM pre-training data. These datasets are derived from web crawls from different sources and accompanied with a complete, open-source pipeline for document selection from web archives, text extraction from HTML, language identification for noisy texts, exact and near-deduplication, annotation with, among others, register labels, text quality estimates, and personally identifiable information; and final selection and filtering. We report on data quality probes through contrastive and analytical statistics, through manual inspection of samples for some 20 languages, and through end-to-end evaluation of various language model architectures trained on this data. For multilingual LLM evaluation, we provide a comprehensive collection of benchmarks for nine European languages, with special emphasis on natively created tasks, mechanisms to mitigate prompt sensitivity, and refined normalization and aggregation of scores. Additionally, we train and evaluate a family of 57 monolingual encoder–decoder models, as well as about 30 “smallish” monolingual GPT-like reference models. Besides the monolingual data and models, we also present a very large collection of parallel texts automatically mined from this data, together with a novel parallel corpus synthesized via machine translation.",{"paper_id":3491,"title":3492,"year":7,"month":188,"day":63,"doi":3493,"resource_url":3494,"first_page":3495,"last_page":3496,"pdf_url":3497,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3498,"paper_type":860,"authors":3499,"abstract":3508},"lrec2026-main-111","Generation of Instruction and Preference Dataset for Improving Japanese Instruction Following in LLMs","10.63317\u002F3w8ceszaj7m9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-111","1435","1454","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.111.pdf","moriyama-etal-2026-generation",[3500,3503,3505],{"paper_id":3491,"author_seq":247,"given_name":3501,"surname":3502,"affiliation":63,"orcid":63},"Kei","Moriyama",{"paper_id":3491,"author_seq":232,"given_name":2796,"surname":3504,"affiliation":63,"orcid":63},"Kodama",{"paper_id":3491,"author_seq":218,"given_name":3506,"surname":3507,"affiliation":63,"orcid":63},"Kouta","Nakayama","Instruction following, the ability to generate text that aligns with human intent, is a core capability of large language models (LLMs) for real-world applications. Instruction tuning is widely used to obtain this capability, but it requires large amounts of annotated data. To reduce the labor and cost of large-scale annotation, data augmentation using LLMs has been proposed as a promising approach. As this approach has primarily been applied to English datasets, its effectiveness in other languages, such as Japanese, remains unclear. In this paper, we propose an automatic pipeline for generating instruction and preference datasets in Japanese. The instruction dataset is created by expanding a manually annotated dataset using an LLM. The preference dataset is then constructed by adding LLM-generated negative examples to the instruction dataset. To ensure the quality of the datasets, instructions and responses are evaluated using LLM-as-a-Judge and ROUGE-L. Experimental results using supervised fine-tuning and direct preference optimization demonstrate that these synthetic datasets improve the instruction-following capability in Japanese.",{"paper_id":3510,"title":3511,"year":7,"month":188,"day":63,"doi":3512,"resource_url":3513,"first_page":3514,"last_page":3515,"pdf_url":3516,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3517,"paper_type":860,"authors":3518,"abstract":3528},"lrec2026-main-112","Adapting Pretrained Models to Endangered Languages in Japan: A Comparative Study on Ryukyuan and Ainu Speech Recognition","10.63317\u002F3iw5dymnwwbr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-112","1455","1463","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.112.pdf","matsuura-etal-2026-adapting",[3519,3522,3525],{"paper_id":3510,"author_seq":247,"given_name":3520,"surname":3521,"affiliation":63,"orcid":63},"Kohei","Matsuura",{"paper_id":3510,"author_seq":232,"given_name":3523,"surname":3524,"affiliation":63,"orcid":63},"Takanori","Ashihara",{"paper_id":3510,"author_seq":218,"given_name":3526,"surname":3527,"affiliation":63,"orcid":63},"Tatsuya","Kawahara","We investigate high-accuracy and speaker-robust automatic speech recognition (ASR) models by leveraging pretrained models for endangered languages in Japan — Ryukyuan (Shuri dialect) and Ainu (Saru dialect) — to support language and cultural preservation. In particular, this study presents the first experimental study on building and evaluating an ASR model for the Ryukyuan language. Specifically, we compare existing multilingual pretrained models, Whisper and XLS-R, with our in-house Japanese-focused model (JP-90k) pretrained solely on a large-scale weakly-supervised Japanese dataset. These models were fine-tuned on up to 10 and 32 hours of Ryukyuan and Ainu data, respectively. As a result, JP-90k consistently outperformed other models of the similar size in both languages. In addition, it demonstrated a remarkable advantage when training data was very limited, i.e., an hour or less. These findings suggest that large-scale pretraining on a language closely related to the target ones can yield robust low-resource ASR, including for unseen speakers and out-of-domain conditions. Furthermore, we found that all pretrained models achieved convergence in ASR accuracy with as little as 3-5 hours of fine-tuning data for both languages.",{"paper_id":3530,"title":3531,"year":7,"month":188,"day":63,"doi":3532,"resource_url":3533,"first_page":3534,"last_page":3535,"pdf_url":3536,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3537,"paper_type":860,"authors":3538,"abstract":3554},"lrec2026-main-113","Prerequisites for Advancing Automatic Speech Recognition in Breton","10.63317\u002F5eh2inevm6e5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-113","1464","1473","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.113.pdf","grobol-etal-2026-prerequisites",[3539,3542,3545,3548,3551],{"paper_id":3530,"author_seq":247,"given_name":3540,"surname":3541,"affiliation":63,"orcid":63},"Morgan","Grobol",{"paper_id":3530,"author_seq":232,"given_name":3543,"surname":3544,"affiliation":63,"orcid":63},"Alice","Millour",{"paper_id":3530,"author_seq":218,"given_name":3546,"surname":3547,"affiliation":63,"orcid":63},"Wassim","Zemouri",{"paper_id":3530,"author_seq":203,"given_name":3549,"surname":3550,"affiliation":63,"orcid":63},"Yuna","Drapier",{"paper_id":3530,"author_seq":188,"given_name":3552,"surname":3553,"affiliation":63,"orcid":63},"Mélanie","Jouitteau","We report on the extensive preliminary work of a collaborative science project aimed at developing Automatic Speech Recognition (ASR) for a minoritized European language: Breton. Hoping to help similar initiatives for other languages and communities, we present the methodology we developed for this specific ecosystem, with an estimate of the material and immaterial resources we used. Our approach is grounded in the needs and resources of the community formed by the end-users of digital development. Our multidisciplinary scientific collaboration involves linguists and speakers embedded in the academic and linguistic community, and computer scientists.",{"paper_id":3556,"title":3557,"year":7,"month":188,"day":63,"doi":3558,"resource_url":3559,"first_page":3560,"last_page":3561,"pdf_url":3562,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3563,"paper_type":860,"authors":3564,"abstract":3580},"lrec2026-main-114","Integrating TEI, NER\u002FNEL, Textometry, and Linked Data for a Semantically Enriched Interview Corpus","10.63317\u002F4zjef9ycsupk","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-114","1474","1484","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.114.pdf","stankovic-etal-2026-integrating",[3565,3568,3571,3574,3577],{"paper_id":3556,"author_seq":247,"given_name":3566,"surname":3567,"affiliation":63,"orcid":63},"Ranka","Stankovic",{"paper_id":3556,"author_seq":232,"given_name":3569,"surname":3570,"affiliation":63,"orcid":63},"Tamara","Vučenović",{"paper_id":3556,"author_seq":218,"given_name":3572,"surname":3573,"affiliation":63,"orcid":63},"Biljana","Rujević",{"paper_id":3556,"author_seq":203,"given_name":3575,"surname":3576,"affiliation":63,"orcid":63},"Milica Ikonić","Nešić",{"paper_id":3556,"author_seq":188,"given_name":3578,"surname":3579,"affiliation":63,"orcid":63},"Mihailo","Škorić","This paper presents a pipeline that converts unstructured interview transcripts into a semantically enriched, queryable knowledge resource. The texts from the Digitalne Ikone 20+ interview collection were first encoded in TEI XML (Text Encoding Initiative), marking interview boundaries, paragraph breaks, speaker turns with identifiers, dates, and topics. This structural encoding underpins downstream NLP and enables structured querying (e.g., by speaker). We then applied Named Entity Recognition to identify persons, places, organizations, and events, and embedded the results directly in TEI. In the third stage, Named Entity Linking mapped entity mentions to canonical Wikidata identifiers via context-aware disambiguation; missing entries were added to Wikidata when necessary. The resulting TEI+NER\u002FNEL corpus, serialized as linked data, follows the NIF (NLP Interchange Framework). The pipeline also supports retrieval-augmented summarization that retrieves evidence passages and prompts LLMs (implemented with DSPy) to produce faithful interview summaries. We discuss design choices (TXM for textometry with JeRTeh resources; TESLA models for NER\u002FNEL), report qualitative gains in interpretability through semantic links, and outline future work on domain-adapted NER\u002FNEL, graph-based completion, and more expressive RAG architectures. The approach is replicable for other oral-history or media corpora and advances practical, evidence-grounded access to cultural archives and beyond.",{"paper_id":3582,"title":3583,"year":7,"month":188,"day":63,"doi":3584,"resource_url":3585,"first_page":3586,"last_page":3587,"pdf_url":3588,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3589,"paper_type":860,"authors":3590,"abstract":3631},"lrec2026-main-115","Uhura: A Benchmark for Evaluating Scientific Question Answering and Truthfulness in Low-Resource African Languages","10.63317\u002F43x6rqwpycuo","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-115","1485","1504","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.115.pdf","bayes-etal-2026-uhura",[3591,3594,3597,3600,3603,3606,3609,3611,3614,3617,3619,3622,3625,3628],{"paper_id":3582,"author_seq":247,"given_name":3592,"surname":3593,"affiliation":63,"orcid":63},"Edward Thomas","Bayes",{"paper_id":3582,"author_seq":232,"given_name":3595,"surname":3596,"affiliation":63,"orcid":63},"Israel Abebe","Azime",{"paper_id":3582,"author_seq":218,"given_name":3598,"surname":3599,"affiliation":63,"orcid":63},"Jesujoba","Alabi",{"paper_id":3582,"author_seq":203,"given_name":3601,"surname":3602,"affiliation":63,"orcid":63},"Jonas","Kgomo",{"paper_id":3582,"author_seq":188,"given_name":3604,"surname":3605,"affiliation":63,"orcid":63},"Tyna","Eloundou",{"paper_id":3582,"author_seq":172,"given_name":3607,"surname":3608,"affiliation":63,"orcid":63},"Elizabeth","Proehl",{"paper_id":3582,"author_seq":155,"given_name":3610,"surname":1840,"affiliation":63,"orcid":63},"Kai",{"paper_id":3582,"author_seq":138,"given_name":3612,"surname":3613,"affiliation":63,"orcid":63},"Imaan","Khadir",{"paper_id":3582,"author_seq":121,"given_name":3615,"surname":3616,"affiliation":63,"orcid":63},"Naome A.","Etori",{"paper_id":3582,"author_seq":104,"given_name":3618,"surname":2728,"affiliation":63,"orcid":63},"Shamsuddeen Hassan",{"paper_id":3582,"author_seq":87,"given_name":3620,"surname":3621,"affiliation":63,"orcid":63},"Choice","Mpanza",{"paper_id":3582,"author_seq":73,"given_name":3623,"surname":3624,"affiliation":63,"orcid":63},"Igneciah Pocia IP","Thete",{"paper_id":3582,"author_seq":55,"given_name":3626,"surname":3627,"affiliation":63,"orcid":63},"Dietrich","Klakow",{"paper_id":3582,"author_seq":38,"given_name":3629,"surname":3630,"affiliation":63,"orcid":63},"David Ifeoluwa","Adelani","Evaluations of Large Language Models (LLMs) on knowledge-intensive tasks and factual accuracy often focus on high-resource languages primarily because datasets for low-resource languages (LRLs) are scarce. In this paper, we present Uhura—a new benchmark that focuses on two tasks in six typologically-diverse African languages, created via human translation of existing English benchmarks. The first dataset, Uhura-ARC-Easy, is composed of multiple-choice science questions. The second, Uhura-TruthfulQA, is a safety benchmark testing the truthfulness of models on topics including health, law, finance, and politics. We highlight the challenges creating benchmarks with highly technical content for LRLs and outline mitigation strategies. Our evaluation reveals a significant performance gap between proprietary models such as GPT-4o and o1-preview, and Claude models, and open-source models like LLaMA and Gemma. Additionally, all models perform better in English than in African languages. These results indicate that LLMs struggle with answering scientific questions and are more prone to generating false claims in low-resource African languages. Our findings underscore the necessity for continuous improvement of multilingual LLM capabilities in LRL settings to ensure safe and reliable use in real-world contexts. We open-source the Uhura Benchmark and Uhura Platform to foster further research and development in NLP for LRLs.",{"paper_id":3633,"title":3634,"year":7,"month":188,"day":63,"doi":3635,"resource_url":3636,"first_page":3637,"last_page":3638,"pdf_url":3639,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3640,"paper_type":860,"authors":3641,"abstract":3661},"lrec2026-main-116","Dialectal Filtering: Synthesizing Kurdish Corpora for Low-Resource Varieties by Utilizing \"Noise\" in Large Textual Data","10.63317\u002F249h3r9tiw52","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-116","1505","1519","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.116.pdf","schuler-etal-2026-dialectal",[3642,3645,3648,3651,3654,3657,3658],{"paper_id":3633,"author_seq":247,"given_name":3643,"surname":3644,"affiliation":63,"orcid":63},"Christian","Schuler",{"paper_id":3633,"author_seq":232,"given_name":3646,"surname":3647,"affiliation":63,"orcid":63},"Raman","Ahmad",{"paper_id":3633,"author_seq":218,"given_name":3649,"surname":3650,"affiliation":63,"orcid":63},"Ānrán","Wáng",{"paper_id":3633,"author_seq":203,"given_name":3652,"surname":3653,"affiliation":63,"orcid":63},"Daniil","Gurgurov",{"paper_id":3633,"author_seq":188,"given_name":3655,"surname":3656,"affiliation":63,"orcid":63},"Timo","Baumann",{"paper_id":3633,"author_seq":172,"given_name":2467,"surname":2972,"affiliation":63,"orcid":63},{"paper_id":3633,"author_seq":155,"given_name":3659,"surname":3660,"affiliation":63,"orcid":63},"Josef van","Genabith","This work introduces a dialect-aware text filtering framework to pre-process, clean, and enhance large text corpora, creating variety-specific sub-corpora for neglected language varieties. We apply our framework to Kurdish, a language with rich dialectal diversity, which presents significant challenges for Natural Language Processing due to its low-resource status and the noisy nature of available text corpora. Leveraging lexicographic features, we assign multi-language-labels to text instances and synthesize over 130 dialect specific corpora from large \"noisy\" data sets containing unlabeled mixtures of Kurdish varieties, representing to our knowledge the largest collection of dialect-specific Kurdish NLP resources to date. This work contributes to the creation of low-resource language technology foundations, especially dialect-specific NLP applications. Specifically, we advance research on Kurdish languages by providing insights into the linguistic relationships among Kurdish varieties.",{"paper_id":3663,"title":3664,"year":7,"month":188,"day":63,"doi":3665,"resource_url":3666,"first_page":3667,"last_page":3668,"pdf_url":3669,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3670,"paper_type":860,"authors":3671,"abstract":3679},"lrec2026-main-117","HybridCodeAuthorship: A Benchmark Dataset for Line-Level Code Authorship Detection","10.63317\u002F4edsbxrqe8na","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-117","1520","1532","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.117.pdf","patterson-etal-2026-hybridcodeauthorship",[3672,3675,3677],{"paper_id":3663,"author_seq":247,"given_name":3673,"surname":3674,"affiliation":63,"orcid":63},"Luke S.","Patterson",{"paper_id":3663,"author_seq":232,"given_name":3446,"surname":3676,"affiliation":63,"orcid":63},"Wang",{"paper_id":3663,"author_seq":218,"given_name":2397,"surname":3678,"affiliation":63,"orcid":63},"Faulkner","Thanks to the rapid adoption of AI code assistants powered by large language models (LLMs), industry codebases are, increasingly, a hybrid of AI- and human-authored code. For risk management and productivity analysis purposes, it is crucial to enable fine-grained location detection of AI-generated code. To develop algorithms for this task, quality benchmarks are needed to assess performance. However, existing benchmarks tend to comprise academic, LeetCode-style problems and presume a code snippet is either completely human-authored or completely AI-authored, which is not reflective of the diverse intents and styles of industry codebases utilizing AI code assistants. To fill these gaps, we introduce HybridCodeAuthorship, a novel benchmark of Python code files with interleaved human- and AI-authored lines of code to simulate authentic utilization of AI code assistants. In this paper, we first present our dataset construction pipeline, which leverages CodeSearchNet, a massive collection of links to open sourced repositories on GitHub. We then benchmark the performance of two state-of-the-art AI-generated code detection algorithms at both the line- and chunk-level. Experimental results demonstrate that HybridCodeAuthorship is a challenging benchmark with a top-scoring algorithm, AIGCode Detector, obtaining a highest F1 score of 0.48 and 0.56 on line-level and chunk-level code detection tasks, respectively.",{"paper_id":3681,"title":3682,"year":7,"month":188,"day":63,"doi":3683,"resource_url":3684,"first_page":3685,"last_page":3686,"pdf_url":3687,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3688,"paper_type":860,"authors":3689,"abstract":3715},"lrec2026-main-118","CorEGe-PT: Compiling a Large Corpus of Academic Texts in Portuguese","10.63317\u002F3wm6ywh8gzxm","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-118","1533","1543","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.118.pdf","kuhn-etal-2026-corege",[3690,3693,3695,3698,3700,3703,3706,3709,3712],{"paper_id":3681,"author_seq":247,"given_name":3691,"surname":3692,"affiliation":63,"orcid":63},"Tanara Zingano","Kuhn",{"paper_id":3681,"author_seq":232,"given_name":3694,"surname":2017,"affiliation":63,"orcid":63},"José",{"paper_id":3681,"author_seq":218,"given_name":3696,"surname":3697,"affiliation":63,"orcid":63},"Bruno","Neves",{"paper_id":3681,"author_seq":203,"given_name":3699,"surname":1581,"affiliation":63,"orcid":63},"Daniela",{"paper_id":3681,"author_seq":188,"given_name":3701,"surname":3702,"affiliation":63,"orcid":63},"Elisabete","Cação",{"paper_id":3681,"author_seq":172,"given_name":3704,"surname":3705,"affiliation":63,"orcid":63},"Ivo","Simões",{"paper_id":3681,"author_seq":155,"given_name":3707,"surname":3708,"affiliation":63,"orcid":63},"Jacinto","Estima",{"paper_id":3681,"author_seq":138,"given_name":3710,"surname":3711,"affiliation":63,"orcid":63},"Delfim","Leão",{"paper_id":3681,"author_seq":121,"given_name":3713,"surname":3714,"affiliation":63,"orcid":63},"Hugo Goncalo","Oliveira","This paper describes the creation of a large-scale corpus of academic texts in Portuguese, dubbed CorEGe-PT, extracted from the institutional repository of a Portuguese university. Its compilation methodology, which combined automatic and manual procedures, is detailed, together with challenges faced and proposed solutions. The process included a thorough analysis of the metadata, which will be publicly released together with the documents, extracted in a markdown format. CorEGe-PT covers five areas of knowledge and, with over 34,000 documents and 1B tokens, is the largest of corpus of its kind in Portuguese, which will enable in-depth linguistic studies while providing data for adapting Large Language Models to academic Portuguese and related tasks.",{"paper_id":3717,"title":3718,"year":7,"month":188,"day":63,"doi":3719,"resource_url":3720,"first_page":3721,"last_page":3722,"pdf_url":3723,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3724,"paper_type":860,"authors":3725,"abstract":3738},"lrec2026-main-119","SLURP-TN : Resource for Tunisian Dialect Spoken Language Understanding","10.63317\u002F4m2ac973aco4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-119","1544","1552","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.119.pdf","elleuch-etal-2026-slurp",[3726,3729,3732,3735],{"paper_id":3717,"author_seq":247,"given_name":3727,"surname":3728,"affiliation":63,"orcid":63},"Haroun","Elleuch",{"paper_id":3717,"author_seq":232,"given_name":3730,"surname":3731,"affiliation":63,"orcid":63},"Salima","Mdhaffar",{"paper_id":3717,"author_seq":218,"given_name":3733,"surname":3734,"affiliation":63,"orcid":63},"Yannick","Estève",{"paper_id":3717,"author_seq":203,"given_name":3736,"surname":3737,"affiliation":63,"orcid":63},"Fethi","Bougares","Spoken Language Understanding (SLU) aims to extract the semantic information from the speech utterance of user queries. It is a core component in a task-oriented dialog system. With the spectacular progress of deep neural network models and the evolution of pre-trained language models, SLU has obtained significant breakthroughs. However, only a few high-resource languages have taken advantage of this progress due to the absence of SLU resources. In this paper, we seek to mitigate this obstacle by introducing SLURP-TN. This dataset was created by recording 55 native speakers uttering sentences in Tunisian dialect, manually translated from six SLURP domains. The result is an SLU Tunisian dialect dataset that comprises 4165 sentences recorded into around 5 hours of acoustic material. We also develop a number of Automatic Speech Recognition and SLU models exploiting SLUTP-TN. The Dataset and baseline models are available at: https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FElyadata\u002FSLURP-TN.",{"paper_id":3740,"title":3741,"year":7,"month":188,"day":63,"doi":3742,"resource_url":3743,"first_page":3744,"last_page":3745,"pdf_url":3746,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3747,"paper_type":860,"authors":3748,"abstract":3757},"lrec2026-main-120","From Semi-Digital Edition to Historical NLP Resource:Constructing and Annotating Historical Multilingual Parallel Text Collections on the TEITOK Platform","10.63317\u002F2okpwwaemhsn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-120","1553","1561","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.120.pdf","janssen-etal-2026-semi",[3749,3752,3754],{"paper_id":3740,"author_seq":247,"given_name":3750,"surname":3751,"affiliation":63,"orcid":63},"Maarten","Janssen",{"paper_id":3740,"author_seq":232,"given_name":2742,"surname":3753,"affiliation":63,"orcid":63},"Jouravel",{"paper_id":3740,"author_seq":218,"given_name":3755,"surname":3756,"affiliation":63,"orcid":63},"Piroska","Lendvai","We construct a multilingual, parallelized digital collection comprising a reconstructed Old Greek text from the 4th century CE and its seven historical versions, modern editions, and translations. We describe the workflow and integrated tools on the TEITOK web-based platform for ingesting, aligning, parallelizing and morphosyntactically annotating these materials. Textual alignment is performed on both the sentence and word level, after which the data are annotated with dependency parses in the Universal Dependencies paradigm. The newly created and manually post-corrected collection can be explored via advanced parallel search functionalities and flexible visualization modes. This workflow is meant to provide support for digital humanities and historical NLP projects via transforming the input texts into parallel NLP resources, enabling cross-fertilization and new insights by multiple research communities.",{"paper_id":3759,"title":3760,"year":7,"month":188,"day":63,"doi":3761,"resource_url":3762,"first_page":3763,"last_page":3764,"pdf_url":3765,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3766,"paper_type":860,"authors":3767,"abstract":3784},"lrec2026-main-121","Toward Conversational Hungarian Speech Recognition: Introducing the BEA-Large and BEA-Dialogue Datasets","10.63317\u002F47jcmqefap7z","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-121","1562","1570","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.121.pdf","gedeon-etal-2026-conversational",[3768,3771,3774,3776,3779,3781],{"paper_id":3759,"author_seq":247,"given_name":3769,"surname":3770,"affiliation":63,"orcid":63},"Máté","Gedeon",{"paper_id":3759,"author_seq":232,"given_name":3772,"surname":3773,"affiliation":63,"orcid":63},"Piroska Zsófia","Barta",{"paper_id":3759,"author_seq":218,"given_name":1625,"surname":3775,"affiliation":63,"orcid":63},"Mihajlik",{"paper_id":3759,"author_seq":203,"given_name":3777,"surname":3778,"affiliation":63,"orcid":63},"Tekla Etelka","Graczi",{"paper_id":3759,"author_seq":188,"given_name":2742,"surname":3780,"affiliation":63,"orcid":63},"Kohári",{"paper_id":3759,"author_seq":172,"given_name":3782,"surname":3783,"affiliation":63,"orcid":63},"Katalin","Mády","The advancement of automatic speech recognition (ASR) has been largely enhanced by extensive datasets in high-resource languages, while languages such as Hungarian remain underrepresented due to limited spontaneous and conversational corpora. To address this gap, we introduce two new datasets – BEA-Large and BEA-Dialogue – constructed from the previously unprocessed portions of the Hungarian speech corpus named BEA. BEA-Large extends BEA-Base with 255 hours of spontaneous speech from 433 speakers, enriched with detailed segment-level metadata. BEA-Dialogue, comprising 85 hours of spontaneous conversations, is a Hungarian speech corpus featuring natural dialogues partitioned into speaker-independent subsets, supporting research in conversational ASR and speaker diarization. We establish reproducible baselines on these datasets using publicly available ASR models, with the fine-tuned Fast Conformer model achieving word error rates as low as 14.18% on spontaneous and 4.8% on repeated speech. Diarization experiments yield diarization error rates between 12.46% and 17.40%, providing reference points for future improvements. The results highlight the persistent difficulty of conversational ASR, particularly due to disfluencies, overlaps, and informal speech patterns. By releasing these datasets and baselines, we aim to advance Hungarian speech technology and offer a methodological framework for developing spontaneous and conversational benchmarks in other languages.",{"paper_id":3786,"title":3787,"year":7,"month":188,"day":63,"doi":3788,"resource_url":3789,"first_page":3790,"last_page":3791,"pdf_url":3792,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3793,"paper_type":860,"authors":3794,"abstract":3845},"lrec2026-main-122","Developing the German Medical Text Corpus (GeMTeX): Legal Compliance and Semantic Enrichment","10.63317\u002F4eqiegnqbu96","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-122","1571","1584","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.122.pdf","hofenbitzer-etal-2026-developing",[3795,3798,3801,3803,3806,3809,3812,3814,3817,3820,3823,3826,3828,3831,3834,3837,3840,3842],{"paper_id":3786,"author_seq":247,"given_name":3796,"surname":3797,"affiliation":63,"orcid":63},"Justin","Hofenbitzer",{"paper_id":3786,"author_seq":232,"given_name":3799,"surname":3800,"affiliation":63,"orcid":63},"Christina","Lohr",{"paper_id":3786,"author_seq":218,"given_name":1104,"surname":3802,"affiliation":63,"orcid":63},"Riedel",{"paper_id":3786,"author_seq":203,"given_name":3804,"surname":3805,"affiliation":63,"orcid":63},"Rebekka","Kiser",{"paper_id":3786,"author_seq":188,"given_name":3807,"surname":3808,"affiliation":63,"orcid":63},"Aliaksandra","Shutsko",{"paper_id":3786,"author_seq":172,"given_name":3810,"surname":3811,"affiliation":63,"orcid":63},"Abanoub","Abdelmalak",{"paper_id":3786,"author_seq":155,"given_name":1625,"surname":3813,"affiliation":63,"orcid":63},"Klügl",{"paper_id":3786,"author_seq":138,"given_name":3815,"surname":3816,"affiliation":63,"orcid":63},"Jutta","Romberg",{"paper_id":3786,"author_seq":121,"given_name":3818,"surname":3819,"affiliation":63,"orcid":63},"Sarah","Riepenhausen",{"paper_id":3786,"author_seq":104,"given_name":3821,"surname":3822,"affiliation":63,"orcid":63},"Miriam","Schechner",{"paper_id":3786,"author_seq":87,"given_name":3824,"surname":3825,"affiliation":63,"orcid":63},"Jakob","Faller",{"paper_id":3786,"author_seq":73,"given_name":2510,"surname":3827,"affiliation":63,"orcid":63},"Meineke",{"paper_id":3786,"author_seq":55,"given_name":3829,"surname":3830,"affiliation":63,"orcid":63},"Luise","Modersohn",{"paper_id":3786,"author_seq":38,"given_name":3832,"surname":3833,"affiliation":63,"orcid":63},"Markus","Löffler",{"paper_id":3786,"author_seq":17,"given_name":3835,"surname":3836,"affiliation":63,"orcid":63},"Juliane","Fluck",{"paper_id":3786,"author_seq":2971,"given_name":3838,"surname":3839,"affiliation":63,"orcid":63},"Udo","Hahn",{"paper_id":3786,"author_seq":2974,"given_name":2813,"surname":3841,"affiliation":63,"orcid":63},"Schulz",{"paper_id":3786,"author_seq":857,"given_name":3843,"surname":3844,"affiliation":63,"orcid":63},"Martin","Boeker","GeMTeX is a large-scale German Medical Text Corpus project with the goal to publish a clinical national reference corpus. The resource is currently under construction and comprises, as of February 2026, more than 15k clinical documents (20M tokens) from six German university hospitals. When building GeMTeX, attention was paid to comply with European regulatory requirements. In phase I, patients were asked to allow reuse of their clinical documents based on the legal foundation of an \"informed consent\". In phase II, consented documents from six major clinical sites in Germany underwent a thorough de-identification process. In phase III, we currently enrich this unlocked dataset with semantic information from the clinical domain. This annotation process is guided by Snomed CT, which supports to directly ground expressions within clinical documents in a worldwide shared medical documentation and ontology standard. The resource is currently under active development and is accessible upon request under controlled access conditions. We refer interested researchers to visit https:\u002F\u002Fkiinformatik.mri.tum.de\u002Fen\u002Fgemtex or reach out via gemtex.mi@mh.tum.de.",{"paper_id":3847,"title":3848,"year":7,"month":188,"day":63,"doi":3849,"resource_url":3850,"first_page":3851,"last_page":3852,"pdf_url":3853,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3854,"paper_type":860,"authors":3855,"abstract":3864},"lrec2026-main-123","MaiChat: A Text-based Dialogue Corpus Rich in Conversational Features","10.63317\u002F3kpp3zj47d6d","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-123","1585","1594","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.123.pdf","dao-etal-2026-maichat",[3856,3859,3862],{"paper_id":3847,"author_seq":247,"given_name":3857,"surname":3858,"affiliation":63,"orcid":63},"Mai Hoang","Dao",{"paper_id":3847,"author_seq":232,"given_name":3860,"surname":3861,"affiliation":63,"orcid":63},"Catherine","Lai",{"paper_id":3847,"author_seq":218,"given_name":1625,"surname":3863,"affiliation":63,"orcid":63},"Bell","We present a new English corpus of typed instant-messaging dialogues that includes detailed timing information. Messages are collected from interactions between pairs who know each other well; the corpus is rich in typed features that augment the purely lexical, including hesitations, self-corrections, expressive respellings, and other markers of spontaneous interaction. Messages are collected using a custom-built chat platform that logs not only message content but also keystroke dynamics, screen activity, and demographic metadata. Designed with a transparent and reproducible protocol, the corpus enables scalable data collection while ensuring privacy and consent. We intend that the rich collection of features collected will facilitate future research in areas such as cognitive modelling, human–computer interaction, and conversational AI.",{"paper_id":3866,"title":3867,"year":7,"month":188,"day":63,"doi":3868,"resource_url":3869,"first_page":3870,"last_page":3871,"pdf_url":3872,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3873,"paper_type":860,"authors":3874,"abstract":3903},"lrec2026-main-124","Saudi ASWAT: A Large-Scale Corpus of Spontaneous Saudi Arabic Speech","10.63317\u002F5dmcn5u57hg6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-124","1595","1602","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.124.pdf","alharbi-etal-2026-saudi",[3875,3878,3881,3884,3886,3888,3891,3894,3897,3900],{"paper_id":3866,"author_seq":247,"given_name":3876,"surname":3877,"affiliation":63,"orcid":63},"Abdullah I.","Alharbi",{"paper_id":3866,"author_seq":232,"given_name":3879,"surname":3880,"affiliation":63,"orcid":63},"Afrah A.","Altamimi",{"paper_id":3866,"author_seq":218,"given_name":3882,"surname":3883,"affiliation":63,"orcid":63},"Muneera","Alhoshan",{"paper_id":3866,"author_seq":203,"given_name":1691,"surname":3885,"affiliation":63,"orcid":63},"Almazrua",{"paper_id":3866,"author_seq":188,"given_name":3887,"surname":3877,"affiliation":63,"orcid":63},"Halah Munif",{"paper_id":3866,"author_seq":172,"given_name":3889,"surname":3890,"affiliation":63,"orcid":63},"Bayan M.","Almuqhim",{"paper_id":3866,"author_seq":155,"given_name":3892,"surname":3893,"affiliation":63,"orcid":63},"Hawra","Aljasim",{"paper_id":3866,"author_seq":138,"given_name":3895,"surname":3896,"affiliation":63,"orcid":63},"Abdulrahman","Alosaimy",{"paper_id":3866,"author_seq":121,"given_name":3898,"surname":3899,"affiliation":63,"orcid":63},"Yahya A.","Asiri",{"paper_id":3866,"author_seq":104,"given_name":3901,"surname":3902,"affiliation":63,"orcid":63},"Abdullah","Alfaifi","Spontaneous Arabic speech is scarce in current corpora, and it is not well represented. This poses a limitation invisibility of spontaneous Arabic to automatic speech recognition (ASR), speaker diarization, and sociolinguistic research. The Saudi ASWAT project fills a major gap by creating the first nationwide corpus of natural Saudi speech, where data has been recorded and transcribed under a systematic methodology and ecologically valid conditions. The corpus aims to collect 2,500 hours of natural conversations from a diverse range of participants. These has been selected from five major Saudi regional varieties, Najdi (Central), Eastern, Hijazi (Western), Northern, and Southern, covering more than fifty five local varieties. Speech has been recorded by trained fieldworkers using participants own devices to reflect real-life variation. The annotated data incorporate a variety of speaker demographics, regional vocabularies which differ from the standard lexicon, and structured metadata. TF–IDF profiling shows regional differences in a range of performing words. Data also represent balanced age and gender sampling to support studies of intergenerational and sociophonetic variation. Saudi ASWAT provides the most linguistically diverse resources of Saudi Arabia to date. Additionally, it establishes an ethical governed framework for Arabic speech data creation to enable advances in both computational modeling and linguistic research.",{"paper_id":3905,"title":3906,"year":7,"month":188,"day":63,"doi":3907,"resource_url":3908,"first_page":3909,"last_page":3910,"pdf_url":3911,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3912,"paper_type":860,"authors":3913,"abstract":3921},"lrec2026-main-125","SciCiteVal: A Multi-Domain Dataset for Scientific Citation Verification","10.63317\u002F4m84m2k77g97","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-125","1603","1611","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.125.pdf","liu-etal-2026-sciciteval",[3914,3917,3918],{"paper_id":3905,"author_seq":247,"given_name":3915,"surname":3916,"affiliation":63,"orcid":63},"Qinyue","Liu",{"paper_id":3905,"author_seq":232,"given_name":1038,"surname":1039,"affiliation":63,"orcid":63},{"paper_id":3905,"author_seq":218,"given_name":3919,"surname":3920,"affiliation":63,"orcid":63},"Cyril","Labbe","Citations are an integral and important part of scientific papers. However, there exist erroneous citations ranging from careless mistakes to deliberate misconduct, and there are currently few studies or benchmark datasets dedicated to automated citation verification. To bridge this gap, we introduce SciCiteVal, a novel, manually annotated dataset for citation verification. Each instance in SciCiteVal pairs a citation context from a citing paper with the corresponding evidence passage extracted from the full text of the cited source. The dataset features a comprehensive taxonomy, where each citation is annotated as \"Correct”, \"Incorrect”, or \"Unrelated”, with the \"Incorrect” category further divided into five fine-grained sub-categories. The completed dataset comprises over 1,000 annotated citations, distributed as 302 \"Correct”, 302 \"Incorrect”, and 430 \"Unrelated” instances. We establish a benchmark by evaluating different Large Language Models (LLMs), providing baseline performance and a detailed analysis. We release SciCiteVal as a resource to support the development of citation verification systems and to facilitate research on evidence-based tasks.",{"paper_id":3923,"title":3924,"year":7,"month":188,"day":63,"doi":3925,"resource_url":3926,"first_page":3927,"last_page":3928,"pdf_url":3929,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3930,"paper_type":860,"authors":3931,"abstract":3941},"lrec2026-main-126","RuznamceNER: A Named Entity Recognition Dataset for Ottoman Turkish","10.63317\u002F24exq9vqk5qx","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-126","1612","1621","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.126.pdf","tasdemir-etal-2026-ruznamcener",[3932,3935,3938],{"paper_id":3923,"author_seq":247,"given_name":3933,"surname":3934,"affiliation":63,"orcid":63},"Esma Fatıma Bilgin","Tasdemir",{"paper_id":3923,"author_seq":232,"given_name":3936,"surname":3937,"affiliation":63,"orcid":63},"Dilara Zeynep","Gürer",{"paper_id":3923,"author_seq":218,"given_name":3939,"surname":3940,"affiliation":63,"orcid":63},"Saziye Betul","Ozates","Named Entity Recognition (NER) in historical texts poses distinct challenges. Language change reflected in spelling variations, archaic vocabulary, and inconsistent orthography, diminish the efficacy of models trained on contemporary corpora. The limited availability of annotated historical datasets constrains the development and evaluation of accurate, domain-specific NER systems, underscoring the necessity for specialized approaches and domain adaptation. In this work, we introduce the ruznamçe registers as a valuable digital historical resource with broad potential for diverse NLP applications. Our primary contribution is RuznamceNER, a manually annotated NER dataset derived from ruznamçe documents spanning two centuries. The dataset contains 2,138 sentences and a total of 8,730 annotated entities of types PERSON, LOCATION and ORGANIZATION. We further report evaluation results using a BERT-CRF baseline model pre-trained with modern Turkish, highlighting the pivotal importance of in-domain training data for effective NER in historical contexts. Experimental results on the RuznamceNER test set under various training configurations show that even a small amount of supervised in-domain data can yield robust performance for well-structured texts, despite significant lexical and orthographic differences between historical and modern language forms",{"paper_id":3943,"title":3944,"year":7,"month":188,"day":63,"doi":3945,"resource_url":3946,"first_page":3947,"last_page":3948,"pdf_url":3949,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3950,"paper_type":860,"authors":3951,"abstract":3972},"lrec2026-main-127","Scripting History: A Diachronic Urdu Text and Image Corpus from the 18Th to 19Th Centuries","10.63317\u002F48tdw3hsxp47","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-127","1622","1632","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.127.pdf","shams-etal-2026-scripting",[3952,3955,3958,3961,3964,3967,3970],{"paper_id":3943,"author_seq":247,"given_name":3953,"surname":3954,"affiliation":63,"orcid":63},"Sana","Shams",{"paper_id":3943,"author_seq":232,"given_name":3956,"surname":3957,"affiliation":63,"orcid":63},"Sahar","Rauf",{"paper_id":3943,"author_seq":218,"given_name":3959,"surname":3960,"affiliation":63,"orcid":63},"Asad","Mustafa",{"paper_id":3943,"author_seq":203,"given_name":3962,"surname":3963,"affiliation":63,"orcid":63},"Muhammad Zeeshan","Javed",{"paper_id":3943,"author_seq":188,"given_name":3965,"surname":3966,"affiliation":63,"orcid":63},"Qurat-ul-Ain","Akram",{"paper_id":3943,"author_seq":172,"given_name":3968,"surname":3969,"affiliation":63,"orcid":63},"Sarmad","Hussain",{"paper_id":3943,"author_seq":155,"given_name":3821,"surname":3971,"affiliation":63,"orcid":63},"Butt","This paper presents the Diachronic Urdu Text and Image Corpus, a one-million-word resource covering Urdu’s development across the 18th and 19th centuries. The corpus is compiled from 328 printed books published between 1800 and 1950, representing a diverse range of genres, authors, and publishers. A 140,000-word sub-corpus has been manually annotated with Urdu part-of-speech tags to facilitate linguistic and computational analysis. The dataset enables systematic investigation of historical changes in Urdu orthography, morphology, and syntax, providing new insights into the language’s history and standardization. To preserve the original printed form, each text is paired with its corresponding page image, creating the first multimodal diachronic corpus for Urdu. The paper outlines the corpus compilation pipeline, digitization workflow, text-image alignment, and annotation strategy designed to ensure accuracy, consistency, and authenticity. This multimodal Urdu diachronic corpus establishes a benchmark for research in computational linguistics, digital humanities, and South Asian language technology, supporting corpus-based exploration of Urdu’s linguistic history and cultural heritage.",{"paper_id":3974,"title":3975,"year":7,"month":188,"day":63,"doi":3976,"resource_url":3977,"first_page":3978,"last_page":3979,"pdf_url":3980,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3981,"paper_type":860,"authors":3982,"abstract":3987},"lrec2026-main-128","IREKIER: An Easy Read Corpus for Basque and Spanish","10.63317\u002F2e96m595cmfg","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-128","1633","1649","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.128.pdf","calleja-etal-2026-irekier",[3983,3986],{"paper_id":3974,"author_seq":247,"given_name":3984,"surname":3985,"affiliation":63,"orcid":63},"Jesús","Calleja",{"paper_id":3974,"author_seq":232,"given_name":1270,"surname":1271,"affiliation":63,"orcid":63},"Easy Read (ER) text adaptation is one of the main means to provide accessible content for people with reading difficulties. ER text features aspects of text simplification, along with specific characteristics such as the need for short sentences, clearly structured content, and explanations for complex concepts. Support for ER text generation is still lacking overall, with few available resources to build automated systems upon. In this work, we describe the IREKIER corpus, based on ER news in Basque and Spanish from the Irekia transparency portal of the Basque Government. This corpus is currently one of the largest publicly shared resource to support training and evaluation of ER text adaptation models in these two languages, and the first of its kind for Basque. We describe our methodology to create the resource, along with the specific challenges raised by ER text. We also provide both intrinsic and extrinsic evaluations of the corpus, which is shared with the scientific community under a CC-BY-NC-ND 4.0 license.",{"paper_id":3989,"title":3990,"year":7,"month":188,"day":63,"doi":3991,"resource_url":3992,"first_page":3993,"last_page":3994,"pdf_url":3995,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3996,"paper_type":860,"authors":3997,"abstract":4005},"lrec2026-main-129","MekongPhon: A Large-Scale Parallel IPA Corpus for Lao and Khmer","10.63317\u002F4bb9rvdshu4z","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-129","1650","1658","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.129.pdf","shurtz-etal-2026-mekongphon",[3998,4001,4003],{"paper_id":3989,"author_seq":247,"given_name":3999,"surname":4000,"affiliation":63,"orcid":63},"Ammon","Shurtz",{"paper_id":3989,"author_seq":232,"given_name":3643,"surname":4002,"affiliation":63,"orcid":63},"Richardson",{"paper_id":3989,"author_seq":218,"given_name":4004,"surname":4002,"affiliation":63,"orcid":63},"Stephen D.","High-quality International Phonetic Alphabet (IPA) transcriptions are a foundational resource for speech and language technologies, yet existing tools for many low-resource languages remain limited in accuracy and scope. In this work, we present MekongPhon, a large-scale, high-quality parallel IPA corpus for Lao and Khmer. The corpus contains 1.3 million Khmer and 367 thousand Lao orthographic–IPA pairs, meticulously aligned and verified. When used to train Transformer-based sequence-to-sequence models, MekongPhon enables exceptionally accurate IPA generation, achieving under 2% Character Error Rate (CER) on held-out test sets. We further introduce linguistically informed Lao and Khmer transliteration tools that offer high-speed IPA conversion, outperforming Epitran by 6-71 CER points despite trading some accuracy for efficiency. All data, code, and pretrained models are publicly released to support future research and development in low-resource language technologies.",{"paper_id":4007,"title":4008,"year":7,"month":188,"day":63,"doi":4009,"resource_url":4010,"first_page":4011,"last_page":4012,"pdf_url":4013,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4014,"paper_type":860,"authors":4015,"abstract":4046},"lrec2026-main-130","CorSpell: Introducing a Semiautomatic Tool for Spelling Normalization in Brazilian Portuguese","10.63317\u002F22uey2oj2w49","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-130","1659","1667","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.130.pdf","schoffen-etal-2026-corspell",[4016,4019,4022,4025,4028,4029,4032,4035,4038,4041,4044],{"paper_id":4007,"author_seq":247,"given_name":4017,"surname":4018,"affiliation":63,"orcid":63},"Juliana","Schoffen",{"paper_id":4007,"author_seq":232,"given_name":4020,"surname":4021,"affiliation":63,"orcid":63},"Dennis Giovani","Balreira",{"paper_id":4007,"author_seq":218,"given_name":4023,"surname":4024,"affiliation":63,"orcid":63},"Elisa Marchioro","Stumpf",{"paper_id":4007,"author_seq":203,"given_name":4026,"surname":4027,"affiliation":63,"orcid":63},"Larissa","Goulart",{"paper_id":4007,"author_seq":188,"given_name":3691,"surname":3692,"affiliation":63,"orcid":63},{"paper_id":4007,"author_seq":172,"given_name":4030,"surname":4031,"affiliation":63,"orcid":63},"Rafael Oleques","Nunes",{"paper_id":4007,"author_seq":155,"given_name":4033,"surname":4034,"affiliation":63,"orcid":63},"Gabriel Ricci","Pazzinato",{"paper_id":4007,"author_seq":138,"given_name":4036,"surname":4037,"affiliation":63,"orcid":63},"Isadora Dahmer","Hanauer",{"paper_id":4007,"author_seq":121,"given_name":4039,"surname":4040,"affiliation":63,"orcid":63},"José Henrique de Souza","Silva",{"paper_id":4007,"author_seq":104,"given_name":4042,"surname":4043,"affiliation":63,"orcid":63},"Luiza Sarmento","Divino",{"paper_id":4007,"author_seq":87,"given_name":2599,"surname":4045,"affiliation":63,"orcid":63},"Matte","With the growing availability of large text collections, efficient tools for corpus annotation and normalization have become increasingly important in linguistic and computational research. This paper presents CorSpell, a semiautomatic tool developed to support the spelling normalization of Brazilian Portuguese texts within the CorCel project—a corpus comprising over 15,000 handwritten exam responses from the Celpe-Bras proficiency test. Given the corpus scale, manual normalization is impractical; CorSpell streamlines this process by enabling users to visualize, select, and replace tokens directly through an intuitive web interface. The tool integrates automatic suggestions from PT-BR dictionaries with human validation, providing an interface for users to access and manipulate the texts. CorSpell significantly reduces annotation time, minimizes errors, and facilitates collaborative work, providing a practical and scalable solution for corpus normalization and a foundation for LLM-based modeling of Portuguese proficiency.",{"paper_id":4048,"title":4049,"year":7,"month":188,"day":63,"doi":4050,"resource_url":4051,"first_page":4052,"last_page":4053,"pdf_url":4054,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4055,"paper_type":860,"authors":4056,"abstract":4076},"lrec2026-main-131","Meta4XNLI-ptBR: Brazilian Portuguese Extension of Meta4XNLI Corpus","10.63317\u002F45566xcgz65x","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-131","1668","1676","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.131.pdf","johansson-etal-2026-meta4xnli",[4057,4060,4063,4065,4067,4070,4073],{"paper_id":4048,"author_seq":247,"given_name":4058,"surname":4059,"affiliation":63,"orcid":63},"Karina","Johansson",{"paper_id":4048,"author_seq":232,"given_name":4061,"surname":4062,"affiliation":63,"orcid":63},"Fernanda","Assi",{"paper_id":4048,"author_seq":218,"given_name":4064,"surname":4040,"affiliation":63,"orcid":63},"Isabella da",{"paper_id":4048,"author_seq":203,"given_name":1586,"surname":4066,"affiliation":63,"orcid":63},"Passador",{"paper_id":4048,"author_seq":188,"given_name":4068,"surname":4069,"affiliation":63,"orcid":63},"Isabela","Rodrigues",{"paper_id":4048,"author_seq":172,"given_name":4071,"surname":4072,"affiliation":63,"orcid":63},"Aline","Paes",{"paper_id":4048,"author_seq":155,"given_name":4074,"surname":4075,"affiliation":63,"orcid":63},"Helena","Caseli","Metaphor is a pervasive phenomenon in language that shapes how people conceptualize and communicate complex ideas. Detecting and interpreting metaphor is not only relevant for linguistic theory but also for many Natural Language Processing (NLP) applications, from machine translation to sentiment analysis, to mention a few. Despite its relevance, no open-source annotated corpus of metaphors exists for one of the world’s most widely spoken languages: Brazilian Portuguese. This paper addresses this gap by presenting an extension of Meta4XNLI, Meta4XNLI-ptBR, with token-level metaphor annotation in Brazilian Portuguese. To achieve this, we propose a pipeline that combines automatic translation via language models with human annotation, following guidelines adapted from MIPVU and Meta4XNLI. The final corpus contains 1,784 human-annotated sentences, of which 42.26% contain at least one metaphorical token. To our knowledge, this is the first open corpus of its kind for Brazilian Portuguese, and it is already freely available.",{"paper_id":4078,"title":4079,"year":7,"month":188,"day":63,"doi":4080,"resource_url":4081,"first_page":4082,"last_page":4083,"pdf_url":4084,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4085,"paper_type":860,"authors":4086,"abstract":4093},"lrec2026-main-132","More than \"Oh\": Grounding Observable Events with Grunts in Multimodal Dialogue","10.63317\u002F43opy9tsucf3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-132","1677","1687","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.132.pdf","brutti-etal-2026-more",[4087,4090],{"paper_id":4078,"author_seq":247,"given_name":4088,"surname":4089,"affiliation":63,"orcid":63},"Richard A.","Brutti",{"paper_id":4078,"author_seq":232,"given_name":4091,"surname":4092,"affiliation":63,"orcid":63},"James","Pustejovsky","Conversational grunts (minimal vocalizations like oh, mm-hm, and uh-huh) ground information and coordinate understanding in human dialogue, yet computational systems typically treat them as noise rather than meaningful communicative acts. We present a systematic annotation and analysis of 497 grunts across 3 hours of multimodal collaborative tasks, introducing an annotation scheme that captures grunts, their antecedents, and dialogue act functions. Our analysis reveals that grunts respond to speech and observable events at nearly equal rates, demonstrating that non-verbal events function as conversational contributions requiring acknowledgment. Tokens exhibit functional specialization: mm-hm predominantly acknowledges speech, while oh preferentially acknowledges events. Prosodic analysis shows speakers systematically modulate duration and pitch based on antecedent type, with event responses typically longer and having greater range. These findings have implications for dialogue state tracking, multimodal grounding, and turn-taking in conversational AI systems.",{"paper_id":4095,"title":4096,"year":7,"month":188,"day":63,"doi":4097,"resource_url":4098,"first_page":4099,"last_page":4100,"pdf_url":4101,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4102,"paper_type":860,"authors":4103,"abstract":4118},"lrec2026-main-133","COME-ALPs: Coreference Annotation with MErging Heuristics Using ALignment-based Projection in Parallel Corpora","10.63317\u002F2ohkaq9ps5hd","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-133","1688","1695","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.133.pdf","saez-etal-2026-come",[4104,4107,4110,4113,4116],{"paper_id":4095,"author_seq":247,"given_name":4105,"surname":4106,"affiliation":63,"orcid":63},"Gabriela Nicole Gonzalez","Saez",{"paper_id":4095,"author_seq":232,"given_name":4108,"surname":4109,"affiliation":63,"orcid":63},"Mariam","Nakhle",{"paper_id":4095,"author_seq":218,"given_name":4111,"surname":4112,"affiliation":63,"orcid":63},"Illia","Kholosha",{"paper_id":4095,"author_seq":203,"given_name":4114,"surname":4115,"affiliation":63,"orcid":63},"Rachel","Atherly",{"paper_id":4095,"author_seq":188,"given_name":2146,"surname":4117,"affiliation":63,"orcid":63},"Dinarelli","Multi-lingual, parallel datasets annotated with discourse phenomena like coreferences are a rare resource. These datasets are useful and informative to evaluate models for NLP tasks taking long contextual information into account, as proved by the large literature published in the last couple of years on e.g. Context-Aware Neural Machine Translation (CA-NMT). Inspired by resources published in previous work, in this paper we propose an automated procedure to annotate multi-lingual, parallel data with coreferences. Through the use of accurate alignment and coreference annotation tools, we project the annotation from English data, where tools are most often more accurate, to one or more target languages. We apply some consistency constraints to obtain more accurate annotations on both source and target side. Using our procedure we generated two new resources that can be used for evaluating CA-NMT models. One starting from the well-known TED Talk’s data released for the IWSLT17 shared task, where we project the annotation from English to target languages as diverse as French, German and Chinese. The second resource is derived from the WMT24 shared task, consisting of news domain data in the same set of target languages. We release these resources, as well as the code framework for applying our annotation procedure, to the community.",{"paper_id":4120,"title":4121,"year":7,"month":188,"day":63,"doi":4122,"resource_url":4123,"first_page":4124,"last_page":4125,"pdf_url":4126,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4127,"paper_type":860,"authors":4128,"abstract":4163},"lrec2026-main-134","MEUR: A Benchmark for Evaluating Vision-Language Models on Multimodal Event Understanding and Reasoning","10.63317\u002F4ftadqtyt374","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-134","1696","1709","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.134.pdf","wang-etal-2026-meur",[4129,4131,4133,4135,4138,4141,4144,4147,4149,4151,4153,4155,4157,4160,4162],{"paper_id":4120,"author_seq":247,"given_name":4130,"surname":3676,"affiliation":63,"orcid":63},"Zimu",{"paper_id":4120,"author_seq":232,"given_name":4132,"surname":3676,"affiliation":63,"orcid":63},"Yuqi",{"paper_id":4120,"author_seq":218,"given_name":4134,"surname":1840,"affiliation":63,"orcid":63},"Tong",{"paper_id":4120,"author_seq":203,"given_name":4136,"surname":4137,"affiliation":63,"orcid":63},"Changyu","Zeng",{"paper_id":4120,"author_seq":188,"given_name":4139,"surname":4140,"affiliation":63,"orcid":63},"Hongbin","Na",{"paper_id":4120,"author_seq":172,"given_name":4142,"surname":4143,"affiliation":63,"orcid":63},"Nijia","Han",{"paper_id":4120,"author_seq":155,"given_name":4145,"surname":4146,"affiliation":63,"orcid":63},"Fuyu","Xing",{"paper_id":4120,"author_seq":138,"given_name":4148,"surname":1840,"affiliation":63,"orcid":63},"Qi",{"paper_id":4120,"author_seq":121,"given_name":4150,"surname":3676,"affiliation":63,"orcid":63},"Qiufeng",{"paper_id":4120,"author_seq":104,"given_name":4152,"surname":2395,"affiliation":63,"orcid":63},"Anh",{"paper_id":4120,"author_seq":87,"given_name":4154,"surname":3676,"affiliation":63,"orcid":63},"Shuihua",{"paper_id":4120,"author_seq":73,"given_name":4156,"surname":1840,"affiliation":63,"orcid":63},"Ling",{"paper_id":4120,"author_seq":55,"given_name":4158,"surname":4159,"affiliation":63,"orcid":63},"Jionglong","Su",{"paper_id":4120,"author_seq":38,"given_name":4161,"surname":1519,"affiliation":63,"orcid":63},"Haiyang",{"paper_id":4120,"author_seq":17,"given_name":3270,"surname":3676,"affiliation":63,"orcid":63},"Event understanding and reasoning play critical roles in thoroughly evaluating the capabilities of Vision-Language Models (VLMs); however, existing Visual Question Answering (VQA) datasets predominantly focus on entity-centric questions, while event- or action-related questions are limited in scale and suffer from significant shortcut issues. We introduce MEUR, the first Multimodal Event Understanding and Reasoning dataset consisting of 1,200 images and 4,217 questions, necessitating VLMs with a diverse range of multimodal understanding and reasoning capabilities to answer, ranging from basic event recognition to more complex tasks such as counting and comparison. To streamline the annotation process, we propose a novel semi-automated pipeline that combines advanced VLMs with human annotators, achieving high quality and efficiency. We conduct extensive experiments on state-of-the-art non-thinking and thinking VLMs to demonstrate their capabilities and limitations in multimodal event understanding and reasoning. Furthermore, we provide a detailed error analysis that points out promising directions for future research.",{"paper_id":4165,"title":4166,"year":7,"month":188,"day":63,"doi":4167,"resource_url":4168,"first_page":4169,"last_page":4170,"pdf_url":4171,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4172,"paper_type":860,"authors":4173,"abstract":4183},"lrec2026-main-135","Building Collaborative Speech Corpora for Low-Resource Languages: The Galician Dataset in Mozilla Common Voice","10.63317\u002F4gd79cq3cump","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-135","1710","1720","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.135.pdf","vladu-etal-2026-building",[4174,4177,4180],{"paper_id":4165,"author_seq":247,"given_name":4175,"surname":4176,"affiliation":63,"orcid":63},"Adina Ioana","Vladu",{"paper_id":4165,"author_seq":232,"given_name":4178,"surname":4179,"affiliation":63,"orcid":63},"Elisa Fernández","Rei",{"paper_id":4165,"author_seq":218,"given_name":4181,"surname":4182,"affiliation":63,"orcid":63},"María Pérez","Lago","This paper presents the methodology and outcomes of building collaborative speech corpora in Mozilla Common Voice (MCV), focusing on the Galician case within Proxecto Nós. We describe the organization of voice collection campaigns –on-site events, student participation, Validatón marathons, and corporate collaboration– and analyze the results in MCV v22.0. While the dataset has achieved a modest scale, major gaps remain in metadata completeness and dialectal tagging, with implications for ASR performance. Drawing on our experience, we highlight effective strategies for engagement, such as transparent communication, cultural identification, and user-friendly tools. We conclude with lessons learnt for improving data representativeness, participant retention, and ethical governance. The observations are specific to the Galician case study but may inform similar efforts in other lesser-resourced languages.",{"paper_id":4185,"title":4186,"year":7,"month":188,"day":63,"doi":4187,"resource_url":4188,"first_page":4189,"last_page":4190,"pdf_url":4191,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4192,"paper_type":860,"authors":4193,"abstract":4206},"lrec2026-main-136","Frame-Guided Synthetic Claim Generation for Automatic Fact-Checking Using High-Volume Tabular Data","10.63317\u002F23pcoy44ykq9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-136","1721","1731","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.136.pdf","devasier-etal-2026-frame",[4194,4196,4199,4201,4204],{"paper_id":4185,"author_seq":247,"given_name":1606,"surname":4195,"affiliation":63,"orcid":63},"Devasier",{"paper_id":4185,"author_seq":232,"given_name":4197,"surname":4198,"affiliation":63,"orcid":63},"Akshith","Putta",{"paper_id":4185,"author_seq":218,"given_name":4200,"surname":3676,"affiliation":63,"orcid":63},"Qing",{"paper_id":4185,"author_seq":203,"given_name":4202,"surname":4203,"affiliation":63,"orcid":63},"Alankrit","Moses",{"paper_id":4185,"author_seq":188,"given_name":4205,"surname":3446,"affiliation":63,"orcid":63},"Chengkai","Automated fact-checking benchmarks have largely ignored the challenge of verifying claims against real-world, high-volume structured data, instead focusing on small, curated tables. We introduce a new large-scale, multilingual dataset to address this critical gap. It contains 78,503 synthetic claims grounded in 434 complex OECD tables, which average over 500K rows each. We propose a novel, frame-guided methodology where algorithms programmatically select significant data points based on six semantic frames to generate realistic claims in English, Chinese, Spanish, and Hindi. Crucially, we demonstrate through knowledge-probing experiments that LLMs have not memorized these facts, forcing systems to perform genuine retrieval and reasoning rather than relying on parameterized knowledge. We provide a baseline SQL-generation system and show that our benchmark is highly challenging. Our analysis identifies evidence retrieval as the primary bottleneck, with models struggling to find the correct data in massive tables. This dataset provides a critical new resource for advancing research on this unsolved, real-world problem.",{"paper_id":4208,"title":4209,"year":7,"month":188,"day":63,"doi":4210,"resource_url":4211,"first_page":4212,"last_page":4213,"pdf_url":4214,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4215,"paper_type":860,"authors":4216,"abstract":4231},"lrec2026-main-137","A Bilingual Bimodal Benchmark for Arabic-English NLP across Grammatical Correction, Essay Scoring, Morphological Tagging, and Speech Recognition","10.63317\u002F489vftd6umyh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-137","1732","1749","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.137.pdf","alhafni-etal-2026-bilingual",[4217,4220,4223,4226,4228],{"paper_id":4208,"author_seq":247,"given_name":4218,"surname":4219,"affiliation":63,"orcid":63},"Bashar","Alhafni",{"paper_id":4208,"author_seq":232,"given_name":4221,"surname":4222,"affiliation":63,"orcid":63},"Injy","Hamed",{"paper_id":4208,"author_seq":218,"given_name":4224,"surname":4225,"affiliation":63,"orcid":63},"Fadhl","Eryani",{"paper_id":4208,"author_seq":203,"given_name":1061,"surname":4227,"affiliation":63,"orcid":63},"Palfreyman",{"paper_id":4208,"author_seq":188,"given_name":4229,"surname":4230,"affiliation":63,"orcid":63},"Nizar","Habash","Building comprehensive datasets that support a variety of NLP tasks and cover a diversity of languages and domains is vital for NLP evaluation purposes. In this paper, we present ZAEBUC*, a dataset that builds upon and enriches prior corpora with new annotations and benchmarking experiments. ZAEBUC* serves as a benchmark for a range of NLP tasks, including grammatical error correction, automated essay scoring, automatic speech recognition, and morphological tagging, which includes tokenization, part-of-speech tagging, and lemmatization. The dataset covers Arabic and English in both written and spoken forms, offering a bilingual and bimodal resource. Furthermore, the corpus brings together a collection of resources gathered from a similar population, enabling cross-linguistic and cross-modal comparisons. We provide benchmarking results, demonstrating the performance of NLP models, including LLMs, across various tasks, languages, and modalities.",{"paper_id":4233,"title":4234,"year":7,"month":188,"day":63,"doi":4235,"resource_url":4236,"first_page":4237,"last_page":4238,"pdf_url":4239,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4240,"paper_type":860,"authors":4241,"abstract":4266},"lrec2026-main-138","Developing a Guideline for the Labovian-Structural Analysis of Oral Narratives in Japanese","10.63317\u002F3bfu9oz7fze4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-138","1750","1760","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.138.pdf","watahiki-etal-2026-developing",[4242,4245,4248,4251,4254,4257,4260,4263],{"paper_id":4233,"author_seq":247,"given_name":4243,"surname":4244,"affiliation":63,"orcid":63},"Amane","Watahiki",{"paper_id":4233,"author_seq":232,"given_name":4246,"surname":4247,"affiliation":63,"orcid":63},"Tomoki","Doi",{"paper_id":4233,"author_seq":218,"given_name":4249,"surname":4250,"affiliation":63,"orcid":63},"Akari","Kikuchi",{"paper_id":4233,"author_seq":203,"given_name":4252,"surname":4253,"affiliation":63,"orcid":63},"Hiroshi","Ohata",{"paper_id":4233,"author_seq":188,"given_name":4255,"surname":4256,"affiliation":63,"orcid":63},"Yuki I.","Nakata",{"paper_id":4233,"author_seq":172,"given_name":4258,"surname":4259,"affiliation":63,"orcid":63},"Takuya","Niikawa",{"paper_id":4233,"author_seq":155,"given_name":4261,"surname":4262,"affiliation":63,"orcid":63},"Taiga","Shinozaki",{"paper_id":4233,"author_seq":138,"given_name":4264,"surname":4265,"affiliation":63,"orcid":63},"Hitomi","Yanaka","Narrative analysis is a cornerstone of qualitative research. One leading approach is the Labovian model, but its application is labor-intensive, requiring a holistic, recursive interpretive process that moves back and forth between individual parts of the transcript and the transcript as a whole. Existing Labovian datasets are available only in English, which differs markedly from Japanese in terms of grammar and discourse conventions. To address this gap, we introduce the first systematic guidelines for Labovian narrative analysis of Japanese narrative data. Our guidelines retain all six Labovian categories and extend the framework by providing explicit rules for clause segmentation tailored to Japanese constructions. In addition, our guidelines cover a broader range of clause types and narrative types. Using these guidelines, annotators achieved high agreement in clause segmentation (Fleiss’ kappa = 0.80) and moderate agreement in two structural classification tasks (Krippendorff’s alpha = 0.41 and 0.45, respectively), one of which is slightly higher than that found in prior work despite the use of finer-grained distinctions. This paper describes the Labovian model, the proposed guidelines, the annotation process, and their utility. It concludes by discussing the challenges encountered during the annotation process and the prospects for developing a larger dataset for structural narrative analysis in Japanese qualitative research.",{"paper_id":4268,"title":4269,"year":7,"month":188,"day":63,"doi":4270,"resource_url":4271,"first_page":4272,"last_page":4273,"pdf_url":4274,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4275,"paper_type":860,"authors":4276,"abstract":4287},"lrec2026-main-139","German General Social Survey Personas: A Survey-Derived Persona Prompt Collection for Population-Aligned LLM Studies","10.63317\u002F2sod6uekicbg","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-139","1761","1780","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.139.pdf","rupprecht-etal-2026-german",[4277,4280,4282,4285],{"paper_id":4268,"author_seq":247,"given_name":4278,"surname":4279,"affiliation":63,"orcid":63},"Jens","Rupprecht",{"paper_id":4268,"author_seq":232,"given_name":2954,"surname":4281,"affiliation":63,"orcid":63},"Froehling",{"paper_id":4268,"author_seq":218,"given_name":4283,"surname":4284,"affiliation":63,"orcid":63},"Claudia","Wagner",{"paper_id":4268,"author_seq":203,"given_name":3832,"surname":4286,"affiliation":63,"orcid":63},"Strohmaier","The use of Large Language Models (LLMs) for simulating human perspectives via persona prompting is gaining traction in computational social science. However, well-curated, empirically grounded persona collections remain scarce, limiting the accuracy and representativeness of such simulations. Here, we introduce the German General Social Survey Personas (GGSS Personas) collection, a comprehensive and representative persona prompt collection built from the German General Social Survey (ALLBUS). The GGSS Personas and their persona prompts are designed to be easily plugged into prompts for all types of LLMs and tasks, steering models to generate responses aligned with the underlying German population. We evaluate GGSS Personas by prompting various LLMs to simulate survey response distributions across diverse topics, demonstrating that GGSS Personas-guided LLMs outperform state-of-the-art classifiers, particularly under data scarcity. Furthermore, we analyze how representativity and attribute selection within persona prompts affect alignment with population responses. Our findings suggest that GGSS Personas provide a potentially valuable resource for research on LLM-based social simulations that enables more systematic explorations of population-aligned persona prompting in NLP and social science research.",{"paper_id":4289,"title":4290,"year":7,"month":188,"day":63,"doi":4291,"resource_url":4292,"first_page":4293,"last_page":4294,"pdf_url":4295,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4296,"paper_type":860,"authors":4297,"abstract":4309},"lrec2026-main-140","Slovene Morphological and Word Formation Segmentation: A Novel Dataset and Evaluation","10.63317\u002F4f6rruft238c","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-140","1781","1793","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.140.pdf","pranji-etal-2026-slovene",[4298,4301,4304,4306],{"paper_id":4289,"author_seq":247,"given_name":4299,"surname":4300,"affiliation":63,"orcid":63},"Marko","Pranjić",{"paper_id":4289,"author_seq":232,"given_name":4302,"surname":4303,"affiliation":63,"orcid":63},"Boris","Kern",{"paper_id":4289,"author_seq":218,"given_name":1290,"surname":4305,"affiliation":63,"orcid":63},"Voršič",{"paper_id":4289,"author_seq":203,"given_name":4307,"surname":4308,"affiliation":63,"orcid":63},"Senja","Pollak","We introduce the first publicly available manually annotated dataset for morphological segmentation and word-formation analysis for Slovene, containing 1,935 words annotated by two domain experts. The dataset provides three types of linguistic information: morphological and word-formation segments with zero-morpheme and simplex annotations. We present a four-stage annotation approach achieving inter-annotator agreement of 86.80% Krippendorff’s Alpha for morphological segmentation and 85.16% for word-formation segments. Computational validation using a morphological segmentation model achieves 87.78% BPR F1 on morphological segmentation and 83.05% on word-formation segments. Despite being smaller than previous datasets derived from non-public esources, our dataset enables high performance and supports reproducible research for morphological analysis tools for Slovene.",{"paper_id":4311,"title":4312,"year":7,"month":188,"day":63,"doi":4313,"resource_url":4314,"first_page":4315,"last_page":4316,"pdf_url":4317,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4318,"paper_type":860,"authors":4319,"abstract":4326},"lrec2026-main-141","GePaDeU - a Multi-layer Corpus of German Parliamentary Debates with Rich Semantic and Pragmatic Annotations","10.63317\u002F5d28ibqd2nzk","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-141","1794","1810","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.141.pdf","rehbein-etal-2026-gepadeu",[4320,4321,4322,4325],{"paper_id":4311,"author_seq":247,"given_name":1290,"surname":1291,"affiliation":63,"orcid":63},{"paper_id":4311,"author_seq":232,"given_name":1296,"surname":1297,"affiliation":63,"orcid":63},{"paper_id":4311,"author_seq":218,"given_name":4323,"surname":4324,"affiliation":63,"orcid":63},"Lars","Ostertag",{"paper_id":4311,"author_seq":203,"given_name":1299,"surname":1300,"affiliation":63,"orcid":63},"This paper presents GePaDeU, a new manually annotated corpus of German Parliamentary Debates with Unified layers of semantic and pragmatic information. The data includes parliamentary speeches from the German Bundestag, ranging over a time period from 2017–2021, with 267 speeches given by 197 members of parliament. The final release of our corpus unifies multiple annotation layers, including entity-level annotations, the annotation of speech events and their corresponding speakers, functional speech acts, clause-level aspect, and moral framing. We provide an overview of the various annotation layers and illustrate how the semantic and pragmatic annotations can be combined for corpus-linguistic studies and discourse analyses, and to answer research questions in the field of political science. The new resource will be made freely available for the research community.",{"paper_id":4328,"title":4329,"year":7,"month":188,"day":63,"doi":4330,"resource_url":4331,"first_page":4332,"last_page":4333,"pdf_url":4334,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4335,"paper_type":860,"authors":4336,"abstract":4343},"lrec2026-main-142","What Are LLMs Doing to Scientific Communication? Measuring Changes in Writing Practices and Reading Experience","10.63317\u002F3ai7wig4fhd8","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-142","1811","1830","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.142.pdf","mileti-etal-2026-what",[4337,4340],{"paper_id":4328,"author_seq":247,"given_name":4338,"surname":4339,"affiliation":63,"orcid":63},"Filip","Miletić",{"paper_id":4328,"author_seq":232,"given_name":4341,"surname":4342,"affiliation":63,"orcid":63},"Neele","Falk","Has the style of scientific communication changed due to the growing use of large language models in the writing process? We address this question in the domain of Natural Language Processing by leveraging two data resources we create: a naturalistic corpus of over 37,000 papers from the ACL Anthology (2020–2024); and a synthetic dataset of 3,000 human-written passages and their LLM-generated improvements. We first implement a series of diachronic lexical analyses, showing that both word frequency and usage contexts have changed significantly over time, indicating semantic specialization in some cases and generalization in others. Broadening our perspective, we then model a range of more complex stylistic features and find that LLM-modified texts more frequently contain certain syntactic constructions, more complex and longer words and a lower lexical diversity. Finally, we connect these changes in writing practices to subjective reading experience through a pilot annotation study with 20 domain experts. They overall rate LLM-improved texts as more understandable and exciting, but also express negative qualitative attitudes towards LLMs, highlighting the strongly subjective effect of AI-assisted writing on reading experience.",{"paper_id":4345,"title":4346,"year":7,"month":188,"day":63,"doi":4347,"resource_url":4348,"first_page":4349,"last_page":4350,"pdf_url":4351,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4352,"paper_type":860,"authors":4353,"abstract":4365},"lrec2026-main-143","GeneFRDebate: Generated French Debates from News Articles with Industrial-Expert Summaries","10.63317\u002F4zuibqqim37u","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-143","1831","1841","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.143.pdf","abrougui-etal-2026-genefrdebate",[4354,4357,4359,4362],{"paper_id":4345,"author_seq":247,"given_name":4355,"surname":4356,"affiliation":63,"orcid":63},"Rim","Abrougui",{"paper_id":4345,"author_seq":232,"given_name":1150,"surname":4358,"affiliation":63,"orcid":63},"Lechien",{"paper_id":4345,"author_seq":218,"given_name":4360,"surname":4361,"affiliation":63,"orcid":63},"Elisabeth","Savatier",{"paper_id":4345,"author_seq":203,"given_name":4363,"surname":4364,"affiliation":63,"orcid":63},"Benoît","Laurent","Summarizing domain-specific conversations, such as political debates, remains challenging despite advances in large language models (LLMs), and resources for French debates are particularly limited. We present GeneFRDebate, a new dataset of synthetic French political debates generated from real-world news articles using an LLM, while keeping expert-written summaries unchanged. Our pipeline combines prompt engineering, human curation, and quality evaluation using both automatic metrics and expert assessment. We also provide baseline experiments with small-scale LLMs (≤8B parameters), demonstrating the dataset’s usefulness for training and evaluation. This work shows that carefully generated synthetic data with human oversight can complement existing corpora, supporting research in multilingual and domain-specific dialogue summarization.",{"paper_id":4367,"title":4368,"year":7,"month":188,"day":63,"doi":4369,"resource_url":4370,"first_page":4371,"last_page":4372,"pdf_url":4373,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4374,"paper_type":860,"authors":4375,"abstract":4394},"lrec2026-main-144","AmbiCoRefVis: A Tool for Visualizing Coreferential Ambiguity","10.63317\u002F2h3c7hgu4tyd","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-144","1842","1855","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.144.pdf","paetzold-etal-2026-ambicorefvis",[4376,4379,4382,4385,4388,4389,4391],{"paper_id":4367,"author_seq":247,"given_name":4377,"surname":4378,"affiliation":63,"orcid":63},"Patrick","Paetzold",{"paper_id":4367,"author_seq":232,"given_name":4380,"surname":4381,"affiliation":63,"orcid":63},"Lukas","Beiske",{"paper_id":4367,"author_seq":218,"given_name":4383,"surname":4384,"affiliation":63,"orcid":63},"Mark-Matthias","Zymla",{"paper_id":4367,"author_seq":203,"given_name":4386,"surname":4387,"affiliation":63,"orcid":63},"Massimo","Poesio",{"paper_id":4367,"author_seq":188,"given_name":3821,"surname":3971,"affiliation":63,"orcid":63},{"paper_id":4367,"author_seq":172,"given_name":1668,"surname":4390,"affiliation":63,"orcid":63},"Weiskopf",{"paper_id":4367,"author_seq":155,"given_name":4392,"surname":4393,"affiliation":63,"orcid":63},"Oliver","Deussen","Situations of ambiguity and uncertainty in the annotation of discourse interpretation tasks, such as anaphoric reference, are common, but existing annotation tools typically only support visualization at the local level (i.e., visualizing more than one mention of a possible antecedent) rather than globally (i.e., visualizing multiple coreference chains), as the latter is a complex problem. In this paper, we introduce the interactive visual analysis tool AmbiCoRefVis, developed to display multiple global interpretations of a referring expression. We evaluate it with the Phrase Detectives corpus.",{"paper_id":4396,"title":4397,"year":7,"month":188,"day":63,"doi":4398,"resource_url":4399,"first_page":4400,"last_page":4401,"pdf_url":4402,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4403,"paper_type":860,"authors":4404,"abstract":4433},"lrec2026-main-145","Fables-DTR: A Corpus of Fables Annotated for Discourse and Temporal Relations","10.63317\u002F5buqgd55dy3e","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-145","1856","1868","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.145.pdf","silvano-etal-2026-fables",[4405,4408,4411,4414,4416,4419,4422,4425,4428,4430],{"paper_id":4396,"author_seq":247,"given_name":4406,"surname":4407,"affiliation":63,"orcid":63},"Purificação","Silvano",{"paper_id":4396,"author_seq":232,"given_name":4409,"surname":4410,"affiliation":63,"orcid":63},"António","Leal",{"paper_id":4396,"author_seq":218,"given_name":4412,"surname":4413,"affiliation":63,"orcid":63},"Maciej","Ogrodniczuk",{"paper_id":4396,"author_seq":203,"given_name":1313,"surname":4415,"affiliation":63,"orcid":63},"Tomaszewska",{"paper_id":4396,"author_seq":188,"given_name":4417,"surname":4418,"affiliation":63,"orcid":63},"Joana","Gomes",{"paper_id":4396,"author_seq":172,"given_name":4420,"surname":4421,"affiliation":63,"orcid":63},"Luís Filipe","Cunha",{"paper_id":4396,"author_seq":155,"given_name":4423,"surname":4424,"affiliation":63,"orcid":63},"Evelin","Amorim",{"paper_id":4396,"author_seq":138,"given_name":4426,"surname":4427,"affiliation":63,"orcid":63},"Martyna","Lewandowska",{"paper_id":4396,"author_seq":121,"given_name":2742,"surname":4429,"affiliation":63,"orcid":63},"Śliwicka",{"paper_id":4396,"author_seq":104,"given_name":4431,"surname":4432,"affiliation":63,"orcid":63},"Alípio","Jorge","This paper presents Fables-DTR, a corpus of Aesop’s fables annotated for discourse and temporal relations, designed to explore how event sequencing and aspectual features and discourse relations interact. Building on the ISO 24617 Semantic Annotation Framework, integrating Part 1 (Time and Events) and Part 8 (Discourse Relations), the resource provides a unified representation of discourse structure and temporal and aspectual features. The corpus comprises 15 fables in English, automatically translated into European Portuguese and Polish (45 texts in total), with all translations manually validated by native linguists to preserve semantic and discourse features. Each fable is annotated in two layers: (i) for discourse relations, argument roles, and signals; (ii) for temporal relations, and event attributes, such as Tense, Aspect, Polarity. The resulting dataset provides relevant information about the association between discourse relations and their temporal and aspectual features. Fables-DTR contributes both a valuable resource for cross-linguistic and narrative discourse analysis and empirical evidence for integrating ISO standards in multilayer annotation. It also provides a foundation for computational applications in discourse parsing, event ordering, and implicit relation detection.",{"paper_id":4435,"title":4436,"year":7,"month":188,"day":63,"doi":4437,"resource_url":4438,"first_page":4439,"last_page":4440,"pdf_url":4441,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4442,"paper_type":860,"authors":4443,"abstract":4451},"lrec2026-main-146","A Benchmark Corpus for the Diagnostic Assessment of Content in L2 English Speech","10.63317\u002F56kmiu3fnmbt","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-146","1869","1877","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.146.pdf","doi-etal-2026-benchmark",[4444,4446,4448],{"paper_id":4435,"author_seq":247,"given_name":4445,"surname":4247,"affiliation":63,"orcid":63},"Kosuke",{"paper_id":4435,"author_seq":232,"given_name":3796,"surname":4447,"affiliation":63,"orcid":63},"Vasselli",{"paper_id":4435,"author_seq":218,"given_name":4449,"surname":4450,"affiliation":63,"orcid":63},"Taro","Watanabe","When evaluating second language (L2) learners’ speech, human raters pay significant attention to its content, and diagnostic feedback on content helps improve learners’ speaking ability. Since human scoring and feedback are time-consuming and costly, automatic models aiming to provide such feedback have been developed, specifically models that detect whether certain content, i.e., key points, is included in learner’s speech. However, previous studies target only integrated test items where learners speak based on listened or read materials, and the data used are not publicly available. In this study, we construct a speech corpus for key point detection. We extend the target to test items where learners speak based on their own experiences and opinions, which show greater content diversity than integrated test items, using an approach that annotates content along with its connections. Analysis of the constructed data demonstrated that the annotated elements are associated with the speech content scores. We also found that large language models are generally successful at locating content element spans, although their predicted spans are often broader than human-annotated ones. The corpus and annotation guidelines are available at https:\u002F\u002Flanguage.sakura.ne.jp\u002Ficnale\u002Fdownload.html.",{"paper_id":4453,"title":4454,"year":7,"month":188,"day":63,"doi":4455,"resource_url":4456,"first_page":4457,"last_page":4458,"pdf_url":4459,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4460,"paper_type":860,"authors":4461,"abstract":4469},"lrec2026-main-147","Insights from Romanized Manipuri Social Media Text: A Transliteration Corpus and Variation Analysis","10.63317\u002F3uqwwrf7jvvo","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-147","1878","1888","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.147.pdf","salice-etal-2026-insights",[4462,4465,4466],{"paper_id":4453,"author_seq":247,"given_name":4463,"surname":4464,"affiliation":63,"orcid":63},"Maisang Kamei","Salice",{"paper_id":4453,"author_seq":232,"given_name":2252,"surname":2253,"affiliation":63,"orcid":63},{"paper_id":4453,"author_seq":218,"given_name":4467,"surname":4468,"affiliation":63,"orcid":63},"Priyankoo","Sarmah","This paper presents the first large-scale study of Romanized Manipuri, a low-resource Indic language widely used by native speakers on social media. Social media text is highly informal and often noisy, posing challenges for natural language processing tasks; therefore, normalization through back-transliteration is essential. We construct a Romanized Manipuri to Manipuri–Bengali script back-transliteration corpus from YouTube comments, capturing diverse informal writing styles and orthographic variations. The dataset is analyzed to examine variation patterns at two levels: character-level inconsistencies and pragmatic stylistic variations influenced by user writing behavior. We also compare social media romanization with formal transliteration conventions, including standardized romanization schemes and textbook-based systems. Furthermore, we evaluate Transformer model at both character and subword levels and conduct a detailed error analyses to identify key challenges affecting back-transliteration performance.",{"paper_id":4471,"title":4472,"year":7,"month":188,"day":63,"doi":4473,"resource_url":4474,"first_page":4475,"last_page":4476,"pdf_url":4477,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4478,"paper_type":860,"authors":4479,"abstract":4485},"lrec2026-main-148","MELD: Melding Diverse Multilingual and Multi-Domain Datasets for Named Entity Recognition Evaluation","10.63317\u002F32qrd24xac2e","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-148","1889","1903","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.148.pdf","glocker-etal-2026-meld",[4480,4483],{"paper_id":4471,"author_seq":247,"given_name":4481,"surname":4482,"affiliation":63,"orcid":63},"Kevin","Glocker",{"paper_id":4471,"author_seq":232,"given_name":2146,"surname":4484,"affiliation":63,"orcid":63},"Kuhlmann","Zero-shot Named Entity Recognition (NER) has gained prominence for information extraction across diverse domains without being limited to a single, fixed tag set. However, existing NER resources vary widely in data format, licensing terms, annotation schemes, and availability, making it difficult to systematically evaluate the generalization capabilities of zero-shot NER models. Prior attempts to aggregate datasets with broad coverage across domains have largely focused on a small subset of languages, and it is often not transparent how datasets were processed from their sources. This paper introduces MELD, a comprehensive multilingual and multi-domain data collection designed to address these gaps. MELD integrates 60 NER datasets spanning 194 languages, 14 domains, and 601 normalized entity types. While previously introduced multilingual NER datasets are mainly silver-standard, MELD contains gold-standard annotations for 60 languages. All data processing steps are fully open-source and reproducible, facilitating future extensions and ensuring long-term accessibility. While MELD is primarily designed for zero-shot evaluation, it also provides training and development splits in a single, consistent format to support future research in few-shot and supervised NER settings.",{"paper_id":4487,"title":4488,"year":7,"month":188,"day":63,"doi":4489,"resource_url":4490,"first_page":4491,"last_page":4492,"pdf_url":4493,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4494,"paper_type":860,"authors":4495,"abstract":4511},"lrec2026-main-149","FinER-ABSA: A Benchmark for Implicit and Explicit Entity Recognition and Aspect-Based Sentiment Analysis in Financial News","10.63317\u002F2puxqv6kunjn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-149","1904","1913","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.149.pdf","akkanwanich-etal-2026-finer",[4496,4499,4502,4505,4508],{"paper_id":4487,"author_seq":247,"given_name":4497,"surname":4498,"affiliation":63,"orcid":63},"Pachara","Akkanwanich",{"paper_id":4487,"author_seq":232,"given_name":4500,"surname":4501,"affiliation":63,"orcid":63},"Pavorn","Thongyoo",{"paper_id":4487,"author_seq":218,"given_name":4503,"surname":4504,"affiliation":63,"orcid":63},"Mahannop","Thabua",{"paper_id":4487,"author_seq":203,"given_name":4506,"surname":4507,"affiliation":63,"orcid":63},"Konlakorn","Wongpatikaseree",{"paper_id":4487,"author_seq":188,"given_name":4509,"surname":4510,"affiliation":63,"orcid":63},"Natthawut","Kertkeidkachorn","Many approaches to English financial text analysis still rely on keyword or rule-based extraction, with limited trust in sentiment models despite advances in contextual understanding. Past studies have explored concepts such as aspect-based sentiment analysis and named entity recognition, yet none address how entities appear implicitly through context rather than direct mentions, or provide a dataset that brings these elements together. This gap limits how well models capture the links between entities, aspect, and sentiment. We introduce FinER-ABSA, a benchmark that integrates implicit and explicit entity recognition with aspect-based sentiment in financial text. Experiments on seven open-source large language models under zero- and few-shot settings show that even the best systems still miss key aspects of implicit reasoning. In the few-shot case (K= 3), Llama-3.3-70B reached an F1 of 0.7623 for implicit entities, suggesting that while models can detect signals, their consistency remains far from the level of reliability required for financial analysis or decision-making. These insights emerge only through FinER-ABSA, which makes such gaps measurable and advances financial Natural Language Processing (NLP) toward deeper contextual understanding and enables systems that better extract comprehensive insights from market-moving information in an industry where such precision is critical.",{"paper_id":4513,"title":4514,"year":7,"month":188,"day":63,"doi":4515,"resource_url":4516,"first_page":4517,"last_page":4518,"pdf_url":4519,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4520,"paper_type":860,"authors":4521,"abstract":4534},"lrec2026-main-150","MUSIA: Multilingual Story Illustration Corpus for Cross-Cultural Alignment and Generation","10.63317\u002F3pk9sab3nfuo","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-150","1914","1924","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.150.pdf","tewari-etal-2026-musia",[4522,4525,4528,4531],{"paper_id":4513,"author_seq":247,"given_name":4523,"surname":4524,"affiliation":63,"orcid":63},"Krishna","Tewari",{"paper_id":4513,"author_seq":232,"given_name":4526,"surname":4527,"affiliation":63,"orcid":63},"Supriya","Chanda",{"paper_id":4513,"author_seq":218,"given_name":4529,"surname":4530,"affiliation":63,"orcid":63},"Nirmit","Patil",{"paper_id":4513,"author_seq":203,"given_name":4532,"surname":4533,"affiliation":63,"orcid":63},"Sukomal","Pal","Recent advances in text-to-image generation have enabled automated visual storytelling, yet most existing datasets remain monolingual and culturally narrow. We introduce MUSIA, a Multilingual Story Illustration Corpus designed to advance research in cross-lingual and culturally grounded narrative illustration. MUSIA comprises bilingual (English-Hindi) story-image pairs drawn from open literary and folk sources, curated to reflect diverse cultural themes, artistic styles, and linguistic structures. Each story includes multiple illustrations aligned at the scene level, accompanied by quality-verified mappings for narrative-visual coherence. To establish a reproducible benchmark, we propose a two-stage baseline combining transformer-based semantic summarization with diffusion-based image generation, achieving strong performance in relevance, visual quality, and consistency. MUSIA represents the first step toward a scalable, culturally inclusive benchmark for multilingual visual storytelling, enabling fair and reproducible research across low-resource and underrepresented languages.",{"paper_id":4536,"title":4537,"year":7,"month":188,"day":63,"doi":4538,"resource_url":4539,"first_page":4540,"last_page":4541,"pdf_url":4542,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4543,"paper_type":860,"authors":4544,"abstract":4556},"lrec2026-main-151","MUDiC: A Dataset for Multi-User Dialogue and Collaboration in Chatbot Interaction","10.63317\u002F36zqrpocuvn9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-151","1925","1933","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.151.pdf","wagner-etal-2026-mudic",[4545,4546,4549,4551,4554],{"paper_id":4536,"author_seq":247,"given_name":2331,"surname":4284,"affiliation":63,"orcid":63},{"paper_id":4536,"author_seq":232,"given_name":4547,"surname":4548,"affiliation":63,"orcid":63},"Cristina Luna","Jimenez",{"paper_id":4536,"author_seq":218,"given_name":4360,"surname":4550,"affiliation":63,"orcid":63},"Andre",{"paper_id":4536,"author_seq":203,"given_name":4552,"surname":4553,"affiliation":63,"orcid":63},"Wolfgang","Minker",{"paper_id":4536,"author_seq":188,"given_name":2813,"surname":4555,"affiliation":63,"orcid":63},"Ultes","We introduce MUDiC, a novel dataset on task-based multi-user interactions in chatbots. Unlike most traditional dialogue corpora that focus on one-to-one human–chatbot exchanges, this dataset captures conversations involving two human participants engaging with a single system. The data include diverse conversational contexts such as shared group task, user intents, and mechanisms to deal with off-topic talk. MUDiC consists of 1,689 dialogue exchanges between 20 groups and the chatbot. Each session is annotated with user id, interaction turns, and intents and dialogue acts, enabling an analysis of group conversational dynamics. Consequently, the dataset aims to support tasks such as multi-user dialogue modelling, intent disambiguation, and moderation behaviour, which are relevant factors for the design of socially aware chatbots.",{"paper_id":4558,"title":4559,"year":7,"month":188,"day":63,"doi":4560,"resource_url":4561,"first_page":4562,"last_page":4563,"pdf_url":4564,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4565,"paper_type":860,"authors":4566,"abstract":4573},"lrec2026-main-152","StoryCCDial: Collecting and Analyzing Human-Human Co-Creation Dialogues for Personalized Creative Support","10.63317\u002F4k28tvh2vihn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-152","1934","1946","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.152.pdf","ezure-etal-2026-storyccdial",[4567,4570],{"paper_id":4558,"author_seq":247,"given_name":4568,"surname":4569,"affiliation":63,"orcid":63},"Natsumi","Ezure",{"paper_id":4558,"author_seq":232,"given_name":4571,"surname":4572,"affiliation":63,"orcid":63},"Michimasa","Inaba","With the development of generative models, research on human-AI co-creation has been actively conducted. However, in the field of co-creation, research on system personalization according to individual characteristics is insufficient, and little focus has been placed on individual differences in creation. Therefore, in this study, we constructed StoryCCDial, a co-creation dialogue dataset aimed at the personalization of co-creative dialogue systems. First, we collected human-human story co-creation dialogue data involving 120 workers and constructed a dataset that includes dialogues, dialogue acts, the workers’ personality traits, postsurveys, and edit histories from the interface. Next, using the constructed dataset, we conducted analyses focusing on the workers’ personality traits, the number of utterances, and edit histories. The analysis revealed differences in dialogue content based on workers’ personality traits, individual differences in the number of utterances during the co-creation process, and variations in creative workflows on the interface. Our dataset will be available at https:\u002F\u002Fgithub.com\u002FUEC-InabaLab\u002FStoryCCDial .",{"paper_id":4575,"title":4576,"year":7,"month":188,"day":63,"doi":4577,"resource_url":4578,"first_page":4579,"last_page":4580,"pdf_url":4581,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4582,"paper_type":860,"authors":4583,"abstract":4590},"lrec2026-main-153","DATASHI: A Parallel English–Tashlhiyt Corpus for Orthography Normalization and Low-Resource Language Processing.","10.63317\u002F2zekkx242a7h","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-153","1947","1956","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.153.pdf","monir-etal-2026-datashi",[4584,4587],{"paper_id":4575,"author_seq":247,"given_name":4585,"surname":4586,"affiliation":63,"orcid":63},"Nasser-Eddine","Monir",{"paper_id":4575,"author_seq":232,"given_name":4588,"surname":4589,"affiliation":63,"orcid":63},"Zakaria","Baou","DATASHI is a new parallel English–Tashlhiyt corpus that fills a critical gap in computational resources for Amazigh languages. It contains 5,000 sentence pairs, including a 1,500-sentence subset with expert-standardized and non-standard user-generated versions, enabling systematic study of orthographic diversity and normalization. This dual design supports text-based NLP tasks—such as tokenization, translation, and normalization—and also serves as a foundation for read-speech data collection and multimodal alignment. Comprehensive evaluations with state-of-the-art Large Language Models (GPT-5, Claude Sonnet 4.5, Gemini 2.5 Pro, Mistral, Qwen3-Max) show clear improvements from zero-shot to few-shot prompting, with Gemini 2.5 Pro achieving the lowest word and character-level error rates and exhibiting robust cross-lingual generalization. A fine-grained analysis of edit operations—deletions, substitutions, and insertions—across phonological classes (geminates, emphatics, uvulars, and pharyngeals) further highlights model-specific sensitivities to marked Tashlhiyt features and provides new diagnostic insights for low-resource Amazigh orthography normalization.",{"paper_id":4592,"title":4593,"year":7,"month":188,"day":63,"doi":4594,"resource_url":4595,"first_page":4596,"last_page":4597,"pdf_url":4598,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4599,"paper_type":860,"authors":4600,"abstract":4614},"lrec2026-main-154","Evaluating Social Intelligence in LLMs via Japanese Honorifics in Email Generation: A Social Semiotic System Perspective","10.63317\u002F54wnt2fwhk8j","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-154","1957","1976","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.154.pdf","liu-etal-2026-evaluating",[4601,4603,4605,4608,4611],{"paper_id":4592,"author_seq":247,"given_name":4602,"surname":3916,"affiliation":63,"orcid":63},"Muxuan",{"paper_id":4592,"author_seq":232,"given_name":3526,"surname":4604,"affiliation":63,"orcid":63},"Ishigaki",{"paper_id":4592,"author_seq":218,"given_name":4606,"surname":4607,"affiliation":63,"orcid":63},"Yusuke","Miyao",{"paper_id":4592,"author_seq":203,"given_name":4609,"surname":4610,"affiliation":63,"orcid":63},"Hiroya","Takamura",{"paper_id":4592,"author_seq":188,"given_name":4612,"surname":4613,"affiliation":63,"orcid":63},"Ichiro","Kobayashi","We propose JaSocial, a novel evaluation framework that leverages Japanese emails to comprehensively evaluate large language models’ (LLMs) social intelligence across varied social‑status relationships. The framework integrates three core components. First, we construct and publicly release a meticulously human‑annotated Japanese email dataset covering six distinct social‑status contexts, thereby capturing nuanced shifts in social hierarchy and politeness. Second, we adopt Systemic Functional Linguistics (SFL)—a social-semiotic linguistic theory that explicitly models how linguistic choices realize interpersonal relations and hierarchical distinctions—to classify email content in terms of three perspectives: social relationships, speech functions, and honorific expressions. Based on these perspectives, we design an automated evaluation method that assigns each LLM-generated email a contextual appropriateness score, quantifying how well it reflects socially intelligent behavior. Third, we release the full evaluation code to ensure reproducibility and enable fair cross-model comparisons. JaSocial exposes current LLMs’ limitations in capturing cultural nuance, while providing an open benchmark for future research.",{"paper_id":4616,"title":4617,"year":7,"month":188,"day":63,"doi":4618,"resource_url":4619,"first_page":4620,"last_page":4621,"pdf_url":4622,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4623,"paper_type":860,"authors":4624,"abstract":4635},"lrec2026-main-155","Do Language Models Know Theo Has a Wife? Investigating the Proviso Problem","10.63317\u002F547c6dmqaxgx","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-155","1977","1988","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.155.pdf","azin-etal-2026-do",[4625,4628,4630,4633],{"paper_id":4616,"author_seq":247,"given_name":4626,"surname":4627,"affiliation":63,"orcid":63},"Tara","Azin",{"paper_id":4616,"author_seq":232,"given_name":1668,"surname":4629,"affiliation":63,"orcid":63},"Dumitrescu",{"paper_id":4616,"author_seq":218,"given_name":4631,"surname":4632,"affiliation":63,"orcid":63},"Diana","Inkpen",{"paper_id":4616,"author_seq":203,"given_name":4634,"surname":2253,"affiliation":63,"orcid":63},"Raj","We investigate how language models handle the proviso problem, an unresolved issue in pragmatics where presuppositions in conditional sentences diverge between theoretical and human interpretations. We reformulate this phenomenon as a Natural Language Inference task and introduce a diagnostic dataset designed to probe presupposition projection in conditionals. We evaluate RoBERTa, DeBERTa, LLaMA, and Gemma using explainability analyses. The results show that models broadly align with human judgments but rely on shallow pattern matching rather than semantic or pragmatic reasoning. Our work provides the first computational evaluation framework for the proviso problem and highlights the need for diagnostic, multi-method approaches to assess pragmatic competence and context-dependent meaning in language models.",{"paper_id":4637,"title":4638,"year":7,"month":188,"day":63,"doi":4639,"resource_url":4640,"first_page":4641,"last_page":4642,"pdf_url":4643,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4644,"paper_type":860,"authors":4645,"abstract":4666},"lrec2026-main-156","Cross-Lingual and Cross-Cultural Transfer of Talk Move Classification to German Science Classrooms","10.63317\u002F3nynifnswvtk","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-156","1989","1997","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.156.pdf","wartena-etal-2026-cross",[4646,4648,4650,4653,4656,4659,4661,4663],{"paper_id":4637,"author_seq":247,"given_name":3643,"surname":4647,"affiliation":63,"orcid":63},"Wartena",{"paper_id":4637,"author_seq":232,"given_name":3643,"surname":4649,"affiliation":63,"orcid":63},"Schumburg",{"paper_id":4637,"author_seq":218,"given_name":4651,"surname":4652,"affiliation":63,"orcid":63},"Andreas","Nehring",{"paper_id":4637,"author_seq":203,"given_name":4654,"surname":4655,"affiliation":63,"orcid":63},"Marcel","Ebert",{"paper_id":4637,"author_seq":188,"given_name":4657,"surname":4658,"affiliation":63,"orcid":63},"Friederike","Korneck",{"paper_id":4637,"author_seq":172,"given_name":1061,"surname":4660,"affiliation":63,"orcid":63},"Schmitt",{"paper_id":4637,"author_seq":155,"given_name":1938,"surname":4662,"affiliation":63,"orcid":63},"Irmer",{"paper_id":4637,"author_seq":138,"given_name":4664,"surname":4665,"affiliation":63,"orcid":63},"Birgit","Neuhaus","Talk moves are discourse categories used to analyse classroom interactions. They provide insights into the types of exchanges between teachers and students and can serve as indicators of teaching quality, supporting feedback and reflection. The automatic classification of talk moves is therefore valuable for educational research and teacher development. While previous studies have explored this task, almost all have focused on English data. We constructed a small corpus of German science classroom transcripts and investigated whether multilingual language models can classify talk moves effectively under data-scarce conditions. Specifically, we examined (1) training with a very limited amount of German data and (2) cross-lingual transfer from English training data, which also entails cross-cultural adaptation. Our results show that multilingual large language models are capable of cross-lingual and cross-cultural transfer, but models trained directly on even a small amount of German data achieve better performance. Combining English and German data yields the best results overall, though the additional benefit of including English data is small.",{"paper_id":4668,"title":4669,"year":7,"month":188,"day":63,"doi":4670,"resource_url":4671,"first_page":242,"last_page":4672,"pdf_url":4673,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4674,"paper_type":860,"authors":4675,"abstract":4690},"lrec2026-main-157","IHPP: A Paragraph-Level Dataset for Investigating the Pragmatics of Hyperpartisan Italian News","10.63317\u002F2sabeivqu9dt","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-157","2011","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.157.pdf","maggini-etal-2026-ihpp",[4676,4679,4681,4684,4687],{"paper_id":4668,"author_seq":247,"given_name":4677,"surname":4678,"affiliation":63,"orcid":63},"Michele Joshua","Maggini",{"paper_id":4668,"author_seq":232,"given_name":1709,"surname":4680,"affiliation":63,"orcid":63},"Bassi",{"paper_id":4668,"author_seq":218,"given_name":4682,"surname":4683,"affiliation":63,"orcid":63},"Angelo","Valente",{"paper_id":4668,"author_seq":203,"given_name":4685,"surname":4686,"affiliation":63,"orcid":63},"Gaël","Dias",{"paper_id":4668,"author_seq":188,"given_name":4688,"surname":4689,"affiliation":63,"orcid":63},"Pablo","Gamallo","This study investigates the linguistic composition of hyperpartisan paragraphs in Italian news on climate change, Ukraine war, and immigration by publicly disclosing the dataset to ensure reproducibility. We introduce a new corpus, IHPP, of 356 articles, for a total of 4,861 paragraphs annotated for hyperpartisan news detection at the paragraph level and enriched with span-level annotations of six semantic-pragmatic linguistic traits: figurative speech, irony\u002Fsarcasm, epithet, as well as hyperbolic and loaded language. We hypothesized that these traits, while violating Gricean maxims, are key mechanisms of hyperpartisan rhetoric. To test this, we fine-tuned a set of mono- and multilingual BERT models for hyperpartisan detection and evaluated their incorporation in the embedding space. Then, we applied explainable techniques, e.g. Integrated Gradients and SHAP to analyze how models allocate attribution to normal and linguistic-trait tokens. Our result show that loaded language is the most discriminative trait. The dataset is released: https:\u002F\u002Fgithub.com\u002FMichJoM\u002FIHPP-Climate.",{"paper_id":4692,"title":4693,"year":7,"month":188,"day":63,"doi":4694,"resource_url":4695,"first_page":132,"last_page":4696,"pdf_url":4697,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4698,"paper_type":860,"authors":4699,"abstract":4709},"lrec2026-main-158","Detecting Potentially Under-annotated Explicit Discourse Connectives in the Penn Discourse Treebank (PDTB-3) with LLMs","10.63317\u002F5392qqgkyzs5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-158","2023","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.158.pdf","chuang-etal-2026-detecting",[4700,4703,4706],{"paper_id":4692,"author_seq":247,"given_name":4701,"surname":4702,"affiliation":63,"orcid":63},"Yueh-Ting","Chuang",{"paper_id":4692,"author_seq":232,"given_name":4704,"surname":4705,"affiliation":63,"orcid":63},"Xixian","Liao",{"paper_id":4692,"author_seq":218,"given_name":4707,"surname":4708,"affiliation":63,"orcid":63},"Bonnie","Webber","Accurate identification of explicit discourse connectives is crucial for analysing discourse relations, which supports NLP tasks such as summarisation and question answering. However, annotation inconsistencies remain a challenge, particularly for ambiguous prepositions with both discourse and non-discourse usages. This paper presents a pipeline that leverages large language model (LLM) prompting, cross-model agreement, and syntactic pattern analysis to detect likely under-annotated connectives. Evaluated on four prepositions (by, with, without and for), the approach effectively identifies likely under-annotations for some, but not all prepositions. Results show that while the method is promising, its generalisability depends on improved prompt design, model choice, and syntactic analysis tools. The findings highlight both the potential and limitations of LLM-based approaches for corpus error detection and demonstrate how improved discourse annotation can contribute to more reliable data for downstream NLP tasks.",{"paper_id":4711,"title":4712,"year":7,"month":188,"day":63,"doi":4713,"resource_url":4714,"first_page":29,"last_page":4715,"pdf_url":4716,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4717,"paper_type":860,"authors":4718,"abstract":4725},"lrec2026-main-159","Can LLMs Understand Punchlines? LLMs' Narrative Understanding Evaluation with Short-shorts","10.63317\u002F4n2p36736i24","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-159","2034","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.159.pdf","cheng-etal-2026-can",[4719,4722],{"paper_id":4711,"author_seq":247,"given_name":4720,"surname":4721,"affiliation":63,"orcid":63},"Jiashi","Cheng",{"paper_id":4711,"author_seq":232,"given_name":4723,"surname":4724,"affiliation":63,"orcid":63},"Takehito","Utsuro","In this study, we constructed a narrative comprehension benchmark using the works of Shinichi Hoshi to examine the extent to which Large Language Models (LLMs) can understand twist endings, or punchlines, in short-short stories. Specifically, story endings were categorized into six types—such as Revelation, Apocalypse, and Sarcasm—and a classification task was designed in which LLMs were prompted with the story text and asked to select the appropriate ending category. We collected human annotations from eight native Japanese speakers to establish a reference benchmark. Experimental comparisons were conducted across multiple LLMs (GPT-4, Claude, Gemini, and Grok), assessing their performance both at the metric level and at the discourse level against human judgments. The results revealed that although certain models approached human performance in specific categories, overall accuracy remained notably lower than the human baseline. Through quantitative and qualitative analyses, this study highlights the challenges LLMs face in capturing narrative subtleties such as irony, implication, and emotional reversal. The proposed benchmark provides a novel framework for evaluating narrative understanding and the deeper semantic reasoning capabilities of LLMs.",{"paper_id":4727,"title":4728,"year":7,"month":188,"day":63,"doi":4729,"resource_url":4730,"first_page":4731,"last_page":4732,"pdf_url":4733,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4734,"paper_type":860,"authors":4735,"abstract":4746},"lrec2026-main-160","Building the AURIS Corpus of Reference and Information Structure","10.63317\u002F4gixthhaqsbs","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-160","2035","2047","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.160.pdf","chiarcos-etal-2026-building",[4736,4738,4740,4743],{"paper_id":4727,"author_seq":247,"given_name":3643,"surname":4737,"affiliation":63,"orcid":63},"Chiarcos",{"paper_id":4727,"author_seq":232,"given_name":3643,"surname":4739,"affiliation":63,"orcid":63},"Fäth",{"paper_id":4727,"author_seq":218,"given_name":4741,"surname":4742,"affiliation":63,"orcid":63},"Tabea","Gröger",{"paper_id":4727,"author_seq":203,"given_name":4744,"surname":4745,"affiliation":63,"orcid":63},"Quentin Alastair","Frey","We present AURIS, the Augsburg corpus for Reference and Information Structure, a multilingual corpus annotated for reference, discourse relations, and aspects of information structure. AURIS introduces an innovative use of off-the-shelf spreadsheet software for complex annotation tasks, reducing technical barriers and dependencies common in discourse annotation. Designed for classroom use, it enables linguistics and philology students to explore diverse theoretical frameworks while working in their language of choice. The paper focuses on technical design and workflows that integrate and generate pre-annotations from heterogeneous sources. Despite its low-tech approach, AURIS aligns with established standards and remains interoperable with existing projects. Preprocessing scripts support multiple languages, with an initial annotation round on German texts evaluated against TED-MDB and ParCorFull data converted into AURIS formats. This approach demonstrates that accessible tools can yield high-quality, replicable annotations for discourse and information-structure research.",{"paper_id":4748,"title":4749,"year":7,"month":188,"day":63,"doi":4750,"resource_url":4751,"first_page":4752,"last_page":4753,"pdf_url":4754,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4755,"paper_type":860,"authors":4756,"abstract":4765},"lrec2026-main-161","There Is No Spoon: Existential Presupposition in Large Language Models","10.63317\u002F5d6abb9evr8w","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-161","2048","2061","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.161.pdf","wrgtter-etal-2026-there",[4757,4760,4762],{"paper_id":4748,"author_seq":247,"given_name":4758,"surname":4759,"affiliation":63,"orcid":63},"Marie-Léontine","Wörgötter",{"paper_id":4748,"author_seq":232,"given_name":4761,"surname":3861,"affiliation":63,"orcid":63},"Shikai",{"paper_id":4748,"author_seq":218,"given_name":4763,"surname":4764,"affiliation":63,"orcid":63},"Sebastian","Schuster","Existential presupposition is a foundational component of meaning: it reflects implicit assumptions of existence that underlie interpretation, even when not explicitly stated. Sentences such as Neo bends the spoon presuppose that the entities referred to exist, independent of the truth-value of the sentence itself. Because this type of meaning is implied rather than explicitly asserted, it provides a diagnostic test of whether large language models (LLMs) display sensitivity to more abstract and less surface-driven layers of meaning. We adapt a natural language inference (NLI)–based probing setup, using a fine-tuned version of DeBERTa-v3-large as a baseline model and compare its behaviour to that of LLaMA-3.1-8B-Instruct and Gemma-3-12B-it under zero- and few-shot prompting, as well as to their fine-tuned base-variants. We find that while all models show sensitivity to existential presupposition across syntactic embeddings, determiner types and contextual cues, their behaviour differs markedly in strength and systematicity, with NLI-fine-tuned autoregressive models exhibiting the most coherent and stable projection patterns. They showed graded and theoretically aligned projection patterns, whereas instruction-tuned models remain largely prone to surface heuristics and prompt susceptibility. These results suggest that pre-trained LLMs exhibit sensitivity to existential presupposition but this behaviour surfaces only systematically when the models have learned the intricacies of the NLI task.",{"paper_id":4767,"title":4768,"year":7,"month":188,"day":63,"doi":4769,"resource_url":4770,"first_page":4771,"last_page":4772,"pdf_url":4773,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4774,"paper_type":860,"authors":4775,"abstract":4783},"lrec2026-main-162","DiscoRAG: A Discourse-Aware Agent for Query-Based Summarization of Long Documents","10.63317\u002F2u9sjkc357ro","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-162","2062","2075","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.162.pdf","chernyavskiy-etal-2026-discorag",[4776,4778,4781],{"paper_id":4767,"author_seq":247,"given_name":869,"surname":4777,"affiliation":63,"orcid":63},"Chernyavskiy",{"paper_id":4767,"author_seq":232,"given_name":4779,"surname":4780,"affiliation":63,"orcid":63},"Lidiia","Ostyakova",{"paper_id":4767,"author_seq":218,"given_name":2748,"surname":4782,"affiliation":63,"orcid":63},"Ilvovsky","Query-based summarization of long documents is often tackled with retrieval-augmented generation (RAG). However, conventional RAG models exhibit limitations when applied to narrative texts, where crucial evidence is often implicit and distributed. This exposes a distinct class of “discourse-aware” queries that require specialized, structure-aware models. To address this, we introduce DiscoRAG, a framework that leverages Rhetorical Structure Theory (RST). By modeling the document as a discourse tree, DiscoRAG navigates its structure, explicitly using rhetorical relations to focus on and aggregate evidence from globally related segments. Furthermore, our pipeline integrates a classifier that assesses query complexity to dynamically select the most efficient retrieval strategy. We evaluate our DiscoRAG against standard and extended-context RAG pipelines on the SQuALITY dataset, which we release augmented with questions requiring deep discourse reasoning and integration of the global narrative. Our results demonstrate that this method sizeably outperforms these baselines, demonstrating its superior ability to assemble a coherent, contextually rich evidence base by interpreting the global narrative structure rather than relying on local semantic similarity.",{"paper_id":4785,"title":4786,"year":7,"month":188,"day":63,"doi":4787,"resource_url":4788,"first_page":4789,"last_page":4790,"pdf_url":4791,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4792,"paper_type":860,"authors":4793,"abstract":4808},"lrec2026-main-163","In-Distribution Steering: Balancing Control and Coherence in Language Model Generation","10.63317\u002F4629fxavjicu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-163","2076","2089","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.163.pdf","vogels-etal-2026-distribution",[4794,4796,4799,4802,4805],{"paper_id":4785,"author_seq":247,"given_name":1995,"surname":4795,"affiliation":63,"orcid":63},"Vogels",{"paper_id":4785,"author_seq":232,"given_name":4797,"surname":4798,"affiliation":63,"orcid":63},"Benjamin","Wong",{"paper_id":4785,"author_seq":218,"given_name":4800,"surname":4801,"affiliation":63,"orcid":63},"Yann","Choho",{"paper_id":4785,"author_seq":203,"given_name":4803,"surname":4804,"affiliation":63,"orcid":63},"Annabelle","Blangero",{"paper_id":4785,"author_seq":188,"given_name":4806,"surname":4807,"affiliation":63,"orcid":63},"Milan","Bhan","Activation steering methods control large language model (LLM) behavior by modifying internal activations at inference time. However, most existing activation steering methods rely on a fixed steering strength, leading to either insufficient control or unadapted intervention that degrades text plausibility and coherence. We introduce In-Distribution Steering (IDS), a novel method that adapts steering strength based on the input data distribution in representation space. IDS dynamically adjusts interventions according to how far a given input lies within the distribution, enabling adaptive intervention and generation stability during text generation. Experiments demonstrate that IDS achieves strong accuracy on classification tasks while producing coherent text without collapse, making IDS particularly well suited for real-world applications.",{"paper_id":4810,"title":4811,"year":7,"month":188,"day":63,"doi":4812,"resource_url":4813,"first_page":4814,"last_page":4815,"pdf_url":4816,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4817,"paper_type":860,"authors":4818,"abstract":4831},"lrec2026-main-164","Improving Multilingual Language Models by Aligning Representations through Steering","10.63317\u002F244dsoue8zu2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-164","2090","2103","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.164.pdf","mahmoud-etal-2026-improving",[4819,4822,4825,4828],{"paper_id":4810,"author_seq":247,"given_name":4820,"surname":4821,"affiliation":63,"orcid":63},"Omar Mohamed","Mahmoud",{"paper_id":4810,"author_seq":232,"given_name":4823,"surname":4824,"affiliation":63,"orcid":63},"Buddhika Laknath","Semage",{"paper_id":4810,"author_seq":218,"given_name":4826,"surname":4827,"affiliation":63,"orcid":63},"Thommen George","Karimpanal",{"paper_id":4810,"author_seq":203,"given_name":4829,"surname":4830,"affiliation":63,"orcid":63},"Santu","Rana","This paper investigates how Large Language Models (LLMs) represent non-English tokens—a question that remains underexplored despite recent progress. We propose a lightweight intervention method using representation steering, where a learned vector is added to the residual stream at a single model layer to enhance multilingual performance. Through extensive experiments across seven competitive baselines—including prompt optimization, supervised fine-tuning (SFT), in-context learning, cross-lingual transfer, projection mapping techniques, and translation-based methods—we show that our approach consistently outperforms most alternatives. In particular, it achieves performance on par with production-grade translation systems while requiring far fewer resources. We further explore the complementarity between our method and SFT, demonstrating that steering offers a direct, efficient way to realign internal representations. These findings underscore the potential of activation-level interventions as a powerful tool for improving the multilingual capabilities of LLMs.",{"paper_id":4833,"title":4834,"year":7,"month":188,"day":63,"doi":4835,"resource_url":4836,"first_page":4837,"last_page":4838,"pdf_url":4839,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4840,"paper_type":860,"authors":4841,"abstract":4858},"lrec2026-main-165","Explainable AI for Ethical Counter Speech Generation in Hate Speech Mitigation","10.63317\u002F3je3mug9bbb3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-165","2104","2114","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.165.pdf","ridoy-etal-2026-explainable",[4842,4845,4848,4850,4853,4856],{"paper_id":4833,"author_seq":247,"given_name":4843,"surname":4844,"affiliation":63,"orcid":63},"Ashiful Islam","Ridoy",{"paper_id":4833,"author_seq":232,"given_name":4846,"surname":4847,"affiliation":63,"orcid":63},"Mohammed","Faisal",{"paper_id":4833,"author_seq":218,"given_name":4849,"surname":2247,"affiliation":63,"orcid":63},"Yogesh",{"paper_id":4833,"author_seq":203,"given_name":4851,"surname":4852,"affiliation":63,"orcid":63},"Md Mamun-Ur","Rashid",{"paper_id":4833,"author_seq":188,"given_name":4854,"surname":4855,"affiliation":63,"orcid":63},"Marina","Ernst",{"paper_id":4833,"author_seq":172,"given_name":2510,"surname":4857,"affiliation":63,"orcid":63},"Hopfgartner","The proliferation of hate speech in digital communication platforms poses significant challenges to online safety and social cohesion. While automated hate speech detection systems have shown promise, their black-box nature limits user trust and understanding of AI-driven content moderation decisions. This paper presents a framework that integrates explainable AI (XAI) techniques with counter-speech generation to create transparent, ethical solutions for hate speech mitigation. Our approach combines a fine-tuned HateBERT model, with a specialized Llama 3.1-8B-Instruct model for generating empathetic counter-narratives. The system employs five distinct XAI methods: Integrated Gradients, Attention Visualization, LIME, Counterfactual Analysis, and Natural Language Explanations to provide interpretable reasoning behind both detection and response generation decisions. The integration of explainability mechanisms with counter-speech generation represents a novel contribution to ethical AI systems, fostering transparency and trust in automated hate speech mitigation while maintaining high performance standards for real-world deployment.",{"paper_id":4860,"title":4861,"year":7,"month":188,"day":63,"doi":4862,"resource_url":4863,"first_page":4864,"last_page":4865,"pdf_url":4866,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4867,"paper_type":860,"authors":4868,"abstract":4875},"lrec2026-main-166","Do Language Models Encode Semantic Relations? Probing and Sparse Feature Analysis","10.63317\u002F52znrwwkdog7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-166","2115","2126","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.166.pdf","diera-etal-2026-do",[4869,4872],{"paper_id":4860,"author_seq":247,"given_name":4870,"surname":4871,"affiliation":63,"orcid":63},"Andor","Diera",{"paper_id":4860,"author_seq":232,"given_name":4873,"surname":4874,"affiliation":63,"orcid":63},"Ansgar","Scherp","Understanding whether large language models (LLMs) capture structured meaning requires examining how they represent concept relationships. In this work, we study three models of increasing scale: Pythia-70M, GPT-2, and Llama 3.1 8B, focusing on four semantic relations: synonymy, antonymy, hypernymy, and hyponymy. We combine linear probing with mechanistic interpretability techniques, including sparse autoencoders (SAE) and activation patching, to identify where these relations are encoded and how specific features contribute to their representation. Our results reveal a directional asymmetry in hierarchical relations: hypernymy is encoded redundantly and resists suppression, while hyponymy relies on compact features that are more easily disrupted by ablation. More broadly, relation signals are diffuse but exhibit stable profiles: they peak in the mid-layers and are stronger in post-residual\u002FMLP pathways than in attention. Difficulty is consistent across models (antonymy easiest, synonymy hardest). Probe-level causality is capacity-dependent: on Llama 3.1, SAE-guided patching reliably shifts these signals, whereas on smaller models the shifts are weak or unstable. Our results clarify where and how reliably semantic relations are represented inside LLMs, and provide a reproducible framework for relating sparse features to probe-level causal evidence.",{"paper_id":4877,"title":4878,"year":7,"month":188,"day":63,"doi":4879,"resource_url":4880,"first_page":4881,"last_page":4882,"pdf_url":4883,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4884,"paper_type":860,"authors":4885,"abstract":4889},"lrec2026-main-167","The Sufficiency-Conciseness Trade-off in LLM Self-Explanation from an Information Bottleneck Perspective","10.63317\u002F4y3jedgirr4b","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-167","2127","2144","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.167.pdf","zahedzadeh-etal-2026-sufficiency",[4886,4888],{"paper_id":4877,"author_seq":247,"given_name":2207,"surname":4887,"affiliation":63,"orcid":63},"Zahedzadeh",{"paper_id":4877,"author_seq":232,"given_name":1755,"surname":1756,"affiliation":63,"orcid":63},"Large Language Models increasingly rely on self-explanations, such as chain of thought reasoning, to improve performance on multi step question answering. While these explanations enhance accuracy, they are often verbose and costly to generate, raising the question of how much explanation is truly necessary. In this paper, we examine the trade-off between sufficiency, defined as the ability of an explanation to justify the correct answer, and conciseness, defined as the reduction in explanation length. Building on the information bottleneck principle, we conceptualize explanations as compressed representations that retain only the information essential for producing correct answers. To operationalize this view, we introduce an evaluation pipeline that constrains explanation length and assesses sufficiency using multiple language models on the ARC Challenge dataset. To broaden the scope, we conduct experiments in both English, using the original dataset, and Persian, as a resource-limited language through translation. Our experiments show that more concise explanations often remain sufficient, preserving accuracy while substantially reducing explanation length, whereas excessive compression leads to performance degradation.",{"paper_id":4891,"title":4892,"year":7,"month":188,"day":63,"doi":4893,"resource_url":4894,"first_page":4895,"last_page":4896,"pdf_url":4897,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4898,"paper_type":860,"authors":4899,"abstract":4906},"lrec2026-main-168","Node-Level Uncertainty Estimation in LLM-Generated SQL","10.63317\u002F3i2i8xy5e7hs","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-168","2145","2153","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.168.pdf","hasson-etal-2026-node",[4900,4903],{"paper_id":4891,"author_seq":247,"given_name":4901,"surname":4902,"affiliation":63,"orcid":63},"Hilaf","Hasson",{"paper_id":4891,"author_seq":232,"given_name":4904,"surname":4905,"affiliation":63,"orcid":63},"Ruocheng","Guo","We present a practical framework for detecting errors in LLM-generated SQL by estimating uncertainty at the level of individual nodes in the query’s abstract syntax tree (AST). Our approach proceeds in two stages. First, we introduce a semantically aware labeling algorithm that, given a generated SQL and a gold reference, assigns node-level correctness without over-penalizing structural containers or alias variation. Second, we represent each node with a rich set of schema-aware and lexical features - capturing identifier validity, alias resolution, type compatibility, ambiguity in scope, and typo signals - and train a supervised classifier to predict per-node error probabilities. We interpret these probabilities as calibrated uncertainty, enabling fine-grained diagnostics that pinpoint exactly where a query is likely to be wrong. Across multiple databases and datasets, our method substantially outperforms token log-probabilities: average AUC improves by +27.44% while maintaining robustness under cross-database evaluation. Beyond serving as an accuracy signal, node-level uncertainty supports targeted repair, human-in-the-loop review, and downstream selective execution. Together, these results establish node-centric, semantically grounded uncertainty estimation as a strong and interpretable alternative to aggregate sequence-level confidence measures.",{"paper_id":4908,"title":4909,"year":7,"month":188,"day":63,"doi":4910,"resource_url":4911,"first_page":4912,"last_page":4913,"pdf_url":4914,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4915,"paper_type":860,"authors":4916,"abstract":4924},"lrec2026-main-169","A Typologically Grounded Evaluation Framework for Word Order and Morphology Sensitivity in Multilingual Masked LMs","10.63317\u002F34sp5kjstcea","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-169","2154","2165","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.169.pdf","feldman-etal-2026-typologically",[4917,4919,4922],{"paper_id":4908,"author_seq":247,"given_name":2742,"surname":4918,"affiliation":63,"orcid":63},"Feldman",{"paper_id":4908,"author_seq":232,"given_name":4920,"surname":4921,"affiliation":63,"orcid":63},"Libby","Barak",{"paper_id":4908,"author_seq":218,"given_name":4923,"surname":3168,"affiliation":63,"orcid":63},"JIng","We introduce a typology-aware diagnostic for multilingual masked language models that tests reliance on word order versus inflectional form. Using Universal Dependencies, we apply inference-time perturbations: full token scrambling, content-word scrambling with function words fixed, dependency-based head–dependent swaps, and sentence-level lemma substitution (+L), which lemmatizes both the context and the masked target label. We evaluate mBERT and XLM-R on English, Chinese, German, Spanish, and Russian. Full scrambling drives word-level reconstruction accuracy near zero in all languages; partial and head–dependent perturbations cause smaller but still large drops. +L has little effect in Chinese but substantially lowers accuracy in German\u002FSpanish\u002FRussian, and it does not mitigate the impact of scrambling. Top-5 word accuracy shows the same pattern: under full scrambling, the gold word rarely appears among the five highest-ranked reconstructions. We release code, sampling scripts, and balanced evaluation subsets; Turkish results under strict reconstruction are reported in the appendix.",{"paper_id":4926,"title":4927,"year":7,"month":188,"day":63,"doi":4928,"resource_url":4929,"first_page":4930,"last_page":4931,"pdf_url":4932,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4933,"paper_type":860,"authors":4934,"abstract":4948},"lrec2026-main-170","From Generation to Evaluation: A Resource for Error-Categorized Question Generation from Video Transcripts","10.63317\u002F5c7pqn99wt3a","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-170","2166","2177","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.170.pdf","berger-etal-2026-generation",[4935,4938,4941,4944,4947],{"paper_id":4926,"author_seq":247,"given_name":4936,"surname":4937,"affiliation":63,"orcid":63},"Joshua","Berger",{"paper_id":4926,"author_seq":232,"given_name":4939,"surname":4940,"affiliation":63,"orcid":63},"Markos","Stamatakis",{"paper_id":4926,"author_seq":218,"given_name":4942,"surname":4943,"affiliation":63,"orcid":63},"Anett","Hoppe",{"paper_id":4926,"author_seq":203,"given_name":4945,"surname":4946,"affiliation":63,"orcid":63},"Ralph","Ewerth",{"paper_id":4926,"author_seq":188,"given_name":3643,"surname":4647,"affiliation":63,"orcid":63},"A key challenge in automated question generation is producing grammatically correct, error-free, and contextually relevant questions. While large language models already handle this well, smaller models that can run on consumer-grade hardware face greater difficulties. Another obstacle is the lack of large, high-quality datasets, particularly for education video transcripts, which limits the diversity and applicability of training data. On top of this, current evaluation methods either rely on strict comparison to a \"ground truth,\" undervaluing valid but unmatched questions, or on expert judgments, which do not scale. They do not provide insights into the nature of errors. In this paper, we introduce a dataset of real-life educational video transcripts and investigate the question-generating capabilities of small language models by assessing their output with pre-defined error categories. We also present a novel approach to automatic quality assessment by classifying questions into predefined error categories. We show that questions generated by small language models are still prone to error. Our proposed classification approach outperforms baseline approaches and matches GPT-5 performance by reaching an accuracy of 72%.",{"paper_id":4950,"title":4951,"year":7,"month":188,"day":63,"doi":4952,"resource_url":4953,"first_page":4954,"last_page":4955,"pdf_url":4956,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4957,"paper_type":860,"authors":4958,"abstract":4965},"lrec2026-main-171","From Behavior to Geometry: A Causal and Geometric Analysis of LoRA-Based Domain Adaptation","10.63317\u002F4tpdsheoivgs","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-171","2178","2189","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.171.pdf","wang-etal-2026-behavior",[4959,4961,4963],{"paper_id":4950,"author_seq":247,"given_name":4960,"surname":3676,"affiliation":63,"orcid":63},"Yizhe",{"paper_id":4950,"author_seq":232,"given_name":3916,"surname":4962,"affiliation":63,"orcid":63},"He",{"paper_id":4950,"author_seq":218,"given_name":4964,"surname":4156,"affiliation":63,"orcid":63},"Zhenhua","Parameter-efficient fine-tuning with Low-Rank Adaptation (LoRA) often improves a large language model’s in-domain performance at the cost of cross-domain generalization. We investigate the mechanistic basis for this trade-off, asking whether LoRA creates new discriminative directions in representation space (emergence) or merely reshapes pre-existing ones. Using a Word Sense Disambiguation testbed, we couple controlled behavioral evaluation with causal localization and geometric diagnostics. We find LoRA learns new, spatially localized discriminative directions in the middle layers of the network, focused at token positions critical for the task. This \"subspace extension\" account explains why LoRA-tuned models excel on in-domain data but struggle to transfer. As a proof of concept, we introduce a mechanistically informed LoRA configuration that concentrates capacity in the identified layers, promotes rank diversity, and applies light answer-token calibration. Without increasing training budget, it yields consistent improvements in both in- and cross-domain settings, demonstrating that mechanistic insight can guide more efficient adaptation.",{"paper_id":4967,"title":4968,"year":7,"month":188,"day":63,"doi":4969,"resource_url":4970,"first_page":4971,"last_page":4972,"pdf_url":4973,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":4974,"bibkey":4975,"paper_type":860,"authors":4976,"abstract":4982},"lrec2026-main-172","Explainable Semantic Textual Similarity via Dissimilar Span Detection","10.63317\u002F4cewrdkiy6ph","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-172","2190","2210","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.172.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.172_OptionalSupplementaryMaterial.zip","lozano-etal-2026-explainable",[4977,4980,4981],{"paper_id":4967,"author_seq":247,"given_name":4978,"surname":4979,"affiliation":63,"orcid":63},"Diego Miguel","Lozano",{"paper_id":4967,"author_seq":232,"given_name":866,"surname":867,"affiliation":63,"orcid":63},{"paper_id":4967,"author_seq":218,"given_name":869,"surname":870,"affiliation":63,"orcid":63},"Semantic Textual Similarity (STS) is a crucial component of many Natural Language Processing (NLP) applications. However, existing approaches typically reduce semantic nuances to a single score, limiting interpretability. To address this, we introduce the task of Dissimilar Span Detection (DSD), which aims to identify semantically differing spans between pairs of texts. This can help users understand which particular words or tokens negatively affect the similarity score, or be used to improve performance in STS-dependent downstream tasks. Furthermore, we release a new dataset suitable for the task, the Span Similarity Dataset (SSD), developed through a semi-automated pipeline combining large language models (LLMs) with human verification. We propose and evaluate different baseline methods for DSD, both unsupervised—based on LIME, SHAP, LLMs, and our own method—as well as an additional supervised approach. While LLMs and supervised models achieve the highest performance, overall results remain low, highlighting the complexity of the task. Finally, we set up an additional experiment that shows how DSD can lead to increased performance in the specific task of paraphrase detection.",{"paper_id":4984,"title":4985,"year":7,"month":188,"day":63,"doi":4986,"resource_url":4987,"first_page":4988,"last_page":4989,"pdf_url":4990,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4991,"paper_type":860,"authors":4992,"abstract":5011},"lrec2026-main-173","BIS Reasoning 1.0: The First Large-Scale Japanese Benchmark for Belief-Inconsistent Syllogistic Reasoning","10.63317\u002F4p65gcdbomon","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-173","2211","2219","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.173.pdf","nguyen-etal-2026-bis",[4993,4995,4998,5000,5002,5005,5008],{"paper_id":4984,"author_seq":247,"given_name":4994,"surname":2395,"affiliation":63,"orcid":63},"Ha Thanh",{"paper_id":4984,"author_seq":232,"given_name":4996,"surname":4997,"affiliation":63,"orcid":63},"Hideyuki","Tachibana",{"paper_id":4984,"author_seq":218,"given_name":4999,"surname":3916,"affiliation":63,"orcid":63},"Chaoran",{"paper_id":4984,"author_seq":203,"given_name":5001,"surname":3916,"affiliation":63,"orcid":63},"Qianying",{"paper_id":4984,"author_seq":188,"given_name":5003,"surname":5004,"affiliation":63,"orcid":63},"Su Myat","Noe",{"paper_id":4984,"author_seq":172,"given_name":5006,"surname":5007,"affiliation":63,"orcid":63},"Koichi","Takeda",{"paper_id":4984,"author_seq":155,"given_name":5009,"surname":5010,"affiliation":63,"orcid":63},"Sadao","Kurohashi","We present BIS Reasoning 1.0, the first large-scale Japanese dataset of syllogistic reasoning problems explicitly designed to evaluate belief-inconsistent reasoning in large language models (LLMs). Unlike prior resources such as NeuBAROCO and JFLD, which emphasize general or belief-aligned logic, BIS Reasoning 1.0 systematically introduces logically valid yet belief-inconsistent syllogisms to expose belief bias—the tendency to accept believable conclusions irrespective of validity. We benchmark a representative suite of cutting-edge models—including OpenAI GPT-5 variants, GPT-4o, Qwen, and prominent Japanese LLMs—under a uniform, zero-shot protocol. Reasoning-centric models achieve near-perfect accuracy on BIS Reasoning 1.0 (e.g., Qwen3-32B ≈99% and GPT-5-mini up to ≈99.7%), while GPT-4o attains around 80%. Earlier Japanese-specialized models underperform, often well below 60%, whereas the latest llm-jp-3.1-13b-instruct4 markedly improves to the mid-80% range. These results indicate that robustness to belief-inconsistent inputs is driven more by explicit reasoning optimization than by language specialization or scale alone. Our analysis further shows that even top-tier systems falter when logical validity conflicts with intuitive or factual beliefs, and that performance is sensitive to prompt design and inference-time reasoning effort. We discuss implications for safety-critical domains—law, healthcare, and scientific literature—where strict logical fidelity must override intuitive belief to ensure reliability.",{"paper_id":5013,"title":5014,"year":7,"month":188,"day":63,"doi":5015,"resource_url":5016,"first_page":5017,"last_page":5018,"pdf_url":5019,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5020,"paper_type":860,"authors":5021,"abstract":5025},"lrec2026-main-174","A Discourse-based Tool Series for Logical Validation of LLMs","10.63317\u002F5fg7umsbz7m5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-174","2220","2231","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.174.pdf","galitsky-etal-2026-discourse",[5022,5024],{"paper_id":5013,"author_seq":247,"given_name":4302,"surname":5023,"affiliation":63,"orcid":63},"Galitsky",{"paper_id":5013,"author_seq":232,"given_name":2748,"surname":4782,"affiliation":63,"orcid":63},"Large Language Models (LLMs) frequently produce fluent but unverifiable reasoning, resulting in potential hallucinations and faulty inferences. This study proposes a logic programming - based verification framework ValidLogic4LLM in which the reasoning expressed by an LLM is transformed into a logic program (LP), probabilistic LP, defeasible LP and abductive LP representing world knowledge and a given problem description—such as a patient health complaint. The LP formed by an LLM is executed within a symbolic reasoning engine, and the resulting inferences are compared to the LLM’s natural-language conclusions. The strength or probability of facts, clauses and arguments is computed based on discourse structure of text expressing these facts or arguments. Divergence between symbolic and neural reasoning outcomes indicates possible hallucination or inconsistency in the model’s internal logic.",{"paper_id":5027,"title":5028,"year":7,"month":188,"day":63,"doi":5029,"resource_url":5030,"first_page":5031,"last_page":5032,"pdf_url":5033,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5034,"paper_type":860,"authors":5035,"abstract":5052},"lrec2026-main-175","Voice, Bias, and Coreference: An Interpretability Study of Gender in Speech Translation","10.63317\u002F3knqp5mifai3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-175","2232","2248","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.175.pdf","conti-etal-2026-voice",[5036,5039,5042,5044,5047,5049],{"paper_id":5027,"author_seq":247,"given_name":5037,"surname":5038,"affiliation":63,"orcid":63},"Lina","Conti",{"paper_id":5027,"author_seq":232,"given_name":5040,"surname":5041,"affiliation":63,"orcid":63},"Dennis","Fucci",{"paper_id":5027,"author_seq":218,"given_name":2146,"surname":5043,"affiliation":63,"orcid":63},"Gaido",{"paper_id":5027,"author_seq":203,"given_name":5045,"surname":5046,"affiliation":63,"orcid":63},"Matteo","Negri",{"paper_id":5027,"author_seq":188,"given_name":1150,"surname":5048,"affiliation":63,"orcid":63},"Wisniewski",{"paper_id":5027,"author_seq":172,"given_name":5050,"surname":5051,"affiliation":63,"orcid":63},"Luisa","Bentivogli","Unlike text, speech conveys information about the speaker, such as gender, through acoustic cues like pitch. This gives rise to modality-specific bias concerns. For example, in speech translation (ST), when translating from languages with notional gender, such as English, into languages where gender-ambiguous terms referring to the speaker are assigned grammatical gender, the speaker’s vocal characteristics may play a role in gender assignment. This risks misgendering speakers—whether through masculine defaults or vocal-based assumptions—yet how ST models make these decisions remains poorly understood. We investigate the mechanisms ST models use to assign gender to speaker-referring terms across three language pairs (en→es\u002Ffr\u002Fit). To do so, we examine how training data patterns, internal language model (ILM) biases, and acoustic information interact. We find that models do not simply replicate term-specific gender associations from training data, but learn broader patterns of masculine prevalence. While the ILM exhibits strong masculine bias, models can override these preferences based on acoustic input. Using contrastive feature attribution on spectrograms, we reveal that the model with higher gender accuracy relies on a previously unknown mechanism: using first-person pronouns to link gendered terms back to the speaker, accessing gender information distributed across the frequency spectrum rather than concentrated in pitch.",{"paper_id":5054,"title":5055,"year":7,"month":188,"day":63,"doi":5056,"resource_url":5057,"first_page":5058,"last_page":5059,"pdf_url":5060,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5061,"paper_type":860,"authors":5062,"abstract":5076},"lrec2026-main-176","MUCH: A Multilingual Claim Hallucination Benchmark","10.63317\u002F4zrfhra4azck","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-176","2249","2267","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.176.pdf","dentan-etal-2026-much",[5063,5065,5068,5070,5073],{"paper_id":5054,"author_seq":247,"given_name":1015,"surname":5064,"affiliation":63,"orcid":63},"Dentan",{"paper_id":5054,"author_seq":232,"given_name":5066,"surname":5067,"affiliation":63,"orcid":63},"Alexi Stanislas","Canesse",{"paper_id":5054,"author_seq":218,"given_name":1709,"surname":5069,"affiliation":63,"orcid":63},"Buscaldi",{"paper_id":5054,"author_seq":203,"given_name":5071,"surname":5072,"affiliation":63,"orcid":63},"Aymen","Shabou",{"paper_id":5054,"author_seq":188,"given_name":5074,"surname":5075,"affiliation":63,"orcid":63},"Sonia","Vanier","Claim-level Uncertainty Quantification (UQ) is a promising approach to mitigate the lack of reliability in Large Language Models (LLMs). We introduce MUCH, the first claim-level UQ benchmark designed for fair and reproducible evaluation of future methods under realistic conditions. It includes 4,876 samples across four European languages (English, French, Spanish, and German) and four instruction-tuned open-weight LLMs. Unlike prior claim-level benchmarks, we release 24 generation logits per token, facilitating the development of future white-box methods without re-generating data. Moreover, in contrast to previous benchmarks that rely on manual or LLM-based segmentation, we propose a new deterministic algorithm capable of segmenting claims using as little as 0.1% of the LLM generation time. This makes our segmentation approach suitable for real-time monitoring of LLM outputs, ensuring that MUCH evaluates UQ methods under realistic deployment constraints. Finally, our evaluations show that current methods still have substantial room for improvement in both performance and efficiency.",{"paper_id":5078,"title":5079,"year":7,"month":188,"day":63,"doi":5080,"resource_url":5081,"first_page":5082,"last_page":5083,"pdf_url":5084,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5085,"paper_type":860,"authors":5086,"abstract":5095},"lrec2026-main-177","AgriChain: Visually-Grounded Expert-Verified Reasoning for Interpretable Agricultural Vision–Language Models","10.63317\u002F2bv5k9hnduop","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-177","2268","2276","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.177.pdf","mahmood-etal-2026-agrichain",[5087,5090,5092],{"paper_id":5078,"author_seq":247,"given_name":5088,"surname":5089,"affiliation":63,"orcid":63},"Hazza","Mahmood",{"paper_id":5078,"author_seq":232,"given_name":5091,"surname":2998,"affiliation":63,"orcid":63},"Yongqiang",{"paper_id":5078,"author_seq":218,"given_name":5093,"surname":5094,"affiliation":63,"orcid":63},"Rao","Anwer","Accurate and interpretable plant disease diagnosis remains a key challenge for vision–language models in real agricultural settings. We present AgriChain, a new dataset of around 11,000 expert-curated leaf images covering a wide range of crops and diseases. Each image is paired with a disease label, a calibrated confidence score, and an expert-verified chain-of-thought explanation. Draft rationales were first generated by GPT-4o and then refined by a professional agricultural engineer using standard descriptors such as lesion color, margin, and distribution. Using these data, we fine-tune the open vision–language model Qwen-2.5-VL-3B to jointly identify diseases and explain its reasoning in a way that mirrors expert thinking. On a 1,000-image test set, our model reaches 73.1% accuracy and produces explanations that align closely with human expertise. These results show that expert-verified reasoning supervision enhances both performance and interpretability, bringing us closer to transparent and trustworthy AI tools for sustainable agriculture.To support reproducibility and further research, the dataset and code are publicly available at https:\u002F\u002Fgithub.com\u002Fhazzanabeel12-netizen\u002Fagrichain.",{"paper_id":5097,"title":5098,"year":7,"month":188,"day":63,"doi":5099,"resource_url":5100,"first_page":5101,"last_page":5102,"pdf_url":5103,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":5104,"bibkey":5105,"paper_type":860,"authors":5106,"abstract":5116},"lrec2026-main-178","SyntaxGym for French: Resource, Annotation, and Evaluation of French and Multilingual LLMs","10.63317\u002F23h32g4nap9i","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-178","2277","2287","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.178.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.178_OptionalSupplementaryMaterial.zip","bladier-etal-2026-syntaxgym",[5107,5110,5113],{"paper_id":5097,"author_seq":247,"given_name":5108,"surname":5109,"affiliation":63,"orcid":63},"Tatiana","Bladier",{"paper_id":5097,"author_seq":232,"given_name":5111,"surname":5112,"affiliation":63,"orcid":63},"Henri-José","Deulofeu",{"paper_id":5097,"author_seq":218,"given_name":5114,"surname":5115,"affiliation":63,"orcid":63},"Alexis","Nasr","Despite recent advances in large language models (LLMs), their syntactic competence remains insufficiently characterized, especially for languages other than English. While benchmarks such as BLiMP and SyntaxGym have enabled systematic syntactic evaluation in English and Spanish, no comparable resource exists for French. To address this gap, we present SyntaxGymFR, a manually curated evaluation suite for evaluating the syntactic abilities of French and multilingual LLMs. SyntaxGymFR consists of manually validated minimal sentence pairs targeting key syntactic phenomena in French. We describe the annotation methodology, the selection of linguistic constructions, and the validation procedures used to ensure the coverage of syntactic phenomena. Furthermore, we report experimental results obtained with several French and multilingual LLMs, analyzing their sensitivity to grammatical contrasts and cross-linguistic transfer effects. Our results provide new insights into the syntactic generalization capabilities of French LLMs and establish SyntaxGymFR as a benchmark for future research on language-specific evaluation of syntactic competence.",{"paper_id":5118,"title":5119,"year":7,"month":188,"day":63,"doi":5120,"resource_url":5121,"first_page":5122,"last_page":5123,"pdf_url":5124,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5125,"paper_type":860,"authors":5126,"abstract":5134},"lrec2026-main-179","Modeling the Human Lexicon under Temperature Variations: Linguistic Factors, Diversity and Typicality in LLM Word Associations","10.63317\u002F5fozb5xnyvio","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-179","2288","2298","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.179.pdf","rodriguez-etal-2026-modeling",[5127,5130,5131],{"paper_id":5118,"author_seq":247,"given_name":5128,"surname":5129,"affiliation":63,"orcid":63},"Maria A.","Rodriguez",{"paper_id":5118,"author_seq":232,"given_name":1938,"surname":2329,"affiliation":63,"orcid":63},{"paper_id":5118,"author_seq":218,"given_name":5132,"surname":5133,"affiliation":63,"orcid":63},"Richard","Huyghe","Large language models (LLMs) achieve impressive results in terms of fluency in text generation, yet the nature of their linguistic knowledge – in particular the human-likeness of their internal lexicon – remains uncertain. This study compares human and LLM-generated word associations to evaluate how accurately models capture human lexical patterns. Using English cue-response pairs from the SWOW-EN dataset and newly generated associations from three LLMs (Mistral-7B, Llama-3.1-8B, and Qwen-2.5-32B) across multiple temperature settings, we examine (i) the influence of lexical factors such as word frequency and concreteness on cue-response pairs, and (ii) the variability and typicality of LLM responses relative to humans. Results show that all models mirror human trends for frequency and concreteness but differ in response variability and typicality. Larger models such as Qwen tend to emulate a single “prototypical” human participant, generating highly typical but minimally variable responses, while smaller models such as Mistral and Llama produce more variable yet less typical responses. Temperature settings further influence this trade-off, with higher values increasing variability but decreasing typicality. These findings highlight both the similarities and differences between human and LLM lexicons, emphasizing the need to account for model size and temperature when probing LLM lexical representations.",{"paper_id":5136,"title":5137,"year":7,"month":188,"day":63,"doi":5138,"resource_url":5139,"first_page":5140,"last_page":5141,"pdf_url":5142,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5143,"paper_type":860,"authors":5144,"abstract":5151},"lrec2026-main-180","Object Realisation in Spoken Guadeloupan French: Evaluating NLP Models for an Under-Resourced Variety","10.63317\u002F29g57awqis6v","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-180","2299","2308","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.180.pdf","npoles-etal-2026-object",[5145,5148],{"paper_id":5136,"author_seq":247,"given_name":5146,"surname":5147,"affiliation":63,"orcid":63},"Amalia Canes","Nápoles",{"paper_id":5136,"author_seq":232,"given_name":5149,"surname":5150,"affiliation":63,"orcid":63},"Sophie","Repp","This paper contributes to the evaluation of natural language parsing models applied to colloquial speech in lesser studied varieties of a language. We are reporting on the performance of speech recognition and of universal dependency (UD) parsing models in a radio corpus of colloquial French spoken in Guadaloupe (GuaFr), which is in contact with a typologically distant language, French-based Guadaloupean Creole (GuaCr). The corpus poses specific challenges due to phonetic and syntactic specifics of GuaFr, as well as the occurrence of code switching to GuaCr. We show weakening the ASR decoder’s language-model (LM) in various parameters avoids hallucination of null objects, which have been described as typical for spoken GuaFr, but not of non-standard object clitic positioning. For UD parsing, we investigate utterance segmentation as the primary lever to affect model performance and compare different segmentation sources (ASR punctuation, manual chunking, UD parser tokenization) and their combination. We highlight both strengths and pitfalls of the models, again focussing on the expression of syntactic objects.",{"paper_id":5153,"title":5154,"year":7,"month":188,"day":63,"doi":5155,"resource_url":5156,"first_page":5157,"last_page":5158,"pdf_url":5159,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5160,"paper_type":860,"authors":5161,"abstract":5177},"lrec2026-main-181","Reason2Decide: Rationale-Driven Multi-Task Learning","10.63317\u002F3uo797cnah93","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-181","2309","2322","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.181.pdf","hasan-etal-2026-reason2decide",[5162,5165,5168,5171,5174],{"paper_id":5153,"author_seq":247,"given_name":5163,"surname":5164,"affiliation":63,"orcid":63},"H M Quamran","Hasan",{"paper_id":5153,"author_seq":232,"given_name":5166,"surname":5167,"affiliation":63,"orcid":63},"Housam Khalifa","Bashier",{"paper_id":5153,"author_seq":218,"given_name":5169,"surname":5170,"affiliation":63,"orcid":63},"Jiayi","Dai",{"paper_id":5153,"author_seq":203,"given_name":5172,"surname":5173,"affiliation":63,"orcid":63},"Mi-Young","Kim",{"paper_id":5153,"author_seq":188,"given_name":5175,"surname":5176,"affiliation":63,"orcid":63},"Randy","Goebel","Despite the wide adoption of Large Language Models (LLM)s, clinical decision support systems face a critical challenge: achieving high predictive accuracy while generating explanations aligned with those predictions. Current approaches suffer from exposure bias, leading to misaligned explanations. We propose Reason2Decide, a two-stage training framework that addresses key challenges in self-rationalization, including exposure bias and task separation. In Stage-1, our model is trained on rationale generation, while in Stage-2, we jointly train on label prediction and rationale generation, applying scheduled sampling to gradually transition from conditioning on gold labels to model predictions. We evaluate Reason2Decide on three medical datasets, including a proprietary triage dataset and public biomedical QA datasets. Across model sizes, Reason2Decide outperforms other fine-tuned baselines and some zero-shot LLMs in prediction (F1) and rationale fidelity (BERTScore, BLEU, LLM-as-a-Judge). In triage, Reason2Decide is rationale source-robust across LLM-generated, nurse-authored, and nurse-post-processed rationales. In our experiments, while using only LLM-generated rationales in Stage-1, Reason2Decide outperforms other fine-tuned variants. This indicates that LLM-generated rationales are suitable for pretraining models, reducing reliance on human annotations. Remarkably, Reason2Decide achieves these gains with models 40x smaller than contemporary foundation models, making clinical reasoning more accessible for resource-constrained deployments while still providing explainable decision support.",{"paper_id":5179,"title":5180,"year":7,"month":188,"day":63,"doi":5181,"resource_url":5182,"first_page":5183,"last_page":5184,"pdf_url":5185,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5186,"paper_type":860,"authors":5187,"abstract":5197},"lrec2026-main-182","Ragability Benchmark: A Dataset and Library to Test LLMs on Inter-context Conflicts","10.63317\u002F2ty3hnn3bgb9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-182","2323","2333","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.182.pdf","gross-etal-2026-ragability",[5188,5191,5194],{"paper_id":5179,"author_seq":247,"given_name":5189,"surname":5190,"affiliation":63,"orcid":63},"Stephanie","Gross",{"paper_id":5179,"author_seq":232,"given_name":5192,"surname":5193,"affiliation":63,"orcid":63},"Johann","Petrak",{"paper_id":5179,"author_seq":218,"given_name":5195,"surname":5196,"affiliation":63,"orcid":63},"Brigitte","Krenn","Knowledge conflicts are a challenging issue when applying retrieval augmented generation (RAG) systems. In this paper, we propose a benchmark to test LLMs on how they deal with inter-context knowledge conflicts where implicit reasoning is required to solve the conflict. Based on actual empirical examples, real entities are replaced by fantasy entities to make sure the model’s internal knowledge does not influence how the model deals with external conflicting information. The proposed benchmark can be used to assess current up-to-date LLMs, but it can also flexibly be adapted for in-depth evaluation of a specific RAG system on selected aspects of conflict identification. We also present an experiment where we apply the benchmark to test 7 current LLMs from different model families. The results show that LLMs are able to identify conflicting contexts (’Is there a contradiction, yes or no?’), while they struggle with answering content related queries. Adding a hint that there might be a contradiction in the provided contexts increases the performance of conflict identification for contradictory context, while it significantly decreases the performance for non-contradictory contexts.",{"paper_id":5199,"title":5200,"year":7,"month":188,"day":63,"doi":5201,"resource_url":5202,"first_page":5203,"last_page":5204,"pdf_url":5205,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5206,"paper_type":860,"authors":5207,"abstract":5218},"lrec2026-main-183","Evaluating the Adaptability of Large Language Models to Linguistic Variation","10.63317\u002F57bpwacmcpr2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-183","2334","2343","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.183.pdf","xu-etal-2026-evaluating",[5208,5210,5212,5213,5216],{"paper_id":5199,"author_seq":247,"given_name":5209,"surname":3290,"affiliation":63,"orcid":63},"Ziyan",{"paper_id":5199,"author_seq":232,"given_name":4854,"surname":5211,"affiliation":63,"orcid":63},"Seghier",{"paper_id":5199,"author_seq":218,"given_name":3543,"surname":3544,"affiliation":63,"orcid":63},{"paper_id":5199,"author_seq":203,"given_name":5214,"surname":5215,"affiliation":63,"orcid":63},"Carlos-Emiliano","Gonzalez-Gallardo",{"paper_id":5199,"author_seq":188,"given_name":5217,"surname":2409,"affiliation":63,"orcid":63},"Jean-Yves","Large language models (LLMs) are often assumed to generalize easily across linguistic contexts, yet their ability to adapt to genre variation remains underexplored. This study examines that question through a French Named Entity Recognition (NER) task conducted on NEM.fr, a multi-genre corpus annotated with gold named entities (NEs) spanning 11 text types, from juridical and encyclopedic prose to poetry, political speech, and online discourse. We evaluate the reasoning-oriented model DeepSeek R1 across six prompting configurations (zero-, one-, and few-shot, with and without chain-of-thought reasoning), while keeping the annotation scheme, prompting format, and evaluation pipeline constant to isolate the role of genre. Performance is measured using both strict and fuzzy F1-based metrics. The results show that prompting choices have little effect once the model has learned the task format, but that genre differences strongly influence outcomes: fuzzy F1 scores range from about 0.85 in formal genres to below 0.20 in informal ones. Even under tightly controlled conditions, LLM behaviour proves highly sensitive to textual regularity and stylistic variation, highlighting genre as a key factor in assessing model robustness.",{"paper_id":5220,"title":5221,"year":7,"month":188,"day":63,"doi":5222,"resource_url":5223,"first_page":5224,"last_page":5225,"pdf_url":5226,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5227,"paper_type":860,"authors":5228,"abstract":5246},"lrec2026-main-184","Probing Discrete Speech Tokens of Spoken Language Models","10.63317\u002F4d4kttgdydde","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-184","2344","2354","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.184.pdf","naber-etal-2026-probing",[5229,5232,5235,5237,5240,5243],{"paper_id":5220,"author_seq":247,"given_name":5230,"surname":5231,"affiliation":63,"orcid":63},"Sven","Naber",{"paper_id":5220,"author_seq":232,"given_name":5233,"surname":5234,"affiliation":63,"orcid":63},"Julia","Koch",{"paper_id":5220,"author_seq":218,"given_name":5236,"surname":2253,"affiliation":63,"orcid":63},"Pranav",{"paper_id":5220,"author_seq":203,"given_name":5238,"surname":5239,"affiliation":63,"orcid":63},"Alberto","Saponaro",{"paper_id":5220,"author_seq":188,"given_name":5241,"surname":5242,"affiliation":63,"orcid":63},"Ioanna","Karagianni",{"paper_id":5220,"author_seq":172,"given_name":5244,"surname":5245,"affiliation":63,"orcid":63},"Ngoc Thang","Vu","This paper presents a framework for systematic probing of discrete speech token representations in spoken language models (SLMs). We propose three complementary components: a distributional divergence analysis testing whether an attribute is reflected in token usage, token-based classifiers to quantify recoverability and an attribute-conditioned representation analysis revealing phonetic attribute realizations. As a demonstration we apply these probes to tokenizer outputs and model generations from CosyVoice2 and SparkTTS on LibriTTS-R and VCTK. We find that gender is encoded in their respective tokens but in different forms - the signal is more stable across stages and datasets in CosyVoice2, whereas SparkTTS shows weaker cross-stage consistency and stronger pause\u002Fprosody-related effects. Exploratory probes of valence, arousal, and dominance are weaker and less consistent. These results show that discrete speech tokens retain speaker-related information in different ways across architectures and that the proposed framework provides an interpretable basis for comparing token representations across spoken language modeling pipelines.",{"paper_id":5248,"title":5249,"year":7,"month":188,"day":63,"doi":5250,"resource_url":5251,"first_page":5252,"last_page":5253,"pdf_url":5254,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5255,"paper_type":860,"authors":5256,"abstract":5277},"lrec2026-main-185","When Consistency Becomes Bias: Interviewer Effects in Semi-Structured Clinical Interviews","10.63317\u002F34hw23mzd8c7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-185","2355","2361","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.185.pdf","watawana-etal-2026-when",[5257,5260,5263,5266,5268,5271,5274],{"paper_id":5248,"author_seq":247,"given_name":5258,"surname":5259,"affiliation":63,"orcid":63},"Hasindri Sankalpana","Watawana",{"paper_id":5248,"author_seq":232,"given_name":5261,"surname":5262,"affiliation":63,"orcid":63},"Sergio Gastón","Burdisso",{"paper_id":5248,"author_seq":218,"given_name":5264,"surname":5265,"affiliation":63,"orcid":63},"Diego Aaron","Moreno-Galvan",{"paper_id":5248,"author_seq":203,"given_name":2846,"surname":5267,"affiliation":63,"orcid":63},"Sanchez-Vega",{"paper_id":5248,"author_seq":188,"given_name":5269,"surname":5270,"affiliation":63,"orcid":63},"Adrian Pastor Lopez","Monroy",{"paper_id":5248,"author_seq":172,"given_name":5272,"surname":5273,"affiliation":63,"orcid":63},"Petr","Motlicek",{"paper_id":5248,"author_seq":155,"given_name":5275,"surname":5276,"affiliation":63,"orcid":63},"Esau","Villatoro-Tello","Automatic depression detection from doctor–patient conversations has gained momentum thanks to the availability of public corpora and advances in language modeling. However, interpretability remains limited: strong performance is often reported without revealing what drives predictions. We analyze three datasets—ANDROIDS, DAIC-WOZ, and E-DAIC—and identify a systematic bias from interviewer prompts in semi-structured interviews. Models trained on interviewer turns exploit fixed prompts and positions to distinguish depressed from control subjects, often achieving high classification scores without using participant language. Restricting models to participant utterances distributes decision evidence more broadly and reflects genuine linguistic cues. While semi-structured protocols ensure consistency, including interviewer prompts inflates performance by leveraging script artifacts. Our results highlight a cross-dataset, architecture-agnostic bias and emphasize the need for analyses that localize decision evidence by time and speaker to ensure models learn from participants’ language.",{"paper_id":5279,"title":5280,"year":7,"month":188,"day":63,"doi":5281,"resource_url":5282,"first_page":5283,"last_page":5284,"pdf_url":5285,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5286,"paper_type":860,"authors":5287,"abstract":5306},"lrec2026-main-186","Constructing a Japanese Claim Decomposition Dataset for Fact-Checking of LLM-Generated Texts","10.63317\u002F5nsactrpnuu6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-186","2362","2375","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.186.pdf","masano-etal-2026-constructing",[5288,5291,5294,5295,5297,5300,5303,5304,5305],{"paper_id":5279,"author_seq":247,"given_name":5289,"surname":5290,"affiliation":63,"orcid":63},"Miwa","Masano",{"paper_id":5279,"author_seq":232,"given_name":5292,"surname":5293,"affiliation":63,"orcid":63},"Ribeka","Keyaki",{"paper_id":5279,"author_seq":218,"given_name":886,"surname":5293,"affiliation":63,"orcid":63},{"paper_id":5279,"author_seq":203,"given_name":4179,"surname":5296,"affiliation":63,"orcid":63},"Minamoto",{"paper_id":5279,"author_seq":188,"given_name":5298,"surname":5299,"affiliation":63,"orcid":63},"Kaito","Horio",{"paper_id":5279,"author_seq":172,"given_name":5301,"surname":5302,"affiliation":63,"orcid":63},"Hirokazu","Kiyomaru",{"paper_id":5279,"author_seq":155,"given_name":3506,"surname":3507,"affiliation":63,"orcid":63},{"paper_id":5279,"author_seq":138,"given_name":4996,"surname":4997,"affiliation":63,"orcid":63},{"paper_id":5279,"author_seq":121,"given_name":2790,"surname":3527,"affiliation":63,"orcid":63},"Since texts generated by large language models (LLMs) may contain misinformation (hallucinations), develop- ing fact-checking systems capable of assessing their veracity has become increasingly important. One of the mainstream approaches to fact-checking is the claim-based one, which first decomposes a generated text into claims, i.e., independent and atomic units of information. Each claim is then used as a query to retrieve supporting evidence, and a verdict is predicted for each claim-evidence pair. Conducting fact-checking at the claim level enhances the explainability of verification results. However, achieving highly accurate verification requires that the text be decomposed into claims at an appropriate level of granularity. To address this, we constructed a dataset for Japanese claim decomposition. As part of this dataset construction, we design detailed guidelines for claim decomposition, ensuring that the extracted claims are in a form useful for fact-checking and that the decomposition rules mitigate annotator variability. Quantitative evaluation confirmed that the constructed dataset is of high quality. Additionally, experiments on prompt-based claim decomposition using the constructed dataset demonstrated that adding high-quality few-shot examples and guidelines to prompts improved performance.",{"paper_id":5308,"title":5309,"year":7,"month":188,"day":63,"doi":5310,"resource_url":5311,"first_page":5312,"last_page":5313,"pdf_url":5314,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5315,"paper_type":860,"authors":5316,"abstract":5328},"lrec2026-main-187","Using LLMs for Automatic Discipline Annotation in a Diachronic Corpus of English Scientific Papers","10.63317\u002F3j9wvu86v48t","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-187","2376","2386","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.187.pdf","bagdasarov-etal-2026-llms",[5317,5320,5323,5325],{"paper_id":5308,"author_seq":247,"given_name":5318,"surname":5319,"affiliation":63,"orcid":63},"Sergei","Bagdasarov",{"paper_id":5308,"author_seq":232,"given_name":5321,"surname":5322,"affiliation":63,"orcid":63},"Diego","Alves",{"paper_id":5308,"author_seq":218,"given_name":2813,"surname":5324,"affiliation":63,"orcid":63},"Fischer",{"paper_id":5308,"author_seq":203,"given_name":5326,"surname":5327,"affiliation":63,"orcid":63},"Elke","Teich","This study investigates the potential of generative large language models (LLMs) to automatically identify the disciplines of scientific papers in the Royal Society Corpus (RSC) – an extensive collection of English scientific publications spanning more than three centuries. We evaluated eight open-source, state-of-the-art LLMs from four model families on a manually annotated subset and further validated the three best-performing models on a corpus of modern scientific texts. These models were subsequently used for large-scale annotation of the RSC. The models exhibited robust and consistent performance, with at least two LLMs agreeing on the same label for 98.3% of the documents. We then conducted an error analysis of papers assigned divergent labels and a diachronic case study of disciplinary trends within the corpus. The error analysis revealed that most discrepancies occurred in twentieth-century texts, reflecting the growing interdisciplinarity of research. The diachronic analysis showed a gradual decline in disciplinary diversity over time as well as fluctuations corresponding to major paradigm shifts such as the Chemical Revolution and key twentieth-century developments in Physics. The discipline labels generated by the three models will be made publicly available.",{"paper_id":5330,"title":5331,"year":7,"month":188,"day":63,"doi":5332,"resource_url":5333,"first_page":5334,"last_page":5335,"pdf_url":5336,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5337,"paper_type":860,"authors":5338,"abstract":5351},"lrec2026-main-188","COCOA: Creation and Exploratory Investigation of a COrpus of Claims frOm NLP Articles","10.63317\u002F38hiuxwcq4bc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-188","2387","2399","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.188.pdf","bleuze-etal-2026-cocoa",[5339,5342,5345,5348],{"paper_id":5330,"author_seq":247,"given_name":5340,"surname":5341,"affiliation":63,"orcid":63},"Clémentine","Bleuze",{"paper_id":5330,"author_seq":232,"given_name":5343,"surname":5344,"affiliation":63,"orcid":63},"Fanny","Ducel",{"paper_id":5330,"author_seq":218,"given_name":5346,"surname":5347,"affiliation":63,"orcid":63},"Maxime","Amblard",{"paper_id":5330,"author_seq":203,"given_name":5349,"surname":5350,"affiliation":63,"orcid":63},"Karen","Fort","Research articles are an essential pillar of scientific knowledge, but they are subject to multiple constraints. On the one hand, their scientific reliability is essential and relies in particular on the peer review process. On the other hand, they fulfill a rhetorical function of persuasion for authors who defend claims in a more and more competitive environment. In a context of massively increasing publication growth and quickly evolving practices, it is essential that the scientific community remains alert and critical of its own biases. In this paper, we call for a \"NLP for NLP\" framing of theseissues. We created COCOA, a corpus of sentences from NLP papers and pre-prints published in English between 1952 and 2024, a sample of which we manually annotated with claim category labels reflecting their rhetorical function. We fine-tuned a SciBERT model to predict remaining labels, and made both the corpus and the model available to the community. We illustrate the interest of the corpus with exploratory analyses, and outline directions for further research. We hope that this work can stimulate discussions on the issues of research standardization and scientific overclaiming.",{"paper_id":5353,"title":5354,"year":7,"month":188,"day":63,"doi":5355,"resource_url":5356,"first_page":5357,"last_page":5358,"pdf_url":5359,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5360,"paper_type":860,"authors":5361,"abstract":5374},"lrec2026-main-189","SPOT: An Annotated French Corpus and Benchmark for Detecting Critical Interventions in Online Conversations","10.63317\u002F3fombx55bdqy","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-189","2400","2418","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.189.pdf","berriche-etal-2026-spot",[5362,5365,5368,5371],{"paper_id":5353,"author_seq":247,"given_name":5363,"surname":5364,"affiliation":63,"orcid":63},"Manon","Berriche",{"paper_id":5353,"author_seq":232,"given_name":5366,"surname":5367,"affiliation":63,"orcid":63},"Célia","Nouri",{"paper_id":5353,"author_seq":218,"given_name":5369,"surname":5370,"affiliation":63,"orcid":63},"Chloé","Clavel",{"paper_id":5353,"author_seq":203,"given_name":5372,"surname":5373,"affiliation":63,"orcid":63},"Jean-Philippe","Cointet","We introduce SPOT (Stopping Points in Online Threads), the first annotated corpus translating the sociological concept of stopping point into a reproducible NLP task. Stopping points are ordinary critical interventions that pause or redirect online discussions through a range of forms — irony, subtle doubt or fragmentary arguments— that frameworks like counterspeech or social correction often overlook. We operationalize this concept as a binary classification task and provide reliable annotation guidelines. The corpus contains 43,305 manually annotated French Facebook comments linked to URLs flagged as false information by social media users, enriched with contextual metadata (article, post, parent comment, page or group, and source). We benchmark fine-tuned encoder models (CamemBERT) and instruction-tuned LLMs under various prompting strategies. Results show that fine-tuned encoders outperform prompted LLMs in F1 score by more than 10 percentage points, confirming the importance of supervised learning for emerging non-English social media tasks. Incorporating contextual metadata further improves encoder models F1 scores from 0.75 to 0.78. We release the anonymized dataset, along with the annotation guidelines and code in our code repository, to foster transparency and reproducible research.",{"paper_id":5376,"title":5377,"year":7,"month":188,"day":63,"doi":5378,"resource_url":5379,"first_page":5380,"last_page":5381,"pdf_url":5382,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5383,"paper_type":860,"authors":5384,"abstract":5403},"lrec2026-main-190","MedPT: A Massive Medical Question Answering Dataset for Brazilian-Portuguese Speakers","10.63317\u002F5mrjjtbsf6rv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-190","2419","2429","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.190.pdf","farber-etal-2026-medpt",[5385,5388,5391,5394,5397,5400],{"paper_id":5376,"author_seq":247,"given_name":5386,"surname":5387,"affiliation":63,"orcid":63},"Fernanda Bufon","Farber",{"paper_id":5376,"author_seq":232,"given_name":5389,"surname":5390,"affiliation":63,"orcid":63},"Iago Alves","Brito",{"paper_id":5376,"author_seq":218,"given_name":5392,"surname":5393,"affiliation":63,"orcid":63},"Julia Soares","Dollis",{"paper_id":5376,"author_seq":203,"given_name":5395,"surname":5396,"affiliation":63,"orcid":63},"Pedro Schindler Freire Brasil","Ribeiro",{"paper_id":5376,"author_seq":188,"given_name":5398,"surname":5399,"affiliation":63,"orcid":63},"Rafael Teixeira","Sousa",{"paper_id":5376,"author_seq":172,"given_name":5401,"surname":5402,"affiliation":63,"orcid":63},"Arlindo R. Galvão","Filho","While large language models (LLMs) show transformative potential in healthcare, their development remains focused on high-resource languages. This creates a critical barrier for other languages, as simple translation fails to capture unique clinical and cultural nuances, such as endemic diseases. To address this, we introduce MedPT, the first large-scale, real-world corpus of patient-doctor interactions for the Brazilian Portuguese medical domain. Comprising 384,095 authentic question-answer pairs and covering over 3,200 distinct health-related conditions, the dataset was refined through a rigorous multi-stage curation protocol that employed a hybrid quantitative-qualitative analysis to filter noise and contextually enrich thousands of ambiguous queries, resulting in a corpus of approximately 57 million tokens. We further utilize of LLM-driven annotation to classify queries into seven semantic types to capture user intent. To validate MedPT’s utility, we benchmark it in a medical specialty classification task: fine-tuning a 1.7B parameter model achieves an outstanding 94% F1-score on a 20-class setup. Furthermore, our qualitative error analysis shows misclassifications are not random but reflect genuine clinical ambiguities (e.g., between comorbid conditions), proving the dataset’s deep semantic richness. We publicly release MedPT on Hugging Face to support the development of more equitable, accurate, and culturally-aware medical technologies for the Portuguese-speaking world.",{"paper_id":5405,"title":5406,"year":7,"month":188,"day":63,"doi":5407,"resource_url":5408,"first_page":5409,"last_page":5410,"pdf_url":5411,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5412,"paper_type":860,"authors":5413,"abstract":5424},"lrec2026-main-191","Large Language Models for Citation Function Classification","10.63317\u002F4sb25z5kxz3q","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-191","2430","2439","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.191.pdf","vodika-etal-2026-large",[5414,5416,5418,5421],{"paper_id":5405,"author_seq":247,"given_name":1668,"surname":5415,"affiliation":63,"orcid":63},"Vodička",{"paper_id":5405,"author_seq":232,"given_name":3469,"surname":5417,"affiliation":63,"orcid":63},"Kral",{"paper_id":5405,"author_seq":218,"given_name":5419,"surname":5420,"affiliation":63,"orcid":63},"Christophe","Cerisara",{"paper_id":5405,"author_seq":203,"given_name":5422,"surname":5423,"affiliation":63,"orcid":63},"Jakub","Šmíd","Citation function classification plays a crucial role in understanding the relationships between scientific publications and advancing bibliometric analysis. This study presents one of the first comprehensive evaluations of multiple state-of-the-art (SOTA) large language models (LLMs) for citation function classification, achieving new SOTA results on the ACL-ARC dataset. We systematically compare five models (Mistral 7B, Orca 2-7B, LLaMA 3.1-8B, Falcon 7B, and SciBERT) across zero-shot, few-shot, and fine-tuning approaches. Our fine-tuned Falcon 7B model achieves a 73,3% macro F1 score on ACL-ARC, representing a significant improvement over previous methods. Additionally, we introduce AC3, a novel dataset featuring a seven-category annotation scheme that distinguishes between neutral acknowledgments and explicit evaluative stances (more opinion-oriented citations – criticizing, complimenting, contradicting). The dataset is implemented across four context extraction variants to systematically evaluate the impact of contextual scope on classification performance. We also provide detailed analysis of model performance, experimental configurations, and limitations to guide future research in this domain. To our knowledge, this is one of the first studies dedicated to comprehensive model comparison for citation function classification, addressing a gap identified in recent surveys.",{"paper_id":5426,"title":5427,"year":7,"month":188,"day":63,"doi":5428,"resource_url":5429,"first_page":5430,"last_page":5431,"pdf_url":5432,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5433,"paper_type":860,"authors":5434,"abstract":5444},"lrec2026-main-192","Small LLMs for Medical NLP: A Systematic Analysis of Few-Shot, Constraint Decoding, Fine-Tuning and Continual Pre-Training in Italian","10.63317\u002F2c6cyoc8xohu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-192","2440","2457","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.192.pdf","ferrazzi-etal-2026-small",[5435,5438,5441,5443],{"paper_id":5426,"author_seq":247,"given_name":5436,"surname":5437,"affiliation":63,"orcid":63},"Pietro","Ferrazzi",{"paper_id":5426,"author_seq":232,"given_name":5439,"surname":5440,"affiliation":63,"orcid":63},"Mattia","Franzin",{"paper_id":5426,"author_seq":218,"given_name":5238,"surname":5442,"affiliation":63,"orcid":63},"Lavelli",{"paper_id":5426,"author_seq":203,"given_name":964,"surname":965,"affiliation":63,"orcid":63},"Large Language Models (LLMs) consistently excel in diverse medical Natural Language Processing (NLP) tasks, yet their substantial computational requirements often limit deployment in real-world healthcare settings. In this work, we investigate whether \"small\" LLMs (around one billion parameters) can effectively perform medical tasks while maintaining competitive accuracy. We evaluate models from three major families—Llama-3, Gemma-3, and Qwen3—across 20 clinical NLP tasks among Named Entity Recognition, Relation Extraction, Case Report Form Filling, Question Answering, and Argument Mining. We systematically compare a range of adaptation strategies, both at inference time (few-shot prompting, constraint decoding) and at training time (supervised fine-tuning, continual pretraining). Fine-tuning emerges as the most effective approach, while the combination of few-shot prompting and constraint decoding offers strong lower-resource alternatives. Our results show that small LLMs can match or even surpass larger baselines, with our best configuration based on Qwen3-1.7B achieving an average score +9.2 points higher than Qwen3-32B. We release a comprehensive collection of all the publicly available Italian medical datasets for NLP tasks, together with our top-performing models. Furthermore, we release an Italian dataset of 126M words from the Emergency Department of an Italian Hospital, and 175M words from various sources that we used for continual pre-training.",{"paper_id":5446,"title":5447,"year":7,"month":188,"day":63,"doi":5448,"resource_url":5449,"first_page":5450,"last_page":5451,"pdf_url":5452,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5453,"paper_type":860,"authors":5454,"abstract":5463},"lrec2026-main-193","Analysing Lightweight Large Language Models for Biomedical Named Entity Recognition on Diverse Ouput Formats","10.63317\u002F3kdyx7mu3dib","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-193","2458","2470","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.193.pdf","epron-etal-2026-analysing",[5455,5457,5460],{"paper_id":5446,"author_seq":247,"given_name":1159,"surname":5456,"affiliation":63,"orcid":63},"Epron",{"paper_id":5446,"author_seq":232,"given_name":5458,"surname":5459,"affiliation":63,"orcid":63},"Adrien","Coulet",{"paper_id":5446,"author_seq":218,"given_name":5461,"surname":5462,"affiliation":63,"orcid":63},"Mehwish","Alam","Despite their strong linguistic capabilities, Large Language Models (LLMs) are computationally demanding and require substantial resources for fine-tuning, which is unadapted to privacy and budget constraints of many healthcare settings. To address this, we present an experimental analysis focused on Biomedical Named Entity Recognition using lightweight LLMs, we evaluate the impact of different output formats on model performance. The results reveal that lightweight LLMs can achieve competitive performance compared to the larger models, highlighting their potential as lightweight yet effective alternatives for biomedical information extraction. Our analysis shows that instruction tuning over many distinct formats does not improve performance, but identifies several format consistently associated with better performance.",{"paper_id":5465,"title":5466,"year":7,"month":188,"day":63,"doi":5467,"resource_url":5468,"first_page":5469,"last_page":5470,"pdf_url":5471,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5472,"paper_type":860,"authors":5473,"abstract":5483},"lrec2026-main-194","WISTERIA: Weak Implicit Signal-based Temporal Relation Extraction with Attention","10.63317\u002F2bk5hugowdxa","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-194","2471","2489","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.194.pdf","do-etal-2026-wisteria",[5474,5477,5480],{"paper_id":5465,"author_seq":247,"given_name":5475,"surname":5476,"affiliation":63,"orcid":63},"Duy Dao","DO",{"paper_id":5465,"author_seq":232,"given_name":5478,"surname":5479,"affiliation":63,"orcid":63},"Anaïs","Halftermeyer",{"paper_id":5465,"author_seq":218,"given_name":5481,"surname":5482,"affiliation":63,"orcid":63},"Thi Bich Hanh","DAO","Temporal Relation Extraction (TRE) requires identifying how two events or temporal expressions are related in time. Existing attention-based models often highlight globally salient tokens but overlook the pair-specific cues that actually determine the temporal relation. We propose WISTERIA (Weak Implicit Signal-based Temporal Relation Extraction with Attention), a framework that examines whether the top-K attention components conditioned on each event pair truly encode interpretable evidence for temporal classification. Unlike prior works assuming explicit markers such as before, after, or when, WISTERIA considers signals as any lexical, syntactic, or morphological element implicitly expressing temporal order. By combining multi-head attention with pair-conditioned top-K pooling, the model isolates the most informative contextual tokens for each pair. We conduct extensive experiments on TimeBank-Dense, MATRES, TDDMan, and TDDAuto, including linguistic analyses of top-K tokens. Results show that WISTERIA achieves competitive accuracy and reveals pair-level rationales aligned with temporal linguistic cues, offering a localized and interpretable view of temporal reasoning.",{"paper_id":5485,"title":5486,"year":7,"month":188,"day":63,"doi":5487,"resource_url":5488,"first_page":5489,"last_page":5490,"pdf_url":5491,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5492,"paper_type":860,"authors":5493,"abstract":5505},"lrec2026-main-195","Dynamic Model Switching to Mitigate Outdated Knowledge in Large Language Models","10.63317\u002F2n9poorzn8s3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-195","2490","2500","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.195.pdf","pinninti-etal-2026-dynamic",[5494,5497,5500,5503],{"paper_id":5485,"author_seq":247,"given_name":5495,"surname":5496,"affiliation":63,"orcid":63},"Ramakrishna","Pinninti",{"paper_id":5485,"author_seq":232,"given_name":5498,"surname":5499,"affiliation":63,"orcid":63},"Sabyasachi","Kamila",{"paper_id":5485,"author_seq":218,"given_name":5501,"surname":5502,"affiliation":63,"orcid":63},"Ayan","Mazumder",{"paper_id":5485,"author_seq":203,"given_name":4846,"surname":5504,"affiliation":63,"orcid":63},"Hasanuzzaman","Generating timely and accurate content is a significant challenge for Large Language Models (LLMs). Obsolete information reduces their reliability and user trust. To overcome the limitations of single models in adapting to evolving information, we propose a dynamic switching model. A multitask trained switch model objective, adaptively picks between a large model that does not have recent information and a smaller model fine-tuned on recent information using contextual and temporal indicators. This method incorporates semantic update detection and temporal switching, which predicts text obsolescence through aggregation of reward signals. For evaluation, we curated the Temporally-aware Dynamic Dataset (TaDD) on Wikipedia and Guardian articles, which are frequently updated. Our framework achieves a balanced precision-recall trade-off on five datasets without continuous retraining, which shows that the model is efficient and adaptable compared to static pretrained models.",{"paper_id":5507,"title":5508,"year":7,"month":188,"day":63,"doi":5509,"resource_url":5510,"first_page":5511,"last_page":5512,"pdf_url":5513,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5514,"paper_type":860,"authors":5515,"abstract":5527},"lrec2026-main-196","Multi-Scale Model Compression via Nested Matrix Learning","10.63317\u002F5o97c4anqod5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-196","2501","2511","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.196.pdf","dong-etal-2026-multi",[5516,5519,5522,5525],{"paper_id":5507,"author_seq":247,"given_name":5517,"surname":5518,"affiliation":63,"orcid":63},"Xiangjue","Dong",{"paper_id":5507,"author_seq":232,"given_name":5520,"surname":5521,"affiliation":63,"orcid":63},"Aditya","Anantharaman",{"paper_id":5507,"author_seq":218,"given_name":5523,"surname":5524,"affiliation":63,"orcid":63},"Hemant","Pugaliya",{"paper_id":5507,"author_seq":203,"given_name":3610,"surname":5526,"affiliation":63,"orcid":63},"Zhong","Large language models (LLMs) have been widely deployed and have achieved remarkable success in downstream tasks. However, their high latency continues to pose challenges for real-time applications that require fast inference, and the need to train and deploy distinct models for different hardware constraints increases both financial and computational costs. To address this, we propose Nested Matrix Learning (NML), a method that trains a single, flexible model capable of generating multiple high-performing student models of varying sizes. This is achieved by simultaneously optimizing a pre-trained teacher model and its nested sub-models in a single training process, without sacrificing the teacher’s performance. NML provides a flexible and scalable solution, allowing models to adapt to different computational budgets. Our extensive experiments show that student models produced by NML, which can be up to 10x smaller than the full-size model, can be directly deployed for efficient inference or serve as superior initialization points for further fine-tuning in downstream tasks. By preserving the performance of the teacher model while delivering compact and efficient student models of various sizes, NML enhances the usability and adaptability of LLMs in real-world scenarios.",{"paper_id":5529,"title":5530,"year":7,"month":188,"day":63,"doi":5531,"resource_url":5532,"first_page":5533,"last_page":5534,"pdf_url":5535,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5536,"paper_type":860,"authors":5537,"abstract":5579},"lrec2026-main-197","Confabulations from ACL Publications (CAP): A Dataset for Scientific Hallucination Detection","10.63317\u002F2vbyt7ey6nrp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-197","2512","2524","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.197.pdf","gamba-etal-2026-confabulations",[5538,5540,5543,5546,5549,5552,5555,5558,5561,5564,5567,5570,5573,5575,5578],{"paper_id":5529,"author_seq":247,"given_name":3205,"surname":5539,"affiliation":63,"orcid":63},"Gamba",{"paper_id":5529,"author_seq":232,"given_name":5541,"surname":5542,"affiliation":63,"orcid":63},"Aman","Sinha",{"paper_id":5529,"author_seq":218,"given_name":5544,"surname":5545,"affiliation":63,"orcid":63},"Timothee","Mickus",{"paper_id":5529,"author_seq":203,"given_name":5547,"surname":5548,"affiliation":63,"orcid":63},"Raul","Vazquez",{"paper_id":5529,"author_seq":188,"given_name":5550,"surname":5551,"affiliation":63,"orcid":63},"Patanjali","Bhamidipati",{"paper_id":5529,"author_seq":172,"given_name":5553,"surname":5554,"affiliation":63,"orcid":63},"Claudio","Savelli",{"paper_id":5529,"author_seq":155,"given_name":5556,"surname":5557,"affiliation":63,"orcid":63},"Ahana","Chattopadhyay",{"paper_id":5529,"author_seq":138,"given_name":5559,"surname":5560,"affiliation":63,"orcid":63},"Laura A.","Zanella",{"paper_id":5529,"author_seq":121,"given_name":5562,"surname":5563,"affiliation":63,"orcid":63},"Yash","Kankanampati",{"paper_id":5529,"author_seq":104,"given_name":5565,"surname":5566,"affiliation":63,"orcid":63},"Binesh Arakkal","Remesh",{"paper_id":5529,"author_seq":87,"given_name":5568,"surname":5569,"affiliation":63,"orcid":63},"Aryan Ashok","Chandramania",{"paper_id":5529,"author_seq":73,"given_name":5571,"surname":5572,"affiliation":63,"orcid":63},"Rohit","Agarwal",{"paper_id":5529,"author_seq":55,"given_name":5574,"surname":3446,"affiliation":63,"orcid":63},"Chuyuan",{"paper_id":5529,"author_seq":38,"given_name":5576,"surname":5577,"affiliation":63,"orcid":63},"Ioana","Buhnila",{"paper_id":5529,"author_seq":17,"given_name":3080,"surname":3081,"affiliation":63,"orcid":63},"We introduce the CAP (Confabulations from ACL Publications) dataset, a multilingual resource for studying hallucinations in large language models (LLMs) within scientific text generation. CAP focuses on the scientific domain, where hallucinations can distort factual knowledge, as they frequently do. In this domain, however, the presence of specialized terminology, statistical reasoning, and context-dependent interpretations further exacerbates these distortions, particularly given LLMs’ lack of true comprehension, limited contextual understanding, and bias toward surface-level generalization. CAP operates in a cross-lingual setting covering five high-resource languages (English, French, Hindi, Italian, and Spanish) and four low-resource languages (Bengali, Gujarati, Malayalam, and Telugu). The dataset comprises 900 curated scientific questions and over 7,000 LLM-generated answers from 16 publicly available models, provided as question–answer pairs along with token sequences and corresponding logits. Each instance is annotated with a binary label indicating the presence of a scientific hallucination, denoted as a factuality error, and a fluency label, capturing issues in the linguistic quality or naturalness of the text. CAP is publicly released to facilitate advanced research on hallucination detection, multilingual evaluation of LLMs, and the development of more reliable scientific NLP systems.",{"paper_id":5581,"title":5582,"year":7,"month":188,"day":63,"doi":5583,"resource_url":5584,"first_page":5585,"last_page":5586,"pdf_url":5587,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5588,"paper_type":860,"authors":5589,"abstract":5603},"lrec2026-main-198","MedInjection-FR: Exploring the Role of Native, Synthetic, and Translated Data in Biomedical Instruction Tuning","10.63317\u002F5kmc9ovfzvyo","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-198","2525","2544","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.198.pdf","belmadani-etal-2026-medinjection",[5590,5593,5595,5598,5601],{"paper_id":5581,"author_seq":247,"given_name":5591,"surname":5592,"affiliation":63,"orcid":63},"Ikram","Belmadani",{"paper_id":5581,"author_seq":232,"given_name":5594,"surname":1145,"affiliation":63,"orcid":63},"Oumaima el",{"paper_id":5581,"author_seq":218,"given_name":5596,"surname":5597,"affiliation":63,"orcid":63},"Pacome Constant Dit","Beaufils",{"paper_id":5581,"author_seq":203,"given_name":5599,"surname":5600,"affiliation":63,"orcid":63},"Benoit","Favre",{"paper_id":5581,"author_seq":188,"given_name":5132,"surname":5602,"affiliation":63,"orcid":63},"Dufour","Instruction tuning has become essential for adapting large language models (LLMs) to follow domain-specific prompts. Yet, in specialized fields such as medicine, the scarcity of high-quality French instruction data limits effective supervision. To address this gap, we introduce MedInjection-FR, a large-scale French biomedical instruction dataset comprising 571K instruction–response pairs drawn from three complementary sources: native, synthetic, and translated data. We design a controlled experimental framework to systematically assess how data provenance affects instruction tuning, using Qwen-4B-Instruct fine-tuned across seven configurations combining these sources. Results show that native data yield the strongest performance, while mixed setups, particularly native and translated, provide complementary benefits. Synthetic data alone remains less effective but contributes positively when balanced with native supervision. Evaluation on open-ended QA combines automatic metrics, LLM-as-a-judge assessment, and human expert review; although LLM-based judgments correlate best with human ratings, they show sensitivity to verbosity. These findings highlight that data authenticity and diversity jointly shape downstream adaptation and that heterogeneous supervision can mitigate the scarcity of native French medical instructions.",{"paper_id":5605,"title":5606,"year":7,"month":188,"day":63,"doi":5607,"resource_url":5608,"first_page":5609,"last_page":5610,"pdf_url":5611,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5612,"paper_type":860,"authors":5613,"abstract":5626},"lrec2026-main-199","The Impact of Tokenization Algorithms on Hungarian Language Model Performance","10.63317\u002F29hx92kq2dxe","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-199","2545","2556","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.199.pdf","osvth-etal-2026-impact",[5614,5617,5620,5623],{"paper_id":5605,"author_seq":247,"given_name":5615,"surname":5616,"affiliation":63,"orcid":63},"Mátyás","Osváth",{"paper_id":5605,"author_seq":232,"given_name":5618,"surname":5619,"affiliation":63,"orcid":63},"Máté Norbert","Molnár",{"paper_id":5605,"author_seq":218,"given_name":5621,"surname":5622,"affiliation":63,"orcid":63},"Roland","Gunics",{"paper_id":5605,"author_seq":203,"given_name":5624,"surname":5625,"affiliation":63,"orcid":63},"Noémi","Ligeti-Nagy","Tokenization is a crucial text processing step for preparing input for language models and can contribute to model performance, especially in morphologically rich languages. Currently, Byte Pair Encoding (BPE), WordPiece, and Unigram LM algorithms are predominantly used in language models, but their effects can vary in agglutinative languages. This work compares these tokenization algorithms across varying vocabulary sizes, as well as a modified Unigram LM variant with morphologically informed initialization, on the Hungarian subset of the OSCAR dataset. The evaluation is based on several metrics describing the inferred quality of the tokenizers and on the downstream performance of multiple BERT models on the HuLU benchmark. Results show that BPE produces the most compact and morphologically aligned subword representations, while the modified Unigram LM achieved the best overall downstream performance across tasks. However, differences between methods and vocabulary sizes were generally small and not statistically significant, with the exception of HuCoPA (a task within the HuLU benchmark), which showed sensitivity to both factors. These findings underscore that tokenizer choice and vocabulary design are critical determinants of language model efficiency and performance in morphologically rich languages.",{"paper_id":5628,"title":5629,"year":7,"month":188,"day":63,"doi":5630,"resource_url":5631,"first_page":5632,"last_page":5633,"pdf_url":5634,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5635,"paper_type":860,"authors":5636,"abstract":5647},"lrec2026-main-200","FAME: Fictional Actors for Multilingual Erasure","10.63317\u002F3npbbsyj2dd7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-200","2557","2566","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.200.pdf","savelli-etal-2026-fame",[5637,5638,5641,5644],{"paper_id":5628,"author_seq":247,"given_name":5553,"surname":5554,"affiliation":63,"orcid":63},{"paper_id":5628,"author_seq":232,"given_name":5639,"surname":5640,"affiliation":63,"orcid":63},"Moreno La","Quatra",{"paper_id":5628,"author_seq":218,"given_name":5642,"surname":5643,"affiliation":63,"orcid":63},"Alkis","Koudounas",{"paper_id":5628,"author_seq":203,"given_name":5645,"surname":5646,"affiliation":63,"orcid":63},"Flavio","Giobergia","Large Language Models trained on web-scale data raise concerns about privacy and the right to be forgotten. To address these issues, Machine Unlearning provides techniques to remove specific information from trained models without retraining from scratch. However, existing benchmarks for evaluating unlearning in LLMs face two major limitations: they focus only on English and support only entity-level forgetting (removing all information about a person). We introduce FAME (Fictional Actors for Multilingual Erasure), a synthetic benchmark for evaluating Machine Unlearning across five languages: English, French, German, Italian, and Spanish. FAME contains 1,000 fictional actor biographies and 20,000 question-answer pairs. Each biography includes information on 20 topics organized into structured categories (biography, career, achievements, personal information). This design enables both entity-level unlearning (i.e., forgetting entire identities) and instance-level unlearning (i.e., forgetting specific facts while retaining others). We provide two dataset splits to support these two different unlearning scenarios and enable systematic comparison of unlearning techniques across languages. Since FAME uses entirely fictional data, it ensures that the information was never encountered during model pretraining, allowing for a controlled evaluation of unlearning methods.",{"paper_id":5649,"title":5650,"year":7,"month":188,"day":63,"doi":5651,"resource_url":5652,"first_page":5653,"last_page":5654,"pdf_url":5655,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5656,"paper_type":860,"authors":5657,"abstract":5669},"lrec2026-main-201","Detecting Risky Behavior Related to Alcohol and Drug Use within Adolescents' Private Messenger Conversations","10.63317\u002F2rjd9g5nbkxv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-201","2567","2580","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.201.pdf","plhk-etal-2026-detecting",[5658,5661,5664,5667],{"paper_id":5649,"author_seq":247,"given_name":5659,"surname":5660,"affiliation":63,"orcid":63},"Jaromír","Plhák",{"paper_id":5649,"author_seq":232,"given_name":5662,"surname":5663,"affiliation":63,"orcid":63},"Michaela","Lebedíková",{"paper_id":5649,"author_seq":218,"given_name":5665,"surname":5666,"affiliation":63,"orcid":63},"Ondrej","Sotolar",{"paper_id":5649,"author_seq":203,"given_name":1061,"surname":5668,"affiliation":63,"orcid":63},"Smahel","Alcohol and drug use negatively impact adolescents’ health, making early detection and prevention essential. One promising approach involves analyzing adolescents’ online conversations for signs of substance use. However, current machine learning models for online detection often rely on public data sources that fail to capture the private experiences of adolescents. In this study, we developed a BERT-based machine learning model to automatically identify discussions about alcohol and drug use with high accuracy, leveraging private messenger conversations from adolescents. Our novel dataset comprises 272,465 annotated utterances from a corpus of 1,260,492 utterances in 2,807 chats authored by 2,165 individuals, primarily in Czech. Our best BERT-based machine learning model achieved a solid F₁ score of 0.817, demonstrating the feasibility of addressing this social science task even in low-resource languages like Czech. Additionally, we verified that state-of-the-art generative open-source large language models are equally effective for this task and can be successfully adapted for other languages, including English. We also analyzed misclassified utterances to identify problematic patterns and improve model performance. The resulting models have significant practical implications for parental mediation software and parental control applications. By automating substance use detection and enabling appropriate real-time interventions, these tools can contribute to safeguarding adolescents’ health.",{"paper_id":5671,"title":5672,"year":7,"month":188,"day":63,"doi":5673,"resource_url":5674,"first_page":5675,"last_page":5676,"pdf_url":5677,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5678,"paper_type":860,"authors":5679,"abstract":5689},"lrec2026-main-202","Voices and Echoes in Fictional Dialogue: A Study of Linguistic Coordination in Literary Texts","10.63317\u002F5ejsrqugxt2v","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-202","2581","2593","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.202.pdf","boriceanu-etal-2026-voices",[5680,5683,5686],{"paper_id":5671,"author_seq":247,"given_name":5681,"surname":5682,"affiliation":63,"orcid":63},"Ioana-Roxana","Boriceanu",{"paper_id":5671,"author_seq":232,"given_name":5684,"surname":5685,"affiliation":63,"orcid":63},"Alina","Iacob",{"paper_id":5671,"author_seq":218,"given_name":5687,"surname":5688,"affiliation":63,"orcid":63},"Liviu P.","Dinu","This study investigates linguistic coordination in fictional dialogue, examining whether the phenomenon typically observed in natural conversation also appears in imagined exchanges created by authors. We analyse dialogues from ten English novels by Jane Austen and E. M. Forster using the Project Dialogism Novel Corpus (PDNC) to measure linguistic convergence across nine function word categories from the Linguistic Inquiry and Word Count (LIWC) lexicon, complemented by network based measures that capture how linguistic adaptation shapes interactions among characters. The results provide evidence of convergence in both authors, confirming that linguistic coordination extends to literary dialogue. The network analysis supports these findings, revealing that alignment is generally reciprocal, unevenly distributed but widespread, and often crosses social and narrative boundaries. Taken together, these results suggest that linguistic coordination in fiction does not depend on deliberate stylistic planning, but reflects underlying cognitive mechanisms involved in language processing and social interaction.",{"paper_id":5691,"title":5692,"year":7,"month":188,"day":63,"doi":5693,"resource_url":5694,"first_page":5695,"last_page":5696,"pdf_url":5697,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5698,"paper_type":860,"authors":5699,"abstract":5708},"lrec2026-main-203","Bridging the Domain Divide: Supervised vs. Zero-Shot Clinical Section Segmentation from MIMIC-III to Obstetrics","10.63317\u002F4ktoypuohtci","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-203","2594","2607","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.203.pdf","karacan-etal-2026-bridging",[5700,5703,5706],{"paper_id":5691,"author_seq":247,"given_name":5701,"surname":5702,"affiliation":63,"orcid":63},"Baris","Karacan",{"paper_id":5691,"author_seq":232,"given_name":5704,"surname":5705,"affiliation":63,"orcid":63},"Barbara Di","Eugenio",{"paper_id":5691,"author_seq":218,"given_name":4377,"surname":5707,"affiliation":63,"orcid":63},"Thornton","Clinical free-text notes contain vital patient information. They are structured into labelled sections; recognizing these sections has been shown to support clinical decision-making and downstream NLP tasks. In this paper, we advance clinical section segmentation through three key contributions. First, we curate a new de-identified, section-labeled obstetrics notes dataset, to supplement the medical domains covered in public corpora such as MIMIC-III, on which most existing segmentation approaches are trained. Second, we systematically evaluate transformer-based supervised models for section segmentation on a curated subset of MIMIC-III (in-domain), and on the new obstetrics dataset (out-of-domain). Third, we conduct the first head-to-head comparison of supervised models for medical section segmentation with zero-shot large language models. Our results show that while supervised models perform strongly in-domain, their performance drops substantially out-of-domain. In contrast, zero-shot models demonstrate robust out-of-domain adaptability once hallucinated section headers are corrected. These findings underscore the importance of developing domain-specific clinical resources and highlight zero-shot segmentation as a promising direction for applying healthcare NLP beyond well-studied corpora, as long as hallucinations are appropriately managed.",{"paper_id":5710,"title":5711,"year":7,"month":188,"day":63,"doi":5712,"resource_url":5713,"first_page":5714,"last_page":5715,"pdf_url":5716,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5717,"paper_type":860,"authors":5718,"abstract":5749},"lrec2026-main-204","Reading Dynamics and Comprehension in Cognitive Aging: A Multimodal Language Resource","10.63317\u002F3wjy3a8cwnw8","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-204","2608","2618","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.204.pdf","marzi-etal-2026-reading",[5719,5721,5724,5726,5728,5731,5734,5737,5740,5743,5746],{"paper_id":5710,"author_seq":247,"given_name":4283,"surname":5720,"affiliation":63,"orcid":63},"Marzi",{"paper_id":5710,"author_seq":232,"given_name":5722,"surname":5723,"affiliation":63,"orcid":63},"Noemi","Boni",{"paper_id":5710,"author_seq":218,"given_name":3543,"surname":5725,"affiliation":63,"orcid":63},"Todesco",{"paper_id":5710,"author_seq":203,"given_name":1104,"surname":5727,"affiliation":63,"orcid":63},"Nadalini",{"paper_id":5710,"author_seq":188,"given_name":5729,"surname":5730,"affiliation":63,"orcid":63},"Giorgia","Albertin",{"paper_id":5710,"author_seq":172,"given_name":5732,"surname":5733,"affiliation":63,"orcid":63},"Cristina","Dolciotti",{"paper_id":5710,"author_seq":155,"given_name":5735,"surname":5736,"affiliation":63,"orcid":63},"Paolo","Bongioanni",{"paper_id":5710,"author_seq":138,"given_name":5738,"surname":5739,"affiliation":63,"orcid":63},"Marcello","Ferro",{"paper_id":5710,"author_seq":121,"given_name":5741,"surname":5742,"affiliation":63,"orcid":63},"Fabio","Tamburini",{"paper_id":5710,"author_seq":104,"given_name":5744,"surname":5745,"affiliation":63,"orcid":63},"Gloria","Gagliardi",{"paper_id":5710,"author_seq":87,"given_name":5747,"surname":5748,"affiliation":63,"orcid":63},"Vito","Pirrelli","We introduce a novel italian language resource for the study of reading and comprehension in aging populations, combining behavioural and linguistic data from healthy controls (HC), individuals with subjective cognitive decline (SCI), participants with Mild Cognitive Impairment (MCI), and patients with mild dementia (CDR1). Reading performance was recorded through a finger-tracking based application during both silent and oral reading, enabling fine-grained temporal analyses at the text, token and character level. Comprehension was assessed via multiple question types (wh-, inferential, referential, and lexical). Descriptive and non-linear regression analyses informed a feature selection process, yielding temporal and comprehension-based measures that capture individual reading dynamics. These features were explored through unsupervised clustering and supervised classification to investigate their discriminative and predictive potential across cognitive profiles. The resource supports research on reading and cognitive decline, offers a reproducible protocol for large-scale data collection, and provides a foundation for developing early cognitive screening and monitoring tools or aging populations.",{"paper_id":5751,"title":5752,"year":7,"month":188,"day":63,"doi":5753,"resource_url":5754,"first_page":5755,"last_page":5756,"pdf_url":5757,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5758,"paper_type":860,"authors":5759,"abstract":5767},"lrec2026-main-205","Evaluating Style Embeddings for Machine-Generated Text Detection","10.63317\u002F5hb2q2wfzabd","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-205","2619","2628","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.205.pdf","durandard-etal-2026-evaluating",[5760,5763,5765],{"paper_id":5751,"author_seq":247,"given_name":5761,"surname":5762,"affiliation":63,"orcid":63},"Noé","Durandard",{"paper_id":5751,"author_seq":232,"given_name":2246,"surname":5764,"affiliation":63,"orcid":63},"Dhawan",{"paper_id":5751,"author_seq":218,"given_name":1270,"surname":5766,"affiliation":63,"orcid":63},"Poibeau","In this paper, we evaluate the use of style embeddings for distinguishing machine-generated from human-written text. Style embeddings are particularly suited for this task as compared to semantic embeddings, they offer higher content-independence, and compared to feature-engineering approaches, they offer a richer and more holistic representation of writing style. We use a detection module in which texts are first embedded in high-dimensional stylistic spaces using a style encoder, and the resulting vector representations are classified using supervised methods. To optimize this detector, we evaluate the performance of a range of pre-trained public-domain style encoders paired with different supervised methods. When evaluated on MGTBench, a widely adopted benchmark, our approach matches or exceeds state-of-the-art performance metrics. It also generalizes well across various text domains and LLMs. Our findings highlight the potential, and would facilitate the use, of style embeddings as lightweight and effective components of machine-generated text detection systems.",{"paper_id":5769,"title":5770,"year":7,"month":188,"day":63,"doi":5771,"resource_url":5772,"first_page":5773,"last_page":5774,"pdf_url":5775,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5776,"paper_type":860,"authors":5777,"abstract":5786},"lrec2026-main-206","The Speech-LLM Takes It All: A Truly Fully End-to-End Spoken Dialog State Tracking Approach","10.63317\u002F5dwmfoqycu6w","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-206","2629","2637","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.206.pdf","ghazal-etal-2026-speech",[5778,5781,5783],{"paper_id":5769,"author_seq":247,"given_name":5779,"surname":5780,"affiliation":63,"orcid":63},"Nizar El","Ghazal",{"paper_id":5769,"author_seq":232,"given_name":2409,"surname":5782,"affiliation":63,"orcid":63},"Caubrière",{"paper_id":5769,"author_seq":218,"given_name":5784,"surname":5785,"affiliation":63,"orcid":63},"Valentin","Vielzeuf","This paper presents a comparative study of context management strategies for end-to-end Spoken Dialog State Tracking using Speech-LLMs. We systematically evaluate traditional multimodal context (combining text history and spoken current turn), full spoken history, and compressed spoken history approaches. Our experiments on the SpokenWOZ corpus demonstrate that providing the full spoken conversation as input yields the highest performance among models of similar size, significantly surpassing prior methods. Furthermore, we show that attention-pooling-based compression of the spoken history offers a strong trade-off, maintaining competitive accuracy with reduced context size. Detailed analysis confirms that improvements stem from more effective context utilization.",{"paper_id":5788,"title":5789,"year":7,"month":188,"day":63,"doi":5790,"resource_url":5791,"first_page":5792,"last_page":5793,"pdf_url":5794,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5795,"paper_type":860,"authors":5796,"abstract":5804},"lrec2026-main-207","Off the Hamster Wheel: Rethinking Dialogue Research through a Meta-Analysis of the ACL Anthology 2024","10.63317\u002F2e2c4k8jvbb3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-207","2638","2652","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.207.pdf","decker-etal-2026-off",[5797,5800,5801],{"paper_id":5788,"author_seq":247,"given_name":5798,"surname":5799,"affiliation":63,"orcid":63},"Amandine","Decker",{"paper_id":5788,"author_seq":232,"given_name":5346,"surname":5347,"affiliation":63,"orcid":63},{"paper_id":5788,"author_seq":218,"given_name":5802,"surname":5803,"affiliation":63,"orcid":63},"Ellen","Breitholtz","In this paper, we take a meta-review approach to investigate how conversation is currently studied in the field by analysing papers from the ACL Anthology 2024. We retrieved 407 papers, which represents about 6.1% of the papers published in the selected venues, and manually reviewed them to determine the conversational task addressed, the corpora used, and the evaluation methods employed. Our analysis leads to several observations. First, dialogue systems represent about half of the papers of the ACL Anthology 2024 while more formal and analytical approaches cover only 12%. Second, many papers provide lacking corpus descriptions, which shows a detachment from the data which becomes a simple tool instead of one of the pillars NLP\u002FCL applications should be based on. Third, the evaluation methods, in particular when it comes to dialogue systems, often do not assess the interactional aspects of these systems or rely on assumptions not backed up from evidence of the dialogue research community. We argue that the field would benefit from a renewed focus on analysis and formal representation of conversation, a richer evaluation culture that includes interactional quality, and more systematic practices regarding the data presentation in papers.",{"paper_id":5806,"title":5807,"year":7,"month":188,"day":63,"doi":5808,"resource_url":5809,"first_page":5810,"last_page":5811,"pdf_url":5812,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5813,"paper_type":860,"authors":5814,"abstract":5829},"lrec2026-main-208","VDAct 2.0: Scaling Video-Grounded Dialogue for Event-driven Activity Understanding with LLM-Assisted Filtering","10.63317\u002F4vcv4ncvs6xx","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-208","2653","2666","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.208.pdf","imrattanatrai-etal-2026-vdact",[5815,5818,5820,5823,5826],{"paper_id":5806,"author_seq":247,"given_name":5816,"surname":5817,"affiliation":63,"orcid":63},"Wiradee","Imrattanatrai",{"paper_id":5806,"author_seq":232,"given_name":2044,"surname":5819,"affiliation":63,"orcid":63},"Asada",{"paper_id":5806,"author_seq":218,"given_name":5821,"surname":5822,"affiliation":63,"orcid":63},"Kimihiro","Hasegawa",{"paper_id":5806,"author_seq":203,"given_name":5824,"surname":5825,"affiliation":63,"orcid":63},"Ken","Fukuda",{"paper_id":5806,"author_seq":188,"given_name":5827,"surname":5828,"affiliation":63,"orcid":63},"Teruko","Mitamura","We present VDAct 2.0, an enhanced benchmark for video-grounded dialogue that builds upon the original VDAct by expanding dialogue coverage and introducing a scalable LLM-assisted filtering pipeline to ensure high-quality, grounded QA pairs. VDAct 2.0 comprises 6,356 human-annotated dialogues with a total of 63,958 turns, grounded in 2,975 household activity videos, with undesirable dialogue turns systematically identified and removed. To achieve this, we design a trigger-based quality framework and calibrate a panel of high-agreement LLMs through human-in-the-loop calibration, allowing scalable QA-turn-level filtering. We benchmark a wide range of pretrained and fine-tuned models, both open-source and proprietary, across standard text generation metrics and LLM-based evaluations. The results highlight both recent advances and remaining challenges in video-grounded dialogue modeling, positioning VDAct 2.0 as a high-fidelity testbed for evaluating and advancing multimodal reasoning in interactive settings.",{"paper_id":5831,"title":5832,"year":7,"month":188,"day":63,"doi":5833,"resource_url":5834,"first_page":5835,"last_page":5836,"pdf_url":5837,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5838,"paper_type":860,"authors":5839,"abstract":5856},"lrec2026-main-209","Multi-dimensional Evaluation of Character-Authentic Dialogue Models Learned from Question-Answer Data","10.63317\u002F4ixiaqyfsd93","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-209","2667","2681","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.209.pdf","otsuka-etal-2026-multi",[5840,5841,5842,5845,5848,5851,5854,5855],{"paper_id":5831,"author_seq":247,"given_name":886,"surname":887,"affiliation":63,"orcid":63},{"paper_id":5831,"author_seq":232,"given_name":883,"surname":884,"affiliation":63,"orcid":63},{"paper_id":5831,"author_seq":218,"given_name":5843,"surname":5844,"affiliation":63,"orcid":63},"Kenta","Hama",{"paper_id":5831,"author_seq":203,"given_name":5846,"surname":5847,"affiliation":63,"orcid":63},"Masahiro","Mizukami",{"paper_id":5831,"author_seq":188,"given_name":5849,"surname":5850,"affiliation":63,"orcid":63},"Tsunehiro","Arimoto",{"paper_id":5831,"author_seq":172,"given_name":5852,"surname":5853,"affiliation":63,"orcid":63},"Hiroaki","Sugiyama",{"paper_id":5831,"author_seq":155,"given_name":892,"surname":893,"affiliation":63,"orcid":63},{"paper_id":5831,"author_seq":138,"given_name":889,"surname":890,"affiliation":63,"orcid":63},"Character-authentic dialogue remains challenging for large language models (LLMs) due to limited character-specific data, generic-style collapse, and hallucinations regarding persona facts. Our work presents a comparative evaluation of several learning strategies for character dialogue grounded in question–answer (QA) data, comparing zero\u002Ffew-shot prompting, supervised fine-tuning (SFT), direct preference optimization (DPO), and a hybrid approach that integrates retrieval-augmented character profiles and knowledge with policy optimization. Using both single-turn and multi-turn settings, we assess multiple dimensions central to character dialogue quality: reproducibility, diversity, hallucination, and character authenticity. Results show that SFT excels in reproducibility and hallucination reduction but tends to shorten and simplify outputs, thereby reducing diversity and authenticity. DPO improves stylistic fidelity and authenticity but depends strongly on externalized character knowledge to limit hallucinations. The hybrid variant that combines character-knowledge retrieval with DPO achieves the best overall balance, delivering strong authenticity while maintaining factual consistency and competitive reproducibility in both single- and multi-turn dialogues. We further analyze the model’s sensitivity to knowledge retrieval and response-length effects and discuss trade-offs among optimization targets that inform practical design choices for developing faithful and engaging character agents trained from scalable QA resources.",{"paper_id":5858,"title":5859,"year":7,"month":188,"day":63,"doi":5860,"resource_url":5861,"first_page":5862,"last_page":5863,"pdf_url":5864,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5865,"paper_type":860,"authors":5866,"abstract":5873},"lrec2026-main-210","Empathy in Greek Exam-Related Support Conversations: A Comparative Evaluation of LLM Responses","10.63317\u002F3ckrvscmebs9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-210","2682","2697","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.210.pdf","kyriazi-etal-2026-empathy",[5867,5870],{"paper_id":5858,"author_seq":247,"given_name":5868,"surname":5869,"affiliation":63,"orcid":63},"Panagiota","Kyriazi",{"paper_id":5858,"author_seq":232,"given_name":5871,"surname":5872,"affiliation":63,"orcid":63},"Prokopis","Prokopidis","Recent advancements in Large Language Models (LLMs) have significantly enhanced Natural Language Processing (NLP), particularly in generating human-like responses and engaging in social interactions. Research in natural language generation involves assessing AI-generated text across multiple dimensions, including accuracy, relevance, and robustness. This paper focuses on evaluating an LLM that puts emphasis on the Greek language and comparing it to two multilingual LLMs across four key dimensions: Understanding, Empathy, Harm, and Reasoning. We analyze the models’ responses to expressions of stress and anxiety from teenagers preparing for the Greek State’s Panhellenic exams for university entrance, assessing not only their ability to comprehend, reason, and respond empathetically but also possible unintended harm that they may cause, such as reinforcing stress or offering inappropriate advice. We, thus, introduce the GEAR (Greek Empathy Assessment Resource) dataset of student issues and exam-related forum posts along with LLM-generated empathetic responses. By prompting each model with contextual cues about its role as a recipient of these messages, this research aims to provide insights into the models’ conversational capabilities, emotional intelligence, and ethical implications in sensitive interactions.",{"paper_id":5875,"title":5876,"year":7,"month":188,"day":63,"doi":5877,"resource_url":5878,"first_page":5879,"last_page":5880,"pdf_url":5881,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5882,"paper_type":860,"authors":5883,"abstract":5890},"lrec2026-main-211","Evaluation of Two Leading Polish Language Models in a Real-world RAG Scenario","10.63317\u002F36igcwtic7tn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-211","2698","2704","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.211.pdf","bartanowicz-etal-2026-evaluation",[5884,5887],{"paper_id":5875,"author_seq":247,"given_name":5885,"surname":5886,"affiliation":63,"orcid":63},"Szymon","Bartanowicz",{"paper_id":5875,"author_seq":232,"given_name":5888,"surname":5889,"affiliation":63,"orcid":63},"Krzysztof","Jassem","This paper presents a comparative evaluation of two leading Polish instruction-tuned language models, Bielik-11B-v2.3-Instruct and PLLuM-12B-nc-chat, within a real-world Retrieval-Augmented Generation (RAG) system designed for the technical documentation of a low-code platform. The study aims to identify the optimal configuration of retrieval and generation components for Polish-language applications. The evaluation was conducted in two stages. First, several embedding models and retrieval methods were tested using standard information retrieval metrics, including NDCG. The OrlikB\u002FKartonBERT-USE-base-v1 model combined with vector-based retrieval achieved the highest performance and was adopted for the second stage. In the generation phase, both models were evaluated using quantitative scoring and pairwise A\u002FB testing with multiple evaluators to ensure robustness. Results show that Bielik-11B-v2.3-Instruct consistently outperformed PLLuM-12B-nc-chat in producing accurate and contextually relevant answers. The study highlights the importance of constructing a reliable golden set, employing a two-phase evaluation pipeline, and selecting appropriate metrics to ensure objective and reproducible assessment of RAG systems in real-world Polish-language contexts.",{"paper_id":5892,"title":5893,"year":7,"month":188,"day":63,"doi":5894,"resource_url":5895,"first_page":5896,"last_page":5897,"pdf_url":5898,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5899,"paper_type":860,"authors":5900,"abstract":5905},"lrec2026-main-212","A Mental State Extraction Dataset for Theory-of-Mind-based Reasoning in Emotional Support Conversations","10.63317\u002F4tu3bpftvd9b","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-212","2705","2723","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.212.pdf","kim-etal-2026-mental",[5901,5903],{"paper_id":5892,"author_seq":247,"given_name":5902,"surname":5173,"affiliation":63,"orcid":63},"Seulgi",{"paper_id":5892,"author_seq":232,"given_name":5904,"surname":5173,"affiliation":63,"orcid":63},"Harksoo","Emotional Support Conversations (ESC) aim to both reduce users’ emotional distress and facilitate problem-solving. Recent approaches in ESC have explored incorporating commonsense knowledge into large language models (LLMs) to improve response generation. However, existing commonsense reasoning models often rely solely on the final utterance, fail to anticipate future turns, overlook emotional cues, or treat knowledge types independently, resulting in incoherent or emotionally misaligned responses. To address these limitations, we propose an approach grounded in Theory of Mind (ToM). Specifically, we introduce MENTOS, a dataset that provides turn-level annotations of the assistant’s mental states (Belief, Emotion, and Intent), organized in a causal structure reflecting psychological principles. A commonsense reasoning model trained on MENTOS predicts these mental states as intermediate reasoning signals that guide response generation. Experiments on the ESConv and ExTES datasets show that incorporating the inferred mental states can enhance supportive and goal-directed response generation across multiple reasoning backbones and response generators. Ablation studies further confirm that Belief, Emotion, and Intent provide complementary benefits for ESC tasks. These findings highlight the effectiveness of ToM-grounded intermediate reasoning in generating empathetic and contextually appropriate responses.",{"paper_id":5907,"title":5908,"year":7,"month":188,"day":63,"doi":5909,"resource_url":5910,"first_page":5911,"last_page":5912,"pdf_url":5913,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5914,"paper_type":860,"authors":5915,"abstract":5926},"lrec2026-main-213","Construction and Analysis of Japanese Parent-Child Dialogic Reading Corpus for Conversational Agents","10.63317\u002F3khbbpw33gyp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-213","2724","2730","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.213.pdf","nakagi-etal-2026-construction",[5916,5919,5922,5924],{"paper_id":5907,"author_seq":247,"given_name":5917,"surname":5918,"affiliation":63,"orcid":63},"Yuko","Nakagi",{"paper_id":5907,"author_seq":232,"given_name":5920,"surname":5921,"affiliation":63,"orcid":63},"Yuya","Chiba",{"paper_id":5907,"author_seq":218,"given_name":2046,"surname":5923,"affiliation":63,"orcid":63},"Fujita",{"paper_id":5907,"author_seq":203,"given_name":1460,"surname":5925,"affiliation":63,"orcid":63},"Araki","Dialogic reading, which involves interactive exchanges between a parent and a child during picture book reading, has been shown to effectively promote children’s language development. While many support systems for picture book reading have been developed to reduce the burden on parents, existing systems are not yet capable of handling dialogic reading, which requires dynamic parent-child interaction. To develop conversational agents capable of dialogic reading, we constructed a multimodal corpus of parent-child picture-book reading dialogues. The corpus comprises recordings from 36 Japanese parent-child pairs taken during actual picture book reading sessions. In this study, we annotated the corpus with dialogue acts relevant to parent-child communication and categorized the types of quizzes and questions used in the sessions, analyzing the linguistic aspects of parent-child interaction during dialogic reading. After dividing the dialogues into two groups based on the proportion of the child’s utterances, our analyses revealed that dialogue systems should adapt their interaction strategies according to individual child characteristics.",{"paper_id":5928,"title":5929,"year":7,"month":188,"day":63,"doi":5930,"resource_url":5931,"first_page":5932,"last_page":5933,"pdf_url":5934,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5935,"paper_type":860,"authors":5936,"abstract":5944},"lrec2026-main-214","ACLBot: A Knowledge Graph-Driven Assistant for ACL Anthology Research","10.63317\u002F33kmjwr3vv44","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-214","2731","2741","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.214.pdf","buchmann-etal-2026-aclbot",[5937,5939,5941],{"paper_id":5928,"author_seq":247,"given_name":1380,"surname":5938,"affiliation":63,"orcid":63},"Buchmann",{"paper_id":5928,"author_seq":232,"given_name":3337,"surname":5940,"affiliation":63,"orcid":63},"Lynden",{"paper_id":5928,"author_seq":218,"given_name":5942,"surname":5943,"affiliation":63,"orcid":63},"Kristiina","Jokinen","We present ACLBot, an interactive chatbot designed to support literature exploration in the ACL Anthology by combining structured knowledge graph querying with large language model (LLM) generative AI. ACLBot integrates a Neo4j-based knowledge graph constructed by extracting data on publications, authors, topics, and research trends from the ACL Anthology, and automatically generates knowledge graph queries to retrieve relevant information in response to user questions. Retrieved results are re-injected into the LLM to produce concise, contextually grounded summaries. We describe the system’s architecture, including its query generation pipeline, knowledge graph integration, and visualization components for highlighting temporal trends in research. To assess usability and effectiveness, we conducted a user evaluation with researchers, collecting qualitative and quantitative feedback on response accuracy, informativeness, and utility for literature discovery. Results indicate that ACLBot effectively supports exploratory search, helps identify relevant works and trends, and offers a promising framework for integrating structured information with generative AI for scientific information retrieval.",{"paper_id":5946,"title":5947,"year":7,"month":188,"day":63,"doi":5948,"resource_url":5949,"first_page":5950,"last_page":5951,"pdf_url":5952,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5953,"paper_type":860,"authors":5954,"abstract":5968},"lrec2026-main-215","This House Debates AI: Evaluating a Language Model in Oxford-Style Debates against Human Experts","10.63317\u002F3ep9kdi62wpr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-215","2742","2759","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.215.pdf","belluzzo-etal-2026-this",[5955,5958,5961,5964,5966],{"paper_id":5946,"author_seq":247,"given_name":5956,"surname":5957,"affiliation":63,"orcid":63},"Umberto","Belluzzo",{"paper_id":5946,"author_seq":232,"given_name":5959,"surname":5960,"affiliation":63,"orcid":63},"Kobi","Hackenburg",{"paper_id":5946,"author_seq":218,"given_name":5962,"surname":5963,"affiliation":63,"orcid":63},"Hannah Rose","Kirk",{"paper_id":5946,"author_seq":203,"given_name":1395,"surname":5965,"affiliation":63,"orcid":63},"Hale",{"paper_id":5946,"author_seq":188,"given_name":1216,"surname":5967,"affiliation":63,"orcid":63},"Röttger","Recent work shows that large language models (LLMs) are increasingly capable of generating persuasive arguments and messages, creating concerns over undue influence on human beliefs. Most evidence so far, however, evaluates LLM argumentation and persuasion in single-turn interactions and\u002For compares to weak human baselines. To address this gap, we benchmark a state-of-the-art LLM, Llama 3.1 Instruct 405B, in 100 six-turn Oxford-style debates against 20 experienced human debaters. Each anonymised debate is rated by 5 independent raters, who provide win\u002Floss judgments as well as 0–100 scores across 11 dimensions of quality. Based on these ratings, the LLM is competitive overall, with a win rate of 51.2%, ranking 6th out of 21 debaters on mean performance score. Compared to humans, the LLM generally scores higher on presentational dimensions (e.g., clarity, confidence, formality) but equal on most substantive dimensions (convincingness, evidence, originality). We also find that pre\u002Fpost rater stance tends to shift towards the position raters chose as the winning side, regardless of whether this side was the LLM or a human. Overall, our results provide new evidence on the qualities of LLM argumentation and its drivers, suggesting strong argumentative competence even in competitive multi-turn settings.",{"paper_id":5970,"title":5971,"year":7,"month":188,"day":63,"doi":5972,"resource_url":5973,"first_page":5974,"last_page":5975,"pdf_url":5976,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5977,"paper_type":860,"authors":5978,"abstract":5990},"lrec2026-main-216","PAIR: A Pilot Dataset for Dual Perspective-based Video-Grounded Dialogue and Reconciliation","10.63317\u002F5gun3w98ovtb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-216","2760","2771","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.216.pdf","watson-etal-2026-pair",[5979,5982,5985,5988],{"paper_id":5970,"author_seq":247,"given_name":5980,"surname":5981,"affiliation":63,"orcid":63},"Lewis N.","Watson",{"paper_id":5970,"author_seq":232,"given_name":5983,"surname":5984,"affiliation":63,"orcid":63},"Carl","Strathearn",{"paper_id":5970,"author_seq":218,"given_name":5986,"surname":5987,"affiliation":63,"orcid":63},"Kenny","Mitchell",{"paper_id":5970,"author_seq":203,"given_name":5989,"surname":2998,"affiliation":63,"orcid":63},"Yanchao","Collaborative dialogue in multi-agent settings often requires interlocutors to integrate partially overlapping perceptual information in order to construct a shared representation of a dynamic environment. We introduce PAIR, a pilot conversational corpus designed to examine how humans coordinate under systematic perceptual asymmetry. The dataset comprises 15 dialogues in which participants observed the same activity from complementary egocentric and exocentric video perspectives and engaged in open-ended discussion to produce a joint account. All transcripts were manually verified and annotated with 42 dialogue act categories, enabling fine-grained analysis of interactional structure. Beyond descriptive statistics, PAIR supports examination of measurable conversational configurations, including turn distribution, participation symmetry, and dialogue act composition, which together provide structural indicators of how perspective integration unfolds in dialogue. Although intentionally lightweight, PAIR is positioned as a controlled benchmark for analysing collaborative dialogue mechanisms rather than a large-scale training resource. The corpus supports dialogue act classification, video-grounded dialogue modelling, and investigation of multi-agent reasoning under distributed perceptual access. By coupling dual-perspective grounding with explicit interactional annotation, PAIR offers a compact testbed for studying reconciliation dynamics in task-oriented dialogue.",{"paper_id":5992,"title":5993,"year":7,"month":188,"day":63,"doi":5994,"resource_url":5995,"first_page":5996,"last_page":5997,"pdf_url":5998,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5999,"paper_type":860,"authors":6000,"abstract":6017},"lrec2026-main-217","I Am Not Them: Persistent Outgroup Bias in Large Language Models Arising from Social Identity Persona Setting","10.63317\u002F2hn5gs6yh5m2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-217","2772","2786","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.217.pdf","dong-etal-2026-am",[6001,6003,6006,6009,6012,6014],{"paper_id":5992,"author_seq":247,"given_name":6002,"surname":5518,"affiliation":63,"orcid":63},"Wenchao",{"paper_id":5992,"author_seq":232,"given_name":6004,"surname":6005,"affiliation":63,"orcid":63},"Assem","Zhunis",{"paper_id":5992,"author_seq":218,"given_name":6007,"surname":6008,"affiliation":63,"orcid":63},"Dongyoung","Jeong",{"paper_id":5992,"author_seq":203,"given_name":6010,"surname":6011,"affiliation":63,"orcid":63},"Hyojin","Chin",{"paper_id":5992,"author_seq":188,"given_name":6013,"surname":4143,"affiliation":63,"orcid":63},"Jiyoung",{"paper_id":5992,"author_seq":172,"given_name":6015,"surname":6016,"affiliation":63,"orcid":63},"Meeyoung","Cha","This research examines how large language models internalize social identities assigned through targeted prompts. Guided by social identity theory, we investigate whether and how these identity assignments cause AI systems to differentiate between \"we\" (the ingroup) and \"they\" (the outgroup). We demonstrate that self-categorization of social identity leads to both ingroup favoritism and outgroup bias, with the latter manifesting as strongly as the former. This finding is significant given the fundamental role of outgroup bias in driving intergroup prejudice and discrimination as documented in social psychology. We further propose a strategic intervention to mitigate such bias by guiding language models to adopt the identity of the initially disfavored group. This method, validated across both political and gender domains, exposes a critical dual function of group alignment: adopting one social identity inherently alters the model’s stance toward outgroups, effectively neutralizing pre-existing biases. Our work shows that understanding human-like AI behaviors is a critical prerequisite to building more balanced and socially responsible technology.",{"paper_id":6019,"title":6020,"year":7,"month":188,"day":63,"doi":6021,"resource_url":6022,"first_page":6023,"last_page":6024,"pdf_url":6025,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6026,"paper_type":860,"authors":6027,"abstract":6038},"lrec2026-main-218","CONVERSE: Annotation Scheme and Dataset for Multimodal Conversational Engagement Analysis in Human-Human and Human-Robot Interaction","10.63317\u002F2ceqaut47as6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-218","2787","2797","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.218.pdf","torubarova-etal-2026-converse",[6028,6031,6034,6036],{"paper_id":6019,"author_seq":247,"given_name":6029,"surname":6030,"affiliation":63,"orcid":63},"Ekaterina","Torubarova",{"paper_id":6019,"author_seq":232,"given_name":6032,"surname":6033,"affiliation":63,"orcid":63},"Oskar","Ljung",{"paper_id":6019,"author_seq":218,"given_name":5233,"surname":6035,"affiliation":63,"orcid":63},"Uddén",{"paper_id":6019,"author_seq":203,"given_name":6037,"surname":1581,"affiliation":63,"orcid":63},"André","Creating conversational agents that can both understand and respond appropriately to users’ engagement remains a major challenge, as conversation is one of the most universal yet complex human behaviors. Modeling conversational engagement requires a fine-grained understanding of how engagement unfolds dynamically in interaction. This paper introduces a novel turn-based annotation scheme for conversational engagement, together with the CONVERSE dataset that contains annotations of 25 hours of unscripted human–human and human–robot conversations with 48 native Swedish speakers. This dataset uniquely utilizes such an annotation scheme for both human and robot agents within the same study, allowing for direct comparison. Notably, this dataset builds upon our previous multimodal corpus, which includes brain imaging (fMRI), eye-tracking, and speech data, as well as personality and stance measures. This dataset opens a new perspective on conversational engagement through these behavioral annotations and the existing neural data at the intersection of multimodal machine learning, human-robot interaction, and cognitive neuroscience.",{"paper_id":6040,"title":6041,"year":7,"month":188,"day":63,"doi":6042,"resource_url":6043,"first_page":6044,"last_page":6045,"pdf_url":6046,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6047,"paper_type":860,"authors":6048,"abstract":6061},"lrec2026-main-219","FineDialFact: A Benchmark for Fine-Grained Dialogue Fact Verification","10.63317\u002F3y7cf5ctmi4c","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-219","2798","2811","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.219.pdf","chen-etal-2026-finedialfact",[6049,6051,6053,6056,6058],{"paper_id":6040,"author_seq":247,"given_name":6050,"surname":1840,"affiliation":63,"orcid":63},"Xiangyan",{"paper_id":6040,"author_seq":232,"given_name":6052,"surname":3446,"affiliation":63,"orcid":63},"Yufeng",{"paper_id":6040,"author_seq":218,"given_name":6054,"surname":6055,"affiliation":63,"orcid":63},"Yujian","Gan",{"paper_id":6040,"author_seq":203,"given_name":6057,"surname":1274,"affiliation":63,"orcid":63},"Arkaitz",{"paper_id":6040,"author_seq":188,"given_name":6059,"surname":6060,"affiliation":63,"orcid":63},"Matthew","Purver","Large language models are known to produce hallucinations - factually incorrect or fabricated information - which poses significant challenges for many natural language processing applications, such as dialogue systems. As a result, detecting hallucinations has become a critical area of research. Current approaches to hallucination detection in dialogue systems primarily focus on verifying the factual consistency of generated responses. However, these responses often contain a mix of accurate, inaccurate or non-verifiable facts, making the use of a single factual label overly simplistic and coarse-grained. In this paper, we introduce a benchmark, FineDialFact, for fine-grained dialogue fact verification, which involves verifying atomic facts extracted from dialogue responses. To support this, we construct a dataset based on publicly available dialogue datasets and evaluate it using various baseline methods. Experimental results demonstrate that methods incorporating Chain-of-Thought reasoning can enhance performance in dialogue fact verification. Despite this, the best F1-score achieved on the HybriDialogue, an open-domain dialogue dataset, is only 0.74, indicating that the benchmark remains a challenging task for future research. We release our dataset and code at https:\u002F\u002Fgithub.com\u002FXiangyanChen\u002FFineDialFact.",{"paper_id":6063,"title":6064,"year":7,"month":188,"day":63,"doi":6065,"resource_url":6066,"first_page":6067,"last_page":6068,"pdf_url":6069,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6070,"paper_type":860,"authors":6071,"abstract":6081},"lrec2026-main-220","Meta-Prompting Follow-Ups for Unsupervised Dialogue Evaluation Using Open-Source Large Language Models","10.63317\u002F4i8vxn9qi57r","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-220","2812","2824","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.220.pdf","cimino-etal-2026-meta",[6072,6075,6076,6078],{"paper_id":6063,"author_seq":247,"given_name":6073,"surname":6074,"affiliation":63,"orcid":63},"Gaetano","Cimino",{"paper_id":6063,"author_seq":232,"given_name":5574,"surname":3446,"affiliation":63,"orcid":63},{"paper_id":6063,"author_seq":218,"given_name":2709,"surname":6077,"affiliation":63,"orcid":63},"Carenini",{"paper_id":6063,"author_seq":203,"given_name":6079,"surname":6080,"affiliation":63,"orcid":63},"Vincenzo","Deufemia","Automatically evaluating dialogue quality remains a major challenge due to the complexity and contextual variability of human interactions. This paper introduces DIET, a novel unsupervised, reference-free metric that uses follow-up utterances to assess dialogue quality. Unlike existing reference-free metrics, which rely on follow-ups derived from annotated data and apply a uniform set of utterances across all dialogues, DIET generates follow-ups using open-source Large Language Models (LLMs) and refines them through a selection process. Two strategies are explored: SELFMAP, where generation and evaluation are performed by the same model to ensure internal coherence, and CRAFT, where multiple models collaborate to generate diverse and complementary follow-ups, enhancing robustness and reducing model bias. Dialogue quality is measured via the likelihood of an LLM continuing the dialogue from selected follow-ups. Experiments show DIET better correlates with human judgments than existing reference-free metrics across multiple meta-evaluation datasets.",{"paper_id":6083,"title":6084,"year":7,"month":188,"day":63,"doi":6085,"resource_url":6086,"first_page":6087,"last_page":6088,"pdf_url":6089,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6090,"paper_type":860,"authors":6091,"abstract":6103},"lrec2026-main-221","HumaniCA: A Benchmark Resource for the Detection of Users' Ascription of Humanness to Conversational Agents","10.63317\u002F3uujwof4yj3o","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-221","2825","2835","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.221.pdf","villata-etal-2026-humanica",[6092,6095,6098,6101],{"paper_id":6083,"author_seq":247,"given_name":6093,"surname":6094,"affiliation":63,"orcid":63},"Sabrina","Villata",{"paper_id":6083,"author_seq":232,"given_name":6096,"surname":6097,"affiliation":63,"orcid":63},"Amon","Rapp",{"paper_id":6083,"author_seq":218,"given_name":6099,"surname":6100,"affiliation":63,"orcid":63},"Luigi Di","Caro",{"paper_id":6083,"author_seq":203,"given_name":3205,"surname":6102,"affiliation":63,"orcid":63},"Cena","Anthropomorphizing, which involves attributing human-like characteristics to non-human entities, is common in users’ conversations with text-based conversational agents and can lead to a misalignment between the users’ expectations and the agent’s actual capabilities. Detecting users’ ascriptions of humanness automatically may enable systems to identify when users adopt a human-like style when conversing with an agent and to adapt its responses accordingly to tune their expectations. In this paper, we introduce HumaniCA, a benchmark resource comprising three annotated datasets of user turns from real dialogues with three different types of conversational agents (task-oriented, Q&A, and LLM-based) aimed at indicating whether the user is ascribing humanness to the conversational agent. We also identified a set of linguistic indicators of user ascription of humanness to conversational agents and validated their utility with benchmark experiments. We then compared performance of our linguistic features and other well-known textual features (TF-IDF weights and SentenceBERT word embeddings), as well as their combinations. The evaluation highlights the central role of our linguistic features: whether used individually or in combination, they consistently achieve higher accuracy across all agent types.",{"paper_id":6105,"title":6106,"year":7,"month":188,"day":63,"doi":6107,"resource_url":6108,"first_page":6109,"last_page":6110,"pdf_url":6111,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6112,"paper_type":860,"authors":6113,"abstract":6119},"lrec2026-main-222","Towards Reliable Evaluation of Emotional Text Generation in LLMs: Human vs. Automatic Metrics","10.63317\u002F554t7yighn5u","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-222","2836","2847","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.222.pdf","jafari-etal-2026-reliable",[6114,6117,6118],{"paper_id":6105,"author_seq":247,"given_name":6115,"surname":6116,"affiliation":63,"orcid":63},"Sadegh","Jafari",{"paper_id":6105,"author_seq":232,"given_name":2294,"surname":2295,"affiliation":63,"orcid":63},{"paper_id":6105,"author_seq":218,"given_name":2182,"surname":2183,"affiliation":63,"orcid":63},"Evaluating emotion generation in large language models (LLMs) remains a challenging problem due to the subjective nature of emotions and the lack of reliable automatic evaluation metrics. In this paper, we introduce a robust and extensible benchmark for systematically assessing automatic metrics in emotion generation tasks. The benchmark currently includes 13 automatic evaluation metrics and five state-of-the-art LLMs, and can be easily extended without requiring additional human annotations. Through a correlation analysis with human evaluations on a carefully curated annotated subset, we identify the emotion recognition score (ERS) metric, computed with gpt-5-nano in an oneshot setting, as the most reliable automatic evaluator, achieving a correlation exceeding 0.99. Interestingly, despite relying on the same underlying LLM, the emotion absolute score (EAS) metric shows a negative correlation, demonstrating that LLM strength alone does not guarantee automatic metric alignment with human judgment. We also provide lightweight, non-LLM-based alternatives, R2_m and R3_m, in the emotion analogy score (EAnS) metric family, suitable for low-resource settings where large models are not accessible. A comprehensive per-class emotion analysis further highlights the strengths and weaknesses of the evaluated models. Overall, our results offer a practical and scalable framework for benchmarking emotion generation evaluation metrics and pave the way for more reliable, fair, and interpretable emotional language evaluation.",{"paper_id":6121,"title":6122,"year":7,"month":188,"day":63,"doi":6123,"resource_url":6124,"first_page":6125,"last_page":6126,"pdf_url":6127,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6128,"paper_type":860,"authors":6129,"abstract":6145},"lrec2026-main-223","Question and Response Dynamics in Public Service Encounters","10.63317\u002F44ysuy55vyoi","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-223","2848","2855","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.223.pdf","siskou-etal-2026-question",[6130,6133,6136,6139,6142],{"paper_id":6121,"author_seq":247,"given_name":6131,"surname":6132,"affiliation":63,"orcid":63},"Wassiliki","Siskou",{"paper_id":6121,"author_seq":232,"given_name":6134,"surname":6135,"affiliation":63,"orcid":63},"Ingrid","Espinoza",{"paper_id":6121,"author_seq":218,"given_name":6137,"surname":6138,"affiliation":63,"orcid":63},"Laurin","Friedrich",{"paper_id":6121,"author_seq":203,"given_name":6140,"surname":6141,"affiliation":63,"orcid":63},"Steffen","Eckhard",{"paper_id":6121,"author_seq":188,"given_name":6143,"surname":6144,"affiliation":63,"orcid":63},"Annette","Hautli-Janisz","When deciding on social welfare benefits, street-level bureaucrats wield significant discretionary power over citizens. One of the key instruments of this power lies in the questioning patterns that control the conversational agenda in face-to-face encounters. In turn, the citizens’ responses show how they navigate these conversational constraints, for instance by answering directly or through more evasive strategies. To shed light on the power dynamics inherent in these encounters, we provide over 200 verbatim transcripts of authentic conversations in German between street-level bureaucrats and citizens, as well as a fully annotated dataset of all question-response pairs extracted from these conversations. We also present PSE v2.0, which is double the size of the only previously available corpus of spoken interactions in street-level bureaucracy. Keywords: Public Service Encounters, verbatim transcripts, question-response pairs",{"paper_id":6147,"title":6148,"year":7,"month":188,"day":63,"doi":6149,"resource_url":6150,"first_page":6151,"last_page":6152,"pdf_url":6153,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6154,"paper_type":860,"authors":6155,"abstract":6160},"lrec2026-main-224","Reasoning over Object Descriptions Improves Coreference Resolution in Task-Based Dialogue Systems","10.63317\u002F34nq5xojbifz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-224","2856","2873","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.224.pdf","ijurco-etal-2026-reasoning",[6156,6159],{"paper_id":6147,"author_seq":247,"given_name":6157,"surname":6158,"affiliation":63,"orcid":63},"Oier","Ijurco",{"paper_id":6147,"author_seq":232,"given_name":2095,"surname":2096,"affiliation":63,"orcid":63},"Task-based dialogue systems assist users in achieving specific goals, such as executing actions or retrieving information, through natural language interactions. Accurate coreference resolution is essential, as it involves identifying object references within the dialogue—a task that becomes increasingly challenging in visually grounded environments characterized by complex scenes and diverse object metadata. However, coreference resolution in task-based dialogue remains limited by poor generalization across domains and heavy reliance on supervised models that often overfit to dataset-specific artifacts. In this work, we propose a unimodal test-time reasoning approach that enables large language models (LLMs) to reason over detailed object metadata and dialogue history to improve coreference resolution. Empirical results on the SIMMC 2.1 dataset demonstrate that LLMs can generate step-by-step reasoning processes that effectively align dialogue context with objects present in the scene. Extensive experiments highlight the models’ ability to link conversations and objects accurately. Moreover, we show that test-time reasoning under few-shot settings generalizes effectively to unseen scenarios and novel objects, outperforming encoder-based supervised methods in cross-domain evaluations. These findings underscore the critical role of structured metadata and careful prompt engineering in enhancing the robustness and generalization of task-oriented dialogue systems.",{"paper_id":6162,"title":6163,"year":7,"month":188,"day":63,"doi":6164,"resource_url":6165,"first_page":6166,"last_page":6167,"pdf_url":6168,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6169,"paper_type":860,"authors":6170,"abstract":6183},"lrec2026-main-225","Evaluating the Effect of Question Wording Variations on Answer Consistency in Large Language Models","10.63317\u002F4k8j56pzchi7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-225","2874","2886","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.225.pdf","takayama-etal-2026-evaluating",[6171,6174,6177,6180],{"paper_id":6162,"author_seq":247,"given_name":6172,"surname":6173,"affiliation":63,"orcid":63},"Junya","Takayama",{"paper_id":6162,"author_seq":232,"given_name":6175,"surname":6176,"affiliation":63,"orcid":63},"Masaya","Ohagi",{"paper_id":6162,"author_seq":218,"given_name":6178,"surname":6179,"affiliation":63,"orcid":63},"Tomoya","Mizumoto",{"paper_id":6162,"author_seq":203,"given_name":6181,"surname":6182,"affiliation":63,"orcid":63},"Katsumasa","Yoshikawa","Large Language Models (LLMs) sometimes generate inconsistent answers when asked semantically equivalent questions expressed with different wordings. Such inconsistency may lead to decreased task performance or excessive agreement with users. This study investigates how question wording influences the answer consistencies of LLMs, focusing on binary Yes\u002FNo questions. We design four types of paraphrasing patterns, namely synonym substitution, antonym substitution, addition of agreement-seeking expressions, and strengthened agreement-seeking expressions, and evaluate their impact on model outputs. Experiments with multiple open-source and commercial LLMs show that many models become more likely to answer \"Yes\" when agreement-seeking expressions are included, and they are particularly vulnerable to antonym substitutions. Our analysis further suggests that some of these tendencies are already present in pretrained models and are not fully removed by post-training. We also provide insights into which factors are likely (or unlikely) to contribute to improving consistency. By providing a systematic evaluation framework, this work highlights the necessity of accounting for wording-induced biases in the development and deployment of LLMs.",{"paper_id":6185,"title":6186,"year":7,"month":188,"day":63,"doi":6187,"resource_url":6188,"first_page":6189,"last_page":6190,"pdf_url":6191,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6192,"paper_type":860,"authors":6193,"abstract":6206},"lrec2026-main-226","Knowledge-Infused Hierarchy-Aware Emotion Recognition in Code-mixed Mental Health Counseling Conversations","10.63317\u002F3xwvyj2py7r9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-226","2887","2898","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.226.pdf","srivastava-etal-2026-knowledge",[6194,6197,6200,6203],{"paper_id":6185,"author_seq":247,"given_name":6195,"surname":6196,"affiliation":63,"orcid":63},"Aseem","Srivastava",{"paper_id":6185,"author_seq":232,"given_name":6198,"surname":6199,"affiliation":63,"orcid":63},"Kushagra","Mittal",{"paper_id":6185,"author_seq":218,"given_name":6201,"surname":6202,"affiliation":63,"orcid":63},"Anusha","Tiwari",{"paper_id":6185,"author_seq":203,"given_name":6204,"surname":6205,"affiliation":63,"orcid":63},"Md. Shad","Akhtar","Effective counseling is often best achieved in a client’s preferred language, allowing better emotional resonance. Despite this, most existing research in emotion recognition in counseling focuses predominantly on English, overlooking the rich emotional and linguistic complexities of other widely spoken languages. Hinglish, a code-mixed blend of Hindi and English, is one such underexplored linguistic medium that millions use to express their emotions authentically. To address this gap, our research lays a foundational step in developing a mental-health conversation dataset in code-mixed Hinglish language, aka. IndieMH. We manually translate counseling conversations from publicly available sources into Hinglish. Moreover, we employ the dataset for emotion classification task for counseling patients. We prepare an exhaustive annotation guideline to annotate IndieMH with 13 emotional states under 3 board emotion categories. Our rigorous sanity check ensures that the quality of IndieMH adheres to research standards. Furthermore, we propose a novel knowledge-cum-hierarchy aware method named Healer for counseling emotion classification in the Hinglish language. To evaluate the model’s performance, we benchmark Healer against 11 potential baseline methods and report standard classification metrics, including accuracy, weighted-F1, and weighted-precision.",{"paper_id":6208,"title":6209,"year":7,"month":188,"day":63,"doi":6210,"resource_url":6211,"first_page":6212,"last_page":6213,"pdf_url":6214,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6215,"paper_type":860,"authors":6216,"abstract":6225},"lrec2026-main-227","A Corpus for Personalized Dialogue Breakdown Repair in Japanese Open-Domain Conversations","10.63317\u002F5pz8k8pxwdug","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-227","2899","2912","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.227.pdf","tsubokura-etal-2026-corpus",[6217,6219,6222],{"paper_id":6208,"author_seq":247,"given_name":883,"surname":6218,"affiliation":63,"orcid":63},"Tsubokura",{"paper_id":6208,"author_seq":232,"given_name":6220,"surname":6221,"affiliation":63,"orcid":63},"Yurie","Iribe",{"paper_id":6208,"author_seq":218,"given_name":6223,"surname":6224,"affiliation":63,"orcid":63},"Norihide","Kitaoka","Recent advances in dialogue systems have been remarkable; however, conversational breakdowns still occur, making it essential to develop appropriate repair strategies. Nevertheless, when a system breakdown actually occurs, it remains unclear how the system should perform the repair, and no corpus has been available to investigate this issue. To address this gap, we presented typical examples of system-induced dialogue breakdowns to crowd workers and collected their expected repair utterances toward the broken system. Each repair utterance was annotated with dialogue act tags, and we constructed a breakdown-repair corpus consisting of 3,990 utterances covering ten representative types of breakdowns. This corpus includes breakdown cases across diverse situations, allowing for the examination of various repair patterns. Furthermore, we also conducted a questionnaire on participants’ personal traits, creating a dataset that enables the investigation of repair strategies tailored to individual user characteristics. In this paper, we report an overview of the dataset and preliminary analysis results.",{"paper_id":6227,"title":6228,"year":7,"month":188,"day":63,"doi":6229,"resource_url":6230,"first_page":6231,"last_page":6232,"pdf_url":6233,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6234,"paper_type":860,"authors":6235,"abstract":6258},"lrec2026-main-228","Conversational Assistants to Support Patients with Heart Failure: Comparing a Neurosymbolic Architecture with GPT","10.63317\u002F4tng9rshvsna","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-228","2913","2926","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.228.pdf","tayal-etal-2026-conversational",[6236,6239,6242,6243,6246,6249,6252,6255],{"paper_id":6227,"author_seq":247,"given_name":6237,"surname":6238,"affiliation":63,"orcid":63},"Anuja","Tayal",{"paper_id":6227,"author_seq":232,"given_name":6240,"surname":6241,"affiliation":63,"orcid":63},"Devika","Salunke",{"paper_id":6227,"author_seq":218,"given_name":5704,"surname":5705,"affiliation":63,"orcid":63},{"paper_id":6227,"author_seq":203,"given_name":6244,"surname":6245,"affiliation":63,"orcid":63},"Paula G.","Allen-Meares",{"paper_id":6227,"author_seq":188,"given_name":6247,"surname":6248,"affiliation":63,"orcid":63},"Eulalia P.","Abril",{"paper_id":6227,"author_seq":172,"given_name":6250,"surname":6251,"affiliation":63,"orcid":63},"Olga","Garcia-Bedoya",{"paper_id":6227,"author_seq":155,"given_name":6253,"surname":6254,"affiliation":63,"orcid":63},"Carolyn A.","Dickens",{"paper_id":6227,"author_seq":138,"given_name":6256,"surname":6257,"affiliation":63,"orcid":63},"Andrew D.","Boyd","Conversational assistants are becoming increasingly popular, including in healthcare, partly due to the availability and capabilities of Large Language Models. There is a need for controlled, probing evaluations with real stakeholders, which can highlight the advantages and disadvantages of more traditional architectures and those based on generative AI. We present a within-group user study to compare two versions of a conversational assistant that allows patients with heart failure to ask about the salt content in food. One version of the system was developed with a neurosymbolic architecture, and another is based on GPT. Our objective in evaluating the two dialogue systems was not only to compare task performance but also to gain insights from real stakeholders. Results indicate that the two systems complement each other, highlighting the promise of a hybrid approach that leverages the strengths of both systems.",{"paper_id":6260,"title":6261,"year":7,"month":188,"day":63,"doi":6262,"resource_url":6263,"first_page":6264,"last_page":6265,"pdf_url":6266,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6267,"paper_type":860,"authors":6268,"abstract":6282},"lrec2026-main-229","Disentangling Approaches to Conversation Disentanglement: Fine-Tune or Learn from Scratch?","10.63317\u002F3frwendckp7g","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-229","2927","2941","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.229.pdf","pal-etal-2026-disentangling",[6269,6271,6274,6277,6279],{"paper_id":6260,"author_seq":247,"given_name":6270,"surname":4533,"affiliation":63,"orcid":63},"Debaditya",{"paper_id":6260,"author_seq":232,"given_name":6272,"surname":6273,"affiliation":63,"orcid":63},"Anton","Leuski",{"paper_id":6260,"author_seq":218,"given_name":6275,"surname":6276,"affiliation":63,"orcid":63},"Ron","Artstein",{"paper_id":6260,"author_seq":203,"given_name":1061,"surname":6278,"affiliation":63,"orcid":63},"Traum",{"paper_id":6260,"author_seq":188,"given_name":6280,"surname":6281,"affiliation":63,"orcid":63},"Kallirroi","Georgila","Conversation disentanglement is the process of segmenting a stream of messages or utterances into separate conversations or \"threads\" that can be more easily understood and processed. We compare the performance of GPT-4o and GPT-4o Mini with deep learning models built from scratch for this task. We show that, using the same amount of training data, out-of-the-box GPT-4o performs poorly, and fine-tuning GPT-4o Mini results in performance comparable to learning small-size models from scratch (based on standard hand-crafted features for this task), with performance reaching 74.4% F1-score for prediction of links between messages and 45.3% F1-score for prediction of perfectly matching conversations. However, the fine-tuned GPT-4o Mini model underperforms when compared to models that utilize complex structural information. We also provide a new method for detailed analysis of the successes and failures of our models, and a new visualization method.",{"paper_id":6284,"title":6285,"year":7,"month":188,"day":63,"doi":6286,"resource_url":6287,"first_page":6288,"last_page":6289,"pdf_url":6290,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6291,"paper_type":860,"authors":6292,"abstract":6301},"lrec2026-main-230","Evaluation of Failure Communication Strategies for Trust Repair in Human-AI Collaboration","10.63317\u002F3vst92w73bdf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-230","2942","2951","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.230.pdf","klein-etal-2026-evaluation",[6293,6296,6299,6300],{"paper_id":6284,"author_seq":247,"given_name":6294,"surname":6295,"affiliation":63,"orcid":63},"Stina","Klein",{"paper_id":6284,"author_seq":232,"given_name":6297,"surname":6298,"affiliation":63,"orcid":63},"Alexandru","Wurm",{"paper_id":6284,"author_seq":218,"given_name":4360,"surname":4550,"affiliation":63,"orcid":63},{"paper_id":6284,"author_seq":203,"given_name":961,"surname":962,"affiliation":63,"orcid":63},"The increasing application of Large Language Models (LLMs) in everyday tasks and at work highlights the crucial importance of trust in human-AI collaboration, particularly when an AI system fails. This paper investigates the effectiveness of failure communication strategies for trust repair in collaborative physical tasks involving a a chat-based AI assistant. A controlled experiment in which participants built LEGO cars guided by an LLM-based AI Assistant was used to evaluate whether findings from trust repair in a virtual environment, such as chatbots, translate to an environment comprising tangible tasks, and whether the timing of trust repair influences the outcome. Results indicate that actively communicating mistakes significantly improves trust compared to a no repair strategy, and that early repair tends to be more effective, indicating that failure communication, independent of the timing, is important for an appropriate calibration of trust.",{"paper_id":6303,"title":6304,"year":7,"month":188,"day":63,"doi":6305,"resource_url":6306,"first_page":6307,"last_page":6308,"pdf_url":6309,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6310,"paper_type":860,"authors":6311,"abstract":6319},"lrec2026-main-231","Multi-Session Client-Centered Treatment Outcome Evaluation in Psychotherapy","10.63317\u002F3am66kas7b32","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-231","2952","2968","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.231.pdf","na-etal-2026-multi",[6312,6313,6316,6318],{"paper_id":6303,"author_seq":247,"given_name":4139,"surname":4140,"affiliation":63,"orcid":63},{"paper_id":6303,"author_seq":232,"given_name":6314,"surname":6315,"affiliation":63,"orcid":63},"Tao","Shen",{"paper_id":6303,"author_seq":218,"given_name":6317,"surname":2998,"affiliation":63,"orcid":63},"Shumao",{"paper_id":6303,"author_seq":203,"given_name":4156,"surname":1840,"affiliation":63,"orcid":63},"In psychotherapy, therapeutic outcome assessment, or treatment outcome evaluation, is essential to mental health care by systematically evaluating therapeutic processes and outcomes. Existing large language model approaches often focus on therapist-centered, single-session evaluations, neglecting the client’s subjective experience and longitudinal progress across multiple sessions. To address these limitations, we propose IPAEval, a client-Informed Psychological Assessment-based Evaluation framework, which automates treatment outcome evaluations from the client’s perspective using clinical interviews. It integrates cross-session client-contextual assessment and session-focused client-dynamics assessment for a comprehensive understanding of therapeutic progress. Specifically, IPAEval employs a two-stage prompt scheme that maps client information onto psychometric test items, enabling interpretable and structured psychological assessments. Experiments on our new TheraPhase dataset, comprising 400 paired initial and completion stage client records, demonstrate that IPAEval effectively tracks symptom severity and treatment outcomes over multiple sessions, outperforming baseline approaches across both closed-source and open-source models, and validating the benefits of items-aware reasoning mechanisms.",{"paper_id":6321,"title":6322,"year":7,"month":188,"day":63,"doi":6323,"resource_url":6324,"first_page":6325,"last_page":6326,"pdf_url":6327,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6328,"paper_type":860,"authors":6329,"abstract":6335},"lrec2026-main-232","Towards Reward Modeling for AI Tutors in Math Mistake Remediation","10.63317\u002F5i2e38498j4m","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-232","2969","2986","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.232.pdf","petukhova-etal-2026-reward",[6330,6333],{"paper_id":6321,"author_seq":247,"given_name":6331,"surname":6332,"affiliation":63,"orcid":63},"Kseniia","Petukhova",{"paper_id":6321,"author_seq":232,"given_name":6029,"surname":6334,"affiliation":63,"orcid":63},"Kochmar","Evaluating the pedagogical quality of AI tutors remains challenging: standard NLG metrics do not determine whether responses identify mistakes, scaffold reasoning, or avoid revealing the answers. For the task of mistake remediation, we derive a hierarchy of pedagogical aspects from human pairwise preferences on MRBench, and synthesize minimally contrastive response pairs that differ along key aspects (e.g., mistake identification and location, targetedness, scaffolding, actionability, clarity, and coherence). We develop and release Bradley-Terry preference models trained on weighted-sum rankings that we automatically create from MRBench, synthetic pairs, and data combinations. Using only synthetic data, our best model reaches 0.69 pairwise accuracy on a human preference test, and combining weighted-sum data with targeted synthetic groups improves accuracy to 0.74, outperforming larger general-purpose reward models while using only a 0.5B-parameter backbone.",{"paper_id":6337,"title":6338,"year":7,"month":188,"day":63,"doi":6339,"resource_url":6340,"first_page":6341,"last_page":6342,"pdf_url":6343,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6344,"paper_type":860,"authors":6345,"abstract":6361},"lrec2026-main-233","HOTATE: A Japanese Dialogue Corpus Annotated with Responses of Private Thoughts and Public Statements","10.63317\u002F3q6pji5z9zn7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-233","2987","2995","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.233.pdf","toda-etal-2026-hotate",[6346,6348,6349,6352,6355,6358,6360],{"paper_id":6337,"author_seq":247,"given_name":5917,"surname":6347,"affiliation":63,"orcid":63},"Toda",{"paper_id":6337,"author_seq":232,"given_name":2790,"surname":2791,"affiliation":63,"orcid":63},{"paper_id":6337,"author_seq":218,"given_name":6350,"surname":6351,"affiliation":63,"orcid":63},"Kota","Manabe",{"paper_id":6337,"author_seq":203,"given_name":6353,"surname":6354,"affiliation":63,"orcid":63},"Eito","Yoneyama",{"paper_id":6337,"author_seq":188,"given_name":6356,"surname":6357,"affiliation":63,"orcid":63},"Kanade","Nonomura",{"paper_id":6337,"author_seq":172,"given_name":1463,"surname":6359,"affiliation":63,"orcid":63},"Fujiwara",{"paper_id":6337,"author_seq":155,"given_name":2793,"surname":2794,"affiliation":63,"orcid":63},"This study aims to reveal how accurately Large Language Models (LLMs) can deal with a speaker’s actual utterances and their true feelings behind them in Japanese dialogue. Speakers use not only private thoughts which express one’s true feelings and intentions, but also public statements which convey their intentions while considering the interlocutor’s feelings and social status. While public statements help to maintain interpersonal relationships, they can obscure the speaker’s true intention, potentially leading to misunderstandings. We extended existing Japanese dialogue corpora by annotating public statements and private thoughts responses for each dialogue in the corpora, and then evaluated LLMs’ ability to classify and generate between these two types of expressions. The results of the classification task revealed that the current LLMs do not understand those expressions at all, and that training with our corpus can significantly improve the recognition performance. Furthermore, the results of the generation task demonstrated that generating private thoughts is more difficult than generating public statements, according to both automatic and human evaluations. We release our corpus, which contains 7,964 human-annotated dialogues.",{"paper_id":6363,"title":6364,"year":7,"month":188,"day":63,"doi":6365,"resource_url":6366,"first_page":6367,"last_page":6368,"pdf_url":6369,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6370,"paper_type":860,"authors":6371,"abstract":6383},"lrec2026-main-234","Mining Naturally Romanized Seed Corpora without Romanizations","10.63317\u002F3fhfr77pugrj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-234","2996","3012","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.234.pdf","benton-etal-2026-mining",[6372,6375,6377,6380],{"paper_id":6363,"author_seq":247,"given_name":6373,"surname":6374,"affiliation":63,"orcid":63},"Adrian","Benton",{"paper_id":6363,"author_seq":232,"given_name":869,"surname":6376,"affiliation":63,"orcid":63},"Gutkin",{"paper_id":6363,"author_seq":218,"given_name":6378,"surname":6379,"affiliation":63,"orcid":63},"Christo","Kirov",{"paper_id":6363,"author_seq":203,"given_name":6381,"surname":6382,"affiliation":63,"orcid":63},"Brian","Roark","While the Latin script is used informally by speakers of many languages with different native scripts, high quality Latin script corpora for such languages that reflect actual natural romanizations are scarce and often difficult to collect. In this work, we propose a method for mining romanized language corpora in languages for which we do not have any pre-existing samples of naturally romanized text, focusing on Tigrinya as a test case. First we examine the efficacy of learning romanizations for a language based on observed romanizations in other languages that use the same native script. We then extrinsically assess such methods by using a romanization model trained on Amharic data to bootstrap coverage of romanized Tigrinya in a language identification system. Manual evaluation by two L1 and one L2 Tigrinya speakers suggests our method extracts romanized Tigrinya text with acceptably high precision.",{"paper_id":6385,"title":6386,"year":7,"month":188,"day":63,"doi":6387,"resource_url":6388,"first_page":6389,"last_page":6390,"pdf_url":6391,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6392,"paper_type":860,"authors":6393,"abstract":6400},"lrec2026-main-235","From Press to Pixels: Evolving Urdu Text Recognition","10.63317\u002F4drrpn75kzpm","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-235","3013","3021","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.235.pdf","arif-etal-2026-press",[6394,6397],{"paper_id":6385,"author_seq":247,"given_name":6395,"surname":6396,"affiliation":63,"orcid":63},"Samee","Arif",{"paper_id":6385,"author_seq":232,"given_name":6398,"surname":6399,"affiliation":63,"orcid":63},"Sualeha","Farid","This paper presents a comparative analysis of Large Language Models (LLMs) and traditional Optical Character Recognition (OCR) systems on Urdu newspapers, addressing challenges posed by complex multi-column layouts, low-resolution scans, and the stylistic variability of the Nastaliq script. To handle these challenges, we fine-tune YOLOv11x models for article- and column-level text block extraction and train a SwinIR-based super-resolution module that enhances image quality for downstream text recognition, improving accuracy by an average of 50%. We further introduce the Urdu Newspaper Benchmark (UNB), a manually annotated dataset for Urdu OCR comprising 829 paragraph images with a total of 9,982 sentences. Using UNB and the OpenITI corpus, we conduct a systematic comparison between traditional CNN+RNN-based OCR systems and modern LLMs, presenting detailed insertion, deletion, and substitution error analyses alongside character-level confusion patterns. We find that Gemini-2.5-Pro achieves the best performance on UNB (WER 0.133), while fine-tuning GPT-4o on just 500 in-domain samples yields a 6.13% absolute WER improvement, demonstrating the adaptability of LLMs to low-resource, morphologically complex scripts like Urdu. The UNB dataset and fine-tuned models are publicly available at https:\u002F\u002Fgithub.com\u002Fpaper-seven\u002FUrduOCR.",{"paper_id":6402,"title":6403,"year":7,"month":188,"day":63,"doi":6404,"resource_url":6405,"first_page":6406,"last_page":6407,"pdf_url":6408,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6409,"paper_type":860,"authors":6410,"abstract":6413},"lrec2026-main-236","HalleluBERT: Let Every Token That Has Meaning Bear Its Weight","10.63317\u002F3qdqexx4e9i2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-236","3022","3030","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.236.pdf","scheibleschmitt-2026-hallelubert",[6411],{"paper_id":6402,"author_seq":247,"given_name":3266,"surname":6412,"affiliation":63,"orcid":63},"Scheible-Schmitt","Transformer-based models have advanced NLP, yet Hebrew still lacks a RoBERTa encoder that is trained at scale and released in both base and large variants. We present HalleluBERT, a RoBERTa-based encoder family trained from scratch on 49.1 GB of deduplicated Hebrew web text and Wikipedia using a Hebrew-specific byte-level BPE vocabulary. On native Hebrew benchmarks for named entity recognition (BMC, NEMO) and sentiment classification (SMCD), HalleluBERT outperforms monolingual and multilingual baselines, and yields the highest unweighted mean score across the three benchmarks. We release model weights and tokenizer under the MIT license to support reproducible Hebrew NLP research.",{"paper_id":6415,"title":6416,"year":7,"month":188,"day":63,"doi":6417,"resource_url":6418,"first_page":6419,"last_page":6420,"pdf_url":6421,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6422,"paper_type":860,"authors":6423,"abstract":6427},"lrec2026-main-237","Sentiment Analysis and Language Models for Kwanyama","10.63317\u002F4whctbu5acfp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-237","3031","3043","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.237.pdf","nakashole-2026-sentiment",[6424],{"paper_id":6415,"author_seq":247,"given_name":6425,"surname":6426,"affiliation":63,"orcid":63},"Ndapa","Nakashole","Kwanyama is related to Swahili, Zulu, and, the more than 300 other languages in the Bantu family. Yet, unlike its better-known relatives, it remains almost entirely absent from modern Natural Language Processing (NLP). We bring Kwanyama into the LLM era of NLP through two key contributions. First, we introduce OkaSentiment, the first sentiment-labeled dataset for Kwanyama. Unlike prior African sentiment corpora that rely primarily on social media, OkaSentiment is grounded in an offline, culturally relevant domain: reviews of domestic labor relationships. The dataset is annotated by over 40 native speakers under expert supervision, with careful quality control. Second, we present OkaLM, the first language models for Kwanyama (1B, 3B, and 8B parameters), obtained by continued pretraining of LLaMA-3 checkpoints on a curated Kwanyama corpus. Together, OkaSentiment and OkaLM bring a left-behind language into the landscape of modern NLP, providing its first benchmark and language models.",{"paper_id":6429,"title":6430,"year":7,"month":188,"day":63,"doi":6431,"resource_url":6432,"first_page":6433,"last_page":6434,"pdf_url":6435,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6436,"paper_type":860,"authors":6437,"abstract":6447},"lrec2026-main-238","TigerCoder: A Novel Suite of LLMs for Code Generation in Bangla","10.63317\u002F5nampb63np3m","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-238","3044","3054","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.238.pdf","raihan-etal-2026-tigercoder",[6438,6441,6444],{"paper_id":6429,"author_seq":247,"given_name":6439,"surname":6440,"affiliation":63,"orcid":63},"Nishat","Raihan",{"paper_id":6429,"author_seq":232,"given_name":6442,"surname":6443,"affiliation":63,"orcid":63},"Antonios","Anastasopoulos",{"paper_id":6429,"author_seq":218,"given_name":6445,"surname":6446,"affiliation":63,"orcid":63},"Marcos","Zampieri","Despite being the 5th most spoken language, Bangla remains underrepresented in Large Language Models (LLMs), particularly for code generation. This primarily stems from the scarcity of high-quality data to pre-train and\u002For finetune such models. Hence, we introduce the first dedicated family of Code LLMs for Bangla (1B & 9B). We offer three major contributions: (1) a comprehensive Bangla code instruction datasets for programming domain adaptation; (2) MBPP-Bangla, an evaluation benchmark for Bangla code generation; and (3) the TigerCoder-family of Code LLMs, achieving significant  11-18% performance gains at Pass@1 over existing multilingual and general-purpose Bangla LLMs. Our findings show that curated, high-quality datasets can overcome limitations of smaller models for low-resource languages.",{"paper_id":6449,"title":6450,"year":7,"month":188,"day":63,"doi":6451,"resource_url":6452,"first_page":6453,"last_page":6454,"pdf_url":6455,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6456,"paper_type":860,"authors":6457,"abstract":6477},"lrec2026-main-239","ViX-Ray: A Vietnamese Chest X-Ray Dataset for Vision-Language Models","10.63317\u002F3do8tpzockx8","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-239","3055","3074","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.239.pdf","nguyen-etal-2026-vix",[6458,6460,6463,6466,6469,6472,6475],{"paper_id":6449,"author_seq":247,"given_name":6459,"surname":2395,"affiliation":63,"orcid":63},"Duy Vu Minh",{"paper_id":6449,"author_seq":232,"given_name":6461,"surname":6462,"affiliation":63,"orcid":63},"Chinh Thanh","Truong",{"paper_id":6449,"author_seq":218,"given_name":6464,"surname":6465,"affiliation":63,"orcid":63},"Trần Hoàng","Phúc",{"paper_id":6449,"author_seq":203,"given_name":6467,"surname":6468,"affiliation":63,"orcid":63},"Hung Tuan","Le",{"paper_id":6449,"author_seq":188,"given_name":6470,"surname":6471,"affiliation":63,"orcid":63},"Nguyen Van-Thanh","Dat",{"paper_id":6449,"author_seq":172,"given_name":6473,"surname":6474,"affiliation":63,"orcid":63},"Trung Hieu","Pham",{"paper_id":6449,"author_seq":155,"given_name":6476,"surname":2395,"affiliation":63,"orcid":63},"Kiet Van","Vietnamese medical research has become an increasingly vital domain, particularly with the rise of intelligent technologies aimed at reducing time and resource burdens in clinical diagnosis. Recent advances in vision-language models (VLMs), such as Gemini and GPT-4V, have sparked a growing interest in applying AI to healthcare. However, most existing VLMs lack exposure to Vietnamese medical data, limiting their ability to generate accurate and contextually appropriate diagnostic outputs for Vietnamese patients. To address this challenge, we introduce ViX-Ray, a novel dataset comprising 5,400 Vietnamese chest X-ray images annotated with expert-written findings and impressions from physicians at a major Vietnamese hospital. We analyze linguistic patterns within the dataset, including the frequency of mentioned body parts and diagnoses, to identify domain-specific linguistic characteristics of Vietnamese radiology reports. Furthermore, we fine-tune five state-of-the-art open-source VLMs on ViX-Ray and compare their performance to leading proprietary models, GPT-4V and Gemini. Our results show that while several models generate outputs partially aligned with clinical ground truths, they often suffer from low precision and excessive hallucination, especially in impression generation. These findings not only demonstrate the complexity and challenge of our dataset but also establish ViX-Ray as a valuable benchmark for evaluating and advancing vision-language models in the Vietnamese clinical domain.",{"paper_id":6479,"title":6480,"year":7,"month":188,"day":63,"doi":6481,"resource_url":6482,"first_page":6483,"last_page":6484,"pdf_url":6485,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6486,"paper_type":860,"authors":6487,"abstract":6500},"lrec2026-main-240","Creating Task-Specific Speech Recognition Datasets from Scratch for Low-Resource Languages: Assessing the Impact of Token Sequence Overlap","10.63317\u002F3myb33sgskfb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-240","3075","3082","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.240.pdf","bremang-etal-2026-creating",[6488,6491,6494,6497],{"paper_id":6479,"author_seq":247,"given_name":6489,"surname":6490,"affiliation":63,"orcid":63},"Adwoa Asantewaa","Bremang",{"paper_id":6479,"author_seq":232,"given_name":6492,"surname":6493,"affiliation":63,"orcid":63},"Dennis Asamoah","Owusu",{"paper_id":6479,"author_seq":218,"given_name":6495,"surname":6496,"affiliation":63,"orcid":63},"Victor","Quagraine",{"paper_id":6479,"author_seq":203,"given_name":6498,"surname":6499,"affiliation":63,"orcid":63},"Leanne M.M.","Annor-Adjaye","Creating a task-specific speech recognition dataset is essential for developing speech recognition applications in low-resource languages. Such applications have uses in agriculture, finance, healthcare, and others, and benefit individuals with low literacy. However, a significant challenge is the high cost of data creation. While there is some work around cost-effective dataset selection, there is little to no work on building a cost-effective dataset for a task from scratch. Our work contributes to the latter. We created a speech recognition dataset from scratch and conducted two major sets of experiments. The first aimed to observe the effect of different datasets of the same size on model performance. Our results confirmed that the same amount spent collecting data can have vastly different results. The second experiment analyzed the effect of token sequence overlap between target and training data since a natural and intuitive approach to building a dataset from scratch for task would be having the task tokens occur in the training data. Our experiments showed that token sequence overlap was not the primary factor influencing model performance. Our work provides a counter-intuitive insight into building speech recognition datasets from scratch in low-resource settings and shows the need for further investigation.",{"paper_id":6502,"title":6503,"year":7,"month":188,"day":63,"doi":6504,"resource_url":6505,"first_page":6506,"last_page":6507,"pdf_url":6508,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6509,"paper_type":860,"authors":6510,"abstract":6527},"lrec2026-main-241","Radio Haiti-Inter: A Large-Scale Annotated Corpus of Spoken Haitian Creole","10.63317\u002F5kk3h4p3mp5d","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-241","3083","3093","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.241.pdf","havard-etal-2026-radio",[6511,6514,6517,6520,6523,6525],{"paper_id":6502,"author_seq":247,"given_name":6512,"surname":6513,"affiliation":63,"orcid":63},"William N.","Havard",{"paper_id":6502,"author_seq":232,"given_name":6515,"surname":6516,"affiliation":63,"orcid":63},"Rayan","Ziane",{"paper_id":6502,"author_seq":218,"given_name":6518,"surname":6519,"affiliation":63,"orcid":63},"Mélissa","Menclé",{"paper_id":6502,"author_seq":203,"given_name":6521,"surname":6522,"affiliation":63,"orcid":63},"Maximin","Coavoux",{"paper_id":6502,"author_seq":188,"given_name":4797,"surname":6524,"affiliation":63,"orcid":63},"Lecouteux",{"paper_id":6502,"author_seq":172,"given_name":1156,"surname":6526,"affiliation":63,"orcid":63},"Schang","We present the first large-scale corpus of spoken Haitian Creole (Kreyòl), namely Radio Haiti-Inter. The corpus was constructed using automatic speech recognition (ASR) with a state-of-the-art model specifically dedicated to Kreyòl. In addition to transcriptions, we provide part-of-speech (POS) tags, as well as time-aligned transcripts and confidence scores, enabling users to select the most reliable segments for their research. We conduct a manual evaluation of both the transcription quality and POS tagging accuracy to assess the reliability of the resource we present. To enable high-quality research with the resource we introduce, we are releasing 50 hours, comprising both the audios and attached annotations, drawn from the highest-quality segments. This corpus represents an invaluable resource for advancing the study of Kreyòl, with potential applications in phonetics, phonology, morphology, syntax, as well as the study of code-switching and code-mixing. As the recordings cover a large span of years, the corpus we introduce is also suited to micro-diachronic studies of Kreyòl.",{"paper_id":6529,"title":6530,"year":7,"month":188,"day":63,"doi":6531,"resource_url":6532,"first_page":6533,"last_page":6534,"pdf_url":6535,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6536,"paper_type":860,"authors":6537,"abstract":6552},"lrec2026-main-242","Synthetic Function Demonstrations Improve Generation in Low-Resource Programming Languages","10.63317\u002F4tutdq38ch4b","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-242","3094","3106","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.242.pdf","mckenna-etal-2026-synthetic",[6538,6541,6543,6544,6547,6550],{"paper_id":6529,"author_seq":247,"given_name":6539,"surname":6540,"affiliation":63,"orcid":63},"Nick","McKenna",{"paper_id":6529,"author_seq":232,"given_name":6542,"surname":3290,"affiliation":63,"orcid":63},"Xinnuo",{"paper_id":6529,"author_seq":218,"given_name":3354,"surname":2841,"affiliation":63,"orcid":63},{"paper_id":6529,"author_seq":203,"given_name":6545,"surname":6546,"affiliation":63,"orcid":63},"Nicholas C.","Wilson",{"paper_id":6529,"author_seq":188,"given_name":6548,"surname":6549,"affiliation":63,"orcid":63},"Benjamin Van","Durme",{"paper_id":6529,"author_seq":172,"given_name":3643,"surname":6551,"affiliation":63,"orcid":63},"Poelitz","A key consideration when training an LLM is whether the target language is more or less resourced, for example English compared to Welsh, or Python compared to Excel. Typical training data for programming languages consists of real program demonstrations coupled with explanatory human-written comments. In this work we present a novel approach to the creation of such data for low resource programming languages, which lack naturally occurring data. Our process generates synthetic, textbook-quality demonstrations of how to use library functions, which we show makes for good model finetuning data. We demonstrate in an example domain of Excel Formulas. First, we collate language documentation, then we use this to augment a powerful teacher model which generates synthetic training data, and finally finetune student models on the demonstrations. Our technique improves student performance on 2 question-answering datasets: WikiTQ and TAT-QA. We also show advantages of finetuning over standard RAG approaches, which can offer only modest improvement due to the unfamiliarity of the target domain to student models.",{"paper_id":6554,"title":6555,"year":7,"month":188,"day":63,"doi":6556,"resource_url":6557,"first_page":6558,"last_page":6559,"pdf_url":6560,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6561,"paper_type":860,"authors":6562,"abstract":6576},"lrec2026-main-243","PerHalluEval: Persian Hallucination Evaluation Benchmark for Large Language Models","10.63317\u002F3xhrjewfcyrm","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-243","3107","3127","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.243.pdf","hosseini-etal-2026-perhallueval",[6563,6565,6567,6570,6573],{"paper_id":6554,"author_seq":247,"given_name":1932,"surname":6564,"affiliation":63,"orcid":63},"Hosseini",{"paper_id":6554,"author_seq":232,"given_name":6566,"surname":6564,"affiliation":63,"orcid":63},"Kimia",{"paper_id":6554,"author_seq":218,"given_name":6568,"surname":6569,"affiliation":63,"orcid":63},"Shayan","Bali",{"paper_id":6554,"author_seq":203,"given_name":6571,"surname":6572,"affiliation":63,"orcid":63},"Zahra","Zanjani",{"paper_id":6554,"author_seq":188,"given_name":6574,"surname":6575,"affiliation":63,"orcid":63},"Saeedeh","Momtazi","Hallucination is a persistent issue affecting all large language Models (LLMs), particularly within low-resource languages such as Persian. PerHalluEval (Persian Hallucination Evaluation) is the first dynamic hallucination evaluation benchmark tailored for the Persian language. Our benchmark leverages a three-stage LLM-driven pipeline, augmented with human validation, to generate plausible answers and summaries regarding QA and summarization tasks, focusing on detecting extrinsic and intrinsic hallucinations. Moreover, we used the log probabilities of generated tokens to select the most believable hallucinated instances. In addition, we engaged human annotators to highlight Persian-specific contexts in the QA dataset in order to evaluate LLMs’ performance on content specifically related to Persian culture. Our evaluation of 12 LLMs, including open- and closed-source models using PerHalluEval, revealed that the models generally struggle in detecting hallucinated Persian text. We showed that providing external knowledge, i.e., the original document for the summarization task, could mitigate hallucination partially. Furthermore, there was no significant difference in terms of hallucination when comparing LLMs specifically trained for Persian with others.",{"paper_id":6578,"title":6579,"year":7,"month":188,"day":63,"doi":6580,"resource_url":6581,"first_page":6582,"last_page":6583,"pdf_url":6584,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6585,"paper_type":860,"authors":6586,"abstract":6610},"lrec2026-main-244","ADAB: Arabic Dataset for Automated Politeness Benchmarking - a Large-Scale Resource for Computational Sociopragmatics","10.63317\u002F559a7pqqchr2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-244","3128","3137","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.244.pdf","alkhalifa-etal-2026-adab",[6587,6590,6593,6595,6598,6601,6604,6607],{"paper_id":6578,"author_seq":247,"given_name":6588,"surname":6589,"affiliation":63,"orcid":63},"Hend","Al-Khalifa",{"paper_id":6578,"author_seq":232,"given_name":6591,"surname":6592,"affiliation":63,"orcid":63},"Nadia","Ghezaiel",{"paper_id":6578,"author_seq":218,"given_name":2960,"surname":6594,"affiliation":63,"orcid":63},"Bounnit",{"paper_id":6578,"author_seq":203,"given_name":6596,"surname":6597,"affiliation":63,"orcid":63},"Hend Hamed","Alhazmi",{"paper_id":6578,"author_seq":188,"given_name":6599,"surname":6600,"affiliation":63,"orcid":63},"Noof Abdullah","Alfear",{"paper_id":6578,"author_seq":172,"given_name":6602,"surname":6603,"affiliation":63,"orcid":63},"Reem Fahad","Alqifari",{"paper_id":6578,"author_seq":155,"given_name":6605,"surname":6606,"affiliation":63,"orcid":63},"Ameera Masoud","Almasoud",{"paper_id":6578,"author_seq":138,"given_name":6608,"surname":6609,"affiliation":63,"orcid":63},"Sharefah Ahmed","Al-Ghamdi","The growing importance of culturally-aware natural language processing systems has led to an increasing demand for resources that capture sociopragmatic phenomena across diverse languages. Nevertheless, Arabic-language resources for politeness detection remain severely under-explored, despite the rich and complex politeness expressions deeply embedded in Arabic communication. In this paper, a new annotated Arabic dataset, called ADAB\u002Fأدب (Arabic Politeness Dataset), was generated and carefully collected from four diverse online platforms including social media, e-commerce, and customer service domains, encompassing both Modern Standard Arabic (MSA) and multiple dialectal varieties (Gulf, Egyptian, Levantine, and Maghrebi). This dataset has undergone a thorough annotation process guided by Arabic linguistic traditions and contemporary pragmatic theory, resulting in three-way politeness classifications: polite, impolite, and neutral. The generated dataset contains 10,000 samples with detailed linguistic feature annotations across 16 politeness categories, achieving substantial inter-annotator agreement (κ = 0.703). A comprehensive benchmarking of this dataset was conducted utilizing 40 model configurations spanning traditional machine learning (12 models), transformer-based architecture (10 models), and large language models (18 configurations), thereby effectively demonstrating its practical utility and inherent challenges. This generated resource aims to bridge the gap in Arabic sociopragmatic NLP and encourage further research into politeness-aware applications for the Arabic language.",{"paper_id":6612,"title":6613,"year":7,"month":188,"day":63,"doi":6614,"resource_url":6615,"first_page":6616,"last_page":6617,"pdf_url":6618,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6619,"paper_type":860,"authors":6620,"abstract":6633},"lrec2026-main-245","GRDD+: An Extended Greek Dialectal Dataset with Cross-Architecture Fine-tuning Evaluation","10.63317\u002F3ijtb8uijcjh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-245","3138","3146","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.245.pdf","chatzikyriakidis-etal-2026-grdd",[6621,6624,6627,6630],{"paper_id":6612,"author_seq":247,"given_name":6622,"surname":6623,"affiliation":63,"orcid":63},"Stergios","Chatzikyriakidis",{"paper_id":6612,"author_seq":232,"given_name":6625,"surname":6626,"affiliation":63,"orcid":63},"Dimitriοs","Papadakis",{"paper_id":6612,"author_seq":218,"given_name":6628,"surname":6629,"affiliation":63,"orcid":63},"Sevasti Ioanna","Papaioannou",{"paper_id":6612,"author_seq":203,"given_name":6631,"surname":6632,"affiliation":63,"orcid":63},"Erofili","Psaltaki","We present an extended Greek Dialectal Dataset (GRDD+) that complements the existing GRDD dataset with more data from Cretan, Cypriot, Pontic and Northern Greek, while we add six new varieties: Greco-Corsican, Griko (Southern Italian Greek), Maniot, Heptanesian, Tsakonian, and Katharevusa Greek. The result is a dataset with total size 6,374,939 words and 10 varieties. This is the first dataset with such variation and size to date. We conduct a number of fine-tuning experiments to see the effect of good quality dialectal data on a number of LLMs. We fine-tune three model architectures (Llama-3-8B, Llama-3.1-8B, Krikri-8B) and compare the results to frontier models (Claude-3.7-Sonnet, Gemini-2.5, ChatGPT-5).",{"paper_id":6635,"title":6636,"year":7,"month":188,"day":63,"doi":6637,"resource_url":6638,"first_page":6639,"last_page":6640,"pdf_url":6641,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6642,"paper_type":860,"authors":6643,"abstract":6659},"lrec2026-main-246","Same-Language Subtitles for Low-resource Languages: A Case of Bundelkhandi","10.63317\u002F4938abev4keh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-246","3147","3153","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.246.pdf","pradhan-etal-2026-same",[6644,6647,6650,6653,6656],{"paper_id":6635,"author_seq":247,"given_name":6645,"surname":6646,"affiliation":63,"orcid":63},"Anirudh","Pradhan",{"paper_id":6635,"author_seq":232,"given_name":6648,"surname":6649,"affiliation":63,"orcid":63},"Ayushi","Pandey",{"paper_id":6635,"author_seq":218,"given_name":6651,"surname":6652,"affiliation":63,"orcid":63},"Divyansh","Kushwaha",{"paper_id":6635,"author_seq":203,"given_name":6654,"surname":6655,"affiliation":63,"orcid":63},"Akshita","Tiwary",{"paper_id":6635,"author_seq":188,"given_name":6657,"surname":6658,"affiliation":63,"orcid":63},"Vivek","Seshadri","Same-language subtitles enhance consumers’ experience for audiovisual content for both hearing impaired population. However, while high-resource languages can benefit from automatic subtitling, subtitles are seldom available for content creators in regional languages. This limits audience engagement on their content, which often is independently produced. This paper presents Project Saurakhi, a platform for generating same-language subtitles in regional languages. To achieve this, we first extract community-generated YouTube videos serve as the primary data source for this project. The current dataset comprises 63 hours of Bundelkhandi speech sourced from 207 YouTube videos across 19 content creators. And second, the technical workflow integrates automated stages with manual refinement via a mobile annotation platform. As regional language content grows both in independent productions, and in over-the-top platforms, Project Saurakhi aims to train women participants in rural India to become proficient in providing subtitles in their native languages.",{"paper_id":6661,"title":6662,"year":7,"month":188,"day":63,"doi":6663,"resource_url":6664,"first_page":6665,"last_page":6666,"pdf_url":6667,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6668,"paper_type":860,"authors":6669,"abstract":6681},"lrec2026-main-247","The Chulalongkorn Corpus of Spoken Thai (CCOST)","10.63317\u002F3ds35jijmgrx","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-247","3154","3160","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.247.pdf","pittayaporn-etal-2026-chulalongkorn",[6670,6673,6676,6679],{"paper_id":6661,"author_seq":247,"given_name":6671,"surname":6672,"affiliation":63,"orcid":63},"Pittayawat","Pittayaporn",{"paper_id":6661,"author_seq":232,"given_name":6674,"surname":6675,"affiliation":63,"orcid":63},"Cathryn","Yang",{"paper_id":6661,"author_seq":218,"given_name":6677,"surname":6678,"affiliation":63,"orcid":63},"Sujinat","Jitwiriyanont",{"paper_id":6661,"author_seq":203,"given_name":4091,"surname":6680,"affiliation":63,"orcid":63},"Kirby","The Chulalongkorn Corpus of Spoken Thai (CCOST) is a phonetically annotated corpus of Standard Thai. The corpus comprises approximately 7 hours of interview-style spontaneous speech from 49 speakers (19 male, 30 female) ranging in age from 18 to 83 years old. Speakers represent diverse regional backgrounds across Thailand but were instructed to speak in Standard Thai. Each speaker also read a 206-item monosyllabic word list twice and a set of 25 sentences three times. The annotation pipeline combines automatic speech recognition (ASR) and forced alignment using CLARIN-D’s OCTRA and Munich Automatic Segmentation System (MAUS) tools with manual correction by phonetically trained native Thai speakers. Transcriptions include orthographic, word-level, syllable-level, and phone-level annotations including toneme labels. The corpus serves as a resource in the sociophonetic investigation of segmental and tonal variation in spontaneous and controlled speech, enabling examination of individual characteristics as well as group differences across age groups, genders, and regional backgrounds. Hand-corrected annotations will additionally serve to improve forced alignment accuracy for Standard Thai.",{"paper_id":6683,"title":6684,"year":7,"month":188,"day":63,"doi":6685,"resource_url":6686,"first_page":6687,"last_page":6688,"pdf_url":6689,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6690,"paper_type":860,"authors":6691,"abstract":6710},"lrec2026-main-248","Nepal Script Text Recognition from Ancient Artifacts: Challenges and Opportunities","10.63317\u002F427yvm8biop7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-248","3161","3170","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.248.pdf","nakarmi-etal-2026-nepal",[6692,6695,6698,6701,6704,6707],{"paper_id":6683,"author_seq":247,"given_name":6693,"surname":6694,"affiliation":63,"orcid":63},"Swornim","Nakarmi",{"paper_id":6683,"author_seq":232,"given_name":6696,"surname":6697,"affiliation":63,"orcid":63},"Sarin","Sthapit",{"paper_id":6683,"author_seq":218,"given_name":6699,"surname":6700,"affiliation":63,"orcid":63},"Sahil Ratna","Tuladhar",{"paper_id":6683,"author_seq":203,"given_name":6702,"surname":6703,"affiliation":63,"orcid":63},"Arya","Shakya",{"paper_id":6683,"author_seq":188,"given_name":6705,"surname":6706,"affiliation":63,"orcid":63},"Bal Krishna","Bal",{"paper_id":6683,"author_seq":172,"given_name":6708,"surname":6709,"affiliation":63,"orcid":63},"Rajani","Chulyadyo","Nepal Script, a script of significant linguistic, historical, and cultural importance, can be found in ancient artifacts in Nepal. As this script has faced a decline in use, it is considered among endangered scripts at present. For its revival and preservation, it is important to digitize ancient artifacts written in Nepal Script and create an accessible digital dataset. Among such artifacts are stone inscriptions, and manuscripts, from which we attempt to recognize texts using Artificial Intelligence techniques. This paper presents our approach of preparing a dataset through an extensive data acquisition method, and developing a system that recognizes Nepal Script texts from images. Our system combines the YOLOv8 algorithm with Convolutional Recurrent Neural Network architecture and Connectionist Temporal Classification loss. Our dataset consists of 5,219 text line images from ancient stone inscriptions, manuscripts, and modern handwritten and typed documents. Utilizing an augmented dataset of 41,752 samples, our system achieved 12.61% Character Error Rate. Despite the small training dataset, our model successfully predicted texts in not only new stone inscriptions and manuscripts but also wooden and copper plate inscriptions. We expect our contributions will encourage further research on Nepal Script and other Nepalese scripts.",{"paper_id":6712,"title":6713,"year":7,"month":188,"day":63,"doi":6714,"resource_url":6715,"first_page":6716,"last_page":6717,"pdf_url":6718,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6719,"paper_type":860,"authors":6720,"abstract":6727},"lrec2026-main-249","LuxBorrow: From Pompier to Pompjee, Tracing Borrowing in Luxembourgish","10.63317\u002F38pbv3g6swmm","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-249","3171","3183","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.249.pdf","hosseinikivanani-etal-2026-luxborrow",[6721,6724],{"paper_id":6712,"author_seq":247,"given_name":6722,"surname":6723,"affiliation":63,"orcid":63},"Nina","Hosseini-Kivanani",{"paper_id":6712,"author_seq":232,"given_name":6725,"surname":6726,"affiliation":63,"orcid":63},"Fred","Philippy","We present LuxBorrow, a borrowing-first analysis of Luxembourgish (LU) news spanning 27 years (1999–2025): 259,305 RTL articles and 43.7M tokens. Our pipeline combines sentence-level language identification (LU\u002FDE\u002FFR\u002FEN) with a token-level borrowing resolver restricted to LU sentences, using lemmatization, a collected loanword registry, and compiled morphological\u002Forthographic rules. Empirically, LU remains the matrix language across all documents, while multilingual practice is pervasive: 77.1% of articles include at least one donor language and 65.4% use three or four. Breadth does not imply intensity: median code-mixing index (CMI) increases from 3.90 (LU+1) to only 7.00 (LU+3), indicating localized insertions rather than balanced bilingual text. Domain\u002Fperiod summaries show moderate but persistent mixing, with CMI rising from 6.1 (1999–2007) to a peak of 8.4 (2020). Token-level adaptations total 25,444 instances and exhibit a mixed profile: morphological 63.8%, orthographic 35.9%, lexical 0.3%; the most frequent single rules are orthographic (on→oun, eur→er), while morphology is collectively dominant. Diachronically, code-switching intensifies, and morphologically adapted borrowings grow from a small base; French overwhelmingly supplies adapted items, with modest growth for German and negligible English. We advocate borrowing-centric evaluation, borrowed token\u002Ftype rates, donor entropy over borrowed items, and assimilation ratios over headline document-level mixing indices.",{"paper_id":6729,"title":6730,"year":7,"month":188,"day":63,"doi":6731,"resource_url":6732,"first_page":6733,"last_page":6734,"pdf_url":6735,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6736,"paper_type":860,"authors":6737,"abstract":6741},"lrec2026-main-250","Ramsa: A Large Sociolinguistically Rich Emirati Arabic Speech Corpus for ASR and TTS","10.63317\u002F3fzxkpjoserh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-250","3184","3198","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.250.pdf","alsabbagh-2026-ramsa",[6738],{"paper_id":6729,"author_seq":247,"given_name":6739,"surname":6740,"affiliation":63,"orcid":63},"Rania","Al-Sabbagh","Ramsa is a developing 41-hour speech corpus of Emirati Arabic designed to support sociolinguistic research and low-resource language technologies. It contains recordings from structured interviews with native speakers and episodes from national television shows. The corpus features 157 speakers (59 female, 98 male), spans subdialects such as Urban, Bedouin, and Mountain\u002FShihhi, and covers topics such as cultural heritage, agriculture and sustainability, daily life, professional trajectories, and architecture. It consists of 91 monologic and 79 dialogic recordings, varying in length and recording conditions. A 10% subset was used to evaluate commercial and open-source models for automatic speech recognition (ASR) and text-to-speech (TTS) in a zero-shot setting to establish initial baselines. Whisper-large-v3-turbo achieved the best ASR performance, with average word and character error rates of 0.268 and 0.144, respectively. MMS-TTS-Ara reported the best mean word and character rates of 0.285 and 0.081, respectively, for TTS. These baselines are competitive but leave substantial room for improvement. The paper highlights the challenges encountered and provides directions for future work.",{"paper_id":6743,"title":6744,"year":7,"month":188,"day":63,"doi":6745,"resource_url":6746,"first_page":6747,"last_page":6748,"pdf_url":6749,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6750,"paper_type":860,"authors":6751,"abstract":6778},"lrec2026-main-251","DialectalArabicMMLU: Benchmarking Dialectal Capabilities in Arabic and Multilingual Language Models","10.63317\u002F3cy68duew55b","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-251","3199","3219","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.251.pdf","altakrori-etal-2026-dialectalarabicmmlu",[6752,6755,6756,6758,6761,6764,6767,6770,6772,6775],{"paper_id":6743,"author_seq":247,"given_name":6753,"surname":6754,"affiliation":63,"orcid":63},"Malik H.","Altakrori",{"paper_id":6743,"author_seq":232,"given_name":4229,"surname":4230,"affiliation":63,"orcid":63},{"paper_id":6743,"author_seq":218,"given_name":2579,"surname":6757,"affiliation":63,"orcid":63},"Lynn",{"paper_id":6743,"author_seq":203,"given_name":6759,"surname":6760,"affiliation":63,"orcid":63},"Younes","Samih",{"paper_id":6743,"author_seq":188,"given_name":6762,"surname":6763,"affiliation":63,"orcid":63},"Abed Alhakim","Freihat",{"paper_id":6743,"author_seq":172,"given_name":6765,"surname":6766,"affiliation":63,"orcid":63},"Kirill","Chirkunov",{"paper_id":6743,"author_seq":155,"given_name":6768,"surname":6769,"affiliation":63,"orcid":63},"Muhammed","AbuOdeh",{"paper_id":6743,"author_seq":138,"given_name":6771,"surname":2175,"affiliation":63,"orcid":63},"Radu",{"paper_id":6743,"author_seq":121,"given_name":6773,"surname":6774,"affiliation":63,"orcid":63},"Preslav","Nakov",{"paper_id":6743,"author_seq":104,"given_name":6776,"surname":6777,"affiliation":63,"orcid":63},"Alham Fikri","Aji","We present DialectalArabicMMLU, a new benchmark for evaluating the performance of large language models (LLMs) across Arabic dialects. While recently developed Arabic and multilingual benchmarks have advanced LLM evaluation for Modern Standard Arabic (MSA), dialectal varieties remain underrepresented despite their prevalence in everyday communication. DialectalArabicMMLU extends the MMLU-Redux framework through manual translation and adaptation of 3K multiple-choice question–answer pairs into five major dialects (Syrian, Egyptian, Emirati, Saudi, and Moroccan), yielding a total of 15K QA pairs across 32 academic and professional domains (22K QA pairs when also including English and MSA). The benchmark enables systematic assessment of LLM reasoning and comprehension beyond MSA, supporting both task-based and linguistic analysis. We evaluate 19 open-weight Arabic and multilingual LLMs (1B–13B parameters) and report substantial performance variation across dialects, revealing persistent gaps in dialectal generalization. DialectalArabicMMLU provides the first unified, human-curated resource for measuring dialectal understanding in Arabic, thus promoting more inclusive evaluation and future model development.",{"paper_id":6780,"title":6781,"year":7,"month":188,"day":63,"doi":6782,"resource_url":6783,"first_page":6784,"last_page":6785,"pdf_url":6786,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6787,"paper_type":860,"authors":6788,"abstract":6801},"lrec2026-main-252","ForumOccitania: A Corpus of User-Generated Content for Multiple Occitan Varieties","10.63317\u002F3h3c8smpj9p2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-252","3220","3233","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.252.pdf","ndey-etal-2026-forumoccitania",[6789,6792,6795,6797,6799],{"paper_id":6780,"author_seq":247,"given_name":6790,"surname":6791,"affiliation":63,"orcid":63},"Oriane","Nédey",{"paper_id":6780,"author_seq":232,"given_name":6793,"surname":6794,"affiliation":63,"orcid":63},"Juliette","Janès",{"paper_id":6780,"author_seq":218,"given_name":4114,"surname":6796,"affiliation":63,"orcid":63},"Bawden",{"paper_id":6780,"author_seq":203,"given_name":3251,"surname":6798,"affiliation":63,"orcid":63},"Clérice",{"paper_id":6780,"author_seq":188,"given_name":4363,"surname":6800,"affiliation":63,"orcid":63},"Sagot","We introduce ForumOccitania, a new Occitan corpus of posts from an online forum, covering a range of topics and dialects. While some existing datasets for this low-resource language include labels of varieties within the dialect continuum, we go one step further by providing metadata pertaining to sociolinguistic factors of language variation (dialect, geographical location, age, proficiency), extracted from self-declared user profiles. We carry out statistical and qualitative analyses, as well as preliminary experiments on unsupervised dialect identification. Our results show that (i) most of the contents is written in Occitan, with the classical spelling conventions, and by young speakers, (ii) posts display a strong presence of dialectal features from four major Occitan varieties (Lemosin, Lengadocian, Gascon, Provençau), and (iii) a simple topic modelling approach introduced by Kuparinen and Scherrer (2024) effectively detects salient features of these four varieties, but also reveals finer-grained diatopical variation tendencies.",{"paper_id":6803,"title":6804,"year":7,"month":188,"day":63,"doi":6805,"resource_url":6806,"first_page":6807,"last_page":6808,"pdf_url":6809,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6810,"paper_type":860,"authors":6811,"abstract":6826},"lrec2026-main-253","A Dataset of Wolof Ajami Manuscripts for HTR and OCR","10.63317\u002F4pz98ojeeqpw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-253","3234","3239","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.253.pdf","yousuf-etal-2026-dataset",[6812,6815,6818,6820,6823],{"paper_id":6803,"author_seq":247,"given_name":6813,"surname":6814,"affiliation":63,"orcid":63},"Oreen","Yousuf",{"paper_id":6803,"author_seq":232,"given_name":6816,"surname":6817,"affiliation":63,"orcid":63},"Elhadji Djibril","Diagne",{"paper_id":6803,"author_seq":218,"given_name":3643,"surname":6819,"affiliation":63,"orcid":63},"Høgel",{"paper_id":6803,"author_seq":203,"given_name":6821,"surname":6822,"affiliation":63,"orcid":63},"Beata","Megyesi",{"paper_id":6803,"author_seq":188,"given_name":6824,"surname":6825,"affiliation":63,"orcid":63},"Joakim","Nivre","We present the first ever dataset of manually segmented and transcribed Ajami manuscripts written in Wolof. The term Ajami refers to modified Arabic-script orthographies used to transcribe African languages. Handwritten text recognition (HTR) and optical character recognition (OCR) models for Arabic-script languages perform poorly on African languages written in Ajami orthographies because these languages are not represented in the pre-training data of the models. This leads to recognition models being unable to extract unique Arabic-script letters and ubiquitous diacritics used in African languages, and struggling to adapt to various calligraphy styles used across Africa. We release the following as an open-source dataset: an ALTO formatting of high-quality images of handwritten and printed, 20th–century Wolof manuscripts; manual segmentation (region and line); and manual transcriptions. We extend our contribution by evaluating several Arabic-script recognition models intended for historical manuscripts and find they produce character error rates (CER) of 61–81%. Transcriptions produced by the evaluated recognition models, as well as a keyboard to transcribe Wolof Ajami manuscripts, are released as well. The digitally transcribed text in the dataset can also be utilized for various natural language processing (NLP) and historical linguistic tasks.",{"paper_id":6828,"title":6829,"year":7,"month":188,"day":63,"doi":6830,"resource_url":6831,"first_page":6832,"last_page":6833,"pdf_url":6834,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6835,"paper_type":860,"authors":6836,"abstract":6843},"lrec2026-main-254","TDMulti: A Tunisian Dialect-Modern Standard Arabic Multitask Corpus with a Context-Aware Cross-Attention BERT Model","10.63317\u002F5aev569ryz8n","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-254","3240","3249","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.254.pdf","torjmen-etal-2026-tdmulti",[6837,6840],{"paper_id":6828,"author_seq":247,"given_name":6838,"surname":6839,"affiliation":63,"orcid":63},"Roua","Torjmen",{"paper_id":6828,"author_seq":232,"given_name":6841,"surname":6842,"affiliation":63,"orcid":63},"Kais","Haddar","The Tunisian dialect dominates online communication in Tunisia but remains severely under-resourced in natural language processing. We introduce the first multitask corpus of Tunisian dialect manually aligned with its equivalents in modern standard Arabic. The TDMulti corpus consists of 3,100 social media comments annotated with 12,400 labels for four interrelated tasks: hate speech detection, sentiment polarity classification, sarcasm identification, and topic category classification. The TDMulti corpus provides a new benchmark for studying pragmatic and social aspects of Tunisian dialect in relation to modern standard Arabic. To exploit this resource, we propose a deep learning model based on transformer architectures. We design three variants: a baseline multitask classifier, a cross-attention model aligning Tunisian dialect and modern standard Arabic representations, and a context-aware cross-attention mechanism with task-specific masking. We evaluate the approach using large pre-trained Arabic language models under different configurations. Results show that the context-aware cross-attention model achieves the best performance, particularly for sarcasm and hate speech detection. TDMulti is released under an open license, contributing a novel resource to advance research on Arabic dialect processing.",{"paper_id":6845,"title":6846,"year":7,"month":188,"day":63,"doi":6847,"resource_url":6848,"first_page":6849,"last_page":6850,"pdf_url":6851,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6852,"paper_type":860,"authors":6853,"abstract":6863},"lrec2026-main-255","The Megrelian Language Corpus (MLC): Creation, Annotation, and Initial Steps toward a UD Treebank","10.63317\u002F3tctzeeznuxb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-255","3250","3256","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.255.pdf","lobzhanidze-etal-2026-megrelian",[6854,6857,6860],{"paper_id":6845,"author_seq":247,"given_name":6855,"surname":6856,"affiliation":63,"orcid":63},"Irina","Lobzhanidze",{"paper_id":6845,"author_seq":232,"given_name":6858,"surname":6859,"affiliation":63,"orcid":63},"Rusudan","Gersamia",{"paper_id":6845,"author_seq":218,"given_name":6861,"surname":6862,"affiliation":63,"orcid":63},"Tamar","Gogia","This paper presents the development of the Megrelian Language Corpus (MLC), a new language resource for the documentation and computational analysis of Megrelian, an endangered Kartvelian language. The corpus is based on fieldwork conducted in Samegrelo, Georgia (2022–2024) and currently contains 97,691 tokens and 60,959 types. The data were transcribed using the International Phonetic Alphabet (IPA) and annotated in Fieldworks Language Explorer (FLEx) with segmentation, morphological analysis and bilingual Georgian-English translations. Each text is accessible through a specially designed web interface, providing multiple tiers of annotation and integrated search functions. The paper describes the corpus design, annotation methodology and challenges encountered in representing Megrelian’s complex agglutinative morphology. It also outlines initial steps toward converting existing data into the Universal Dependencies (UD) framework, building on experience from related Kartvelian languages such as Georgian. The MLC corpus represents the first publicly available linguistic resource for Megrelian and provides a foundation for future UD treebank development.",{"paper_id":6865,"title":6866,"year":7,"month":188,"day":63,"doi":6867,"resource_url":6868,"first_page":6869,"last_page":6870,"pdf_url":6871,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6872,"paper_type":860,"authors":6873,"abstract":6881},"lrec2026-main-256","Steering LLMs toward Korean Local Speech: Iterative Refinement Framework for Faithful Dialect Translation","10.63317\u002F2g2idiqbj4xj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-256","3257","3269","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.256.pdf","park-etal-2026-steering",[6874,6877,6879],{"paper_id":6865,"author_seq":247,"given_name":6875,"surname":6876,"affiliation":63,"orcid":63},"Keunhyeung","Park",{"paper_id":6865,"author_seq":232,"given_name":6878,"surname":2998,"affiliation":63,"orcid":63},"Seunguk",{"paper_id":6865,"author_seq":218,"given_name":6880,"surname":5173,"affiliation":63,"orcid":63},"Youngbin","Standard-to-dialect machine translation remains challenging due to a persistent dialect gap in large language models and evaluation distortions inherent in n-gram metrics, which favor source copying over authentic dialect translation. In this paper, we propose the dialect refinement (DIA-REFINE) framework, which guides LLMs toward faithful target dialect outputs through an iterative loop of translation, verification, and feedback using external dialect classifiers. To address the limitations of n-gram-based metrics, we introduce the dialect fidelity score (DFS) to quantify linguistic shift and the target dialect ratio (TDR) to measure the success of dialect translation. Experiments on Korean dialects across zero-shot and in-context learning baselines demonstrate that DIA-REFINE consistently enhances dialect fidelity. The proposed metrics distinguish between False Success cases, where high n-gram scores obscure failures in dialectal translation, and True Attempt cases, where genuine attempts at dialectal translation yield low n-gram scores. We also observed that models exhibit varying degrees of responsiveness to the framework, and that integrating in-context examples further improves the translation of dialectal expressions. Our work establishes a robust framework for goal-directed, inclusive dialect translation, providing both rigorous evaluation and critical insights into model performance.",{"paper_id":6883,"title":6884,"year":7,"month":188,"day":63,"doi":6885,"resource_url":6886,"first_page":6887,"last_page":6888,"pdf_url":6889,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6890,"paper_type":860,"authors":6891,"abstract":6897},"lrec2026-main-257","LombardoGraphia: Automatic Classification of Lombard Orthography Variants","10.63317\u002F2vm9gj2ap9wq","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-257","3270","3280","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.257.pdf","signoroni-etal-2026-lombardographia",[6892,6895],{"paper_id":6883,"author_seq":247,"given_name":6893,"surname":6894,"affiliation":63,"orcid":63},"Edoardo","Signoroni",{"paper_id":6883,"author_seq":232,"given_name":3469,"surname":6896,"affiliation":63,"orcid":63},"Rychly","Lombard, an underresourced language variety spoken by approximately 3.8 million people in Northern Italy and Southern Switzerland, lacks a unified orthographic standard. Multiple orthographic systems exist, creating challenges for NLP resource development and model training. This paper presents the first study of automatic Lombard orthography classification and LombardoGraphia, a curated corpus of 11,186 Lombard Wikipedia samples tagged across 9 orthographic variants, and models for automatic orthography classification. We curate the dataset, processing and filtering raw Wikipedia content to ensure text suitable for orthographic analysis. We train 24 traditional and neural classification models with various features and encoding levels. Our best models achieve 96.06% and 85.78% overall and average class accuracy, though performance on minority classes remains challenging due to data imbalance. Our work provides crucial infrastructure for building variety-aware NLP resources for Lombard.",{"paper_id":6899,"title":6900,"year":7,"month":188,"day":63,"doi":6901,"resource_url":6902,"first_page":6903,"last_page":6904,"pdf_url":6905,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6906,"paper_type":860,"authors":6907,"abstract":6919},"lrec2026-main-258","Meenz bleibt Meenz, but Large Language Models Do Not Speak Its Dialect","10.63317\u002F4foh8f7kygj8","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-258","3281","3294","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.258.pdf","bui-etal-2026-meenz",[6908,6911,6913,6916],{"paper_id":6899,"author_seq":247,"given_name":6909,"surname":6910,"affiliation":63,"orcid":63},"Minh Duc","Bui",{"paper_id":6899,"author_seq":232,"given_name":2152,"surname":6912,"affiliation":63,"orcid":63},"Mager",{"paper_id":6899,"author_seq":218,"given_name":6914,"surname":6915,"affiliation":63,"orcid":63},"Peter Herbert","Kann",{"paper_id":6899,"author_seq":203,"given_name":6917,"surname":6918,"affiliation":63,"orcid":63},"Katharina von der","Wense","Meenzerisch, the dialect spoken in the German city of Mainz, is also the traditional language of the Mainz carnival, a yearly celebration well known throughout Germany. However, Meenzerisch is on the verge of dying out—a fate it shares with many other German dialects. Natural language processing (NLP) has the potential to help with the preservation and revival efforts of languages and dialects. However, so far no NLP research has looked at Meenzerisch. This work presents the first research in the field of NLP that is explicitly focused on the dialect of Mainz. We introduce a digital dictionary—an NLP-ready dataset derived from an existing resource—to support researchers in modeling and benchmarking the language. It contains 2,351 words in the dialect paired with their meanings described in Standard German. We then use this dataset to answer the following research questions: (1) Can state-of-the-art large language models (LLMs) generate definitions for dialect words? (2) Can LLMs generate words in Meenzerisch, given their definitions? Our experiments show that LLMs can do neither: the best model for definitions reaches only 6.27% accuracy and the best word generation model’s accuracy is 1.51%. We then conduct two additional experiments in order to see if accuracy is improved by few-shot learning and by extracting rules from the training set, which are then passed to the LLM. While those approaches are able to improve the results, accuracy remains below 10%. This highlights that additional resources and an intensification of research efforts focused on German dialects are desperately needed.",{"paper_id":6921,"title":6922,"year":7,"month":188,"day":63,"doi":6923,"resource_url":6924,"first_page":6925,"last_page":6926,"pdf_url":6927,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6928,"paper_type":860,"authors":6929,"abstract":6938},"lrec2026-main-259","Bootstrapping NLP for Sakha: Named Entity Recognition and Sentiment Analysis in an Extremely Low-Resource Setting","10.63317\u002F5gybmguss48p","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-259","3295","3303","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.259.pdf","everstova-etal-2026-bootstrapping",[6930,6932,6935],{"paper_id":6921,"author_seq":247,"given_name":3425,"surname":6931,"affiliation":63,"orcid":63},"Everstova",{"paper_id":6921,"author_seq":232,"given_name":6933,"surname":6934,"affiliation":63,"orcid":63},"Nikolai","Efimov",{"paper_id":6921,"author_seq":218,"given_name":6936,"surname":6937,"affiliation":63,"orcid":63},"Valerio","Basile","We present the first systematic study of core NLP tasks for Sakha (Yakut), a low-resource Turkic language with approximately 450,000 speakers in northeastern Siberia. We introduce two manually annotated datasets: a 690-sentence NER corpus (921 entities: PER, LOC, ORG) and an 798-sentence sentiment corpus (positive, negative, neutral). Using mBERT and RuBERT in controlled 2×2 experiments, we report a twofold effect: on the one hand, it improves performance when base unknown-token rates exceed approximately 10% (RuBERT: +9.4 F1); on the other hand, it leads to worse performance otherwise (mBERT: −6.1 F1), despite improving tokenization in both cases. Cross-domain transfer (news vs forums) reveals severe asymmetry: formal-to-informal training achieves 47% accuracy while the reverse yields only 26%—a 21-point gap demonstrating that domain composition dominates model architecture choice in low-resource settings. Neutral-boundary detection is the primary bottleneck, with 89% of disagreements clustering around subjective\u002Fobjective distinctions rather than polarity confusions. With fewer than 1,000 samples per task, we establish first benchmarks for Sakha NER (53.5 F1) and sentiment analysis (54% accuracy).",{"paper_id":6940,"title":6941,"year":7,"month":188,"day":63,"doi":6942,"resource_url":6943,"first_page":6944,"last_page":6945,"pdf_url":6946,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6947,"paper_type":860,"authors":6948,"abstract":6957},"lrec2026-main-260","Lightweight Cross-Lingual Federated Prompt Tuning for Low-Resource Languages","10.63317\u002F3qfz4ob3zbo8","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-260","3304","3316","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.260.pdf","azam-etal-2026-lightweight",[6949,6952,6955],{"paper_id":6940,"author_seq":247,"given_name":6950,"surname":6951,"affiliation":63,"orcid":63},"Ubaid","Azam",{"paper_id":6940,"author_seq":232,"given_name":6953,"surname":6954,"affiliation":63,"orcid":63},"Imran","Razzak",{"paper_id":6940,"author_seq":218,"given_name":2729,"surname":6956,"affiliation":63,"orcid":63},"Jameel","Multilingual NLP faces challenges of data heterogeneity, privacy, and limited computational resources, especially for low-resource languages. Centralised methods risk privacy breaches, while federated learning struggles with communication overhead and poor cross-lingual generalisation. We propose FLiP (Federated Lightweight Prompt-tuning), a privacy-preserving, resource-efficient, generalizable framework integrating prompt-based learning with federated optimisation. FLiP eliminates communication overhead, reduces trainable parameters to 16%, and cuts GPU memory use by 90%. Experiments show superior generalisation and efficiency under both IID and Non-IID settings, establishing FLiP as a scalable, privacy-aware solution for multilingual NLP, particularly in low-resource and indigenous language contexts.",{"paper_id":6959,"title":6960,"year":7,"month":188,"day":63,"doi":6961,"resource_url":6962,"first_page":6963,"last_page":6964,"pdf_url":6965,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6966,"paper_type":860,"authors":6967,"abstract":6973},"lrec2026-main-261","A Parallel Corpus of the Parable of the Prodigal Son: Building a Resource for Documenting Language Varieties in Mainland France","10.63317\u002F55c9dwgbmy43","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-261","3317","3324","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.261.pdf","ing-etal-2026-parallel",[6968,6969,6970,6972],{"paper_id":6959,"author_seq":247,"given_name":2481,"surname":2482,"affiliation":63,"orcid":63},{"paper_id":6959,"author_seq":232,"given_name":6793,"surname":6794,"affiliation":63,"orcid":63},{"paper_id":6959,"author_seq":218,"given_name":5230,"surname":6971,"affiliation":63,"orcid":63},"Ködel",{"paper_id":6959,"author_seq":203,"given_name":4363,"surname":6800,"affiliation":63,"orcid":63},"This paper presents a historical parallel corpus of languages spoken in metropolitan France. It consists of a collection of versions of the Parable of the Prodigal Son, collected during the 19th century. The paper aims to present the interest of such a corpus, its constitution—through XML\u002FTEI encoding, semi-automatic alignment and projection on linguistic maps—and its potential uses for the study of these low-resource languages.",{"paper_id":6975,"title":6976,"year":7,"month":188,"day":63,"doi":6977,"resource_url":6978,"first_page":6979,"last_page":6980,"pdf_url":6981,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6982,"paper_type":860,"authors":6983,"abstract":7010},"lrec2026-main-262","Developing Zila: A Spoken Language Resource for the Endangered Slovenian Gail Valley Dialect","10.63317\u002F3fhhk948chhm","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-262","3325","3332","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.262.pdf","zgank-etal-2026-developing",[6984,6987,6990,6993,6996,6998,7001,7004,7007],{"paper_id":6975,"author_seq":247,"given_name":6985,"surname":6986,"affiliation":63,"orcid":63},"Andrej","Zgank",{"paper_id":6975,"author_seq":232,"given_name":6988,"surname":6989,"affiliation":63,"orcid":63},"Gregor","Donaj",{"paper_id":6975,"author_seq":218,"given_name":6991,"surname":6992,"affiliation":63,"orcid":63},"Urh","Kolaric",{"paper_id":6975,"author_seq":203,"given_name":6994,"surname":6995,"affiliation":63,"orcid":63},"Usi","Sereinig",{"paper_id":6975,"author_seq":188,"given_name":1559,"surname":6997,"affiliation":63,"orcid":63},"Koren-Zwitter",{"paper_id":6975,"author_seq":172,"given_name":6999,"surname":7000,"affiliation":63,"orcid":63},"Sanja","Boto",{"paper_id":6975,"author_seq":155,"given_name":7002,"surname":7003,"affiliation":63,"orcid":63},"Sabina","Zwitter-Grilc",{"paper_id":6975,"author_seq":138,"given_name":7005,"surname":7006,"affiliation":63,"orcid":63},"Jasna","Vidinic",{"paper_id":6975,"author_seq":121,"given_name":7008,"surname":7009,"affiliation":63,"orcid":63},"Darinka","Verdonik","Slovenian is a less-resourced South Slavic language. Existing Slovenian spoken language resources mainly cover the standard language in everyday communication. However, Slovenian encompasses a wide range of dialects, most of which are not represented in available spoken language resources. This paper presents the development of Zila, a Slovenian spoken language resource for the Gail Valley dialect. This dialect is one of the most endangered varieties of Slovenian and is spoken in the extreme north-western periphery of the Slovenian language area. The goal of the project was to build a language resource comprising 100 hours of speech with manually produced transcriptions. The spoken material was collected from members of the Slovenian minority in Carinthia, Austria, with the local community playing a key role in the data acquisition process. A dedicated set of transcription rules was created to capture the full range of acoustic and linguistic features of the Gail Valley dialect, which differs significantly from standard Slovenian. A preliminary speech recognition experiment was conducted to analyze these differences further. The Zila project demonstrates how spoken language technologies can help to preserve the cultural and linguistic heritage of an endangered dialect.",{"paper_id":7012,"title":7013,"year":7,"month":188,"day":63,"doi":7014,"resource_url":7015,"first_page":7016,"last_page":7017,"pdf_url":7018,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7019,"paper_type":860,"authors":7020,"abstract":7042},"lrec2026-main-263","Nawatl Context-Free Grammars for Natural Language Processing","10.63317\u002F5e2etyunwu7g","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-263","3333","3342","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.263.pdf","landa-etal-2026-nawatl",[7021,7024,7027,7030,7033,7036,7037,7039],{"paper_id":7012,"author_seq":247,"given_name":7022,"surname":7023,"affiliation":63,"orcid":63},"Juan Jose Guzman","Landa",{"paper_id":7012,"author_seq":232,"given_name":7025,"surname":7026,"affiliation":63,"orcid":63},"Juan-Manuel","Torres-Moreno",{"paper_id":7012,"author_seq":218,"given_name":7028,"surname":7029,"affiliation":63,"orcid":63},"Graham","Ranger",{"paper_id":7012,"author_seq":203,"given_name":7031,"surname":7032,"affiliation":63,"orcid":63},"Miguel","Figueroa-Saavedra",{"paper_id":7012,"author_seq":188,"given_name":7034,"surname":7035,"affiliation":63,"orcid":63},"Ligia Quintana","Torres",{"paper_id":7012,"author_seq":172,"given_name":5214,"surname":5215,"affiliation":63,"orcid":63},{"paper_id":7012,"author_seq":155,"given_name":7038,"surname":4548,"affiliation":63,"orcid":63},"Luis Gil Moreno",{"paper_id":7012,"author_seq":138,"given_name":7040,"surname":7041,"affiliation":63,"orcid":63},"Martha Lorena Avendaño","Garrido","The aim of this article is to introduce Context-Free Grammars (CFG) for the Nawatl language. Nawatl is an Amerindian language of the π-language type, i.e. a language with few digital resources. For this reason the corpora available for the learning of Large Language Models (LLMs) are virtually non-existent, posing a significant challenge. The goal is to produce a substantial number of syntactically valid artificial Nawatl sentences and thereby to expand the corpora for the purpose of learning embeddings (static models or probably LLMs). For this objective, we introduce two new Nawatl CFGs and use them in generative mode. Thanks to these grammars, it is possible to expand Nawatl corpus significantly and subsequently to use it to learn embeddings (such as FastText) and to evaluate their relevance in semantic similarity tasks. The results show an improvement compared to the results obtained using only the original corpus without artificial expansion, and also demonstrate that economic embeddings often perform better than some LLMs.",{"paper_id":7044,"title":7045,"year":7,"month":188,"day":63,"doi":7046,"resource_url":7047,"first_page":7048,"last_page":7049,"pdf_url":7050,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7051,"paper_type":860,"authors":7052,"abstract":7061},"lrec2026-main-264","Physical Commonsense Reasoning for Lower-Resourced Languages and Dialects: A Study on Basque","10.63317\u002F3p6tfvjmet58","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-264","3343","3354","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.264.pdf","bengoetxea-etal-2026-physical",[7053,7056,7058],{"paper_id":7044,"author_seq":247,"given_name":7054,"surname":7055,"affiliation":63,"orcid":63},"Jaione","Bengoetxea",{"paper_id":7044,"author_seq":232,"given_name":2128,"surname":7057,"affiliation":63,"orcid":63},"Gonzalez-Dios",{"paper_id":7044,"author_seq":218,"given_name":7059,"surname":7060,"affiliation":63,"orcid":63},"Rodrigo","Agerri","Physical commonsense reasoning represents a fundamental capability of human intelligence, enabling individuals to understand their environment, predict future events, and navigate physical spaces. Recent years have witnessed growing interest in reasoning tasks within Natural Language Processing (NLP). However, no prior research has examined the performance of Large Language Models (LLMs) on non-question-answering (non-QA) physical commonsense reasoning tasks in low-resource languages such as Basque. Taking the Italian GITA as a starting point, this paper addresses this gap by presenting BasPhyCo, the first non-QA physical commonsense reasoning dataset for Basque, available in both standard and dialectal variants. We evaluate model performance across three hierarchical levels of commonsense understanding: (1) distinguishing between plausible and implausible narratives (accuracy), (2) identifying the conflicting element that renders a narrative implausible (consistency), and (3) determining the specific physical state that creates the implausibility (verifiability). These tasks were assessed using multiple multilingual LLMs as well as models pretrained specifically for Italian and Basque. Results indicate that, in terms of verifiability, LLMs exhibit limited physical commonsense capabilities in low-resource languages such as Basque, especially when processing dialectal variants.",{"paper_id":7063,"title":7064,"year":7,"month":188,"day":63,"doi":7065,"resource_url":7066,"first_page":7067,"last_page":7068,"pdf_url":7069,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7070,"paper_type":860,"authors":7071,"abstract":7077},"lrec2026-main-265","Common Voice for Pakistan: Developing an Open Speech Corpus for Low-Resource Pakistani Languages","10.63317\u002F4r3mie85u8cq","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-265","3355","3359","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.265.pdf","alam-etal-2026-common",[7072,7074],{"paper_id":7063,"author_seq":247,"given_name":7073,"surname":5462,"affiliation":63,"orcid":63},"Meesum",{"paper_id":7063,"author_seq":232,"given_name":7075,"surname":7076,"affiliation":63,"orcid":63},"Francis","Tyers","Pakistan is home to more than 70 languages out of which 30 languages are endangered. Most of Pakistani languages remain absent from modern speech and text technologies, with resources focused on Urdu and a few major tongues. Through Mozilla’s Open Multilingual Speech Fund, this paper documents one year project for the development of an open, community driven speech corpus for 39 indigenous languages of Pakistan. The dataset includes locally authored texts, daily life sentences, poetry, and folk songs to make a culturally balanced. The project not only supports Automatic Speech Recognition but also promote linguistic preservation and digital inclusion.",{"paper_id":7079,"title":7080,"year":7,"month":188,"day":63,"doi":7081,"resource_url":7082,"first_page":7083,"last_page":7084,"pdf_url":7085,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7086,"paper_type":860,"authors":7087,"abstract":7094},"lrec2026-main-266","Amulwe Kimün: A Community-Grounded Demo, Resource, and ASR Baseline for Mapuzugun","10.63317\u002F386vegsicf5x","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-266","3360","3368","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.266.pdf","oliva-etal-2026-amulwe",[7088,7091],{"paper_id":7079,"author_seq":247,"given_name":7089,"surname":7090,"affiliation":63,"orcid":63},"Cristian Eduardo Ahumada","Oliva",{"paper_id":7079,"author_seq":232,"given_name":7092,"surname":7093,"affiliation":63,"orcid":63},"Fatiha","Sadat","This paper introduces Amulwe Kimün (\"a means or path for knowledge\" in Mapuzugun), a community-grounded multimodal quiz application co-created with Mapuche speakers to support the revitalization of Mapuzugun. Developed within a FACSO–CONADI collaboration during an intensive language course, the platform integrates multiple-choice, ordering and free-text exercises, as well as forums and chat functions to promote language practice, peer learning, and a sense of community. A pilot involving 32 learners produced 562 responses across 43 questions, with accuracies of 92.3% (multiple choice), 55.2% (ordering), and 7.1% (free-text), offering insights for refining item design and evaluation strategies. The low open-answer accuracy is related to a strict exact-match scoring and orthographic variation of the language. In addition, we present an initial Automatic Speech Recognition (ASR) prototype (Whisper-small + LoRA), establishing a fine-tuned baseline relative to zero-shot performance. The demo illustrates how community-grounded design, language resources, and lightweight evaluation can productively meet in a practical tool for an endangered language.",{"paper_id":7096,"title":7097,"year":7,"month":188,"day":63,"doi":7098,"resource_url":7099,"first_page":7100,"last_page":7101,"pdf_url":7102,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7103,"paper_type":860,"authors":7104,"abstract":7113},"lrec2026-main-267","Development of Serbian QA Datasets through Prompt-Based Generation and Human Validation","10.63317\u002F3fmp6mwp2ugy","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-267","3369","3378","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.267.pdf","raenovi-etal-2026-development",[7105,7108,7111,7112],{"paper_id":7096,"author_seq":247,"given_name":7106,"surname":7107,"affiliation":63,"orcid":63},"Jovana","Rađenović",{"paper_id":7096,"author_seq":232,"given_name":7109,"surname":7110,"affiliation":63,"orcid":63},"Olivera","Kitanović",{"paper_id":7096,"author_seq":218,"given_name":3566,"surname":3567,"affiliation":63,"orcid":63},{"paper_id":7096,"author_seq":203,"given_name":3578,"surname":3579,"affiliation":63,"orcid":63},"LLMs capable of answering questions, fulfilling diverse user requests, and functioning as chatbots rely heavily on extensive datasets. However, for the Serbian language, there is a significant lack of high-quality datasets structured in a question-and-answer (QA) format. To address this, we extracted a portion of the SQuAD-sr dataset, which, to the best of our knowledge, is the largest QA dataset in Serbian and contains over 87k samples. While this dataset is an incredibly valuable resource, it was translated using an adapted Translate-Align-Retrieve method and contains errors and terminological inaccuracies. In this work, we systematically reviewed and corrected more than 7k samples from the SQuAD-sr dataset, significantly improving the dataset’s reliability and quality. We call this modified subset of the SQuAD-sr dataset, the SQuAD-sr-md dataset. The corrections that were made are crucial for training accurate and robust QA models in Serbian, ensuring that AI systems can leverage the full potential of this dataset. We also introduce an additional QA dataset generated from encyclopedia articles, Wikipedia pages, and scientific paper abstracts using LLMs, which contains 74k samples. We name this dataset the SerbianQA-Gen.",{"paper_id":7115,"title":7116,"year":7,"month":188,"day":63,"doi":7117,"resource_url":7118,"first_page":7119,"last_page":7120,"pdf_url":7121,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7122,"paper_type":860,"authors":7123,"abstract":7133},"lrec2026-main-268","An Enhanced Pipeline for the Manzini-Savoia Dialect Corpus","10.63317\u002F2pyh7a7tfdjo","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-268","3379","3393","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.268.pdf","fusco-etal-2026-enhanced",[7124,7127,7130],{"paper_id":7115,"author_seq":247,"given_name":7125,"surname":7126,"affiliation":63,"orcid":63},"Achille","Fusco",{"paper_id":7115,"author_seq":232,"given_name":7128,"surname":7129,"affiliation":63,"orcid":63},"Greta","Mazzaggio",{"paper_id":7115,"author_seq":218,"given_name":7131,"surname":7132,"affiliation":63,"orcid":63},"Carlo","Zoli","This paper presents a semi-automatic workflow for enriching the Manzini–Savoia Corpus (MSC) of Italian dialects with extended glosses, normalized transcriptions, and projected morpho-syntactic annotations. While the MSC is a unique resource for Romance microvariation, its partial glossing and phonetic transcription in the International Phonetic Alphabet (IPA) pose major challenges for computational processing. We introduce a pipeline for gloss coverage expansion and reliable morpho-syntactic annotation combining rule-based and data-driven components, which includes: (i) automatic completion of truncated verbal paradigms; (ii) hybrid lexical alignment between dialectal tokens and Italian glosses, integrating per-region lexical priors with a dynamic programming alignment algorithm; and (iii) projection-based morpho-syntactic tagging from aligned glosses. The proposed methods offer a reproducible framework for extending partially glossed dialect corpora and contribute new annotated data for research in computational dialectology and cross-variety language modeling.",{"paper_id":7135,"title":7136,"year":7,"month":188,"day":63,"doi":7137,"resource_url":7138,"first_page":7139,"last_page":7140,"pdf_url":7141,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7142,"paper_type":860,"authors":7143,"abstract":7149},"lrec2026-main-269","Are Language Models Borrowing-Blind? A Multilingual Evaluation of Loanword Identification across 10 Languages","10.63317\u002F2mtum74avv3g","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-269","3394","3401","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.269.pdf","silva-etal-2026-are",[7144,7146],{"paper_id":7135,"author_seq":247,"given_name":7145,"surname":4040,"affiliation":63,"orcid":63},"Merilin Sousa",{"paper_id":7135,"author_seq":232,"given_name":7147,"surname":7148,"affiliation":63,"orcid":63},"Sina","Ahmadi","Throughout language history, words are borrowed from one language to another and gradually become integrated into the recipient’s lexicon. Speakers can often differentiate these loanwords from native vocabulary, particularly in bilingual communities where a dominant language continuously imposes lexical items on a minority language. This paper investigates whether pretrained language models, including large language models, possess similar capabilities for loanword identification. We evaluate multiple models across 10 languages. Despite explicit instructions and contextual information, our results show that models perform poorly in distinguishing loanwords from native ones. These findings corroborate previous evidence that modern NLP systems exhibit a bias toward loanwords rather than native equivalents. Our work has implications for developing NLP tools for minority languages and supporting language preservation in communities under lexical pressure from dominant languages.",{"paper_id":7151,"title":7152,"year":7,"month":188,"day":63,"doi":7153,"resource_url":7154,"first_page":7155,"last_page":7156,"pdf_url":7157,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7158,"paper_type":860,"authors":7159,"abstract":7166},"lrec2026-main-270","Comparing Approaches to Automatic Summarization in Less-Resourced Languages","10.63317\u002F2pi7c62tdqsr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-270","3402","3422","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.270.pdf","palenmichel-etal-2026-comparing",[7160,7163],{"paper_id":7151,"author_seq":247,"given_name":7161,"surname":7162,"affiliation":63,"orcid":63},"Chester","Palen-Michel",{"paper_id":7151,"author_seq":232,"given_name":7164,"surname":7165,"affiliation":63,"orcid":63},"Constantine","Lignos","Automatic text summarization has achieved high performance in higher-resourced languages like English, but comparatively less attention has been given to summarization in less-resourced languages. This work compares a variety of approaches to summarization from zero-shot prompting of LLMs large and small to fine-tuning smaller models like mT5 with and without three data augmentation approaches and multilingual transfer. We also explore an LLM translation pipeline approach, translating from the source language to English, summarizing and translating back. Evaluating with five different metrics, we find that there is variation across LLMs in their performance at similar model sizes, that our multilingual fine-tuned mT5 baseline outperforms most other approaches including zero-shot LLM performance for most metrics, and that LLM as judge may be unreliable on less-resourced languages.",{"paper_id":7168,"title":7169,"year":7,"month":188,"day":63,"doi":7170,"resource_url":7171,"first_page":7172,"last_page":7173,"pdf_url":7174,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7175,"paper_type":860,"authors":7176,"abstract":7184},"lrec2026-main-271","PsihoRo: Depression and Anxiety Romanian Text Corpus","10.63317\u002F2958cfzsyg7a","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-271","3423","3433","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.271.pdf","ciobotaru-etal-2026-psihoro",[7177,7180,7183],{"paper_id":7168,"author_seq":247,"given_name":7178,"surname":7179,"affiliation":63,"orcid":63},"Alexandra","Ciobotaru",{"paper_id":7168,"author_seq":232,"given_name":7181,"surname":7182,"affiliation":63,"orcid":63},"Ana-Maria","Bucur",{"paper_id":7168,"author_seq":218,"given_name":5687,"surname":5688,"affiliation":63,"orcid":63},"Psychological corpora in NLP are collections of texts used to analyze human psychology, emotions, and mental health. These texts allow researchers to study psychological constructs, identify patterns related to mental health problems and analyze emotional language. However, collecting accurate mental health data from social media can be challenging due to the assumptions made by data collectors. A more effective approach involves gathering data through open-ended questions and then assessing participants’ mental health status using self-report screening surveys. This method was successfully employed for English, a language with a lot of psychological NLP resources. However, the same cannot be stated for Romanian, which currently has no open-source mental health corpus. To address this gap, we have collected the first open-source corpus focused on depression and anxiety in Romanian, by utilizing a form with 6 open-ended questions along with the standardized PHQ-9 and GAD-7 screening questionnaires. Although the PsihoRo corpus contains texts from only 205 respondents, it represents an important first step toward understanding and analyzing mental health issues within the Romanian population. We employ statistical analysis, text analysis using Romanian LIWC, emotion detection, and topic modeling to identify the most important features of this newly introduced resource for the NLP community. The data is publicly available at https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FAlegzandra\u002FPsihoRo.",{"paper_id":7186,"title":7187,"year":7,"month":188,"day":63,"doi":7188,"resource_url":7189,"first_page":7190,"last_page":7191,"pdf_url":7192,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7193,"paper_type":860,"authors":7194,"abstract":7201},"lrec2026-main-272","Aligned Parallel Corpus of the Vedic Saṁhitās for Machine Translation","10.63317\u002F3faoikwftvwt","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-272","3434","3444","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.272.pdf","tsukagoshi-etal-2026-aligned",[7195,7198],{"paper_id":7186,"author_seq":247,"given_name":7196,"surname":7197,"affiliation":63,"orcid":63},"Yuzuki","Tsukagoshi",{"paper_id":7186,"author_seq":232,"given_name":7199,"surname":7200,"affiliation":63,"orcid":63},"Ikki","Ohmukai","We introduce a verse-\u002Fparagraph-aligned parallel corpus for three Vedic Saṁhitās –the R̥gveda (R̥V), the Atharvaveda Śaunaka (AVŚ), and the Taittirīya Saṁhitā (TS)– paired with authoritative public-domain translations (Geldner for R̥V, Whitney for AVŚ, and Keith for TS). The source texts are drawn from established digital editions (e.g., TITUS and VedaWeb) and normalized under ISO 15919. Each Sanskrit segment is aligned to exactly one translated unit (verse or paragraph for TS prose), yielding a unified, model-ready format. Using this resource, we fine-tune and evaluate three large language models –GPT-4.1 nano, Gemini 2.5 Flash, and Mitra– on Vedic→German\u002FEnglish translation. Evaluation combines surface and semantic metrics (case-insensitive sacreBLEU and COMET), enabling a balanced assessment of form and meaning. Results show consistent in-domain gains after supervised fine-tuning, but substantial cross-domain degradation when models are tested on unseen Saṁhitās, indicating pronounced stylistic and lexical divergence among R̥V, AVŚ, and TS. These findings motivate domain-aware training and reporting practices for Vedic machine translation. We release the corpus with standardized splits and preprocessing to support reproducibility and future d research on historical language modeling, alignment, and translation for low-resource ancient languages.",{"paper_id":7203,"title":7204,"year":7,"month":188,"day":63,"doi":7205,"resource_url":7206,"first_page":7207,"last_page":7208,"pdf_url":7209,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7210,"paper_type":860,"authors":7211,"abstract":7227},"lrec2026-main-273","FormosanMT: A Multilingual Parallel Corpus of the Formosan Language Family","10.63317\u002F3ut33xim33nt","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-273","3445","3455","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.273.pdf","scheppat-etal-2026-formosanmt",[7212,7215,7218,7221,7224],{"paper_id":7203,"author_seq":247,"given_name":7213,"surname":7214,"affiliation":63,"orcid":63},"Hunter","Scheppat",{"paper_id":7203,"author_seq":232,"given_name":7216,"surname":7217,"affiliation":63,"orcid":63},"Joshua K.","Hartshorne",{"paper_id":7203,"author_seq":218,"given_name":7219,"surname":7220,"affiliation":63,"orcid":63},"Sema","Koc",{"paper_id":7203,"author_seq":203,"given_name":7222,"surname":7223,"affiliation":63,"orcid":63},"Éric Le","Ferrand",{"paper_id":7203,"author_seq":188,"given_name":7225,"surname":7226,"affiliation":63,"orcid":63},"Emily","Prud'hommeaux","While the quality of machine translation (MT) between widely-spoken languages has improved dramatically in recent years, training robust MT systems for languages with fewer resources remains a challenge. Endangered languages, which often lack the speaker population and written tradition needed to create text resources, are at a particular disadvantage. Developing robust MT architectures for very low-resource settings is hampered by the lack of suitable parallel corpora. To address this challenge, we introduce FormosanMT, a set of MT-ready parallel corpora for the Formosan family of endangered languages indigenous to Taiwan. Together the corpora total nearly 500,000 Formosan-Mandarin and Formosan-English sentence pairs. We share scripts for extracting these corpora from public sources, along with customizable tools for filtering, normalizing, and partitioning the data. In addition, we provide a new tokenizer for Traditional Chinese writing compatible with the popular No Language Left Behind (NLLB) MT architecture, along with updated and improved code for fine-tuning NLLB for any low-resource language pair. Finally we distribute our fully trained NLLB and OpenNMT models for the Formosan languages to and from both Mandarin and English. In addition to serving as a valuable resource for the Formosan language speaker communities, our data, code, and models will be available to NLP researchers working on endangered and low-resource language MT.",{"paper_id":7229,"title":7230,"year":7,"month":188,"day":63,"doi":7231,"resource_url":7232,"first_page":7233,"last_page":7234,"pdf_url":7235,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7236,"paper_type":860,"authors":7237,"abstract":7256},"lrec2026-main-274","The Construction of a Mixe Variant Parallel Corpus","10.63317\u002F4iizmd3in9aj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-274","3456","3461","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.274.pdf","ruiz-etal-2026-construction",[7238,7241,7244,7247,7250,7253],{"paper_id":7229,"author_seq":247,"given_name":7239,"surname":7240,"affiliation":63,"orcid":63},"Ivan Vladimir Meza","Ruiz",{"paper_id":7229,"author_seq":232,"given_name":7242,"surname":7243,"affiliation":63,"orcid":63},"Delfino Zacarias","Marquez",{"paper_id":7229,"author_seq":218,"given_name":7245,"surname":7246,"affiliation":63,"orcid":63},"Martha Elba Ramírez","Andrés",{"paper_id":7229,"author_seq":203,"given_name":7248,"surname":7249,"affiliation":63,"orcid":63},"Victoriano Santiago","Cayetano",{"paper_id":7229,"author_seq":188,"given_name":7251,"surname":7252,"affiliation":63,"orcid":63},"Jonathan Santiago","Antonio",{"paper_id":7229,"author_seq":172,"given_name":7254,"surname":7255,"affiliation":63,"orcid":63},"Carlos Daniel Hernández","Mena","We present the progress and challenges of constructing a Mixe-Spanish parallel corpus for Machine Translation. Mixe is a Mexican Indigenous Language that is spoken by more than 100, 000 speakers. In particular, we focus on the San Juan Guivicovic Mixe variant (mir). The resulting resource is available under an open research license (CC BY-NC-SA). It was created following a previous state-of-the-art methodology for Mexican indigenous languages. In this case, we used paid translators from the variant region. We present a baseline system.",{"paper_id":7258,"title":7259,"year":7,"month":188,"day":63,"doi":7260,"resource_url":7261,"first_page":7262,"last_page":7263,"pdf_url":7264,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7265,"paper_type":860,"authors":7266,"abstract":7274},"lrec2026-main-275","Nepali Lemmatization with Multilingual Transformers: Intrinsic and Extrinsic Evaluation in a Low-Resource Setting","10.63317\u002F2o6euz7qakr5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-275","3462","3469","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.275.pdf","regmi-etal-2026-nepali",[7267,7270,7273],{"paper_id":7258,"author_seq":247,"given_name":7268,"surname":7269,"affiliation":63,"orcid":63},"Sunil","Regmi",{"paper_id":7258,"author_seq":232,"given_name":7271,"surname":7272,"affiliation":63,"orcid":63},"Sundeep","Dawadi",{"paper_id":7258,"author_seq":218,"given_name":6705,"surname":6706,"affiliation":63,"orcid":63},"The Nepali language has a rich and complex morphology. Existing lemmatization research focuses on traditional rule-based or TRIE-based approaches. These methods often fail when encountering out-of-vocabulary or misspelled words. This paper investigates neural lemmatization for the under-resourced Nepali language using multilingual transformer models. We formulate lemmatization as a text-to-text generation problem and evaluate its impacts on downstream tasks by finetuning mBART-large-50, mT5-base, and mT5-small. The models were trained on a combination of publicly available and human-annotated word-lemma pair (8,000 instances) dataset. The performance is evaluated using Character Error Rate (CER), accuracy, character-level Bilingual Evaluation Understudy (BLEU), and morphological coverage. The mT5-base model achieved the highest overall performance. The model achieved 96.1% accuracy and a 1.1% CER using a learning rate of 5 × 10−4. However, it showed slightly weaker performance in handling complex morphological variations. The mBART-large-50 model followed closely with 96.0% accuracy and 0.970 morphological coverage. To assess the efficacy of these models, we applied lemmatization to downstream tasks. In Hindi-Nepali cross-lingual alignment, performance improved significantly from 12.86% to 41.61% using mBART model. In information retrieval, the Mean Average Precision (MAP)@1 using binary index increased from 0.71 to 0.90 using mBART model. These results demonstrate that multilingual transformers effectively learn morphological transformations for low-resource languages through text-to-text generation.",{"paper_id":7276,"title":7277,"year":7,"month":188,"day":63,"doi":7278,"resource_url":7279,"first_page":7280,"last_page":7281,"pdf_url":7282,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7283,"paper_type":860,"authors":7284,"abstract":7302},"lrec2026-main-276","Diacritic Restoration for Low-Resource Indigenous Languages: Case Study with Bribri and Cook Islands Māori","10.63317\u002F4c9samfkamqy","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-276","3470","3483","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.276.pdf","cotosolano-etal-2026-diacritic",[7285,7288,7289,7292,7294,7296,7299],{"paper_id":7276,"author_seq":247,"given_name":7286,"surname":7287,"affiliation":63,"orcid":63},"Rolando","Coto-Solano",{"paper_id":7276,"author_seq":232,"given_name":3320,"surname":3446,"affiliation":63,"orcid":63},{"paper_id":7276,"author_seq":218,"given_name":7290,"surname":7291,"affiliation":63,"orcid":63},"Manoela Teleginski","Ferraz",{"paper_id":7276,"author_seq":203,"given_name":2019,"surname":7293,"affiliation":63,"orcid":63},"Sasse",{"paper_id":7276,"author_seq":188,"given_name":6016,"surname":7295,"affiliation":63,"orcid":63},"Krupka",{"paper_id":7276,"author_seq":172,"given_name":7297,"surname":7298,"affiliation":63,"orcid":63},"Sharid","Loaiciga",{"paper_id":7276,"author_seq":155,"given_name":7300,"surname":7301,"affiliation":63,"orcid":63},"Sally Akevai Tenamu","Nicholas","We present experiments on diacritic restoration, a form of text normalization essential for creating and processing data in natural language processing (NLP) tasks. Our study focuses on two extremely under-resourced languages: Bribri, a Chibchan language spoken in Costa Rica, and Cook Islands Māori, a Polynesian language spoken in the Cook Islands. Specifically, this paper: (i) compares algorithms for diacritics restoration in under-resourced languages, including tonal diacritics, (ii) examines the amount of data required to achieve target performance levels, (iii) contrasts results across varying resource conditions, and (iv) explores the related task of diacritic correction. We find that fine-tuned, character-level LLMs perform best, likely due to their ability to decompose complex characters into their UTF-8 byte representations. In contrast, massively multilingual models perform less effectively given our data constraints. Across all models, reliable performance begins to emerge with data budgets of around 10,000 words. Zero-shot approaches perform poorly in all cases. This study responds both to requests from the language communities and to broader NLP research questions concerning model performance and generalization in under-resource contexts.",{"paper_id":7304,"title":7305,"year":7,"month":188,"day":63,"doi":7306,"resource_url":7307,"first_page":7308,"last_page":7309,"pdf_url":7310,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7311,"paper_type":860,"authors":7312,"abstract":7322},"lrec2026-main-277","A Modern Online Learning Platform for ʻŌlelo Hawaiʻi Classrooms","10.63317\u002F547hw8gqc3ph","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-277","3484","3489","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.277.pdf","castro-etal-2026-modern",[7313,7315,7317,7320],{"paper_id":7304,"author_seq":247,"given_name":3643,"surname":7314,"affiliation":63,"orcid":63},"Castro",{"paper_id":7304,"author_seq":232,"given_name":7316,"surname":3843,"affiliation":63,"orcid":63},"Keneth",{"paper_id":7304,"author_seq":218,"given_name":7318,"surname":7319,"affiliation":63,"orcid":63},"Winston","Wu",{"paper_id":7304,"author_seq":203,"given_name":7321,"surname":6546,"affiliation":63,"orcid":63},"William H.","We present Hōʻoi Aʻo, a browser-based platform designed to streamline the teaching workflow and enhance the learning experience for students in Hawaiian language classes. Built with modern technologies including FastAPI, React, and MongoDB, the platform provides an intuitive and specialized environment for both instructors and students of ʻŌlelo Hawaiʻi. Our platform enables instructors to add content, create or import quizzes in multiple formats, view and analyze common student mistakes, and ultimately save time through automatic grading. Students can access chapters, lessons, assignments, and quizzes all in one place, with automatically graded quizzes for instant feedback and unlimited randomly-generated practice questions created using an innovative synchronous context-free grammar approach, allowing students to obtain extra language practice outside of class. Currently, the platform supports content from Book 1 of Nā Kai ʻEwalu, a popular Hawaiian textbook. Hōʻoi Aʻo not only makes language practice more accessible for a language with few existing learning resources, but also represents a step toward a more modern and effective digital ecosystem for teaching and learning ʻŌlelo Hawaiʻi.",{"paper_id":7324,"title":7325,"year":7,"month":188,"day":63,"doi":7326,"resource_url":7327,"first_page":7328,"last_page":7329,"pdf_url":7330,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7331,"paper_type":860,"authors":7332,"abstract":7335},"lrec2026-main-278","Glossed Data in Northern Interior Salish","10.63317\u002F2isngefy6ags","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-278","3490","3495","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.278.pdf","stacey-2026-glossed",[7333],{"paper_id":7324,"author_seq":247,"given_name":2742,"surname":7334,"affiliation":63,"orcid":63},"Stacey","The Northern Interior subgroup of the Salish language family, spoken in the Pacific Northwest of North America, comprises three languages: St’át’imcets, nɬeʔkepmxcín, and Secwepemctsín. Each has a small number of first-language (L1) speakers remaining due to the effects of colonization, though language revitalization efforts are ongoing. This work introduces the first compiled and cleaned language datasets in these languages, useable in natural language processing (NLP) projects. This data is in glossed format, with transcriptions in the language, translations into English, and linguistic segmentations and glosses that provide a detailed breakdown of meaning. In order to achieve consistently formatted data within and across each language, extensive data cleaning was conducted. This paper provides the glossed data standards that were developed and recounts the cleaning process. Scripts that help to automate parts of the data preparation processes are included. Finally, this work strives to keep the interconnectedness of language and community as a central consideration.",{"paper_id":7337,"title":7338,"year":7,"month":188,"day":63,"doi":7339,"resource_url":7340,"first_page":7341,"last_page":7342,"pdf_url":7343,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7344,"paper_type":860,"authors":7345,"abstract":7351},"lrec2026-main-279","CEFR-Cymraeg: A Dataset and Baseline Models for Language Proficiency Assessment in Welsh","10.63317\u002F2dvuy5ucr9g2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-279","3496","3505","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.279.pdf","waqar-etal-2026-cefr",[7346,7348,7349,7350],{"paper_id":7337,"author_seq":247,"given_name":7347,"surname":2204,"affiliation":63,"orcid":63},"Eeshan",{"paper_id":7337,"author_seq":232,"given_name":2837,"surname":2838,"affiliation":63,"orcid":63},{"paper_id":7337,"author_seq":218,"given_name":2843,"surname":2844,"affiliation":63,"orcid":63},{"paper_id":7337,"author_seq":203,"given_name":2846,"surname":2847,"affiliation":63,"orcid":63},"We introduce CEFR-Cymraeg, the first dataset annotated with Common European Framework of Reference (CEFR) levels for Welsh. The dataset is built from learning materials for adult learners, carefully extracted from widely used coursebooks and verified by teachers of Welsh as a second language. It spans levels A1 to B2 and includes multiple units of analysis: sentences, dialogues, paragraphs, and documents. In total, 2,658 entries are provided with gold-standard CEFR annotations, making CEFR-Cymraeg a valuable resource for research on language learning and low-resourced Celtic languages. To illustrate its potential applications, we define language proficiency assessment as a multi-class classification task and fine-tune multilingual pre-trained language models. Given the limited size of the dataset, we also experiment with data augmentation. Results show that these models successfully capture proficiency distinctions and generalise well to Welsh, with the best-performing model reaching a weighted F1-score of 0.83. Qualitative analysis confirmed that most apparent errors reflected valid pedagogical variation rather than model inconsistencies. CEFR-Cymraeg establishes a benchmark resource for Welsh and opens new opportunities for educational NLP, corpus linguistics, and multilingual proficiency research.",{"paper_id":7353,"title":7354,"year":7,"month":188,"day":63,"doi":7355,"resource_url":7356,"first_page":7357,"last_page":7358,"pdf_url":7359,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7360,"paper_type":860,"authors":7361,"abstract":7370},"lrec2026-main-280","Singlish to English Translation with Precision: A Dataset and Language Detection-Driven Masked Modeling for Singlish to English Translation","10.63317\u002F4fw7s9vepnr9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-280","3506","3516","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.280.pdf","kumar-etal-2026-singlish",[7362,7364,7367,7368,7369],{"paper_id":7353,"author_seq":247,"given_name":7363,"surname":2247,"affiliation":63,"orcid":63},"Sujit",{"paper_id":7353,"author_seq":232,"given_name":7365,"surname":7366,"affiliation":63,"orcid":63},"Gerome Kusuma","Ang",{"paper_id":7353,"author_seq":218,"given_name":912,"surname":913,"affiliation":63,"orcid":63},{"paper_id":7353,"author_seq":203,"given_name":918,"surname":919,"affiliation":63,"orcid":63},{"paper_id":7353,"author_seq":188,"given_name":921,"surname":922,"affiliation":63,"orcid":63},"Singlish, a creole rooted in English and influenced by Singapore’s multilingual and multicultural environment, poses significant challenges for those proficient in standard English due to its unique and often complex lexical and syntactic structures. Despite significant advancements in language translation for both high- and low-resource languages, translating Singlish to English remains largely underexplored. This gap is primarily due to the lack of dedicated datasets for language detection and Singlish-to-English translation, as well as the absence of robust models capable of addressing the unique linguistic challenges posed by Singlish. In this work, we curate a word-level language detection dataset, a Singlish-to-English translation dataset, and propose a Language Detection-driven Masked Language Modelling approach for translating Singlish into English. We evaluate the performance of existing models and the proposed approach on two Singlish-to-English translation datasets, including our proposed SEAT dataset. The results demonstrate that the proposed LD-MLMTrans approach outperforms the baseline model and exhibits high proficiency in Singlish-to-English translation.",{"paper_id":7372,"title":7373,"year":7,"month":188,"day":63,"doi":7374,"resource_url":7375,"first_page":7376,"last_page":7377,"pdf_url":7378,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7379,"paper_type":860,"authors":7380,"abstract":7384},"lrec2026-main-281","LLMs in Ottoman Turkish: From MLM to NER","10.63317\u002F2ttbxopqx25z","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-281","3517","3522","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.281.pdf","ylandilolu-2026-llms",[7381],{"paper_id":7372,"author_seq":247,"given_name":7382,"surname":7383,"affiliation":63,"orcid":63},"Enes","Yılandiloğlu","This paper introduces three foundational contributions to Digital Ottoman Turkish Studies. It presents: (1) three masked language models (MLMs) trained on over 11 million words from 144 works spanning from the 15th to 20th century, (2) a state-of-the-art Named Entity Recognition (NER) model (F1 = 89.94%) trained on 9,960 manually annotated entities, and (3) a state-of-the-art Universal Dependency (UD) parsing model for Ottoman Turkish. This work differs from others by deploying IJMES-transliterated documents for training and evaluation in order to prevent loss of information due to the change of the script from Perso-Arabic to Latin. The paper further explores probabilistic manuscript reconstruction in preliminary experiments, showing that MLMs can recover unread sections in historical documents with 77.8% top-1 accuracy when a list of candidate words is provided. Followed by a discussion, the paper outlines the future directions as building century‐aware MLMs and expanding the training data across genres to enhance model generalization.",{"paper_id":7386,"title":7387,"year":7,"month":188,"day":63,"doi":7388,"resource_url":7389,"first_page":7390,"last_page":7391,"pdf_url":7392,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7393,"paper_type":860,"authors":7394,"abstract":7400},"lrec2026-main-282","SloPal: A 60-Million-Word Slovak Parliamentary Corpus with Aligned Speech and Fine-Tuned ASR Models","10.63317\u002F3wovntemmb6u","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-282","3523","3533","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.282.pdf","bok-etal-2026-slopal",[7395,7397],{"paper_id":7386,"author_seq":247,"given_name":2022,"surname":7396,"affiliation":63,"orcid":63},"Božík",{"paper_id":7386,"author_seq":232,"given_name":7398,"surname":7399,"affiliation":63,"orcid":63},"Marek","Suppa","Slovak remains a low-resource language for automatic speech recognition (ASR), with fewer than 100 hours of publicly available training data. We present SloPal, a comprehensive Slovak parliamentary corpus comprising 330,000 speaker-segmented transcripts (66 million words, 220 million tokens) spanning 2001–2024, with rich metadata including speaker names, roles, and session information. From this collection, we derive SloPalSpeech, a 2,806-hour aligned speech dataset with segments up to 30 seconds, constructed using a language-agnostic anchor-based alignment pipeline and optimized for Whisper-based ASR training. Fine-tuning Whisper on SloPalSpeech reduces Word Error Rate (WER) by up to 70%, with the fine-tuned small model (244M parameters) approaching base large-v3 (1.5B parameters) performance at 6× fewer parameters. We publicly release the SloPal text corpus, SloPalSpeech aligned audio, and four fine-tuned Whisper models at https:\u002F\u002Fhuggingface.co\u002Fcollections\u002FNaiveNeuron\u002Fslopal, providing the most comprehensive open Slovak parliamentary language resource to date.",{"paper_id":7402,"title":7403,"year":7,"month":188,"day":63,"doi":7404,"resource_url":7405,"first_page":7406,"last_page":7407,"pdf_url":7408,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7409,"paper_type":860,"authors":7410,"abstract":7415},"lrec2026-main-283","SlovKE: A Large-Scale Dataset and LLM Evaluation for Slovak Keyphrase Extraction","10.63317\u002F322dk2ztk5dj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-283","3534","3546","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.283.pdf","tevak-etal-2026-slovke",[7411,7414],{"paper_id":7402,"author_seq":247,"given_name":7412,"surname":7413,"affiliation":63,"orcid":63},"Dávid","Števaňák",{"paper_id":7402,"author_seq":232,"given_name":7398,"surname":7399,"affiliation":63,"orcid":63},"Keyphrase extraction for morphologically rich, low-resource languages remains understudied, largely due to the scarcity of suitable evaluation datasets. We address this gap for Slovak by constructing a dataset of 227,432 scientific abstracts with author-assigned keyphrases—scraped and systematically cleaned from the Slovak Central Register of Theses—representing a 25-fold increase over the largest prior Slovak resource and approaching the scale of established English benchmarks such as KP20K. Using this dataset, we benchmark three unsupervised baselines (YAKE, TextRank, KeyBERT with SlovakBERT embeddings) and evaluate KeyLLM, an LLM-based extraction method using GPT-3.5-turbo. Unsupervised baselines achieve at most 11.6% exact-match F1@6, with a large gap to partial matching (up to 51.5%), reflecting the difficulty of matching inflected surface forms to author-assigned keyphrases. KeyLLM narrows this exact–partial gap, producing keyphrases closer to the canonical forms assigned by authors, while manual evaluation on 100 documents (κ = 0.61) confirms that KeyLLM captures relevant concepts that automated exact matching underestimates. Our analysis identifies morphological mismatch as the dominant failure mode for statistical methods—a finding relevant to other inflected languages. The dataset (https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FNaiveNeuron\u002FSlovKE) and evaluation code (https:\u002F\u002Fgithub.com\u002FNaiveNeuron\u002FSlovKE) are publicly available.",{"paper_id":7417,"title":7418,"year":7,"month":188,"day":63,"doi":7419,"resource_url":7420,"first_page":7421,"last_page":7422,"pdf_url":7423,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7424,"paper_type":860,"authors":7425,"abstract":7434},"lrec2026-main-284","Automatic Speech Recognition for Documenting Endangered Languages: Case Study of Ikema Miyakoan","10.63317\u002F4im4f6vuxk42","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-284","3547","3555","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.284.pdf","taguchi-etal-2026-automatic",[7426,7429,7432],{"paper_id":7417,"author_seq":247,"given_name":7427,"surname":7428,"affiliation":63,"orcid":63},"Chihiro","Taguchi",{"paper_id":7417,"author_seq":232,"given_name":7430,"surname":7431,"affiliation":63,"orcid":63},"Yukinori","Takubo",{"paper_id":7417,"author_seq":218,"given_name":1061,"surname":7433,"affiliation":63,"orcid":63},"Chiang","Language endangerment poses a major challenge to linguistic diversity worldwide, and technological advances have opened new avenues for documentation and revitalization. Among these, automatic speech recognition (ASR) has shown increasing potential to assist in the transcription of endangered language data. This study focuses on Ikema, a severely endangered Ryukyuan language spoken in Okinawa, Japan, with approximately 1,300 remaining speakers, most of whom are over 60 years old. We present an ongoing effort to develop an ASR system for Ikema based on field recordings. Specifically, we (1) construct a 6.33-hour speech corpus from field recordings, (2) train an ASR model that achieves a character error rate as low as 15%, and (3) evaluate the impact of ASR-assisted transcription on annotation efficiency. Our results demonstrate that ASR integration can substantially reduce transcription time and cognitive load, offering a practical pathway toward scalable, technology-supported documentation of endangered languages.",{"paper_id":7436,"title":7437,"year":7,"month":188,"day":63,"doi":7438,"resource_url":7439,"first_page":7440,"last_page":7441,"pdf_url":7442,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":7443,"bibkey":7444,"paper_type":860,"authors":7445,"abstract":7457},"lrec2026-main-285","Adaptive Method for Self-Supervised Learning Models on Automatic Dialect Speech Recognition Based on Shared Knowledge of Japanese Dialects and Standard Japanese","10.63317\u002F2tb4unmnwikb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-285","3556","3565","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.285.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.285_OptionalSupplementaryMaterial.zip","asakawa-etal-2026-adaptive",[7446,7449,7452,7454],{"paper_id":7436,"author_seq":247,"given_name":7447,"surname":7448,"affiliation":63,"orcid":63},"Naoru","Asakawa",{"paper_id":7436,"author_seq":232,"given_name":7450,"surname":7451,"affiliation":63,"orcid":63},"Naoki","Takahashi",{"paper_id":7436,"author_seq":218,"given_name":7453,"surname":3610,"affiliation":63,"orcid":63},"Atsuhiko",{"paper_id":7436,"author_seq":203,"given_name":7455,"surname":7456,"affiliation":63,"orcid":63},"Seiichi","Nakagawa","Speech recognition for Japanese dialects is challenging, and recognition accuracy tends to be lower compared to standard Japanese. Previous research proposed a three-step learning method based on the self-supervised learning (SSL) model XLS-R as the base model, incorporating three multi-task learning tasks: SSL, ASR, and dialect identification (DID). While this achieved improved recognition performance for dialect speech, it faced the issue of degraded recognition performance for standard Japanese. This study proposes an adaptation method to construct a single speech recognition model, based on the prior model, that is suitable for both Japanese dialects and standard Japanese. We explored the use of diverse speech corpora, including ReazonSpeech based on TV broadcast audio and CEJC based on everyday conversational speech, in addition to the standard Japanese speech corpus CSJ and the dialect speech corpus COJADS used in prior research, aiming for knowledge sharing between dialects and standard Japanese. As a result, we confirmed improved recognition performance for both dialects and standard Japanese by including both in the final step of a three-step learning method. We also examined the impact of differences in corpus type and domain on recognition performance.",{"paper_id":7459,"title":7460,"year":7,"month":188,"day":63,"doi":7461,"resource_url":7462,"first_page":7463,"last_page":7464,"pdf_url":7465,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7466,"paper_type":860,"authors":7467,"abstract":7479},"lrec2026-main-286","ATLAS: Article Tracking, Linking, and Analysis of Swedish Encyclopedias","10.63317\u002F5idqekcpsxyg","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-286","3566","3576","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.286.pdf","andersson-etal-2026-atlas",[7468,7471,7474,7477],{"paper_id":7459,"author_seq":247,"given_name":7469,"surname":7470,"affiliation":63,"orcid":63},"Albin","Andersson",{"paper_id":7459,"author_seq":232,"given_name":7472,"surname":7473,"affiliation":63,"orcid":63},"Salam","Jonasson",{"paper_id":7459,"author_seq":218,"given_name":7475,"surname":7476,"affiliation":63,"orcid":63},"Fredrik","Wastring",{"paper_id":7459,"author_seq":203,"given_name":1159,"surname":7478,"affiliation":63,"orcid":63},"Nugues","The digitization of old encyclopedias represents an important step to improve access to historically structured knowledge. Often, however, this process does not go beyond an optical character recognition, leaving all the underlying structure unexploited. In addition, many encyclopedias had multiple editions reflecting the evolution of knowledge. The lack of structure in the raw text makes it difficult to track changes across these editions. In this work, we built a pipeline to restore the text structure, where we extract the headwords and identify entries; categorize the entities; match entries across editions; and link entries to a Wikidata item. We applied this pipeline to the four major editions of _Nordisk familjebok_, an authoritative Swedish encyclopedia published between 1876 and 1951. We could extract the headwords with an F1 score of 97.8% and we obtained an F1 score of 93.4% on the headword classification. On a small-scale evaluation, we reached a 93% precision on the cross-edition matching, 85% precision and 16.5% recall on the Wikidata linking. This shows that an automated approach to digitized historical knowledge is possible. This should facilitate the preservation of general knowledge and the understanding of knowledge transmission. The datasets and programs are available online.",{"paper_id":7481,"title":7482,"year":7,"month":188,"day":63,"doi":7483,"resource_url":7484,"first_page":7485,"last_page":7486,"pdf_url":7487,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7488,"paper_type":860,"authors":7489,"abstract":7507},"lrec2026-main-287","Evaluating Embedding Models on Danish Historical Newspapers: A Corpus and Benchmark Resource","10.63317\u002F2hzf85aou6dw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-287","3577","3589","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.287.pdf","lassche-etal-2026-evaluating",[7490,7493,7496,7499,7502,7505],{"paper_id":7481,"author_seq":247,"given_name":7491,"surname":7492,"affiliation":63,"orcid":63},"Alie","Lassche",{"paper_id":7481,"author_seq":232,"given_name":7494,"surname":7495,"affiliation":63,"orcid":63},"Pascale","Feldkamp",{"paper_id":7481,"author_seq":218,"given_name":7497,"surname":7498,"affiliation":63,"orcid":63},"Yuri","Bizzoni",{"paper_id":7481,"author_seq":203,"given_name":7500,"surname":7501,"affiliation":63,"orcid":63},"Katrine","Baunvig",{"paper_id":7481,"author_seq":188,"given_name":7503,"surname":7504,"affiliation":63,"orcid":63},"Kristoffer","Nielbo",{"paper_id":7481,"author_seq":172,"given_name":1915,"surname":7506,"affiliation":63,"orcid":63},"Heinsen","We present an enriched dataset of almost five million Danish historical newspaper articles from the late seventeenth to nineteenth century, augmented with semantic embeddings and an annotated subset, to enable semi-automated classification as well as thematic and linguistic exploration. Through three historical benchmark tasks that evaluate the performance of Danish and multilingual embedding models on this historical Danish corpus, we discuss how the choice for an embedding model depends on the type of task, and enrich our corpus with embeddings from the overall best performing model. As a showcase experiment, we look at the distribution of article categories in the three subgenres that can be observed in the corpus. This experiment highlights the corpus and article-level embeddings’ potential for further exploration and analysis of the Danish historical mediascape. The resource is freely available for research use and aims to foster reproducible, data-driven studies of language and culture in the Danish nineteenth century.",{"paper_id":7509,"title":7510,"year":7,"month":188,"day":63,"doi":7511,"resource_url":7512,"first_page":7513,"last_page":7514,"pdf_url":7515,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7516,"paper_type":860,"authors":7517,"abstract":7524},"lrec2026-main-288","Leveraging Linguistic Similarity for Low-Resource Speech Transcription","10.63317\u002F5mcp4ektu7mw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-288","3590","3598","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.288.pdf","fedchenko-etal-2026-leveraging",[7518,7521],{"paper_id":7509,"author_seq":247,"given_name":7519,"surname":7520,"affiliation":63,"orcid":63},"Valentina","Fedchenko",{"paper_id":7509,"author_seq":232,"given_name":7522,"surname":7523,"affiliation":63,"orcid":63},"Eric","Jordan","This study investigates how large-scale, self-supervised acoustic models (like XLSR and MMS) represent linguistic similarity and whether this can optimize Automatic Speech Recognition (ASR) for low-resource and dialectally diverse languages. While these models excel at cross-lingual transfer learning, their internal representations of fine-grained dialectal variation remain opaque. We focus on Yiddish, a language with a complex dialect continuum, to test if a model’s internal acoustic similarity metric—Acoustic Token Distribution Similarity (ATDS)—predicts ASR performance. Our methodology involved fine-tuning models on Yiddish dialects and measuring ATDS between Yiddish and related languages. Results confirm that ATDS is a meaningful predictor: higher acoustic similarity in the model’s latent space correlates with lower character error rates (CER) after fine-tuning. This relationship is strongest in mid-to-upper layers of the MMS model and for in-domain data. Crucially, ATDS captures model-dependent acoustic similarity, which does not always align with genealogical linguistic relationships but remains a practical indicator of transfer learning potential. We conclude that ATDS is a valuable tool for selecting donor languages to develop more efficient, dialect-sensitive ASR systems for language documentation, even if its absolute values require careful interpretation against linguistic knowledge.",{"paper_id":7526,"title":7527,"year":7,"month":188,"day":63,"doi":7528,"resource_url":7529,"first_page":7530,"last_page":7531,"pdf_url":7532,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7533,"paper_type":860,"authors":7534,"abstract":7551},"lrec2026-main-289","A Corpus of Persuasion Techniques in Slavic Languages","10.63317\u002F46rmmqosdrgt","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-289","3599","3616","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.289.pdf","piskorski-etal-2026-corpus",[7535,7537,7540,7541,7544,7546,7549],{"paper_id":7526,"author_seq":247,"given_name":5422,"surname":7536,"affiliation":63,"orcid":63},"Piskorski",{"paper_id":7526,"author_seq":232,"given_name":7538,"surname":7539,"affiliation":63,"orcid":63},"Dimitar Iliyanov","Dimitrov",{"paper_id":7526,"author_seq":218,"given_name":4854,"surname":4855,"affiliation":63,"orcid":63},{"paper_id":7526,"author_seq":203,"given_name":7542,"surname":7543,"affiliation":63,"orcid":63},"Jacek","Haneczok",{"paper_id":7526,"author_seq":188,"given_name":1631,"surname":7545,"affiliation":63,"orcid":63},"Marcinczuk",{"paper_id":7526,"author_seq":172,"given_name":7547,"surname":7548,"affiliation":63,"orcid":63},"Arkadiusz","Modzelewski",{"paper_id":7526,"author_seq":155,"given_name":1064,"surname":7550,"affiliation":63,"orcid":63},"Yangarber","We present a new corpus of persuasion techniques for Slavic languages. The corpus contains documents from parliamentary debates in Bulgarian and Polish, and from social media in Russian, annotated with persuasion techniques at text-span and sentence level. The techniques come from a taxonomy of 25 fine-grained persuasion techniques, grouped under six broader categories of rhetorical persuasion strategies. The corpus contains approximately 7500 text spans annotated with persuasion techniques, from 222 documents that cover hotly debated topics at both international and national level. We describe the process of corpus creation, provide related statistics, elaborate on topic and persuasion technique correlations. We provide baseline models and benchmark results for detection and classification of persuasion techniques at the text-span level and sentence level, which use classic ML-based and generative AI-based models.",{"paper_id":7553,"title":7554,"year":7,"month":188,"day":63,"doi":7555,"resource_url":7556,"first_page":7557,"last_page":7558,"pdf_url":7559,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7560,"paper_type":860,"authors":7561,"abstract":7572},"lrec2026-main-290","GePaDeSE: A New Resource for Clause-Level Aspect in German Parliamentary Debates","10.63317\u002F5jshypsv6t3k","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-290","3617","3632","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.290.pdf","schlenker-etal-2026-gepadese",[7562,7563,7564,7567,7569,7571],{"paper_id":7553,"author_seq":247,"given_name":1296,"surname":1297,"affiliation":63,"orcid":63},{"paper_id":7553,"author_seq":232,"given_name":1290,"surname":1291,"affiliation":63,"orcid":63},{"paper_id":7553,"author_seq":218,"given_name":7565,"surname":7566,"affiliation":63,"orcid":63},"Lilly","Brauner",{"paper_id":7553,"author_seq":203,"given_name":2175,"surname":7568,"affiliation":63,"orcid":63},"Ertz",{"paper_id":7553,"author_seq":188,"given_name":1290,"surname":7570,"affiliation":63,"orcid":63},"Reinig",{"paper_id":7553,"author_seq":172,"given_name":1299,"surname":1300,"affiliation":63,"orcid":63},"This paper presents GePaDeSE, a new resource with annotations of clause-level aspect in German parliamentary debates, also known as Situation Entity types. The new resource includes 250 political speeches from the German Bundestag, given by 192 speakers, with over 220,000 tokens. In the paper, we first describe the new corpus and the annotation process. Then we present experiments on automatically classifying clause-level aspect and present an in-depth analysis where we show the potential of Situation Entities for the analysis of political discourse.",{"paper_id":7574,"title":7575,"year":7,"month":188,"day":63,"doi":7576,"resource_url":7577,"first_page":7578,"last_page":7579,"pdf_url":7580,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7581,"paper_type":860,"authors":7582,"abstract":7594},"lrec2026-main-291","FrameNet Semantic Role Classification by Analogy","10.63317\u002F4orqo3vca85v","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-291","3633","3644","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.291.pdf","ngo-etal-2026-framenet",[7583,7586,7589,7592],{"paper_id":7574,"author_seq":247,"given_name":7584,"surname":7585,"affiliation":63,"orcid":63},"Van Duy","Ngo",{"paper_id":7574,"author_seq":232,"given_name":7587,"surname":7588,"affiliation":63,"orcid":63},"Stergos","Afantenos",{"paper_id":7574,"author_seq":218,"given_name":7590,"surname":7591,"affiliation":63,"orcid":63},"Emiliano","Lorini",{"paper_id":7574,"author_seq":203,"given_name":7031,"surname":7593,"affiliation":63,"orcid":63},"Couceiro","In this paper, we adopt a relational view of analogies applied to Semantic Role Classification in FrameNet. We define analogies as formal relations over the Cartesian product of frame evoking lexical units and frame element pairs, which we use to construct a new dataset.Each element of this binary relation is labelled as a valid analogical instance if the frame elements share the same semantic role, or as invalid otherwise.This formulation allows us to transform Semantic Role Classification into binary classification and train a lightweight Artificial Neural Network (ANN) that exhibits rapid convergence with minimal parameters. Crucially, no Semantic Role information is introduced to the neural network during training. We recover semantic roles during inference by computing probability distributions over candidates of all semantic roles within a given frame through random sampling and analogical transfer. This approach allows us to surpass previous State of the Art results while maintaining computational efficiency and frugality.",{"paper_id":7596,"title":7597,"year":7,"month":188,"day":63,"doi":7598,"resource_url":7599,"first_page":7600,"last_page":7601,"pdf_url":7602,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7603,"paper_type":860,"authors":7604,"abstract":7619},"lrec2026-main-292","CEFR-Annotated WordNet: LLM-Based Proficiency-Guided Semantic Database for Language Learning","10.63317\u002F3egsd9wawd56","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-292","3645","3661","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.292.pdf","kikuchi-etal-2026-cefr",[7605,7607,7610,7613,7616],{"paper_id":7596,"author_seq":247,"given_name":7606,"surname":4250,"affiliation":63,"orcid":63},"Masato",{"paper_id":7596,"author_seq":232,"given_name":7608,"surname":7609,"affiliation":63,"orcid":63},"Masatsugu","Ono",{"paper_id":7596,"author_seq":218,"given_name":7611,"surname":7612,"affiliation":63,"orcid":63},"Toshioki","Soga",{"paper_id":7596,"author_seq":203,"given_name":7614,"surname":7615,"affiliation":63,"orcid":63},"Tetsu","Tanabe",{"paper_id":7596,"author_seq":188,"given_name":7617,"surname":7618,"affiliation":63,"orcid":63},"Tadachika","Ozono","Although WordNet is a valuable resource because of its structured semantic networks and extensive vocabulary, its fine-grained sense distinctions can be challenging for second-language learners. To address this issue, we developed a version of WordNet annotated with the Common European Framework of Reference for Languages (CEFR), integrating its semantic networks with language-proficiency levels. We automated this process using a large language model to measure the semantic similarity between sense definitions in WordNet and entries in the English Vocabulary Profile Online. To validate our approach, we constructed a large-scale corpus containing both sense and CEFR-level information from the annotated WordNet and used it to develop contextual lexical classifiers. Our experiments demonstrate that models fine-tuned on this corpus perform comparably to those fine-tuned on gold-standard annotations. Furthermore, by combining this corpus with the gold-standard data, we developed a practical classifier that achieves a Macro-F1 score of 0.81. This result provides indirect evidence that the transferred labels are largely consistent with the gold-standard levels. The annotated WordNet, corpus, and classifiers are publicly available to help bridge the gap between natural language processing and language education, thereby facilitating more effective and efficient language learning.",{"paper_id":7621,"title":7622,"year":7,"month":188,"day":63,"doi":7623,"resource_url":7624,"first_page":7625,"last_page":7626,"pdf_url":7627,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7628,"paper_type":860,"authors":7629,"abstract":7636},"lrec2026-main-293","Towards a Gold Standard for Adjectival Hypernymy: Enriching the Open English WordNet with a Hybrid Approach","10.63317\u002F5eaqyq2rwc43","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-293","3662","3671","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.293.pdf","augello-etal-2026-gold",[7630,7633,7634],{"paper_id":7621,"author_seq":247,"given_name":7631,"surname":7632,"affiliation":63,"orcid":63},"Lorenzo","Augello",{"paper_id":7621,"author_seq":232,"given_name":2271,"surname":2272,"affiliation":63,"orcid":63},{"paper_id":7621,"author_seq":218,"given_name":2146,"surname":7635,"affiliation":63,"orcid":63},"Passarotti","Adjectival hypernymy is an underexplored lexical-semantic relation essential for Natural Language Processing (NLP) and hierarchical semantic organization of the lexicon. While hypernymy in nouns and verbs has been extensively modeled in resources such as WordNet, adjectives remain largely unstructured due to their gradability and context-dependence. We present a hybrid Large Language Model (LLM)-Human approach towards the creation of a gold-standard dataset for adjectival hypernymy. Our method integrates three LLMs with systematic human evaluation, guided by a specifically developed theoretical framework ensuring consistency and linguistically-based principles, compiling a resource of 3,836 validated adjective hyponym-hypernym pairs. Results demonstrate high precision for consensus predictions (87%), confirming the utility of cross-model agreement as a proxy for semantic validity. This method highlights how LLMs can complement human effort and expertise to support the construction of lexical resources. The resulting dataset aims to enrich the Open English WordNet (OEWN) with explicit adjectival hierarchies and serves as a benchmark for hypernymy detection and lexical entailment evaluation.",{"paper_id":7638,"title":7639,"year":7,"month":188,"day":63,"doi":7640,"resource_url":7641,"first_page":7642,"last_page":7643,"pdf_url":7644,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7645,"paper_type":860,"authors":7646,"abstract":7660},"lrec2026-main-294","PREMOVE in LiLa: Integrating Latin Preverbed Motion Verbs with WordNet and VerbNet","10.63317\u002F3ifm66wvmf86","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-294","3672","3683","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.294.pdf","farina-etal-2026-premove",[7647,7649,7650,7653,7655,7658],{"paper_id":7638,"author_seq":247,"given_name":1104,"surname":7648,"affiliation":63,"orcid":63},"Farina",{"paper_id":7638,"author_seq":232,"given_name":2146,"surname":7635,"affiliation":63,"orcid":63},{"paper_id":7638,"author_seq":218,"given_name":7651,"surname":7652,"affiliation":63,"orcid":63},"Francesco","Mambrini",{"paper_id":7638,"author_seq":203,"given_name":5045,"surname":7654,"affiliation":63,"orcid":63},"Pellegrini",{"paper_id":7638,"author_seq":188,"given_name":7656,"surname":7657,"affiliation":63,"orcid":63},"Eleonora","Litta",{"paper_id":7638,"author_seq":172,"given_name":1509,"surname":7659,"affiliation":63,"orcid":63},"Moretti","PREMOVE is a diachronic dataset of Ancient Greek and Latin PREverbed MOtion VErbs, providing manually curated morphological, syntactic, and semantic annotations for almost three thousand verbal occurrences. This paper presents the integration of PREMOVE into the LiLa Knowledge Base of Latin, linking its semantic annotations to WordNet (WN) and VerbNet (VN). We describe the RDF conversion using OntoLex-Lemon and FrAC, enabling explicit modelling of token-level attestations and dataset-level provenance. The resulting linked resource achieves full FAIR compliance and supports complex SPARQL queries, allowing users to explore motion semantics across lexical, textual, and semantic layers. Example SPARQL queries demonstrate how researchers can retrieve attested forms for specific WN synsets or VN classes, supporting reproducible linguistic research and cross-resource exploration of motion semantics in ancient languages.",{"paper_id":7662,"title":7663,"year":7,"month":188,"day":63,"doi":7664,"resource_url":7665,"first_page":7666,"last_page":7667,"pdf_url":7668,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7669,"paper_type":860,"authors":7670,"abstract":7680},"lrec2026-main-295","From Incidents to Framing: A Dutch and English Frame-semantic Corpus and Lexicon","10.63317\u002F2hg8e9h7cus9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-295","3684","3696","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.295.pdf","vossen-etal-2026-incidents",[7671,7674,7677],{"paper_id":7662,"author_seq":247,"given_name":7672,"surname":7673,"affiliation":63,"orcid":63},"Piek T.J.M.","Vossen",{"paper_id":7662,"author_seq":232,"given_name":7675,"surname":7676,"affiliation":63,"orcid":63},"Pia","Sommerauer",{"paper_id":7662,"author_seq":218,"given_name":7678,"surname":7679,"affiliation":63,"orcid":63},"Levi","Remijnse","This paper reports on the final results of the Dutch FrameNet project. The project followed a new approach to aggregate an event corpus starting from registered events in Wikidata and collecting text in different languages that refer to these events. The resulting corpus is not only referentially grounded, but it is also grouped by the type of event, e.g. mass shootings, elections, sports events. A subset of the texts has been annotated with FrameNets frames for all references to the registered events and participants. The result is a unique corpus with comparable texts across languages that make reference to the same and similar events. From the annotations, we derived Dutch and English FrameNet lexicons, as well as reference lexicons. These lexicons allow us to infer abstractions from the annotations that also reflect sociocultural differences in framing the same entities and events.",{"paper_id":7682,"title":7683,"year":7,"month":188,"day":63,"doi":7684,"resource_url":7685,"first_page":7686,"last_page":7687,"pdf_url":7688,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7689,"paper_type":860,"authors":7690,"abstract":7696},"lrec2026-main-296","AI Safety Lost in Translation: Evaluating the Effectiveness of English-Italian Cross-Lingual LLM Safety Alignment","10.63317\u002F24nruqbycv2a","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-296","3697","3713","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.296.pdf","wu-etal-2026-ai",[7691,7693],{"paper_id":7682,"author_seq":247,"given_name":7692,"surname":7319,"affiliation":63,"orcid":63},"Alessio",{"paper_id":7682,"author_seq":232,"given_name":7694,"surname":7695,"affiliation":63,"orcid":63},"Martim","Brandao","Large Language Models (LLMs) have been shown to be vulnerable to various issues of bias and safety, for which new safety alignment techniques have been proposed. In this paper, we investigate the degree to which such techniques improve safety in a non-English language, specifically in Italian, both when they have and don’t have access to safety training data in that language. We evaluate standard mitigation techniques and assess cross-lingual safety transfer by comparing English-only versus bilingual Supervised Fine-Tuning (SFT), on several open-source small LLMs: Qwen3, Llama3.2, and Gemma3. Results confirm a significant cross-lingual safety gap, with most models performing worse in Italian. We find that while prompt engineering is generally effective, the impact of SFT is highly inconsistent. English-only SFT occasionally failed to transfer safety improvements into Italian and even deteriorated the performance of some models. Furthermore, bilingual SFT repeatedly underperformed other mitigation methods. These findings demonstrate that safety alignment does not always generalize across languages and models, and standard mitigation strategies can lead to unpredictable effects. We thus highlight the critical necessity for language-specific evaluation and dedicated multilingual safety research to ensure AI is developed equitably and safely for a global audience.",{"paper_id":7698,"title":7699,"year":7,"month":188,"day":63,"doi":7700,"resource_url":7701,"first_page":7702,"last_page":7703,"pdf_url":7704,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7705,"paper_type":860,"authors":7706,"abstract":7721},"lrec2026-main-297","Semantic Label Drift in Cross-Cultural Translation","10.63317\u002F5ae9txdv2s3g","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-297","3714","3724","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.297.pdf","kabir-etal-2026-semantic",[7707,7710,7712,7715,7718],{"paper_id":7698,"author_seq":247,"given_name":7708,"surname":7709,"affiliation":63,"orcid":63},"Mohsinul","Kabir",{"paper_id":7698,"author_seq":232,"given_name":7711,"surname":2400,"affiliation":63,"orcid":63},"Tasnim",{"paper_id":7698,"author_seq":218,"given_name":7713,"surname":7714,"affiliation":63,"orcid":63},"Md Mezbaur","Rahman",{"paper_id":7698,"author_seq":203,"given_name":7716,"surname":7717,"affiliation":63,"orcid":63},"Polydoros","Giannouris",{"paper_id":7698,"author_seq":188,"given_name":7719,"surname":7720,"affiliation":63,"orcid":63},"Sophia","Ananiadou","Machine Translation (MT) is widely employed to address resource scarcity in low-resource languages by translating data from high-resource languages. While sentiment preservation in translation has long been studied, a critical but underexplored factor is the role of cultural alignment between source and target languages. In this paper, we hypothesize that semantic labels drift or are altered during MT due to cultural divergence. Through a series of experiments across culturally sensitive and neutral domains, we establish three key findings: (1) MT systems, including modern Large Language Models (LLMs), induce label drift during translation, particularly in culturally sensitive domains; (2) unlike earlier statistical MT tools, LLMs encode cultural knowledge, and leveraging this knowledge can amplify label drift; and (3) cultural similarity or dissimilarity between source and target languages is a crucial determinant of label preservation. Our findings highlight that neglecting cultural factors in MT not only undermines label fidelity but also risks misinterpretation and cultural conflict in downstream applications. We release our codebase to facilitate future research in cross-cultural translation: https:\u002F\u002Fgithub.com\u002Fmohsinulkabir14\u002Flabel_drift",{"paper_id":7723,"title":7724,"year":7,"month":188,"day":63,"doi":7725,"resource_url":7726,"first_page":7727,"last_page":7728,"pdf_url":7729,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7730,"paper_type":860,"authors":7731,"abstract":7741},"lrec2026-main-298","Chain-of-Thought Reasoning Improves Context-Aware Translation with Large Language Models","10.63317\u002F37kz9rawf9fn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-298","3725","3741","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.298.pdf","ataee-etal-2026-chain",[7732,7735,7738],{"paper_id":7723,"author_seq":247,"given_name":7733,"surname":7734,"affiliation":63,"orcid":63},"Shabnam","Ataee",{"paper_id":7723,"author_seq":232,"given_name":7736,"surname":7737,"affiliation":63,"orcid":63},"Hugo","Huart",{"paper_id":7723,"author_seq":218,"given_name":7739,"surname":7740,"affiliation":63,"orcid":63},"Andrei","Popescu-Belis","This paper assesses the ability of large language models (LLMs) to translate texts that include inter-sentential dependencies. We use the English-French DiscEvalMT benchmark (Bawden et al., 2018) with pairs of sentences containing translation challenges for pronominal anaphora and lexical cohesion. We evaluate 12 LLMs from the DeepSeek-R1, GPT, Llama, Mistral and Phi families on two tasks: (1) distinguish a correct translation from a wrong but plausible one; and (2) generate a correct translation. We compare prompts that encourage chain-of-thought reasoning with those that do not. The best models take advantage of reasoning and reach about 90% accuracy on the first task and COMET scores of about 92% on the second task, with GPT-4, GPT-4o and Phi standing out. Moreover, we observe a \"wise get wiser\" effect: the improvements through reasoning are larger for models that already perform well without reasoning.",{"paper_id":7743,"title":7744,"year":7,"month":188,"day":63,"doi":7745,"resource_url":7746,"first_page":7747,"last_page":7748,"pdf_url":7749,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7750,"paper_type":860,"authors":7751,"abstract":7756},"lrec2026-main-299","Adja-French Parallel Corpus: A New Resource for Machine Translation of a West African Under-Resourced Language","10.63317\u002F5bk4g2k7mpmu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-299","3742","3749","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.299.pdf","godeme-etal-2026-adja",[7752,7755],{"paper_id":7743,"author_seq":247,"given_name":7753,"surname":7754,"affiliation":63,"orcid":63},"Josue Frejus","Godeme",{"paper_id":7743,"author_seq":232,"given_name":7286,"surname":7287,"affiliation":63,"orcid":63},"We present the first parallel text corpus for Adja machine translation, an under-resourced Gbe language spoken by approximately 1,000,000 people in Benin and Togo. The corpus contains 10,000 French-Adja sentence pairs, providing a foundation for machine translation research. We establish baseline results using fine-tuned NLLB and ByT5 models, achieving a chrF++ of 28 in the French→Adja direction, and up to a chrF++ of 34 in the Adja→French direction. This work represents the first public machine translation resource for Adja. It provides benchmarks for future studies on this under-resourced West African language. The dataset is available at https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FJosueG\u002Ffrench-adja-parallel-corpus.",{"paper_id":7758,"title":7759,"year":7,"month":188,"day":63,"doi":7760,"resource_url":7761,"first_page":7762,"last_page":7763,"pdf_url":7764,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7765,"paper_type":860,"authors":7766,"abstract":7776},"lrec2026-main-300","Goldfish: Monolingual Language Models for 350 Languages","10.63317\u002F5ceec3hhv4d5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-300","3750","3781","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.300.pdf","chang-etal-2026-goldfish",[7767,7769,7771,7774],{"paper_id":7758,"author_seq":247,"given_name":7768,"surname":864,"affiliation":63,"orcid":63},"Tyler A.",{"paper_id":7758,"author_seq":232,"given_name":3860,"surname":7770,"affiliation":63,"orcid":63},"Arnett",{"paper_id":7758,"author_seq":218,"given_name":7772,"surname":7773,"affiliation":63,"orcid":63},"Zhuowen","Tu",{"paper_id":7758,"author_seq":203,"given_name":4797,"surname":7775,"affiliation":63,"orcid":63},"Bergen","For many low-resource languages, the only available language models are large multilingual models trained on many languages simultaneously. Despite state-of-the-art performance on reasoning tasks, we find that these models still struggle with basic grammatical text generation in many languages. First, large multilingual models perform worse than bigrams for many languages (e.g. 24% of languages in XGLM 4.5B; 43% in BLOOM 7.1B) using FLORES perplexity as an evaluation metric. Second, when we train small monolingual models with only 125M parameters on 1GB or less data for 350 languages, these small models outperform large multilingual models both in perplexity and on a massively multilingual grammaticality benchmark. To facilitate future work on low-resource language modeling, we release Goldfish, a suite of over 1,000 small monolingual language models trained comparably for 350 languages. These models represent the first publicly-available monolingual language models for 215 of the languages included.",{"paper_id":7778,"title":7779,"year":7,"month":188,"day":63,"doi":7780,"resource_url":7781,"first_page":7782,"last_page":7783,"pdf_url":7784,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7785,"paper_type":860,"authors":7786,"abstract":7822},"lrec2026-main-301","Dynaword: From One-shot to Continuously Developed Datasets","10.63317\u002F4x9cdkge22vb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-301","3782","3793","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.301.pdf","enevoldsen-etal-2026-dynaword",[7787,7789,7791,7793,7796,7799,7802,7803,7804,7805,7806,7808,7811,7813,7816,7819,7820,7821],{"paper_id":7778,"author_seq":247,"given_name":2007,"surname":7788,"affiliation":63,"orcid":63},"Enevoldsen",{"paper_id":7778,"author_seq":232,"given_name":7790,"surname":1507,"affiliation":63,"orcid":63},"Kristian Nørgaard",{"paper_id":7778,"author_seq":218,"given_name":1380,"surname":7792,"affiliation":63,"orcid":63},"Kostkan",{"paper_id":7778,"author_seq":203,"given_name":7794,"surname":7795,"affiliation":63,"orcid":63},"Balázs","Szabó",{"paper_id":7778,"author_seq":188,"given_name":7797,"surname":7798,"affiliation":63,"orcid":63},"Márton","Kardos",{"paper_id":7778,"author_seq":172,"given_name":7800,"surname":7801,"affiliation":63,"orcid":63},"Kirsten","Vad",{"paper_id":7778,"author_seq":155,"given_name":1915,"surname":7506,"affiliation":63,"orcid":63},{"paper_id":7778,"author_seq":138,"given_name":1622,"surname":1623,"affiliation":63,"orcid":63},{"paper_id":7778,"author_seq":121,"given_name":1619,"surname":1620,"affiliation":63,"orcid":63},{"paper_id":7778,"author_seq":104,"given_name":1606,"surname":1607,"affiliation":63,"orcid":63},{"paper_id":7778,"author_seq":87,"given_name":1512,"surname":7807,"affiliation":63,"orcid":63},"Larsen",{"paper_id":7778,"author_seq":73,"given_name":7809,"surname":7810,"affiliation":63,"orcid":63},"Rob van der","Goot",{"paper_id":7778,"author_seq":55,"given_name":1625,"surname":7812,"affiliation":63,"orcid":63},"Vahlstrup",{"paper_id":7778,"author_seq":38,"given_name":7814,"surname":7815,"affiliation":63,"orcid":63},"Per Møldrup","Dalum",{"paper_id":7778,"author_seq":17,"given_name":7817,"surname":7818,"affiliation":63,"orcid":63},"Desmond","Elliott",{"paper_id":7778,"author_seq":2971,"given_name":1634,"surname":1635,"affiliation":63,"orcid":63},{"paper_id":7778,"author_seq":2974,"given_name":1625,"surname":1626,"affiliation":63,"orcid":63},{"paper_id":7778,"author_seq":857,"given_name":7503,"surname":7504,"affiliation":63,"orcid":63},"Large-scale datasets are foundational for research and development in natural language processing. However, current approaches face three key challenges: (1) reliance on ambiguously licensed sources restricting use, sharing, and derivative works; (2) static dataset releases that prevent community contributions and diminish longevity; and (3) quality assurance processes restricted to publishing teams rather than leveraging community expertise. To address these limitations, we introduce two contributions: the Dynaword approach and Danish Dynaword. The Dynaword approach is a framework for creating large-scale, open datasets that can be continuously updated through community collaboration. Danish Dynaword is a concrete implementation that validates this approach and demonstrates its potential. Danish Dynaword contains over five times as many tokens as comparable releases, is exclusively openly licensed, and has received multiple contributions across industry, the public sector and research institutions. The repository includes light-weight tests to ensure data formatting, quality, and documentation, establishing a sustainable framework for ongoing community contributions and dataset evolution.",{"paper_id":7824,"title":7825,"year":7,"month":188,"day":63,"doi":7826,"resource_url":7827,"first_page":7828,"last_page":7829,"pdf_url":7830,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7831,"paper_type":860,"authors":7832,"abstract":7838},"lrec2026-main-302","From Bones to Rocks: A Systematic Evaluation of Specialized Definition Generation for Portuguese","10.63317\u002F4w8a83i8b3wy","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-302","3794","3804","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.302.pdf","nunes-etal-2026-bones",[7833,7834,7835],{"paper_id":7824,"author_seq":247,"given_name":4030,"surname":4031,"affiliation":63,"orcid":63},{"paper_id":7824,"author_seq":232,"given_name":4020,"surname":4021,"affiliation":63,"orcid":63},{"paper_id":7824,"author_seq":218,"given_name":7836,"surname":7837,"affiliation":63,"orcid":63},"Joel Luís","Carbonera","This work presents a systematic evaluation of Large Language Models (LLMs) for generating specialized definitions in Portuguese, focusing on the medical and geological domains. We introduce a robust benchmark and employ a rigorous, statistically grounded evaluation framework, including 5-fold cross-validation and significance testing, to ensure the reliability and generalizability of our findings. Our comprehensive experiments with various open-source, decoder-only LLMs explore in-context learning (ICL) with diverse prompting strategies, ranging from zero-shot to few-shot and contextual information. The evaluated models include multilingual architectures and one model that underwent continued pretraining specifically for Portuguese, allowing us to assess the impact of language adaptation on definition generation quality. The results indicate that most evaluated models perform effectively in this task, with relatively small performance differences among the top models. Statistical analyses confirmed that these differences are not consistently significant, suggesting that several open LLMs, regardless of their size, multilingual capacity, or language specialization, offer comparable effectiveness for Portuguese definition generation. These findings provide valuable insights for selecting and adapting models for specialized NLP tasks in low-resource languages like Portuguese.",{"paper_id":7840,"title":7841,"year":7,"month":188,"day":63,"doi":7842,"resource_url":7843,"first_page":7844,"last_page":7845,"pdf_url":7846,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7847,"paper_type":860,"authors":7848,"abstract":7855},"lrec2026-main-303","Bangla Key2Text: Text Generation from Keywords for a Low Resource Language","10.63317\u002F4wkpwaxktwfn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-303","3805","3822","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.303.pdf","talukder-etal-2026-bangla",[7849,7852],{"paper_id":7840,"author_seq":247,"given_name":7850,"surname":7851,"affiliation":63,"orcid":63},"Tonmoy","Talukder",{"paper_id":7840,"author_seq":232,"given_name":7853,"surname":7854,"affiliation":63,"orcid":63},"G M","Shahariar","This paper introduces Bangla Key2Text, a large-scale dataset of 2.6 million Bangla keyword-text pairs designed for keyword-driven text generation in a low-resource language. The dataset is constructed using a BERT-based keyword extraction pipeline applied to millions of Bangla news texts, transforming raw articles into structured keyword-text pairs suitable for supervised learning. To establish baseline performance on this new benchmark, we fine-tune two sequence-to-sequence models, mT5 and BanglaT5, and evaluate them using multiple automatic metrics and human judgments. Experimental results show that task-specific fine-tuning substantially improves keyword-conditioned text generation in Bangla compared to zero-shot large language models. The dataset, trained models, and code are publicly released to support future research in Bangla natural language generation and keyword-to-text generation tasks.",{"paper_id":7857,"title":7858,"year":7,"month":188,"day":63,"doi":7859,"resource_url":7860,"first_page":7861,"last_page":7862,"pdf_url":7863,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7864,"paper_type":860,"authors":7865,"abstract":7868},"lrec2026-main-304","Beyond Lemmas and Syntax: Comparing Human and LLM-Generated Scientific Abstracts","10.63317\u002F4t9vnvcb943b","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-304","3823","3832","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.304.pdf","bagdasarov-etal-2026-beyond",[7866,7867],{"paper_id":7857,"author_seq":247,"given_name":5318,"surname":5319,"affiliation":63,"orcid":63},{"paper_id":7857,"author_seq":232,"given_name":5321,"surname":5322,"affiliation":63,"orcid":63},"In this study, we compare human-written (HWT) and machine-generated (MGT) abstracts of scientific papers, going beyond traditional lexical and syntactic analyses. We use an extensive corpus of publications on computational linguistics submitted to the Association of Computational Linguistics from mid 1950s to 2022. First, we generate abstracts with three state-of-the-art models (GPT-4o, Llama 3.1 and Qwen 2.5), providing the models with full texts of papers, and subsequently we compare these abstracts to those written by humans. We study the overall information content of abstracts, operationalised as surprisal, and the distribution of information in abstracts quantified as local Uniform Information Density (UID), both metrics related to the processing effort. Subsequently, we perform an extrinsic evaluation through topic modelling and clustering applying the BERTopic model. Our results show significant differences both in surprisal and UID, suggesting that abstracts generated by Llama are less cognitively demanding and show a more uniform distribution of information. Our topic modelling experiments show greater divergence between humans and LLMs than between LLM pairs. At the same time, Llama abstracts seem to be more semantically similar to those written by humans, standing in line with previous findings suggesting such similarity on lexical and syntactic level.",{"paper_id":7870,"title":7871,"year":7,"month":188,"day":63,"doi":7872,"resource_url":7873,"first_page":7874,"last_page":7875,"pdf_url":7876,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7877,"paper_type":860,"authors":7878,"abstract":7900},"lrec2026-main-305","Systematic Multi-Aspect Evaluation of Time Series-Based Report Generation: The Case of Financial Analysis from Stock Data","10.63317\u002F2u7a679u9rkq","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-305","3833","3850","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.305.pdf","fons-etal-2026-systematic",[7879,7881,7883,7886,7888,7891,7894,7897],{"paper_id":7870,"author_seq":247,"given_name":3607,"surname":7880,"affiliation":63,"orcid":63},"Fons",{"paper_id":7870,"author_seq":232,"given_name":2968,"surname":7882,"affiliation":63,"orcid":63},"Kochkina",{"paper_id":7870,"author_seq":218,"given_name":7884,"surname":7885,"affiliation":63,"orcid":63},"Rachneet","Kaur",{"paper_id":7870,"author_seq":203,"given_name":7887,"surname":4137,"affiliation":63,"orcid":63},"Zhen",{"paper_id":7870,"author_seq":188,"given_name":7889,"surname":7890,"affiliation":63,"orcid":63},"Berowne","Hlavaty",{"paper_id":7870,"author_seq":172,"given_name":7892,"surname":7893,"affiliation":63,"orcid":63},"Charese","Smiley",{"paper_id":7870,"author_seq":155,"given_name":7895,"surname":7896,"affiliation":63,"orcid":63},"Svitlana","Vyetrenko",{"paper_id":7870,"author_seq":138,"given_name":7898,"surname":7899,"affiliation":63,"orcid":63},"Manuela","Veloso","This paper explores the capability of large language models (LLMs) to generate coherent textual reports from time series data, using financial reports from stock data as the use case. We conduct a comprehensive multi-aspect evaluation across four model families, including linguistic quality, content source attribution, automated metrics, and expert human assessment. We evaluate models using four major stock indices and two synthetic time series to assess generalization. We assess reports based on single and multiple time series data, and experiment with plain text and multi-modal prompting. We examine temporal effects by analyzing report quality as data approaches model knowledge cutoffs and testing synthetic future intervals. Our evaluation shows that LLMs are capable of creating high-quality financial analyst reports, with larger models demonstrating superior performance, however even those require human oversight and have potential for temporal logic errors. Our findings reveal model-specific behavioral patterns that enable tailored generation pipelines and inform future research about model pitfalls in time series-to-text generation tasks.",{"paper_id":7902,"title":7903,"year":7,"month":188,"day":63,"doi":7904,"resource_url":7905,"first_page":7906,"last_page":7907,"pdf_url":7908,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7909,"paper_type":860,"authors":7910,"abstract":7919},"lrec2026-main-306","Towards Reliable AI Fairness: Challenges in Steering Features within Bias-Implicated Neurons","10.63317\u002F2iexsnkqn3j6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-306","3851","3860","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.306.pdf","garridomunoz-etal-2026-reliable",[7911,7914,7917],{"paper_id":7902,"author_seq":247,"given_name":7912,"surname":7913,"affiliation":63,"orcid":63},"Ismael","Garrido-Munoz",{"paper_id":7902,"author_seq":232,"given_name":7915,"surname":7916,"affiliation":63,"orcid":63},"Arturo","Montejo-Raez",{"paper_id":7902,"author_seq":218,"given_name":2846,"surname":7918,"affiliation":63,"orcid":63},"Martí­nez-Santiago","LLMs perpetuate societal biases, such as gender stereotypes, reinforcing harmful norms and posing significant fairness risks in real-world applications. We investigate a fine-grained mitigation technique that moves beyond surface-level fixes. Our approach uses attribution graphs to identify and directly steer bias-implicated features within a Sparse Autoencoder’s (SAE) latent space. This method, known as feature steering, offers a theoretically precise, surgical intervention aimed at correcting bias at its neural source without costly retraining. We critically examine its practical reliability across various contexts. We find that steering effectiveness is highly sensitive to parameter tuning, often requiring unpredictable, context-specific adjustments. The intervention’s success exists in narrow \"sweet spots,\" outside of which performance can degrade catastrophically. This demonstrates that while direct intervention on learned features is a powerful analytical tool, significant challenges of brittleness and instability hinder its application as a consistent, broad-scale debiasing solution, necessitating research into more robust control mechanisms.",{"paper_id":7921,"title":7922,"year":7,"month":188,"day":63,"doi":7923,"resource_url":7924,"first_page":7925,"last_page":7926,"pdf_url":7927,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7928,"paper_type":860,"authors":7929,"abstract":7933},"lrec2026-main-307","From Body to Mind: Analyzing Gender Representation in Spanish Generative Language Models","10.63317\u002F5fz5bb6tihu3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-307","3861","3874","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.307.pdf","garridomunoz-etal-2026-body",[7930,7931,7932],{"paper_id":7921,"author_seq":247,"given_name":7912,"surname":7913,"affiliation":63,"orcid":63},{"paper_id":7921,"author_seq":232,"given_name":2846,"surname":7918,"affiliation":63,"orcid":63},{"paper_id":7921,"author_seq":218,"given_name":7915,"surname":7916,"affiliation":63,"orcid":63},"While Large Language Models (LLMs) demonstrate remarkable text generation capabilities, they also risk inheriting and perpetuating harmful societal biases present in their vast training data. This study presents a rigorous, large-scale analysis of gender bias in a diverse set of 20 publicly available Spanish generative LLMs, ranging from 760M to 11B parameters. Our methodology utilizes a comprehensive set of specifically designed sentence templates to elicit adjectival descriptions associated with men and women in neutral contexts. We then extract and manually classify these adjectives using the Supersenses lexicosemantic framework, focusing on four key domains: BODY, BEHAVIOR, FEELING, and MIND. Our research uncovers systematic patterns consistent with pervasive cultural stereotypes, echoing findings from earlier masked language models. Women are disproportionately described by physical and emotional attributes, whereas men are more frequently associated with behavioral and cognitive traits. Finally, we investigate the relationship between model size and the intensity of these observed gender biases, offering crucial insights into how scaling affects fairness and equity in non-English models.",{"paper_id":7935,"title":7936,"year":7,"month":188,"day":63,"doi":7937,"resource_url":7938,"first_page":7939,"last_page":7940,"pdf_url":7941,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7942,"paper_type":860,"authors":7943,"abstract":7950},"lrec2026-main-308","Incivility and Rigidity: Evaluating the Risks of Fine-Tuning LLMs for Political Argumentation","10.63317\u002F5g48jjks5n35","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-308","3875","3883","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.308.pdf","churina-etal-2026-incivility",[7944,7947],{"paper_id":7935,"author_seq":247,"given_name":7945,"surname":7946,"affiliation":63,"orcid":63},"Svetlana","Churina",{"paper_id":7935,"author_seq":232,"given_name":7948,"surname":7949,"affiliation":63,"orcid":63},"Kokil","Jaidka","Incivility on platforms such as Twitter (now X) and Reddit complicates the development of AI systems that can support productive, rhetorically sound political argumentation. We present experiments with GPT-3.5 Turbo fine-tuned on two contrasting datasets of political discourse: high-incivility Twitter replies to U.S. Congress and low-incivility posts from Reddit’s r\u002FChangeMyView. Our evaluation examines how data composition and prompting strategies affect the rhetorical framing and deliberative quality of model-generated arguments. Results show that Reddit-finetuned models generate safer but rhetorically rigid arguments, while cross-platform fine-tuning amplifies adversarial tone and toxicity. Prompt-based steering reduces overt toxicity (e.g., personal attacks) but cannot fully offset the influence of noisy training data. We introduce a rhetorical evaluation rubric—covering justification, reciprocity, alignment, and authority—and provide implementation guidelines for authoring, moderation, and deliberation-support systems.",{"paper_id":7952,"title":7953,"year":7,"month":188,"day":63,"doi":7954,"resource_url":7955,"first_page":7956,"last_page":7957,"pdf_url":7958,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7959,"paper_type":860,"authors":7960,"abstract":7980},"lrec2026-main-309","EsBBQ and CaBBQ: The Spanish and Catalan Bias Benchmarks for Question Answering","10.63317\u002F2u47873noowf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-309","3884","3907","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.309.pdf","ruizfernndez-etal-2026-esbbq",[7961,7964,7967,7970,7973,7975,7977],{"paper_id":7952,"author_seq":247,"given_name":7962,"surname":7963,"affiliation":63,"orcid":63},"Valle","Ruiz-Fernández",{"paper_id":7952,"author_seq":232,"given_name":7965,"surname":7966,"affiliation":63,"orcid":63},"Mario","Mina",{"paper_id":7952,"author_seq":218,"given_name":7968,"surname":7969,"affiliation":63,"orcid":63},"Júlia","Falcão",{"paper_id":7952,"author_seq":203,"given_name":7971,"surname":7972,"affiliation":63,"orcid":63},"Luis Antonio Vasquez","Reina",{"paper_id":7952,"author_seq":188,"given_name":2742,"surname":7974,"affiliation":63,"orcid":63},"Salles",{"paper_id":7952,"author_seq":172,"given_name":1276,"surname":7976,"affiliation":63,"orcid":63},"Gonzalez-Agirre",{"paper_id":7952,"author_seq":155,"given_name":7978,"surname":7979,"affiliation":63,"orcid":63},"Olatz","Perez-de-Viñaspre","Previous literature has largely shown that Large Language Models (LLMs) perpetuate social biases learnt from their pre-training data. Given the notable lack of resources for social bias evaluation in languages other than English, and for social contexts outside of the United States, this paper introduces the Spanish and the Catalan Bias Benchmarks for Question Answering (EsBBQ and CaBBQ). Based on the original BBQ, these two parallel datasets are designed to assess social bias across 10 categories using a multiple-choice QA setting, now adapted to the Spanish and Catalan languages and to the social context of Spain. We report evaluation results on different LLMs, factoring in model family, size and variant. Our results show that models tend to fail to choose the correct answer in ambiguous scenarios, and that high QA accuracy often correlates with greater reliance on social biases.",{"paper_id":7982,"title":7983,"year":7,"month":188,"day":63,"doi":7984,"resource_url":7985,"first_page":7986,"last_page":7987,"pdf_url":7988,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7989,"paper_type":860,"authors":7990,"abstract":7998},"lrec2026-main-310","ToxSyn-PT: A Synthetic Fine-Grained Dataset of Minority-Targeted Toxic Language in Portuguese","10.63317\u002F3ne367tx8hvj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-310","3908","3920","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.310.pdf","brito-etal-2026-toxsyn",[7991,7992,7993,7994,7997],{"paper_id":7982,"author_seq":247,"given_name":5389,"surname":5390,"affiliation":63,"orcid":63},{"paper_id":7982,"author_seq":232,"given_name":5392,"surname":5393,"affiliation":63,"orcid":63},{"paper_id":7982,"author_seq":218,"given_name":5386,"surname":5387,"affiliation":63,"orcid":63},{"paper_id":7982,"author_seq":203,"given_name":7995,"surname":7996,"affiliation":63,"orcid":63},"Diogo","Fernandes",{"paper_id":7982,"author_seq":188,"given_name":5401,"surname":5402,"affiliation":63,"orcid":63},"The development of robust hate speech detection systems remains limited by the lack of large-scale, fine-grained training data, especially for languages beyond English. Existing corpora typically rely on simplistic toxic and non-toxic labels, and the few that capture hate directed at specific minority groups lack the positive counterexamples required to distinguish genuine hate from mere discussion. In this work, we introduce ToxSyn-PT, the first Portuguese large-scale corpus explicitly designed for multi-label hate speech detection across nine protected minority groups, including the non-toxic counterexamples absent in all other public datasets. Generated via a controllable four-stage pipeline, ToxSyn contains discourse-type annotations to capture rhetorical strategies of toxic\u002Fnon-toxic language, such as sarcasm, dehumanization, and cultural appreciation. Our experiments reveal a catastrophic, mutual generalization failure compared to existing datasets from social-media domains: models trained on social media struggle to generalize to minority-specific contexts, and vice-versa. This finding indicates they are distinct tasks and exposes summary metrics like Macro F1 can be unreliable indicators of true model behavior, as they completely mask model failure. We publicly release ToxSyn on HuggingFace to support reproducible research on synthetic data generation and benchmark progress in hate-speech detection for low- and mid-resource languages.",{"paper_id":8000,"title":8001,"year":7,"month":188,"day":63,"doi":8002,"resource_url":8003,"first_page":8004,"last_page":8005,"pdf_url":8006,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8007,"paper_type":860,"authors":8008,"abstract":8022},"lrec2026-main-311","AnswerCarefully: Creating a Dataset for LLM Safety in Japanese","10.63317\u002F289xxmwcworn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-311","3921","3931","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.311.pdf","suzuki-etal-2026-answercarefully",[8009,8012,8015,8016,8018,8019],{"paper_id":8000,"author_seq":247,"given_name":8010,"surname":8011,"affiliation":63,"orcid":63},"Hisami","Suzuki",{"paper_id":8000,"author_seq":232,"given_name":8013,"surname":8014,"affiliation":63,"orcid":63},"Satoru","Katsumata",{"paper_id":8000,"author_seq":218,"given_name":2796,"surname":3504,"affiliation":63,"orcid":63},{"paper_id":8000,"author_seq":203,"given_name":8017,"surname":7451,"affiliation":63,"orcid":63},"Tetsuro",{"paper_id":8000,"author_seq":188,"given_name":3506,"surname":3507,"affiliation":63,"orcid":63},{"paper_id":8000,"author_seq":172,"given_name":8020,"surname":8021,"affiliation":63,"orcid":63},"Satoshi","Sekine","In this paper we present JLLMSafety, a dataset for promoting the safety of Japanese LLM outputs. The dataset consists of 1,800 pairs of questions and reference answers, where the questions require special attention in answering. It covers a wide range of risk categories established in prior English-language datasets, but the data samples are original in that they are manually curated to reflect the socio-cultural context of LLM usage in Japan. We show that using this dataset for instruction to fine-tune a Japanese LLM led to improved output safety without compromising the utility of general responses. We also report the results of a safety evaluation of 12 Japanese LLMs using this dataset as a benchmark. Finally, we discuss the significance of creating regionally specific datasets of LLM safety, and describe the meta tags we added to the dataset to facilitate the creation of similar datasets in different languages and regions. The dataset is made available publicly for the sole purpose of improving LLM safety without any other usage restrictions.",{"paper_id":8024,"title":8025,"year":7,"month":188,"day":63,"doi":8026,"resource_url":8027,"first_page":8028,"last_page":8029,"pdf_url":8030,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8031,"paper_type":860,"authors":8032,"abstract":8043},"lrec2026-main-312","A Dutch Benchmark to Assess Social Bias in LLMs within a Hiring Decision Setting","10.63317\u002F3gdjhdj7otjm","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-312","3932","3943","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.312.pdf","burema-etal-2026-dutch",[8033,8036,8039,8042],{"paper_id":8024,"author_seq":247,"given_name":8034,"surname":8035,"affiliation":63,"orcid":63},"Renate","Burema",{"paper_id":8024,"author_seq":232,"given_name":8037,"surname":8038,"affiliation":63,"orcid":63},"Anne","Schuth",{"paper_id":8024,"author_seq":218,"given_name":8040,"surname":8041,"affiliation":63,"orcid":63},"Christopher","Spelt",{"paper_id":8024,"author_seq":203,"given_name":5518,"surname":2395,"affiliation":63,"orcid":63},"In this paper, we present a Dutch benchmark to assess whether large language models (LLMs) exhibit social biases in hiring decisions, focusing on gender and country of origin. We experiment with two approaches: explicit descriptions of the applicants’ demographics and using first names as proxies. We evaluate both monolingual and multilingual LLMs and find that all tested models, gpt-4o-mini, claude-3.5-haiku, Geitje-7B-Ultra and EuroLLM-9B-Instruct, exhibit some degree of social bias in their decisions. Furthermore, all models tested are sensitive to the manner in which the prompts are written. We make our benchmark publicly available under an EUPL-1.2 license. The benchmark is available at https:\u002F\u002Fgithub.com\u002FMinBZK\u002Fllm-benchmark\u002Ftree\u002Fmain\u002Fbenchmarks\u002Fsocial-bias.",{"paper_id":8045,"title":8046,"year":7,"month":188,"day":63,"doi":8047,"resource_url":8048,"first_page":8049,"last_page":8050,"pdf_url":8051,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8052,"paper_type":860,"authors":8053,"abstract":8073},"lrec2026-main-313","PBBQ: A Persian Bias Benchmark Dataset Curated with Human-AI Collaboration for Large Language Models","10.63317\u002F2ee2xn7cdmrr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-313","3944","3960","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.313.pdf","farsi-etal-2026-pbbq",[8054,8057,8058,8061,8064,8067,8070],{"paper_id":8045,"author_seq":247,"given_name":8055,"surname":8056,"affiliation":63,"orcid":63},"Farhan","Farsi",{"paper_id":8045,"author_seq":232,"given_name":6568,"surname":6569,"affiliation":63,"orcid":63},{"paper_id":8045,"author_seq":218,"given_name":8059,"surname":8060,"affiliation":63,"orcid":63},"Fatemeh","Valeh",{"paper_id":8045,"author_seq":203,"given_name":8062,"surname":8063,"affiliation":63,"orcid":63},"Parsa","Ghofrani",{"paper_id":8045,"author_seq":188,"given_name":8065,"surname":8066,"affiliation":63,"orcid":63},"Alireza","Pakniat",{"paper_id":8045,"author_seq":172,"given_name":8068,"surname":8069,"affiliation":63,"orcid":63},"Seyedkian","Kashfipour",{"paper_id":8045,"author_seq":155,"given_name":8071,"surname":8072,"affiliation":63,"orcid":63},"Amir H.","Payberah","With the increasing adoption of large language models (LLMs), ensuring their alignment with social norms has become a critical concern. While prior research has examined bias detection in various languages, there remains a significant gap in resources addressing social biases within Persian cultural contexts. In this work, we introduce PBBQ, a comprehensive benchmark dataset designed to evaluate social biases in Persian LLMs. Our benchmark, which encompasses 16 cultural categories, was developed through anonymous questionnaires completed by 250 diverse individuals across multiple demographics, in close collaboration with social science experts to ensure its validity. The resulting PBBQ dataset contains over 37,000 carefully curated questions, providing a foundation for the evaluation and mitigation of bias in Persian language models. We benchmark several open-source LLMs, a closed-source model, and Persian-specific fine-tuned models on PBBQ. Our findings reveal that current LLMs exhibit significant social biases across Persian culture. Additionally, by comparing model outputs to human responses, we observe that LLMs often replicate human bias patterns, highlighting the complex interplay between learned representations and cultural stereotypes. Our PBBQ dataset is also publicly available for use in future work. Content warning: This paper contains unsafe content.",{"paper_id":8075,"title":8076,"year":7,"month":188,"day":63,"doi":8077,"resource_url":8078,"first_page":8079,"last_page":8080,"pdf_url":8081,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":8082,"bibkey":8083,"paper_type":860,"authors":8084,"abstract":8091},"lrec2026-main-314","Contextualizing Toxicity: An Annotation Framework for Unveiling Pragmatics in Conversations of Online Discussion Forums","10.63317\u002F2iicz26v9ckq","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-314","3961","3974","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.314.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.314_OptionalSupplementaryMaterial.zip","fu-etal-2026-contextualizing",[8085,8088],{"paper_id":8075,"author_seq":247,"given_name":8086,"surname":8087,"affiliation":63,"orcid":63},"Yingxue","Fu",{"paper_id":8075,"author_seq":232,"given_name":8089,"surname":8090,"affiliation":63,"orcid":63},"Anais","Ollagnier","The role of context has attracted increasing attention in research on toxicity detection. Interpreting toxic language remains a complex and multifaceted challenge, shaped by numerous linguistic, contextual, and social factors. However, current approaches often define \"context\" narrowly, focusing primarily on surface lexical cues such as hate lexicons, profanity markers, or sentiment polarity. These features, while useful, are insufficient to capture the interactional dynamics, user behaviors, and intentionality that shape such phenomena. To address this gap, this paper introduces a novel and systematic annotation framework, grounded in Speech Act Theory (Austin, 1962), aimed at deciphering the illocutionary and perlocutionary dimensions of conversation, which are unexplored in existing studies. We apply this framework to a new dataset of complete Reddit conversation threads, sampled to include discussions that turn toxic (124 conversations, 1990 messages). We evaluate the performance of GPT models (GPT-3, GPT-4, and GPT-5) on this challenging annotation task, providing insights into how large language models capture pragmatic and contextual dimensions of online toxicity.",{"paper_id":8093,"title":8094,"year":7,"month":188,"day":63,"doi":8095,"resource_url":8096,"first_page":8097,"last_page":8098,"pdf_url":8099,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8100,"paper_type":860,"authors":8101,"abstract":8116},"lrec2026-main-315","How Far Can Bias Go? Tracing Bias from Pre-Training Data to Alignment","10.63317\u002F4zeoky6waeng","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-315","3975","3995","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.315.pdf","thaler-etal-2026-how",[8102,8105,8108,8110,8113],{"paper_id":8093,"author_seq":247,"given_name":8103,"surname":8104,"affiliation":63,"orcid":63},"Marion","Thaler",{"paper_id":8093,"author_seq":232,"given_name":8106,"surname":8107,"affiliation":63,"orcid":63},"Abdullatif","Köksal",{"paper_id":8093,"author_seq":218,"given_name":5684,"surname":8109,"affiliation":63,"orcid":63},"Leidinger",{"paper_id":8093,"author_seq":203,"given_name":8111,"surname":8112,"affiliation":63,"orcid":63},"Anna Anna","Korhonen",{"paper_id":8093,"author_seq":188,"given_name":8114,"surname":8115,"affiliation":63,"orcid":63},"Hinrich","Schütze","As LLMs are increasingly integrated into user-facing applications, addressing biases that perpetuate societal inequalities is crucial. While much work has gone into measuring and mitigating biases, fewer studies have investigated their origins. Therefore, this study examines the propagation of representational gender-occupation bias from pre-training data to LLM generations. Using zero-shot prompting and token co-occurrence analyses, we explore how biases in the pre-training data influence model generations. Our findings reveal that representational biases present in the pre-training data are amplified in the model generations, regardless of hyperparameters and prompting type. By comparing gender representation in the pre-training data with real-world distributions, our research highlights discrepancies between the data and the model, underscoring the importance of further work in mitigating bias at the data level.",{"paper_id":8118,"title":8119,"year":7,"month":188,"day":63,"doi":8120,"resource_url":8121,"first_page":8122,"last_page":8123,"pdf_url":8124,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8125,"paper_type":860,"authors":8126,"abstract":8135},"lrec2026-main-316","Robust Bias Evaluation with FilBBQ: A Filipino Bias Benchmark for Question-Answering Language Models","10.63317\u002F2evzxdoyq8i6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-316","3996","4008","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.316.pdf","gamboa-etal-2026-robust",[8127,8130,8133],{"paper_id":8118,"author_seq":247,"given_name":8128,"surname":8129,"affiliation":63,"orcid":63},"Lance Calvin Lim","Gamboa",{"paper_id":8118,"author_seq":232,"given_name":8131,"surname":8132,"affiliation":63,"orcid":63},"Yue","Feng",{"paper_id":8118,"author_seq":218,"given_name":8134,"surname":1359,"affiliation":63,"orcid":63},"Mark","With natural language generation becoming a popular use case for language models, the Bias Benchmark for Question-Answering (BBQ) has grown to be an important benchmark format for evaluating stereotypical associations exhibited by generative models. We expand the linguistic scope of BBQ and construct FilBBQ through a four-phase development process consisting of template categorization, culturally aware translation, new template construction, and prompt generation. These processes resulted in a bias test composed of more than 10,000 prompts which assess whether models demonstrate sexist and homophobic prejudices relevant to the Philippine context. We then apply FilBBQ on models trained in Filipino but do so with a robust evaluation protocol that improves upon the reliability and accuracy of previous BBQ implementations. Specifically, we account for models’ response instability by obtaining prompt responses across multiple seeds and averaging the bias scores calculated from these distinctly seeded runs. Our results confirm both the variability of bias scores across different seeds and the presence of sexist and homophobic biases relating to emotion, domesticity, stereotyped queer interests, and polygamy. FilBBQ will be available via GitHub.",{"paper_id":8137,"title":8138,"year":7,"month":188,"day":63,"doi":8139,"resource_url":8140,"first_page":8141,"last_page":8142,"pdf_url":8143,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8144,"paper_type":860,"authors":8145,"abstract":8152},"lrec2026-main-317","Uncovering Hidden Violent Tendencies in LLMs: A Demographic Analysis via Behavioral Vignettes","10.63317\u002F3b7ht2jn59d3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-317","4009","4018","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.317.pdf","myers-etal-2026-uncovering",[8146,8149],{"paper_id":8137,"author_seq":247,"given_name":8147,"surname":8148,"affiliation":63,"orcid":63},"Quintin","Myers",{"paper_id":8137,"author_seq":232,"given_name":8150,"surname":8151,"affiliation":63,"orcid":63},"Yanjun","Gao","Large language models (LLMs) are increasingly proposed for detecting and responding to violent content online, yet their ability to reason about morally ambiguous, real-world scenarios remains underexamined. We present the first study to evaluate LLMs using a validated social science instrument designed to measure human response to everyday conflict, namely the Violent Behavior Vignette Questionnaire (VBVQ). To assess potential bias, we introduce persona-based prompting that varies race, age, and geographic identity within the United States. Six LLMs developed across different geopolitical and organizational contexts are evaluated under a unified zero-shot setting. Our study reveals two key findings: (1) LLMs’ surface-level text generation often diverges from their internal preference for violent responses; (2) their violent tendencies vary across demographics, frequently contradicting established findings in criminology, social science, and psychology.",{"paper_id":8154,"title":8155,"year":7,"month":188,"day":63,"doi":8156,"resource_url":8157,"first_page":8158,"last_page":8159,"pdf_url":8160,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8161,"paper_type":860,"authors":8162,"abstract":8177},"lrec2026-main-318","Exploring Social Bias in Slovenia: The EEC-SL Dataset","10.63317\u002F2pdt2x4ci6e5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-318","4019","4030","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.318.pdf","caporusso-etal-2026-exploring",[8163,8166,8169,8172,8173,8174],{"paper_id":8154,"author_seq":247,"given_name":8164,"surname":8165,"affiliation":63,"orcid":63},"Jaya","Caporusso",{"paper_id":8154,"author_seq":232,"given_name":8167,"surname":8168,"affiliation":63,"orcid":63},"Damar","Hoogland",{"paper_id":8154,"author_seq":218,"given_name":8170,"surname":8171,"affiliation":63,"orcid":63},"Boshko","Koloski",{"paper_id":8154,"author_seq":203,"given_name":6059,"surname":6060,"affiliation":63,"orcid":63},{"paper_id":8154,"author_seq":188,"given_name":4307,"surname":4308,"affiliation":63,"orcid":63},{"paper_id":8154,"author_seq":172,"given_name":8175,"surname":8176,"affiliation":63,"orcid":63},"Spela","Vintar","We introduce the EEC-SL dataset, an adaptation of the Equity Evaluation Corpus from English to Slovenian. Based on 11 sentence templates, the dataset contains 8,640 sentences, including pairs of minimally-distant sentences, varying with regard to one of two variables: gender (female or male), and ethnicity (Slovenian or not-Slovenian). In order to validate our selection of personal names, we create a localised version of the Implicit Association Test for ethnic bias, in which participants show a significant implicit bias favouring Slovenian over non-Slovenian names. We use the dataset to evaluate social bias in three computational language models (large language models and an encoder-only transformer) to perform sentiment analysis—specifically, valence. We analyse the results in terms of differences in sentiment between minimally-distant groups of sentences and inferential tests. We found limited evidence for social bias with regard to ethnicity, and no evidence for gender bias, in any of the employed models.",{"paper_id":8179,"title":8180,"year":7,"month":188,"day":63,"doi":8181,"resource_url":8182,"first_page":8183,"last_page":8184,"pdf_url":8185,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8186,"paper_type":860,"authors":8187,"abstract":8194},"lrec2026-main-319","The MISOMEM-Val Dataset for Identifying Human Values in Misogynistic Memes","10.63317\u002F2x5tsyz2ejnm","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-319","4031","4047","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.319.pdf","ailneni-etal-2026-misomem",[8188,8191],{"paper_id":8179,"author_seq":247,"given_name":8189,"surname":8190,"affiliation":63,"orcid":63},"Rakshitha Rao","Ailneni",{"paper_id":8179,"author_seq":232,"given_name":8192,"surname":8193,"affiliation":63,"orcid":63},"Sanda","Harabagiu","We present MISOMEM-Val, the first dataset that systematically annotates human values across Frames of Misogyny (FoMs) derived from misogynistic memes. Extending the Taxonomy of Misogyny, each frame is linked to the Human Value Hierarchy (HVH) with annotated support and ignore stances and accompanying rationales. In total, 1089 frames were annotated, comprising 3,051 support and 7,007 ignore value instances. We introduce Hierarchical Value Discovery with Human Feedback (HVD-HF), an LLM-assisted annotation framework combining Chain-of-Thought prompting and self-consistency verification to ensure transparency and quality. The annotation analysis reveals systematic asymmetries—Conservation and Self-Enhancement are frequently supported, while Self-Transcendence is often ignored, thus highlighting how misogynistic memes distort core human values.",{"paper_id":8196,"title":8197,"year":7,"month":188,"day":63,"doi":8198,"resource_url":8199,"first_page":8200,"last_page":8201,"pdf_url":8202,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8203,"paper_type":860,"authors":8204,"abstract":8213},"lrec2026-main-320","ConGA: Guidelines for Contextual Gender Annotation. a Framework for Annotating Gender in Machine Translation","10.63317\u002F5nai3xa6oon7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-320","4048","4057","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.320.pdf","rescigno-etal-2026-conga",[8205,8208,8210],{"paper_id":8196,"author_seq":247,"given_name":8206,"surname":8207,"affiliation":63,"orcid":63},"Argentina Anna","Rescigno",{"paper_id":8196,"author_seq":232,"given_name":2161,"surname":8209,"affiliation":63,"orcid":63},"Vanmassenhove",{"paper_id":8196,"author_seq":218,"given_name":8211,"surname":8212,"affiliation":63,"orcid":63},"Johanna","Monti","Handling gender across languages remains a persistent challenge for Machine Translation (MT) and Large Language Models (LLMs), especially when translating from gender-neutral languages into morphologically gendered ones, such as English to Italian. English largely omits grammatical gender, while Italian requires explicit agreement across multiple grammatical categories. This asymmetry often leads MT systems to default to masculine forms, reinforcing bias and reducing translation accuracy. To address this issue, we present the Contextual Gender Annotation (ConGA) framework, a linguistically grounded set of guidelines for word-level gender annotation. The scheme distinguishes between semantic gender in English through three tags, Masculine (M), Feminine (F), and Ambiguous (A), and grammatical gender realisation in Italian (Masculine (M), Feminine (F)), combined with entity-level identifiers for cross-sentence tracking. We apply ConGA to the gENder-IT dataset, creating a gold-standard resource for evaluating gender bias in translation. Our results reveal systematic masculine overuse and inconsistent feminine realisation, highlighting persistent limitations of current MT systems. By combining fine-grained linguistic annotation with quantitative evaluation, this work offers both a methodology and a benchmark for building more gender-aware and multilingual NLP systems.",{"paper_id":8215,"title":8216,"year":7,"month":188,"day":63,"doi":8217,"resource_url":8218,"first_page":8219,"last_page":8220,"pdf_url":8221,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8222,"paper_type":860,"authors":8223,"abstract":8235},"lrec2026-main-321","University Speaking for Everyone: Assessing Changes in Italian Higher Education Statutes toward Gender-Inclusive Language","10.63317\u002F3nhtc8w4hd4i","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-321","4058","4069","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.321.pdf","salto-etal-2026-university",[8224,8227,8230,8233],{"paper_id":8215,"author_seq":247,"given_name":8225,"surname":8226,"affiliation":63,"orcid":63},"Sebastiano Vecellio","Salto",{"paper_id":8215,"author_seq":232,"given_name":8228,"surname":8229,"affiliation":63,"orcid":63},"Camilla","Casula",{"paper_id":8215,"author_seq":218,"given_name":8231,"surname":8232,"affiliation":63,"orcid":63},"Alessio Palmero","Aprosio",{"paper_id":8215,"author_seq":203,"given_name":2548,"surname":8234,"affiliation":63,"orcid":63},"Tonelli","We examine the editorial evolution of Italian university statutes toward inclusive language, analyzing how institutions represent female and non-binary identities and how these representations affect administrative communication. To this end, we compile and annotate a corpus of university statutes, tracing the changes that have led some universities to move from the use of the generic masculine to more inclusive formulations. We also experiment with tools for the automatic detection of non-inclusive language in institutional communication and methods for the automatic rewriting of texts into inclusive language.",{"paper_id":8237,"title":8238,"year":7,"month":188,"day":63,"doi":8239,"resource_url":8240,"first_page":8241,"last_page":8242,"pdf_url":8243,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":8244,"bibkey":8245,"paper_type":860,"authors":8246,"abstract":8264},"lrec2026-main-322","Breaking the Benchmark: Revealing LLM Bias via Minimal Contextual Augmentation","10.63317\u002F5a6nbh2tnoeb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-322","4070","4092","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.322.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.322_OptionalSupplementaryMaterial.zip","miandoab-etal-2026-breaking",[8247,8250,8253,8256,8258,8261],{"paper_id":8237,"author_seq":247,"given_name":8248,"surname":8249,"affiliation":63,"orcid":63},"Kaveh Eskandari","Miandoab",{"paper_id":8237,"author_seq":232,"given_name":8251,"surname":8252,"affiliation":63,"orcid":63},"Mahammed","Kamruzzaman",{"paper_id":8237,"author_seq":218,"given_name":8254,"surname":8255,"affiliation":63,"orcid":63},"Arshia","Gharooni",{"paper_id":8237,"author_seq":203,"given_name":8257,"surname":5173,"affiliation":63,"orcid":63},"Gene Louis",{"paper_id":8237,"author_seq":188,"given_name":8259,"surname":8260,"affiliation":63,"orcid":63},"Vasanth","Sarathy",{"paper_id":8237,"author_seq":172,"given_name":8262,"surname":8263,"affiliation":63,"orcid":63},"Ninareh","Mehrabi","Large Language Models have been shown to demonstrate stereotypical biases in their representations and behavior due to the discriminative nature of the data that they have been trained on. Despite significant progress in the development of methods and models that refrain from using stereotypical information in their decision-making, recent work has shown that approaches used for bias alignment are brittle. In this work, we introduce a novel and general augmentation framework that involves three plug-and-play steps and is applicable to a number of fairness evaluation benchmarks. Through application of augmentation to a fairness evaluation dataset (Bias Benchmark for Question Answering (BBQ)), we find that Large Language Models (LLMs), including state-of-the-art open and closed weight models, are susceptible to perturbations to their inputs, showcasing a higher likelihood to behave stereotypically. Furthermore, we find that such models are more likely to have biased behavior in cases where the target demographic belongs to a community less studied by the literature, underlining the need to expand the fairness and safety research to include more diverse communities.",{"paper_id":8266,"title":8267,"year":7,"month":188,"day":63,"doi":8268,"resource_url":8269,"first_page":8270,"last_page":8271,"pdf_url":8272,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8273,"paper_type":860,"authors":8274,"abstract":8284},"lrec2026-main-323","TryggLLM: A Benchmark for Evaluating LLM Safety in Norwegian","10.63317\u002F2rhfg2a92wim","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-323","4093","4102","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.323.pdf","touileb-etal-2026-tryggllm",[8275,8278,8281],{"paper_id":8266,"author_seq":247,"given_name":8276,"surname":8277,"affiliation":63,"orcid":63},"Samia","Touileb",{"paper_id":8266,"author_seq":232,"given_name":8279,"surname":8280,"affiliation":63,"orcid":63},"Truls","Pedersen",{"paper_id":8266,"author_seq":218,"given_name":8282,"surname":8283,"affiliation":63,"orcid":63},"Isabell Stinessen","Haugen","We introduce TryggLLM, the first safety benchmark dataset for Norwegian. The dataset is intended for benchmarking different types of safety issues that can occur when using Norwegian generative language models. We have manually translated two English benchmark datasets, while modifying the content to be aligned with the Norwegian context. The benchmark dataset is composed of two sub-parts: i) prompts annotated by four native speakers, in both the written variants of Norwegian Bokmål (BM) and Nynorsk (NN), such that each native speaker wrote in their preferred variants (two BM and two NN); ii) prompts and target responses, where each of them has a BM and a NN version. We provide detailed descriptions of the data creation process. We also present a thorough manual evaluation of benchmarking existing open Norwegian LLMs using TryggLLM. Our results show that between 18% and 48% of the generated responses are unsafe, across all tested models.",{"paper_id":8286,"title":8287,"year":7,"month":188,"day":63,"doi":8288,"resource_url":8289,"first_page":8290,"last_page":8291,"pdf_url":8292,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8293,"paper_type":860,"authors":8294,"abstract":8299},"lrec2026-main-324","KOCOH: Korean Context-Dependent Hate Speech Dataset","10.63317\u002F5pnu2jn6awun","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-324","4103","4114","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.324.pdf","park-etal-2026-kocoh",[8295,8297],{"paper_id":8286,"author_seq":247,"given_name":8296,"surname":6876,"affiliation":63,"orcid":63},"Eunah",{"paper_id":8286,"author_seq":232,"given_name":8298,"surname":1913,"affiliation":63,"orcid":63},"Sanghoun","We introduce the KOrean COntext-dependent Hate speech dataset (KOCOH) to evaluate large language models’ ability to detect context-dependent hate speech in Korean. KOCOH consists of 3,000 context-comment pairs collected from Korean online communities (Dcinside, FMkorea) with detailed annotations, including labels for hate speech and hate target groups. We assess the context-dependent hate speech detection capabilities of both humans and 11 state-of-the-art large language models, including GPT-5, Claude Sonnet 4, and Gemini 2.5 Flash. Our results show that humans outperform language models, with GPT-5 achieving the highest performance among the evaluated models. While humans demonstrate balanced recall and specificity, language models generally show significantly higher specificity compared to recall. The performance of both humans and models is affected by factors such as Honam-related vocabulary and sentiment polarity. This study contributes resources to Korean hate speech research and empirically demonstrates the performance gap between humans and language models. Through both quantitative and qualitative analyses, we explore the similarities and differences between humans and language models, offering insights for future developments in language models and AI ethics research. KOCOH is available at https:\u002F\u002Fgithub.com\u002Feparkatgithub\u002FKOCOH.",{"paper_id":8301,"title":8302,"year":7,"month":188,"day":63,"doi":8303,"resource_url":8304,"first_page":8305,"last_page":8306,"pdf_url":8307,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8308,"paper_type":860,"authors":8309,"abstract":8329},"lrec2026-main-325","Towards Fair Speech Recognition: Mitigating Demographic Bias in End-to-End ASR Systems","10.63317\u002F5e5f93jkhzt5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-325","4115","4125","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.325.pdf","jahan-etal-2026-fair",[8310,8313,8315,8318,8321,8323,8326],{"paper_id":8301,"author_seq":247,"given_name":8311,"surname":8312,"affiliation":63,"orcid":63},"Maliha","Jahan",{"paper_id":8301,"author_seq":232,"given_name":1316,"surname":8314,"affiliation":63,"orcid":63},"Thebaud",{"paper_id":8301,"author_seq":218,"given_name":8316,"surname":8317,"affiliation":63,"orcid":63},"Zsuzsanna","Fagyal",{"paper_id":8301,"author_seq":203,"given_name":8319,"surname":8320,"affiliation":63,"orcid":63},"Jesus","Villalba",{"paper_id":8301,"author_seq":188,"given_name":8134,"surname":8322,"affiliation":63,"orcid":63},"Hasegawa-Johnson",{"paper_id":8301,"author_seq":172,"given_name":8324,"surname":8325,"affiliation":63,"orcid":63},"Laureano Moro","Velazquez",{"paper_id":8301,"author_seq":155,"given_name":8327,"surname":8328,"affiliation":63,"orcid":63},"Najim","Dehak","Demographic bias in the performance of speech and language technology has been an active area of recent research. A lot of studies have shown the existence of demographic biases in Automatic Speech Recognition (ASR) systems. In this work, we propose a novel model-agnostic and demographic label-agnostic approach, called DARe, to mitigate any existing bias in an ASR system towards certain speaker groups. We built a debiasing module that goes between the feature extractor of an ASR and the rest of that ASR. The module includes content-group disentanglers to separate content and group, a demographic classifier, and adversarial reweighting. To eliminate the need for demographic labels, we generated pseudo-group labels by extracting speaker embeddings and clustering them. We worked with three ASR systems–Wav2Vec2 base, SEW tiny, and Whisper small. We used the FAI dataset, which contains naturalistic conversations with speakers who self-identify their demographic attributes. We used Word Error Rate (WER) as a metric of ASR performance and a Poisson regression-based approach to evaluate the racial fairness of the models. We compared the racial bias of the models before and after applying our proposed approach and observed a significant improvement in fairness.",{"paper_id":8331,"title":8332,"year":7,"month":188,"day":63,"doi":8333,"resource_url":8334,"first_page":8335,"last_page":8336,"pdf_url":8337,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8338,"paper_type":860,"authors":8339,"abstract":8345},"lrec2026-main-326","RuBIN: A Russian Benchmark for Evaluating LLMs with Cultural Insights","10.63317\u002F3um9hpbgpxph","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-326","4126","4140","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.326.pdf","lazukova-etal-2026-rubin",[8340,8343],{"paper_id":8331,"author_seq":247,"given_name":8341,"surname":8342,"affiliation":63,"orcid":63},"Polina","Lazukova",{"paper_id":8331,"author_seq":232,"given_name":6855,"surname":8344,"affiliation":63,"orcid":63},"Piontkovskaya","Understanding culture-specific knowledge is essential for developing language models that perform reliably across diverse social and linguistic settings. This work explores both methodological and practical aspects of evaluating culture-specific knowledge in large language models. Special attention is given to the multiple-choice question answering format as a tool for identifying and measuring such knowledge. An analysis of existing benchmarks reveals several limitations, including insufficient cultural sensitivity and the presence of uninformative distractor options. In response, the RuBIN benchmark is introduced – a dataset consisting of questions based on phrases that are widely known in Russian culture. The paper describes the process of selecting and filtering culturally relevant topics, generating plausible incorrect answers using LLMs, and annotating and testing the benchmark for cross-linguistic robustness. RuBIN helps identify current LLMs’ weaknesses in transferring cultural knowledge and can serve as a tool for further adapting these models to diverse linguistic and cultural contexts.",{"paper_id":8347,"title":8348,"year":7,"month":188,"day":63,"doi":8349,"resource_url":8350,"first_page":8351,"last_page":8352,"pdf_url":8353,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8354,"paper_type":860,"authors":8355,"abstract":8359},"lrec2026-main-327","Evaluating Phonetically Weighted and Unweighted Distance Measures in Dialectometry","10.63317\u002F38ndhg759wui","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-327","4141","4151","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.327.pdf","lameli-2026-evaluating",[8356],{"paper_id":8347,"author_seq":247,"given_name":8357,"surname":8358,"affiliation":63,"orcid":63},"Alfred","Lameli","This paper compares phonetically weighted and unweighted string distance measures in dialectometry, examining how explicit phonetic modeling affects the quantitative representation of linguistic similarity. Using narrow IPA transcriptions from the German REDE corpus, we evaluate nine measures–Levenshtein distance, bigram and trigram overlap, cosine distance, Jaro-Winkler, Jaccard similarity, the Herrgen-Schmidt measure, and the Relative Identity Value–through correlational analysis, distributional comparison, stabilization testing, and multidimensional scaling. The phonetically weighted Herrgen-Schmidt measure consistently achieves the most balanced distance dispersion, earliest stabilization, and highest linguistic plausibility. Unweighted edit-based measures reproduce the same topological structure in compressed form; distributional and overlap-based metrics introduce systematic scale distortions through exaggeration or compression. These findings establish explicit phonetic weighting as a principled and analytically efficient extension of standard dialectometric procedures. Explicit phonetic weighting enhances resolution and interpretive precision without altering the underlying relational geometry of dialect classifications.",{"paper_id":8361,"title":8362,"year":7,"month":188,"day":63,"doi":8363,"resource_url":8364,"first_page":8365,"last_page":8366,"pdf_url":8367,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8368,"paper_type":860,"authors":8369,"abstract":8382},"lrec2026-main-328","Piecing Together Cross-Document Coreference Resolution Datasets: Systematic Dataset Analysis and Unification","10.63317\u002F3pt7r7gj2dq3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-328","4152","4172","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.328.pdf","zhukova-etal-2026-piecing",[8370,8373,8376,8379],{"paper_id":8361,"author_seq":247,"given_name":8371,"surname":8372,"affiliation":63,"orcid":63},"Anastasia","Zhukova",{"paper_id":8361,"author_seq":232,"given_name":8374,"surname":8375,"affiliation":63,"orcid":63},"Terry Lima","Ruas",{"paper_id":8361,"author_seq":218,"given_name":8377,"surname":8378,"affiliation":63,"orcid":63},"Jan Philip","Wahle",{"paper_id":8361,"author_seq":203,"given_name":8380,"surname":8381,"affiliation":63,"orcid":63},"Bela","Gipp","Work in Natural Language Understanding increasingly relies on the ability to identify and track entities and events across large, heterogeneous text collections. This task, known as cross-document coreference resolution (CDCR), has a wide range of downstream applications, including multi-document summarization, information retrieval, and knowledge base population. Research in this area remains fragmented due to heterogeneous dataset formats, varying annotation standards, and the predominance of the CDCR definition as the event coreference resolution (ECR). To address these challenges, we introduce uCDCR, a unified dataset that consolidates diverse publicly available English CDCR corpora across various domains into a consistent format, which we analyze with standardized metrics and evaluation protocols. uCDCR incorporates both entity and event coreference, corrects known inconsistencies, and enriches datasets with missing attributes to facilitate reproducible research. We establish a cohesive framework for fair, interpretable, and cross-dataset analysis in CDCR and compare the datasets on their lexical properties, e.g., lexical composition of the annotated mentions, lexical diversity and ambiguity metrics, discuss the annotation rules and principles that lead to high lexical diversity, and examine how these metrics influence performance on the same-head-lemma baseline. Our dataset analysis shows that ECB+, the state-of-the-art benchmark for CDCR, has one of the lowest lexical diversities, and its CDCR complexity, measured by the same-head-lemma baseline, lies in the middle among all uCDCR datasets. Moreover, comparing document and mention distributions between ECB+ and uCDCR shows that using all uCDCR datasets for model training and evaluation will improve the generalizability of CDCR models. Finally, the almost identical performance on the same-head-lemma baseline, separately applied to events and entities, shows that resolving both types is a complex task and should not be steered toward ECR alone. The uCDCR dataset is available at https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FAnZhu\u002FuCDCR, and the code for parsing, analyzing, and scoring the dataset is available at https:\u002F\u002Fgithub.com\u002Fanastasia-zhukova\u002FuCDCR.",{"paper_id":8384,"title":8385,"year":7,"month":188,"day":63,"doi":8386,"resource_url":8387,"first_page":8388,"last_page":8389,"pdf_url":8390,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8391,"paper_type":860,"authors":8392,"abstract":8397},"lrec2026-main-329","Spotlights and Blindspots: Evaluating Machine-Generated Text Detection","10.63317\u002F3mzmq2zqni35","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-329","4173","4187","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.329.pdf","stowe-etal-2026-spotlights",[8393,8395],{"paper_id":8384,"author_seq":247,"given_name":4481,"surname":8394,"affiliation":63,"orcid":63},"Stowe",{"paper_id":8384,"author_seq":232,"given_name":8396,"surname":4530,"affiliation":63,"orcid":63},"Kailash","With the rise of generative language models, machine-generated text detection has become a critical challenge. A wide variety of models is available, but inconsistent datasets, evaluation metrics, and assessment strategies obscure comparisons of model effectiveness. To address this, we evaluate 15 different detection models from six distinct systems, as well as seven trained models, across seven English-language textual test sets and three creative human-written datasets. We provide an empirical analysis of model performance, the influence of training and evaluation data, and the impact of key metrics. We find that no single system excels in all areas and nearly all are effective for certain tasks, and the representation of model performance is critically linked to dataset and metric choices. We find high variance in model ranks based on datasets and metrics, and overall poor performance on novel human-written texts in high-risk domains. Across datasets and metrics, we find that methodological choices that are often assumed or overlooked are essential for clearly and accurately reflecting model performance.",{"paper_id":8399,"title":8400,"year":7,"month":188,"day":63,"doi":8401,"resource_url":8402,"first_page":8403,"last_page":8404,"pdf_url":8405,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8406,"paper_type":860,"authors":8407,"abstract":8414},"lrec2026-main-330","JAPAS: A Benchmark and Neural Approach for Japanese Patent Support Relation Extraction","10.63317\u002F4yxeqpyo7gna","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-330","4188","4198","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.330.pdf","chousa-etal-2026-japas",[8408,8411],{"paper_id":8399,"author_seq":247,"given_name":8409,"surname":8410,"affiliation":63,"orcid":63},"Katsuki","Chousa",{"paper_id":8399,"author_seq":232,"given_name":8412,"surname":8413,"affiliation":63,"orcid":63},"Ryosuke","Sugiura","Efficient analysis of patent literature is crucial for technological development and protecting intellectual property. A key task is verifying the “support requirement,” which mandates that the detailed description must fully describe the claimed invention. This requirement is fundamental to a patent’s validity. Manual verification is a labor-intensive process that demands technical and legal expertise, making automation highly desirable. However, research on this task has been hampered by two key challenges: (1) the absence of a public benchmark, and (2) the reliance of prior work on lexical matching, which fails to capture semantic equivalence. To address these issues, we introduce JAPAS, the first public benchmark for this task, comprising over 2,000 instances manually annotated for Japanese patents. Each instance is labeled with a claim span, a supporting description paragraph, a relation type, and the annotator’s confidence level. Using this benchmark, we also establish modern baselines that capture semantic similarity, such as embeddings and LLMs. Our experiments show that a fine-tuned Qwen3-14B model achieves an F1 score of 0.50, outperforming the conventional lexical-based baseline. This result, which demonstrates that the task is feasible yet challenging, highlights the utility of JAPAS as a research foundation and provides a performance target for future work.",{"paper_id":8416,"title":8417,"year":7,"month":188,"day":63,"doi":8418,"resource_url":8419,"first_page":8420,"last_page":8421,"pdf_url":8422,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8423,"paper_type":860,"authors":8424,"abstract":8427},"lrec2026-main-331","A Teacher-Student Approach to Creating Verified Synthetic Clarification and Correction Dialogues for TableQA Tasks","10.63317\u002F2qropbkccqtp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-331","4199","4212","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.331.pdf","poelitz-etal-2026-teacher",[8425,8426],{"paper_id":8416,"author_seq":247,"given_name":3643,"surname":6551,"affiliation":63,"orcid":63},{"paper_id":8416,"author_seq":232,"given_name":6539,"surname":6540,"affiliation":63,"orcid":63},"Real dialogues with AI assistants for solving table questions-answering tasks often follow dynamic, unpredictable paths due to imperfect information provided by the user or in the data, which must be caught and handled. Developing datasets which capture such user-AI interactions is difficult and time-consuming. In this work, we develop a novel framework for synthetically generating controlled, multi-turn conversations between a user and AI assistant for the task of table-based question answering (TableQA), which can be generated from an existing dataset with fully specified TableQA examples for any target domain. Each conversation aims to solve a table-based reasoning question through collaborative effort, modeling one of two real-world scenarios: (1) an AI-initiated clarification, or (2) a user-initiated correction. Critically, we employ a strong teacher LLM to verify our synthetic conversations by functional correctness, ensuring high quality. Finally, we demonstrate synthetic datasets generated from TableQA tasks as benchmarks of frontier LLMs. We find that even larger models struggle to effectively issue clarification questions and accurately integrate user feedback for corrections, demonstrating important areas for future research.",{"paper_id":8429,"title":8430,"year":7,"month":188,"day":63,"doi":8431,"resource_url":8432,"first_page":8433,"last_page":8434,"pdf_url":8435,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8436,"paper_type":860,"authors":8437,"abstract":8443},"lrec2026-main-332","Persona-Aware Evaluation of Cognitive Bias in LLMs: From Benchmark to Applied Decision-Making","10.63317\u002F2dvjjaywrket","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-332","4213","4225","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.332.pdf","yoshikawa-etal-2026-persona",[8438,8439,8440],{"paper_id":8429,"author_seq":247,"given_name":6181,"surname":6182,"affiliation":63,"orcid":63},{"paper_id":8429,"author_seq":232,"given_name":6172,"surname":6173,"affiliation":63,"orcid":63},{"paper_id":8429,"author_seq":218,"given_name":8441,"surname":8442,"affiliation":63,"orcid":63},"Takato","Yamazaki","We present a persona-aware evaluation suite that couples a 12-category cognitive-bias benchmark with 100 applied financial framing tasks to assess how large language models (LLMs) respond under systematically varied persona conditions. Using a factorized set of 162 personas spanning gender, age, political orientation, income, and education, we analyze how persona conditioning modulates bias-consistent responding across ten instruction-tuned models. On applied tasks, persona conditioning reduces framing reversals on average and slightly increases decision confidence, with substantial variation across model families and scales. Correlation analyses further reveal that benchmark bias tendencies—particularly availability, social proof, and framing—predict applied framing sensitivity, suggesting that standardized bias scores can serve as indicators of real-world decision variability. This work provides a unified framework for linking cognitive-bias evaluation with persona-conditioned decision behavior in LLMs. (All data and prompts will be released after acceptance to preserve anonymity.)",{"paper_id":8445,"title":8446,"year":7,"month":188,"day":63,"doi":8447,"resource_url":8448,"first_page":8449,"last_page":8450,"pdf_url":8451,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8452,"paper_type":860,"authors":8453,"abstract":8463},"lrec2026-main-333","ArtistMus: A Globally Diverse, Artist-Centric Benchmark for Retrieval-Augmented Music Question Answering","10.63317\u002F5crq45yka6ru","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-333","4226","4238","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.333.pdf","kwon-etal-2026-artistmus",[8454,8457,8460],{"paper_id":8445,"author_seq":247,"given_name":8455,"surname":8456,"affiliation":63,"orcid":63},"Daeyong","Kwon",{"paper_id":8445,"author_seq":232,"given_name":8458,"surname":8459,"affiliation":63,"orcid":63},"SeungHeon","Doh",{"paper_id":8445,"author_seq":218,"given_name":8461,"surname":8462,"affiliation":63,"orcid":63},"Juhan","Nam","Recent advances in Large Language Models (LLMs) have transformed open-domain question answering, yet their effectiveness in music-related reasoning remains limited due to sparse music knowledge in pretraining data. While music information retrieval and computational musicology have explored structured and multimodal understanding, few resources support factual and contextual music question answering (MQA) grounded in artist metadata or historical context. We introduce MusWikiDB, a vector database of 3.2M passages from 144K music-related Wikipedia pages, and ArtistMus, a benchmark of 1,000 questions on 500 diverse artists with metadata such as genre, debut year, and topic. These resources enable systematic evaluation of retrieval augmented generation (RAG) for MQA. Experiments show that RAG markedly improves factual accuracy—open-source models gain up to +56.8 percentage points (pp; Qwen3 8B: 35.0→91.8), approaching proprietary performance. RAG-style fine-tuning further boosts both factual recall and contextual reasoning, yielding strong improvements on both in-domain and out-of-domain benchmarks. MusWikiDB also yields +6 pp higher accuracy and 67% faster retrieval than the general Wikipedia corpus. We release MusWikiDB and ArtistMus to advance research in music information retrieval and domain-specific QA, establishing a foundation for retrieval augmented reasoning in culturally rich domains such as music.",{"paper_id":8465,"title":8466,"year":7,"month":188,"day":63,"doi":8467,"resource_url":8468,"first_page":8469,"last_page":8470,"pdf_url":8471,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8472,"paper_type":860,"authors":8473,"abstract":8480},"lrec2026-main-334","MATA: Mindful Assessment of the Telugu Abilities of Large Language Models","10.63317\u002F2qyza2xt6xac","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-334","4239","4256","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.334.pdf","kranti-etal-2026-mata",[8474,8477],{"paper_id":8465,"author_seq":247,"given_name":8475,"surname":8476,"affiliation":63,"orcid":63},"Chalamalasetti","Kranti",{"paper_id":8465,"author_seq":232,"given_name":8478,"surname":8479,"affiliation":63,"orcid":63},"Sowmya","Vajjala","In this paper, we introduce MATA, a novel evaluation dataset to assess the ability of Large Language Models (LLMs) in Telugu language, comprising 729 carefully curated multiple-choice and open-ended questions that span diverse linguistic dimensions. We evaluate 11 open-weight and closed-source LLMs on our dataset and present a fine-grained analysis of their performance. Further, we empirically show how LLMs rely on superficial heuristics such as answer position and distractor patterns for multiple-choice questions. Finally, we also compare LLM-as-a-judge evaluation with human evaluation for open-ended questions assess its reliability in a low-resource language. We argue that such fine-grained evaluation is essential for understanding model limitations and can inform the development of more linguistically capable LLMs, while also serving as a foundation for future research in Telugu NLP. Our dataset is available at:https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FTeluguLLMResearch\u002FMATA",{"paper_id":8482,"title":8483,"year":7,"month":188,"day":63,"doi":8484,"resource_url":8485,"first_page":8486,"last_page":8487,"pdf_url":8488,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8489,"paper_type":860,"authors":8490,"abstract":8497},"lrec2026-main-335","Estonian Native Large Language Model Benchmark","10.63317\u002F5kocg97rooga","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-335","4257","4267","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.335.pdf","lillepalu-etal-2026-estonian",[8491,8494],{"paper_id":8482,"author_seq":247,"given_name":8492,"surname":8493,"affiliation":63,"orcid":63},"Helena Grete","Lillepalu",{"paper_id":8482,"author_seq":232,"given_name":8495,"surname":8496,"affiliation":63,"orcid":63},"Tanel","Alumäe","The availability of LLM benchmarks for the Estonian language is limited, and a comprehensive evaluation comparing the performance of different LLMs on Estonian tasks has yet to be conducted. We introduce a new benchmark for evaluating LLMs in Estonian, based on seven diverse datasets. These datasets assess general and domain-specific knowledge, understanding of Estonian grammar and vocabulary, summarization abilities, contextual comprehension, and more. The datasets are all generated from native Estonian sources without using machine translation. We compare the performance of base models, instruction-tuned open-source models, and commercial models. Our evaluation includes 6 base models and 26 instruction-tuned models. To assess the results, we employ both human evaluation and LLM-as-a-judge methods. Human evaluation scores showed moderate to high correlation with benchmark evaluations, depending on the dataset. Claude 3.7 Sonnet, used as an LLM judge, demonstrated strong alignment with human ratings, indicating that top-performing LLMs can effectively support the evaluation of Estonian-language models.",{"paper_id":8499,"title":8500,"year":7,"month":188,"day":63,"doi":8501,"resource_url":8502,"first_page":8503,"last_page":8504,"pdf_url":8505,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8506,"paper_type":860,"authors":8507,"abstract":8512},"lrec2026-main-336","Indirect Question Answering in English, German and Bavarian: A Challenging Task for High- and Low-Resource Languages Alike","10.63317\u002F3qtihanapruj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-336","4268","4289","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.336.pdf","winkler-etal-2026-indirect",[8508,8510,8511],{"paper_id":8499,"author_seq":247,"given_name":3821,"surname":8509,"affiliation":63,"orcid":63},"Winkler",{"paper_id":8499,"author_seq":232,"given_name":2810,"surname":3170,"affiliation":63,"orcid":63},{"paper_id":8499,"author_seq":218,"given_name":3175,"surname":3176,"affiliation":63,"orcid":63},"Indirectness is a common feature of daily communication, yet is underexplored in NLP research for both low-resource as well as high-resource languages. Indirect Question Answering (IQA) aims at classifying the polarity of indirect answers. In this paper, we present two multilingual corpora for IQA of varying quality that both cover English, Standard German and Bavarian, a German dialect without standard orthography: InQA+, a small high-quality evaluation dataset with hand-annotated labels, and GenIQA, a larger training dataset, that contains artificial data generated by GPT-4o-mini. We find that IQA is a pragmatically hard task that comes with various challenges, based on several experiment variations with multilingual transformer models (mBERT, XLM-R and mDeBERTa). We suggest and employ recommendations to tackle these challenges. Our results reveal low performance, even for English, and severe overfitting. We analyse various factors that influence these results, including label ambiguity, label set and dataset size. We find that the IQA performance is poor in high- (English, German) and low-resource languages (Bavarian) and that it is beneficial to have a large amount of training data. Further, GPT-4o-mini does not possess enough pragmatic understanding to generate high-quality IQA data in any of our tested languages.",{"paper_id":8514,"title":8515,"year":7,"month":188,"day":63,"doi":8516,"resource_url":8517,"first_page":8518,"last_page":8519,"pdf_url":8520,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8521,"paper_type":860,"authors":8522,"abstract":8533},"lrec2026-main-337","Benchmarking Large Language Models for Chinese and Japanese IMEs: Phonetic-to-Character Generation and Textual Error Correction","10.63317\u002F42jiimjriyga","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-337","4290","4311","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.337.pdf","zou-etal-2026-benchmarking",[8523,8526,8528,8531],{"paper_id":8514,"author_seq":247,"given_name":8524,"surname":8525,"affiliation":63,"orcid":63},"Yuchun","Zou",{"paper_id":8514,"author_seq":232,"given_name":8527,"surname":1359,"affiliation":63,"orcid":63},"Tedd",{"paper_id":8514,"author_seq":218,"given_name":8529,"surname":8530,"affiliation":63,"orcid":63},"Xiaodi","Fan",{"paper_id":8514,"author_seq":203,"given_name":8532,"surname":3446,"affiliation":63,"orcid":63},"Jun","Efficient text entry for complex writing systems like Chinese and Japanese necessitates the use of Input Method Editors (IMEs). While Large Language Models (LLMs) are emerging as powerful, context-aware language resources for this task, we present a comprehensive benchmark and evaluation methodology to assess the viability of LLMs for next-generation IMEs. We conduct a comparative analysis of a diverse set of LLMs against established baseline methods on two core tasks: phonetic-to-character generation (using Pinyin and Romaji) and textual error correction. Our experiments demonstrate that top-tier LLMs achieve superior accuracy by leveraging deep contextual understanding, significantly outperforming traditional systems in ambiguity resolution and the correction of complex errors. However, our analysis also reveals a crucial trade-off between accuracy and computational efficiency across different models. The datasets, evaluation scripts, and results from this study serve as a vital public resource for future research, providing a robust baseline for developing and selecting models that balance performance with the low-latency demands of real-world text input.",{"paper_id":8535,"title":8536,"year":7,"month":188,"day":63,"doi":8537,"resource_url":8538,"first_page":8539,"last_page":8540,"pdf_url":8541,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":8542,"bibkey":8543,"paper_type":860,"authors":8544,"abstract":8551},"lrec2026-main-338","DaLA: Danish Linguistic Acceptability Evaluation Guided by Real World Errors","10.63317\u002F4kcbotaa3zgo","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-338","4312","4326","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.338.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.338_OptionalSupplementaryMaterial.zip","barmina-etal-2026-dala",[8545,8546,8549,8550],{"paper_id":8535,"author_seq":247,"given_name":1619,"surname":1620,"affiliation":63,"orcid":63},{"paper_id":8535,"author_seq":232,"given_name":8547,"surname":8548,"affiliation":63,"orcid":63},"Nathalie Carmen Hau","Norman",{"paper_id":8535,"author_seq":218,"given_name":1625,"surname":1626,"affiliation":63,"orcid":63},{"paper_id":8535,"author_seq":203,"given_name":1634,"surname":1635,"affiliation":63,"orcid":63},"We present an enhanced benchmark for evaluating linguistic acceptability in Danish. We first analyze the most common errors found in written Danish. Based on this analysis, we introduce a set of fourteen corruption functions that generate incorrect sentences by systematically introducing errors into existing correct Danish sentences. To ensure the accuracy of these corruptions, we assess their validity using both manual and automatic methods. The results are then used as a benchmark for evaluating Large Language Models on a linguistic acceptability judgement task. Our findings demonstrate that this extension is both broader and more comprehensive than the current state of the art. By incorporating a greater variety of corruption types, our benchmark provides a more rigorous assessment of linguistic acceptability, increasing task difficulty, as evidenced by the lower performance of LLMs on our benchmark compared to existing ones. Our results also suggest that our benchmark has a higher discriminatory power which allows to better distinguish well-performing models from low-performing ones.",{"paper_id":8553,"title":8554,"year":7,"month":188,"day":63,"doi":8555,"resource_url":8556,"first_page":8557,"last_page":8558,"pdf_url":8559,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8560,"paper_type":860,"authors":8561,"abstract":8573},"lrec2026-main-339","KCIF: Knowledge-Conditioned Instruction Following","10.63317\u002F2vmfhqzwvdkw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-339","4327","4345","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.339.pdf","murthy-etal-2026-kcif",[8562,8565,8568,8570],{"paper_id":8553,"author_seq":247,"given_name":8563,"surname":8564,"affiliation":63,"orcid":63},"Rudra","Murthy",{"paper_id":8553,"author_seq":232,"given_name":8566,"surname":8567,"affiliation":63,"orcid":63},"Praveen","Venkateswaran",{"paper_id":8553,"author_seq":218,"given_name":8569,"surname":2247,"affiliation":63,"orcid":63},"Prince",{"paper_id":8553,"author_seq":203,"given_name":8571,"surname":8572,"affiliation":63,"orcid":63},"Danish","Contractor","LLM evaluation benchmarks have traditionally separated the testing of knowledge\u002Freasoning capabilities from instruction following. In this work, we study the interaction between knowledge and instruction following, and observe that LLMs struggle to follow simple answer modifying instructions, and are also distracted by instructions that should have no bearing on the original knowledge task answer. We leverage existing multiple-choice answer based knowledge benchmarks and apply a set of simple instructions which include manipulating text (eg.: change case), numeric quantities (eg.: increase value, change formatting), operate on lists (eg.: sort answer candidates) and distractor instructions (eg.: change case of numeric answers). We evaluate models at varying parameter sizes (1B-405B) from different model families and find that, surprisingly, all models report a significant drop in performance on such simple task compositions. While large-sized and frontier models report performance drops of 40-50%, in small and medium sized models the drop is severe (sometimes exceeding 80%). Our results highlight a limitation in the traditional separation of knowledge\u002Freasoning and instruction following, and suggest that joint-study of these capabilities are important. We release our benchmark dataset, evaluation framework code, and results for future work.",{"paper_id":8575,"title":8576,"year":7,"month":188,"day":63,"doi":8577,"resource_url":8578,"first_page":8579,"last_page":8580,"pdf_url":8581,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8582,"paper_type":860,"authors":8583,"abstract":8592},"lrec2026-main-340","GAIN: A Benchmark for Goal-Aligned Decision-Making of Large Language Models under Imperfect Norms","10.63317\u002F2wrdx2f8mez2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-340","4346","4357","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.340.pdf","kawarada-etal-2026-gain",[8584,8587,8589],{"paper_id":8575,"author_seq":247,"given_name":8585,"surname":8586,"affiliation":63,"orcid":63},"Masayuki","Kawarada",{"paper_id":8575,"author_seq":232,"given_name":8588,"surname":4450,"affiliation":63,"orcid":63},"Kodai",{"paper_id":8575,"author_seq":218,"given_name":8590,"surname":8591,"affiliation":63,"orcid":63},"Soichiro","Murakami","We introduce GAIN(Goal-Aligned Decision-Making under Imperfect Norms), a benchmark designed to evaluate how large language models (LLMs) balance adherence to norms against business goals. Existing benchmarks typically focus on abstract scenarios rather than real-world business applications. Furthermore, they provide limited insights into the factors influencing LLM decision-making. This restricts their ability to measure models’ adaptability to complex, real-world norm-goal conflicts. In GAIN, models receive a goal, a specific situation, a norm, and additional contextual pressures. These pressure, explicitly designed to encourage potential norm deviations, are a unique feature that differentiates GAIN from other benchmarks, enabling a systematic evaluation of the factors influencing decision-making. We define five types of pressures: Goal Alignment, Risk Aversion, Emotional\u002FEthical Appeal, Social\u002FAuthoritative Influence, and Personal Incentive. The benchmark comprises 1,200 scenarios across four domains: hiring, customer support, advertising and finance. Our experiments show that advanced LLMs frequently mirror human decision-making patterns. However, when Personal Incentive pressure is present, they diverge significantly, showing a strong tendency to adhere to norms rather than deviate from them.",{"paper_id":8594,"title":8595,"year":7,"month":188,"day":63,"doi":8596,"resource_url":8597,"first_page":8598,"last_page":8599,"pdf_url":8600,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8601,"paper_type":860,"authors":8602,"abstract":8614},"lrec2026-main-341","Can LLMs Evaluate What They Cannot Annotate? Revisiting LLM Reliability in Hate Speech Detection","10.63317\u002F22n5hekovvrz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-341","4358","4370","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.341.pdf","piot-etal-2026-can",[8603,8606,8608,8611],{"paper_id":8594,"author_seq":247,"given_name":8604,"surname":8605,"affiliation":63,"orcid":63},"Paloma","Piot",{"paper_id":8594,"author_seq":232,"given_name":1061,"surname":8607,"affiliation":63,"orcid":63},"Otero",{"paper_id":8594,"author_seq":218,"given_name":8609,"surname":8610,"affiliation":63,"orcid":63},"Patricia","Martin-Rodilla",{"paper_id":8594,"author_seq":203,"given_name":8612,"surname":8613,"affiliation":63,"orcid":63},"Javier","Parapar","Hate speech spreads widely online and harms both individuals and communities, making automatic detection essential for large-scale moderation. However, accurately detecting hate speech remains a difficult task. Part of the challenge lies in subjectivity: what one person flags as hate speech, another may see as benign. Traditional annotation agreement metrics, such as Cohen’s k, oversimplify this disagreement, treating it as an error rather than meaningful diversity. Meanwhile, Large Language Models (LLMs) promise scalable annotation, but prior studies demonstrate that they cannot fully replace human judgement, especially in subjective tasks. In this work, we reexamine LLM reliability using a subjectivity-aware framework, cross-Replication Reliability (xRR), revealing that even under fairer lens, LLMs still diverge from humans. Yet this limitation opens an opportunity: we find that LLM-generated annotations can reliably reflect performance trends across classification models, correlating with human evaluations. We test this by examining whether LLM-generated annotations preserve the relative ordering of model performance derived from human evaluation (i.e. whether models ranked as more reliable by human annotators preserve the same order when evaluated with LLM-generated labels). Our results show that, although LLMs differ from humans at the instance level, they reproduce similar ranking and classification patterns, suggesting their potential as proxy evaluators. While not a substitute for human annotators, they might serve as a scalable proxy for evaluation in subjective NLP tasks.",{"paper_id":8616,"title":8617,"year":7,"month":188,"day":63,"doi":8618,"resource_url":8619,"first_page":8620,"last_page":8621,"pdf_url":8622,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8623,"paper_type":860,"authors":8624,"abstract":8642},"lrec2026-main-342","PersianMedQA: Evaluating Large Language Models on a Persian-English Bilingual Medical Question Answering Benchmark","10.63317\u002F3yixio7ngbkh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-342","4371","4386","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.342.pdf","kalahroodi-etal-2026-persianmedqa",[8625,8628,8631,8634,8636,8639],{"paper_id":8616,"author_seq":247,"given_name":8626,"surname":8627,"affiliation":63,"orcid":63},"Mohammad Javad Ranjbar","Kalahroodi",{"paper_id":8616,"author_seq":232,"given_name":8629,"surname":8630,"affiliation":63,"orcid":63},"Amirhossein","Sheikholselami",{"paper_id":8616,"author_seq":218,"given_name":8632,"surname":8633,"affiliation":63,"orcid":63},"Sepehr Karimi","Arpanahi",{"paper_id":8616,"author_seq":203,"given_name":8635,"surname":8627,"affiliation":63,"orcid":63},"Sepideh Ranjbar",{"paper_id":8616,"author_seq":188,"given_name":8637,"surname":8638,"affiliation":63,"orcid":63},"Heshaam","Faili",{"paper_id":8616,"author_seq":172,"given_name":8640,"surname":8641,"affiliation":63,"orcid":63},"Azadeh","Shakery","Large Language Models (LLMs) have achieved remarkable performance on a wide range of Natural Language Processing (NLP) benchmarks, often surpassing human-level accuracy. However, their reliability in high-stakes domains such as medicine, particularly in low-resource languages, remains underexplored. In this work, we introduce PersianMedQA, a large-scale dataset of 20,785 expert-validated multiple-choice Persian medical questions from 14 years of Iranian national medical exams, spanning 23 medical specialties and designed to evaluate LLMs in both Persian and English. We benchmark 41 state-of-the-art models, including general-purpose, Persian, and medical LLMs, in zero-shot and chain-of-thought (CoT) settings. Our results show that closed-weight general models (e.g., GPT-4.1) consistently outperform all other categories, achieving 83.09% accuracy in Persian and 80.7% in English, while Persian LLMs such as Dorna underperform significantly (e.g., 34.9% in Persian), often struggling with both instruction-following and domain reasoning. We also analyze the impact of translation, showing that while English performance is generally higher, 3-10% of questions can only be answered correctly in Persian due to cultural and clinical contextual cues that are lost in translation. Finally, we demonstrate that model size alone is insufficient for robust performance without strong domain or language adaptation. PersianMedQA provides a foundation for evaluating bilingual and culturally grounded medical reasoning in LLMs. The dataset, along with a bilingual medical dictionary, is publicly available at: https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FMohammadJRanjbar\u002FPersianMedQA.",{"paper_id":8644,"title":8645,"year":7,"month":188,"day":63,"doi":8646,"resource_url":8647,"first_page":8648,"last_page":8649,"pdf_url":8650,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":8651,"bibkey":8652,"paper_type":860,"authors":8653,"abstract":8661},"lrec2026-main-343","HatePrototypes: Interpretable and Transferable Representations for Implicit and Explicit Hate Speech Detection","10.63317\u002F3opu4zq9p6pc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-343","4387","4399","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.343.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.343_OptionalSupplementaryMaterial.zip","proskurina-etal-2026-hateprototypes",[8654,8656,8659],{"paper_id":8644,"author_seq":247,"given_name":6855,"surname":8655,"affiliation":63,"orcid":63},"Proskurina",{"paper_id":8644,"author_seq":232,"given_name":8657,"surname":8658,"affiliation":63,"orcid":63},"Marc-Antoine","Carpentier",{"paper_id":8644,"author_seq":218,"given_name":1179,"surname":8660,"affiliation":63,"orcid":63},"Velcin","Optimization of offensive content moderation models for different types of hateful messages is typically achieved through continued pre-training or fine-tuning on new hate speech benchmarks. However, existing benchmarks mainly address explicit hate toward protected groups and often overlook implicit or indirect hate, such as demeaning comparisons, calls for exclusion or violence, and subtle discriminatory language that still causes harm. While explicit hate can often be captured through surface features, implicit hate requires deeper, full-model semantic processing. In this work, we question the need for repeated fine-tuning and analyze the role of HatePrototypes, class-level vector representations derived from language models optimized for hate speech detection and safety moderation. We find that these prototypes, built from as few as 50 examples per class, enable cross-task transfer between explicit and implicit hate, with interchangeable prototypes across benchmarks. Moreover, we show that parameter-free early exiting with prototypes is effective for both hate types. We release the code, prototype resources, and evaluation scripts to support future research on efficient and transferable hate speech detection.",{"paper_id":8663,"title":8664,"year":7,"month":188,"day":63,"doi":8665,"resource_url":8666,"first_page":8667,"last_page":8668,"pdf_url":8669,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8670,"paper_type":860,"authors":8671,"abstract":8676},"lrec2026-main-344","Investigating Memorization in Language Models Trained via Knowledge Distillation","10.63317\u002F39ec72wwr6ux","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-344","4400","4413","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.344.pdf","mcking-etal-2026-investigating",[8672,8674],{"paper_id":8663,"author_seq":247,"given_name":3750,"surname":8673,"affiliation":63,"orcid":63},"Mäcking",{"paper_id":8663,"author_seq":232,"given_name":5662,"surname":8675,"affiliation":63,"orcid":63},"Regneri","We analyze how knowledge distillation influences memorization in language models. Although knowledge distillation is a widely used technique to train smaller, more efficient models, its effect on memorization is not well understood, despite the importance of memorization for model utility and privacy. We demonstrate that when the student and teacher models are trained on different datasets, knowledge distillation substantially reduces memorization and accelerates the forgetting of sequences previously memorized by the student. However, knowledge distillation does not eliminate privacy risks: it accelerates memorization when the student is trained on sequences memorized by the teacher, and teachers can leak memorized content even when the student is trained on data that does not contain these sequences. Finally, we find that the size of the teacher model leads to a trade-off between how quickly memorized information is transferred to the student and how much the student ultimately memorizes. Overall, we provide practical insights for balancing the utility of distilled models against the privacy concerns associated with memorization.",{"paper_id":8678,"title":8679,"year":7,"month":188,"day":63,"doi":8680,"resource_url":8681,"first_page":8682,"last_page":8683,"pdf_url":8684,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8685,"paper_type":860,"authors":8686,"abstract":8711},"lrec2026-main-345","Redefining Evaluation Standards: A Unified Framework for Evaluating the Korean Capabilities of Language Models","10.63317\u002F46t2asmno5ng","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-345","4414","4422","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.345.pdf","lee-etal-2026-redefining",[8687,8689,8692,8694,8697,8700,8703,8706,8708],{"paper_id":8678,"author_seq":247,"given_name":8688,"surname":1359,"affiliation":63,"orcid":63},"Hanwool",{"paper_id":8678,"author_seq":232,"given_name":8690,"surname":8691,"affiliation":63,"orcid":63},"Dasol","Choi",{"paper_id":8678,"author_seq":218,"given_name":8693,"surname":5173,"affiliation":63,"orcid":63},"Sooyong",{"paper_id":8678,"author_seq":203,"given_name":8695,"surname":8696,"affiliation":63,"orcid":63},"Ilgyun","Jung",{"paper_id":8678,"author_seq":188,"given_name":8698,"surname":8699,"affiliation":63,"orcid":63},"Sangwon","Baek",{"paper_id":8678,"author_seq":172,"given_name":8701,"surname":8702,"affiliation":63,"orcid":63},"Guijin","Son",{"paper_id":8678,"author_seq":155,"given_name":8704,"surname":8705,"affiliation":63,"orcid":63},"Inseong","Hwang",{"paper_id":8678,"author_seq":138,"given_name":8707,"surname":1359,"affiliation":63,"orcid":63},"Naeun",{"paper_id":8678,"author_seq":121,"given_name":8709,"surname":8710,"affiliation":63,"orcid":63},"Seunghyeok","Hong","Recent advancements in Korean large language models (LLMs) have driven numerous benchmarks and evaluation methods, yet inconsistent protocols cause up to 10 p.p performance gaps across institutions. Overcoming these reproducibility gaps does not mean enforcing a one-size-fits-all evaluation. Rather, effective benchmarking requires diverse experimental approaches and a framework robust enough to support them. To this end, we introduce HRET (Haerae Evaluation Toolkit), an open-source, registry-based framework that unifies Korean LLM assessment. HRET integrates major Korean benchmarks, multiple inference backends, and multi-method evaluation, with language consistency enforcement to ensure genuine Korean outputs. Its modular registry design also enables rapid incorporation of new datasets, methods, and backends, ensuring the toolkit adapts to evolving research needs. Beyond standard accuracy metrics, HRET incorporates Korean-focused output analyses-morphology-aware Type-Token Ratio (TTR) for evaluating lexical diversity and systematic keyword-omission detection for identifying missing concepts-to provide diagnostic insights into language-specific behaviors. These targeted analyses help researchers pinpoint morphological and semantic shortcomings in model outputs, guiding focused improvements in Korean LLM development.",{"paper_id":8713,"title":8714,"year":7,"month":188,"day":63,"doi":8715,"resource_url":8716,"first_page":8717,"last_page":8718,"pdf_url":8719,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8720,"paper_type":860,"authors":8721,"abstract":8735},"lrec2026-main-346","Cross-Lingual Stability and Bias in Instruction-Tuned Language Models for Humanitarian NLP","10.63317\u002F59n42irmw53u","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-346","4423","4433","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.346.pdf","nemkova-etal-2026-cross",[8722,8725,8728,8730,8733],{"paper_id":8713,"author_seq":247,"given_name":8723,"surname":8724,"affiliation":63,"orcid":63},"Poli","Nemkova",{"paper_id":8713,"author_seq":232,"given_name":8726,"surname":8727,"affiliation":63,"orcid":63},"Amrit","Adhikari",{"paper_id":8713,"author_seq":218,"given_name":6059,"surname":8729,"affiliation":63,"orcid":63},"Pearson",{"paper_id":8713,"author_seq":203,"given_name":8731,"surname":8732,"affiliation":63,"orcid":63},"Vamsi Krishna","Sadu",{"paper_id":8713,"author_seq":188,"given_name":8734,"surname":8134,"affiliation":63,"orcid":63},"Albert V.","Humanitarian organizations face a critical choice: invest in costly commercial APIs or rely on free open-weight models for multilingual human rights monitoring. While commercial systems offer reliability, open-weight alternatives lack empirical validation - especially for low-resource languages common in conflict zones. This paper presents the first systematic comparison of commercial and open-weight large language models (LLMs) for human-rights-violation detection across seven languages, quantifying the cost-reliability trade-off facing resource-constrained organizations. Across 78,000 multilingual inferences, we evaluate six models - four instruction-aligned (Claude-Sonnet-4, DeepSeek-V3, Gemini-Flash-2.0, GPT-4.1-mini) and two open-weight (LLaMA-3-8B, Mistral-7B) - using both standard classification metrics and new measures of cross-lingual reliability: Calibration Deviation (CD), Decision Bias (ΔBias), Language Robustness Score (LRS), and Language Stability Score (LSS). Results show that alignment, not scale, determines stability: aligned models maintain near-invariant accuracy and balanced calibration across typologically distant and low-resource languages (e.g., Lingala, Burmese), while open-weight models exhibit significant prompt-language sensitivity and calibration drift. These findings demonstrate that multilingual alignment enables language-agnostic reasoning and provide practical guidance for humanitarian organizations balancing budget constraints with reliability in multilingual deployment.",{"paper_id":8737,"title":8738,"year":7,"month":188,"day":63,"doi":8739,"resource_url":8740,"first_page":8741,"last_page":8742,"pdf_url":8743,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8744,"paper_type":860,"authors":8745,"abstract":8748},"lrec2026-main-347","Counting on Consensus: Selecting the Right Inter-Annotator Agreement Metric for NLP Annotation and Evaluation","10.63317\u002F2wuxteb6uvcj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-347","4434","4446","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.347.pdf","james-2026-counting",[8746],{"paper_id":8737,"author_seq":247,"given_name":8747,"surname":4091,"affiliation":63,"orcid":63},"Joseph H. F.","Human annotation remains the foundation of reliable and interpretable data in Natural Language Processing (NLP). As annotation and evaluation tasks continue to expand, from categorical labelling to segmentation, subjective judgment, and continuous rating, measuring agreement between annotators has become increasingly more complex. This paper outlines how inter-annotator agreement (IAA) has been conceptualised and applied across NLP and related disciplines, describing the assumptions and limitations of common approaches. We organise agreement measures by task type and discuss how factors such as label imbalance and missing data influence reliability estimates. In addition, we highlight best practices for clear and transparent reporting, including the use of confidence intervals and the analysis of disagreement patterns. The paper aims to serve as a guide for selecting and interpreting agreement measures, promoting more consistent and reproducible human annotation and evaluation in NLP.",{"paper_id":8750,"title":8751,"year":7,"month":188,"day":63,"doi":8752,"resource_url":8753,"first_page":8754,"last_page":8755,"pdf_url":8756,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8757,"paper_type":860,"authors":8758,"abstract":8762},"lrec2026-main-348","Quadratic Weighted Kappa Is Not Enough for Evaluating Automated Essay Scoring Models","10.63317\u002F3co8wwdqqyf6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-348","4447","4456","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.348.pdf","albatarni-etal-2026-quadratic",[8759,8761],{"paper_id":8750,"author_seq":247,"given_name":7472,"surname":8760,"affiliation":63,"orcid":63},"Albatarni",{"paper_id":8750,"author_seq":232,"given_name":1492,"surname":1493,"affiliation":63,"orcid":63},"Quadratic Weighted Kappa (QWK) has been the standard evaluation metric in Automated Essay Scoring (AES) research for over two decades. Despite repeated criticisms highlighting its limitations, the community has largely continued to rely on QWK without adopting alternative metrics. This study aims to encourage a shift toward more suitable evaluation practices by systematically examining QWK’s behavior under three key conditions: dataset size, class imbalance, and score range. Using both a publicly available AES dataset and carefully synthesized datasets, we demonstrate scenarios where QWK produces unstable or misleading results. Our findings highlight the need for more robust evaluation practices and point to alternative metrics, particularly variants of Gwet’s AC2, that offer greater reliability across a variety of conditions.",{"paper_id":8764,"title":8765,"year":7,"month":188,"day":63,"doi":8766,"resource_url":8767,"first_page":8768,"last_page":8769,"pdf_url":8770,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8771,"paper_type":860,"authors":8772,"abstract":8781},"lrec2026-main-349","Evaluating the Homogeneity of Keyphrase Prediction Models","10.63317\u002F5d5bqer83n9q","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-349","4457","4469","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.349.pdf","houbre-etal-2026-evaluating",[8773,8776,8778],{"paper_id":8764,"author_seq":247,"given_name":8774,"surname":8775,"affiliation":63,"orcid":63},"Mael","Houbre",{"paper_id":8764,"author_seq":232,"given_name":2175,"surname":8777,"affiliation":63,"orcid":63},"Boudin",{"paper_id":8764,"author_seq":218,"given_name":8779,"surname":8780,"affiliation":63,"orcid":63},"Beatrice","Daille","Keyphrases which are useful in several NLP and IR applications are either extracted from text or predicted by generative models. Contrarily to keyphrase extraction approaches, keyphrase generation models can predict keyphrases that do not appear in a document’s text called ‘absent keyphrases‘. This ability means that keyphrase generation models can associate a document to a notion that is not explicitly mentioned in its text. Intuitively, this suggests that for two documents treating the same subjects, a keyphrase generation model is more likely to be homogeneous in their indexing i.e. predict the same keyphrase for both documents, regardless of those keyphrases appearing in their respective text or not; something a keyphrase extraction model would fail to do. Yet, homogeneity of keyphrase prediction models is not covered by current benchmarks. In this work, we introduce a method to evaluate the homogeneity of keyphrase prediction models and study if absent keyphrase generation capabilities actually help the model to be more homogeneous. To our surprise, we show that keyphrase extraction methods are competitive with generative models, and that depending on the evaluation scenario, having the ability to generate absent keyphrases can actually act to the detriment of homogeneity. Our data, code and prompts are available on Huggingface and github.",{"paper_id":8783,"title":8784,"year":7,"month":188,"day":63,"doi":8785,"resource_url":8786,"first_page":8787,"last_page":8788,"pdf_url":8789,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8790,"paper_type":860,"authors":8791,"abstract":8803},"lrec2026-main-350","A Taxonomy of Safety: Harmonizing LLM Benchmarks in a Fragmented Landscape","10.63317\u002F4n7jrxunmvcp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-350","4470","4481","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.350.pdf","rastegar-etal-2026-taxonomy",[8792,8795,8798,8800],{"paper_id":8783,"author_seq":247,"given_name":8793,"surname":8794,"affiliation":63,"orcid":63},"Shadi","Rastegar",{"paper_id":8783,"author_seq":232,"given_name":8796,"surname":8797,"affiliation":63,"orcid":63},"Viktor","Hangya",{"paper_id":8783,"author_seq":218,"given_name":2109,"surname":8799,"affiliation":63,"orcid":63},"Kuech",{"paper_id":8783,"author_seq":203,"given_name":8801,"surname":8802,"affiliation":63,"orcid":63},"Darina","Gold","Understanding and mitigating the safety limitations of LLMs is of great importance to build trustworthy AI applications. Although a wide range of safety benchmarks are available, there is no standardized taxonomy of safety categories. As a result, some benchmarks focus on a specific subset of categories, they define test samples on different granularity levels, or they use different definitions or naming conventions. To mitigate these issues, we propose a two-level taxonomy of LLM safety categories, created by harmonizing existing resources. Our taxonomy gives an overview of important safety categories that helps researchers pinpoint potential safety risks and select the right benchmarks when evaluating or developing language models. Moreover, the taxonomy provides guidelines to categorize future benchmarks. Furthermore, since the majority of the available safety resources are English-focused, we check the cross-cultural validity of our taxonomy by translating datasets covering all top level categories to French, German, Italian, and Spanish. A manual review of a subset of translated samples by native speakers revealed no major cultural mismatches from a safety perspective. This supports not only the transferability of English benchmarks but also the transferability of the categories in our taxonomy, as well as its potential as a practical tool for guiding safety-focused dataset development and evaluation beyond English.",{"paper_id":8805,"title":8806,"year":7,"month":188,"day":63,"doi":8807,"resource_url":8808,"first_page":8809,"last_page":8810,"pdf_url":8811,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8812,"paper_type":860,"authors":8813,"abstract":8824},"lrec2026-main-351","Consistency of LLMs to Comparative Statements in Mathematical Reasoning Tasks","10.63317\u002F5c2k786wu6jm","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-351","4482","4496","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.351.pdf","san-etal-2026-consistency",[8814,8817,8819,8821],{"paper_id":8805,"author_seq":247,"given_name":8815,"surname":8816,"affiliation":63,"orcid":63},"Aidan W.","San",{"paper_id":8805,"author_seq":232,"given_name":8818,"surname":8702,"affiliation":63,"orcid":63},"Daniel Juyoung",{"paper_id":8805,"author_seq":218,"given_name":8820,"surname":3916,"affiliation":63,"orcid":63},"Xiaodong",{"paper_id":8805,"author_seq":203,"given_name":8822,"surname":8823,"affiliation":63,"orcid":63},"Yangfeng","Ji","Large language models (LLMs) have the potential to significantly expand access to quality education through applications such as mathematics tutoring. However, a key challenge is that student writing often contains redundancies, and prior research has shown that LLMs can be sensitive to such irrelevant information. This raises a critical research question: How consistent are LLMs when faced with extraneous comparative statements? To address this, we propose a systematic framework for evaluating LLM consistency. Our approach involves a hybrid strategy that integrates template-based and model-based methods to generate comparative statements (e.g., \"One of the apples was tastier than average\") and insert them into mathematical reasoning problems. The merit of our approach lies in its systematic and automated nature, enabling rigorous assessment across various models and datasets. Conducting experiments on the GSM8K, AQuA, and Hendrycks MATH benchmarks with a suite of open-source LLMs, we highlight two key results. First, LLM accuracy can drop by over 30% when presented with these statements. Furthermore, we uncover a trade-off between the diversity of the generated statements and the magnitude of the performance drop, where less diverse and more repetitive perturbations lead to greater accuracy degradation.",{"paper_id":8826,"title":8827,"year":7,"month":188,"day":63,"doi":8828,"resource_url":8829,"first_page":8830,"last_page":8831,"pdf_url":8832,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8833,"paper_type":860,"authors":8834,"abstract":8844},"lrec2026-main-352","PersianAnonymizer: Evaluating LLM-Labeled Training for Efficient NER-based Anonymization in Persian","10.63317\u002F57u2ica9225o","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-352","4497","4506","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.352.pdf","shalchian-etal-2026-persiananonymizer",[8835,8838,8841],{"paper_id":8826,"author_seq":247,"given_name":8836,"surname":8837,"affiliation":63,"orcid":63},"Mohammad Hossein","Shalchian",{"paper_id":8826,"author_seq":232,"given_name":8839,"surname":8840,"affiliation":63,"orcid":63},"Mostafa","Amiri",{"paper_id":8826,"author_seq":218,"given_name":8842,"surname":8843,"affiliation":63,"orcid":63},"Amir Mahdi","Sadeghzadeh","We target practical anonymization of Persian customer chats by training a compact NER model from LLM-labeled supervision and selecting the best labeler for deployment. We compare three instruction-tuned LLMs—DEEPSEEKV3-0324, GPT-OSS-120B, and QWEN3-235B-A22B-INSTRUCT-2507—to produce span annotations under a shared JSON protocol, yielding four corpora (OSS_ZeroShot, Qwen_ZeroShot, Qwen_FewShot, DeepSeek_FewShot). A MATINAROBERTA-based token-classifier is trained per corpus and evaluated with token-level Precision\u002FRecall\u002FF1 (overall and per-class). We also report Label Coverage Recall (LCR), the proportion of gold non-O tokens predicted as non-O, and quantify cross-labeler behavior via a token-level Venn on test annotations. Finally, we contrast test-set annotation latency of the LLMs on H200 nodes with the trained NER’s test-time labeling on a single RTX 3090. Results show that supervision from OSS_ZeroShot yields the strongest macro-F1 and LCR, while the resulting NER labels an entire 40K-message test set in ∼2 minutes on one consumer GPU. This establishes a practical path to high-quality, low-cost anonymization for Persian industrial data.",{"paper_id":8846,"title":8847,"year":7,"month":188,"day":63,"doi":8848,"resource_url":8849,"first_page":8850,"last_page":8851,"pdf_url":8852,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8853,"paper_type":860,"authors":8854,"abstract":8862},"lrec2026-main-353","How Many Samples Do We Need? A Toolkit for Power-Aware Evaluation Design","10.63317\u002F4j37zxirsi26","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-353","4507","4513","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.353.pdf","basile-etal-2026-how",[8855,8856,8859],{"paper_id":8846,"author_seq":247,"given_name":4682,"surname":6937,"affiliation":63,"orcid":63},{"paper_id":8846,"author_seq":232,"given_name":8857,"surname":8858,"affiliation":63,"orcid":63},"Areg Mikael","Sarvazyan",{"paper_id":8846,"author_seq":218,"given_name":8860,"surname":8861,"affiliation":63,"orcid":63},"José Ángel","González","If datasets are the telescopes of our field, then statistical power is their resolution, i.e., their ability to reveal a true difference in model performance when one exists. Many NLP evaluations are underpowered, leading to overstated claims of improvement. This paper introduces sk-power, an open-source Python library that helps researchers and practitioners design well-powered evaluations. Built with familiar scikit-learn-style abstractions, sk-power enables users to simulate evaluation scenarios, estimate minimum detectable effects, and assess the reliability of reported gains. We also illustrate what can go wrong when power analysis isn’t carried out. Our goal is to position power analysis as a first-class, practical step in evaluation planning.",{"paper_id":8864,"title":8865,"year":7,"month":188,"day":63,"doi":8866,"resource_url":8867,"first_page":8868,"last_page":8869,"pdf_url":8870,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8871,"paper_type":860,"authors":8872,"abstract":8880},"lrec2026-main-354","Of Words and Meaning: A Grammatical and Semantic Benchmark for Faroese LLM Understanding","10.63317\u002F4u4i99hc8co8","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-354","4514","4526","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.354.pdf","debess-etal-2026-words",[8873,8876,8878],{"paper_id":8864,"author_seq":247,"given_name":8874,"surname":8875,"affiliation":63,"orcid":63},"Iben Nyholm","Debess",{"paper_id":8864,"author_seq":232,"given_name":3175,"surname":8877,"affiliation":63,"orcid":63},"Scalvini",{"paper_id":8864,"author_seq":218,"given_name":8879,"surname":8280,"affiliation":63,"orcid":63},"Bolette","Evaluating language technology for low-resource languages faces a fundamental challenge: the scarcity of native benchmarks suitable for systematic assessment. For Faroese, no such evaluation frameworks exist. We address this gap by presenting the first benchmark suite for Faroese semantic understanding and grammatical competence. Our methodology transforms existing lexicographic resources, authoritative dictionaries and error corpora, into systematic evaluation tasks through computational restructuring, demonstrating a replicable approach for resource-constrained settings. The resulting benchmarks assess grammatical correctness, semantic relation classification, and metaphor comprehension. Evaluation across LLMs from compact open-source to large-scale commercial systems reveals consistent performance patterns favouring proprietary models. This work establishes a proof of concept for benchmark creation from traditional linguistic resources, and provides a methodological template for other low-resource language communities.",{"paper_id":8882,"title":8883,"year":7,"month":188,"day":63,"doi":8884,"resource_url":8885,"first_page":8886,"last_page":8887,"pdf_url":8888,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8889,"paper_type":860,"authors":8890,"abstract":8901},"lrec2026-main-355","TURING: Evaluating Human Abilities to Identify AI-Generated Texts","10.63317\u002F4e4ojwwryi8d","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-355","4527","4535","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.355.pdf","kalashnikova-etal-2026-turing",[8891,8894,8897,8899],{"paper_id":8882,"author_seq":247,"given_name":8892,"surname":8893,"affiliation":63,"orcid":63},"Natalia","Kalashnikova",{"paper_id":8882,"author_seq":232,"given_name":8895,"surname":8896,"affiliation":63,"orcid":63},"Nicolas De","Bufala",{"paper_id":8882,"author_seq":218,"given_name":5149,"surname":8898,"affiliation":63,"orcid":63},"Fayad",{"paper_id":8882,"author_seq":203,"given_name":4364,"surname":8900,"affiliation":63,"orcid":63},"Cervoni","This study analyzes humans’ ability to identify AI-generated texts across 10 genres. We collected 9164 annotations from 214 participants on 500 texts (half human, half LLM-produced), and analyzed 7943 after quality screening. Our main findings are that the humans accuracy was above chance but far from perfect (around 59%), with a slight tendency to label texts as \"Human-generated\". Their performance is influenced by the text genre (structural\u002Ffactual formats easier to identify vs. complex genres) and by generating LLM. Annotators optionally selected three-level descriptors to justify decisions. While they had very limited effects on accuracy, their usage showed some association between text features (monotony, lack of cohesion or coherence) and \"AI-generated\" labeling. However, the linguistic features of the texts appear to have no robust impact after correction on human judgment. A small learning effect emerged but was practically negligible (0.1-0.2%), and personal characteristics of annotators had an impact on their accuracy, except age, which showed no effect. Finally, two automated detection tools were tested, reaching 88% accuracy on our distribution, clearly above humans, highlighting the value of human-tool combinations.",{"paper_id":8903,"title":8904,"year":7,"month":188,"day":63,"doi":8905,"resource_url":8906,"first_page":8907,"last_page":8908,"pdf_url":8909,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8910,"paper_type":860,"authors":8911,"abstract":8921},"lrec2026-main-356","JamC-QA: A Multiple-Choice Question Answering Benchmark for Japan-Specific Knowledge","10.63317\u002F33xwat5joobd","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-356","4536","4546","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.356.pdf","oka-etal-2026-jamc",[8912,8915,8918],{"paper_id":8903,"author_seq":247,"given_name":8913,"surname":8914,"affiliation":63,"orcid":63},"Teruaki","Oka",{"paper_id":8903,"author_seq":232,"given_name":8916,"surname":8917,"affiliation":63,"orcid":63},"Tomohide","Shibata",{"paper_id":8903,"author_seq":218,"given_name":8919,"surname":8920,"affiliation":63,"orcid":63},"Nao","Yoshida","We introduce JamC-QA, a multiple-choice question answering benchmark specifically designed to evaluate Japan-specific knowledge. Existing Japanese QA benchmarks largely consist of questions translated from English or derived from professional exams, primarily targeting academic or generally shared knowledge. Consequently, this limits the usefulness of distinguishing the performance of high-performing Large Language Models on local knowledge acquisition. To address this, JamC-QA serves as a robust resource for assessing the acquisition of Japan-specific knowledge. It comprises 2,309 challenging instances that were created entirely from scratch by human annotators across eight categories: culture, custom, regional identity, geography, history, government, law, and healthcare. Instances that were easily answerable by weak models were filtered out. Evaluation results highlight the critical distinction between model types: while multilingual models scored highly on general benchmarks like MMLU and JMMLU, the results on JamC-QA indicate that they do not fully capture Japan-specific knowledge. Japanese-language models outperform multilingual models, especially on culture- and region-related knowledge such as proverbs, traditional events, and local customs. Furthermore, we find a notable division within Japanese models: models further pretrained on Japanese text excel at administrative and legal questions, while models trained from scratch perform strongly on local and cultural aspects.",{"paper_id":8923,"title":8924,"year":7,"month":188,"day":63,"doi":8925,"resource_url":8926,"first_page":8927,"last_page":8928,"pdf_url":8929,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8930,"paper_type":860,"authors":8931,"abstract":8938},"lrec2026-main-357","Towards Dynamic Metaphor Identification: Evaluating GPT O-Series Models on Five Metaphoricity Cues in U.S. Trade Corpora","10.63317\u002F2zr7e2gvwuo9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-357","4547","4559","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.357.pdf","bas-etal-2026-dynamic",[8932,8935,8936],{"paper_id":8923,"author_seq":247,"given_name":8933,"surname":8934,"affiliation":63,"orcid":63},"Berkay","Bas",{"paper_id":8923,"author_seq":232,"given_name":981,"surname":982,"affiliation":63,"orcid":63},{"paper_id":8923,"author_seq":218,"given_name":8937,"surname":916,"affiliation":63,"orcid":63},"Xiaojuan","Although recent advances have focused on detecting metaphors, existing models generally treat them as static entities. There has been little research into identifying dynamic metaphors in discourse. This article addresses this gap by focusing on metaphoricity cues: Linguistic signals that may indicate the activation of metaphoric meaning in different discourse contexts. This study examines the ability of OpenAI’s O-series models (O4-mini, O4-mini-high and O3) in detecting five metaphoricity cues in the U.S. trade discourse, including cues of explicit mapping, emphasis, marking, repetition and novelisation. Research results show that the models performed best on repetition and emphasis, while novelisation was the most difficult cue to detect.",{"paper_id":8940,"title":8941,"year":7,"month":188,"day":63,"doi":8942,"resource_url":8943,"first_page":8944,"last_page":8945,"pdf_url":8946,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8947,"paper_type":860,"authors":8948,"abstract":8957},"lrec2026-main-358","Evaluating Text Style Transfer: A Nine-language Benchmark for Text Detoxification","10.63317\u002F539pqaksijax","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-358","4560","4574","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.358.pdf","protasov-etal-2026-evaluating",[8949,8952,8954,8955],{"paper_id":8940,"author_seq":247,"given_name":8950,"surname":8951,"affiliation":63,"orcid":63},"Vitaly","Protasov",{"paper_id":8940,"author_seq":232,"given_name":3405,"surname":8953,"affiliation":63,"orcid":63},"Babakov",{"paper_id":8940,"author_seq":218,"given_name":866,"surname":867,"affiliation":63,"orcid":63},{"paper_id":8940,"author_seq":203,"given_name":869,"surname":8956,"affiliation":63,"orcid":63},"Panchenko","Despite notable advances in large language models (LLMs), reliable evaluation of text generation tasks such as text style transfer (TST) remains an open challenge. Existing research has shown that automatic metrics often correlate poorly with human judgments (Dementieva et al., 2024; Pauli et al., 2025), limiting our ability to assess model performance accurately. Furthermore, most prior work has focused primarily on English, while the evaluation of multilingual TST systems, particularly for text detoxification, remains largely underexplored. In this paper, we present the first comprehensive multilingual benchmarking study of evaluation metrics for text detoxification evaluation across nine languages: Arabic, Amharic, Chinese, English, German, Hindi, Russian, Spanish, Ukrainian. Drawing inspiration from machine translation evaluation, we compare neural-based automatic metrics with LLM-as-a-judge approaches together with experiments on task-specific fine-tuned models. Our analysis reveals that the proposed metrics achieve significantly higher correlation with human judgments compared to baseline approaches. We also provide actionable insights and practical guidelines for building robust and reliable multilingual evaluation pipelines for text detoxification and related TST tasks.",{"paper_id":8959,"title":8960,"year":7,"month":188,"day":63,"doi":8961,"resource_url":8962,"first_page":8963,"last_page":8964,"pdf_url":8965,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8966,"paper_type":860,"authors":8967,"abstract":8993},"lrec2026-main-359","Irish-BLiMP: A Linguistic Benchmark for Evaluating Human and Language Model Performance in a Low-Resource Setting","10.63317\u002F4krpnjca34as","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-359","4575","4586","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.359.pdf","mcgiff-etal-2026-irish",[8968,8971,8973,8975,8978,8981,8984,8986,8988,8990],{"paper_id":8959,"author_seq":247,"given_name":8969,"surname":8970,"affiliation":63,"orcid":63},"Josh","Mcgiff",{"paper_id":8959,"author_seq":232,"given_name":8972,"surname":2407,"affiliation":63,"orcid":63},"Tung Khanh",{"paper_id":8959,"author_seq":218,"given_name":1600,"surname":8974,"affiliation":63,"orcid":63},"Mulcahy",{"paper_id":8959,"author_seq":203,"given_name":8976,"surname":8977,"affiliation":63,"orcid":63},"Dáibhidh Ó","Luinín",{"paper_id":8959,"author_seq":188,"given_name":8979,"surname":8980,"affiliation":63,"orcid":63},"Jake","Dalzell",{"paper_id":8959,"author_seq":172,"given_name":8982,"surname":8983,"affiliation":63,"orcid":63},"Róisín Ní","Bhroin",{"paper_id":8959,"author_seq":155,"given_name":2397,"surname":8985,"affiliation":63,"orcid":63},"Burke",{"paper_id":8959,"author_seq":138,"given_name":3431,"surname":8987,"affiliation":63,"orcid":63},"O'Sullivan",{"paper_id":8959,"author_seq":121,"given_name":8989,"surname":2395,"affiliation":63,"orcid":63},"Hoang D.",{"paper_id":8959,"author_seq":104,"given_name":8991,"surname":8992,"affiliation":63,"orcid":63},"Nikola S.","Nikolov","We present Irish-BLiMP (Irish Benchmark of Linguistic Minimal Pairs), the first dataset and framework designed for fine-grained evaluation of linguistic competence in the Irish language, an endangered language. Drawing on a variety of linguistic literature and grammar reference works, a team of fluent Irish speakers manually constructed and reviewed 1020 minimal pairs across a taxonomy of 11 linguistic features. We evaluate both existing Large Language Models (LLMs) and fluent human participants on their syntactic knowledge of Irish. Our findings show that humans outperform all models across all linguistic features, achieving 16.6% higher accuracy on average. Moreover, a substantial performance gap of 18.1% persists between open- and closed-source LLMs, with even the strongest model (gpt-5) reaching only 73.5% accuracy compared to 90.1% by human. Interestingly, human participants and models struggle on different aspects of Irish grammar, thus highlighting a difference in representation learned by the models. Overall, Irish-BLiMP provides the first systematic framework for evaluating the grammatical competence of LLMs in Irish and offers a valuable benchmark for advancing research on linguistic understanding in low-resource languages.",{"paper_id":8995,"title":8996,"year":7,"month":188,"day":63,"doi":8997,"resource_url":8998,"first_page":8999,"last_page":9000,"pdf_url":9001,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9002,"paper_type":860,"authors":9003,"abstract":9026},"lrec2026-main-360","EduBench: A Portuguese Benchmark for Open-Ended Discursive Question Answering","10.63317\u002F4nocig8f36r9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-360","4587","4596","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.360.pdf","paiola-etal-2026-edubench",[9004,9007,9010,9013,9016,9019,9021,9023],{"paper_id":8995,"author_seq":247,"given_name":9005,"surname":9006,"affiliation":63,"orcid":63},"Pedro Henrique","Paiola",{"paper_id":8995,"author_seq":232,"given_name":9008,"surname":9009,"affiliation":63,"orcid":63},"Luís Gabriel Damiati","Mendes",{"paper_id":8995,"author_seq":218,"given_name":9011,"surname":9012,"affiliation":63,"orcid":63},"Bruno de Oliveira","Monchelato",{"paper_id":8995,"author_seq":203,"given_name":9014,"surname":9015,"affiliation":63,"orcid":63},"André da Fonseca","Schuck",{"paper_id":8995,"author_seq":188,"given_name":9017,"surname":9018,"affiliation":63,"orcid":63},"Gabriel Lino","Garcia",{"paper_id":8995,"author_seq":172,"given_name":9020,"surname":4069,"affiliation":63,"orcid":63},"Douglas",{"paper_id":8995,"author_seq":155,"given_name":9022,"surname":4075,"affiliation":63,"orcid":63},"Helena de Medeiros",{"paper_id":8995,"author_seq":138,"given_name":9024,"surname":9025,"affiliation":63,"orcid":63},"João Paulo","Papa","Evaluating open-ended text generation in large language models remains challenging, particularly for non-English languages. We introduce EduBench, a comprehensive Portuguese-language benchmark comprising 3,149 discursive questions from Brazilian university entrance examinations spanning 2015–2025. Unlike multiple-choice or extractive QA benchmarks, EduBench requires extended, argumentative responses across diverse domains, including Humanities, Exact and Natural Sciences, and Languages. Each question includes expert-curated reference answers from official sources, rich metadata, and automated image descriptions to support text-only evaluation. We establish baseline results using nine contemporary models, ranging from 4B-parameter SLMs to state-of-the-art reasoning-capable LLMs, and evaluate them using complementary metrics (BLEU, BERTScore, G-Eval). Our results reveal substantial metric disagreement and highlight the complexity of assessing discursive generation, with models achieving 54–71% alignment with expert answers. We release EduBench publicly to support research on Portuguese NLP and open-ended generation evaluation.",{"paper_id":9028,"title":9029,"year":7,"month":188,"day":63,"doi":9030,"resource_url":9031,"first_page":9032,"last_page":9033,"pdf_url":9034,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9035,"paper_type":860,"authors":9036,"abstract":9048},"lrec2026-main-361","SemBench: A Universal Semantic Framework for LLM Evaluation","10.63317\u002F4fu76b2p67w6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-361","4597","4610","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.361.pdf","zubillaga-etal-2026-sembench",[9037,9040,9042,9045],{"paper_id":9028,"author_seq":247,"given_name":9038,"surname":9039,"affiliation":63,"orcid":63},"Mikel","Zubillaga",{"paper_id":9028,"author_seq":232,"given_name":9041,"surname":1180,"affiliation":63,"orcid":63},"Naiara",{"paper_id":9028,"author_seq":218,"given_name":9043,"surname":9044,"affiliation":63,"orcid":63},"Oscar","Sainz",{"paper_id":9028,"author_seq":203,"given_name":9046,"surname":9047,"affiliation":63,"orcid":63},"German","Rigau","Recent progress in Natural Language Processing (NLP) has been driven by the emergence of Large Language Models (LLMs), which exhibit remarkable generative and reasoning capabilities. However, despite their success, evaluating the true semantic understanding of these models remains a persistent challenge. Traditional benchmarks such as Word-in-Context (WiC) effectively probe this capability, but their creation is resource-intensive and often limited to high-resource languages. In this paper, we introduce SemBench, a framework for automatically generating synthetic benchmarks that assess the semantic competence of LLMs using only dictionary sense definitions and a sentence encoder. This approach eliminates the need for curated example sentences, making it both scalable and language-independent. We evaluate SemBench in three languages (English, Spanish, and Basque) spanning different levels of linguistic resources, and across a wide range of LLMs. Our results show that rankings derived from SemBench strongly correlate with those obtained from standard WiC datasets. Furthermore, our analysis demonstrates that only a small number of examples is required to achieve stable and meaningful rankings. Overall, SemBench provides a lightweight, adaptable, and data-efficient framework for cross-lingual evaluation of semantic understanding in LLMs.",{"paper_id":9050,"title":9051,"year":7,"month":188,"day":63,"doi":9052,"resource_url":9053,"first_page":9054,"last_page":9055,"pdf_url":9056,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9057,"paper_type":860,"authors":9058,"abstract":9067},"lrec2026-main-362","EL-MIA: Quantifying Membership Inference Risks of Sensitive Entities in LLMs","10.63317\u002F59k6vkt3biya","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-362","4611","4625","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.362.pdf","satvaty-etal-2026-el",[9059,9061,9064],{"paper_id":9050,"author_seq":247,"given_name":2207,"surname":9060,"affiliation":63,"orcid":63},"Satvaty",{"paper_id":9050,"author_seq":232,"given_name":9062,"surname":9063,"affiliation":63,"orcid":63},"Suzan","Verberne",{"paper_id":9050,"author_seq":218,"given_name":9065,"surname":9066,"affiliation":63,"orcid":63},"Fatih","Turkmen","Membership inference attacks (MIA) aim to infer whether a particular data point is part of the training dataset of a model. In this paper, we propose a new task in the context of LLM privacy: entity-level discovery of membership risk focused on sensitive information (PII, credit card numbers, etc). Existing methods for MIA can detect the presence of entire prompts or documents in the LLM training data, but they fail to capture risks at a finer granularity. We propose the “EL-MIA” framework for auditing entity-level membership risks in LLMs. We construct a benchmark dataset for the evaluation of MIA methods on this task. Using this benchmark, we conduct a systematic comparison of existing MIA techniques as well as two newly proposed methods. We provide a comprehensive analysis of the results, trying to explain the relation of the entity level MIA susceptability with the model scale, training epochs, and other surface level factors. Our findings reveal that existing MIA methods are limited when it comes to entity-level membership inference of the sensitive attributes, while this susceptibility can be outlined with relatively straightforward methods, highlighting the need for stronger adversaries to stress test the provided threat model.",{"paper_id":9069,"title":9070,"year":7,"month":188,"day":63,"doi":9071,"resource_url":9072,"first_page":9073,"last_page":9074,"pdf_url":9075,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9076,"paper_type":860,"authors":9077,"abstract":9088},"lrec2026-main-363","Same Meaning, Different Scores: Lexical and Syntactic Sensitivity in LLM Evaluation","10.63317\u002F297j5gohq2ip","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-363","4626","4637","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.363.pdf","kosti-etal-2026-same",[9078,9081,9084,9086],{"paper_id":9069,"author_seq":247,"given_name":9079,"surname":9080,"affiliation":63,"orcid":63},"Bogdan","Kostić",{"paper_id":9069,"author_seq":232,"given_name":9082,"surname":9083,"affiliation":63,"orcid":63},"Conor","Fallon",{"paper_id":9069,"author_seq":218,"given_name":1296,"surname":9085,"affiliation":63,"orcid":63},"Risch",{"paper_id":9069,"author_seq":203,"given_name":869,"surname":9087,"affiliation":63,"orcid":63},"Loeser","The rapid advancement of Large Language Models (LLMs) has established standardized evaluation benchmarks as the primary instrument for model comparison. Yet, their reliability is increasingly questioned due to sensitivity to shallow variations in input prompts. This paper examines how controlled, truth-conditionally equivalent lexical and syntactic perturbations affect the absolute performance and relative ranking of 23 contemporary LLMs across three benchmarks: MMLU, SQuAD, and AMEGA. We employ two linguistically principled pipelines to generate meaning-preserving variations: one performing synonym substitution for lexical changes, and another using dependency parsing to determine applicable syntactic transformations. Results show that lexical perturbations consistently induce substantial, statistically significant performance degradation across nearly all models and tasks, while syntactic perturbations have more heterogeneous effects, occasionally improving results. Both perturbation types destabilize model leaderboards on complex tasks. Furthermore, model robustness did not consistently scale with model size, revealing strong task dependence. Overall, the findings suggest that LLMs rely more on surface-level lexical patterns than on abstract linguistic competence, underscoring the need for robustness testing as a standard component of LLM evaluation.",{"paper_id":9090,"title":9091,"year":7,"month":188,"day":63,"doi":9092,"resource_url":9093,"first_page":9094,"last_page":9095,"pdf_url":9096,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9097,"paper_type":860,"authors":9098,"abstract":9103},"lrec2026-main-364","The Potential for Misleading Results in Text Sanitisation with Standard Evaluation Metrics","10.63317\u002F4ubbuzpc4hpu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-364","4638","4646","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.364.pdf","zhang-etal-2026-potential",[9099,9101],{"paper_id":9090,"author_seq":247,"given_name":9100,"surname":1519,"affiliation":63,"orcid":63},"Dan",{"paper_id":9090,"author_seq":232,"given_name":8134,"surname":9102,"affiliation":63,"orcid":63},"Anderson","Data privacy is an important facet of modern life. It is especially important when considering data that carries potentially sensitive information such as in medical or legal documents. However, it is particularly difficult to ensure private information has been removed or masked in unstructured data, e.g. free-flowing text. The evaluation of systems that automatically detect and remove personal identifiable information (PII) from text is also challenging. Here we present a case study of a system that seemingly performed well, but under closer scrutiny the high performance was due to the shortcomings of standard binary classification metrics in the context of high target class prevalence. We then give a short analysis of different possible metrics in these high-prevalence scenarios, clearly showing the superiority of the Matthews Correlation Coefficient. This is particularly important because readily available data in this domain is rare and often systems are compared using biographies from Wikipedia which have a naturally high prevalence. This can be further aggravated by certain reasonable pre-processing or evaluation formalisms as in the case study discussed here.",{"paper_id":9105,"title":9106,"year":7,"month":188,"day":63,"doi":9107,"resource_url":9108,"first_page":9109,"last_page":9110,"pdf_url":9111,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9112,"paper_type":860,"authors":9113,"abstract":9119},"lrec2026-main-365","Mind the Language Gap: Assessing LLM Safety in Italian","10.63317\u002F4mvfemxb3wvv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-365","4647","4657","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.365.pdf","marafatto-etal-2026-mind",[9114,9116],{"paper_id":9105,"author_seq":247,"given_name":2968,"surname":9115,"affiliation":63,"orcid":63},"Marafatto",{"paper_id":9105,"author_seq":232,"given_name":9117,"surname":9118,"affiliation":63,"orcid":63},"Roberto","Navigli","The rapid diffusion of Large Language Models (LLMs) across linguistic and cultural contexts underscores the need for systematic safety evaluations beyond English. As LLMs are increasingly applied in multilingual settings, ensuring their safe and appropriate behavior in other languages is essential. This paper presents a methodology for building safety evaluation datasets that comprehensively cover the full spectrum of sensitive topics relevant to LLM safety. The resulting resources include a collection of Italian Wikipedia pages encompassing all major categories of sensitive content, and a companion dataset containing three challenging Italian-language questions per page designed to probe model behavior on high-risk issues. Each prompt was annotated into four safety outcome categories: correct refusal, safe informative, unsafe, and ambiguous. Together, these datasets provide a robust foundation for evaluating and benchmarking LLM safety in Italian. To demonstrate their utility, we used them to assess four LLMs, identifying systematic differences in refusal consistency and compliance across sensitive domains. To support transparency and reproducibility, we release a public repository containing the list of categorized Italian Wikipedia pages, the automatically generated prompts, and the standard prompt template used for safety testing. With this work, we aim to advance language-specific safety assessment and support the responsible, culturally grounded deployment of LLMs beyond English.",{"paper_id":9121,"title":9122,"year":7,"month":188,"day":63,"doi":9123,"resource_url":9124,"first_page":9125,"last_page":9126,"pdf_url":9127,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9128,"paper_type":860,"authors":9129,"abstract":9158},"lrec2026-main-366","Bulgarian Massive Multitask Language Understanding Benchmark","10.63317\u002F57jy4fetjmc4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-366","4658","4672","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.366.pdf","koeva-etal-2026-bulgarian",[9130,9133,9136,9139,9142,9144,9146,9149,9152,9155],{"paper_id":9121,"author_seq":247,"given_name":9131,"surname":9132,"affiliation":63,"orcid":63},"Svetla Peneva","Koeva",{"paper_id":9121,"author_seq":232,"given_name":9134,"surname":9135,"affiliation":63,"orcid":63},"Ivelina","Stoyanova",{"paper_id":9121,"author_seq":218,"given_name":9137,"surname":9138,"affiliation":63,"orcid":63},"Dimiter","Georgiev",{"paper_id":9121,"author_seq":203,"given_name":9140,"surname":9141,"affiliation":63,"orcid":63},"Svetlozara","Leseva",{"paper_id":9121,"author_seq":188,"given_name":7519,"surname":9143,"affiliation":63,"orcid":63},"Stefanova",{"paper_id":9121,"author_seq":172,"given_name":2960,"surname":9145,"affiliation":63,"orcid":63},"Todorova",{"paper_id":9121,"author_seq":155,"given_name":9147,"surname":9148,"affiliation":63,"orcid":63},"Tsvetana Ivanova","Dimitrova",{"paper_id":9121,"author_seq":138,"given_name":9150,"surname":9151,"affiliation":63,"orcid":63},"Hristina","Kukova",{"paper_id":9121,"author_seq":121,"given_name":9153,"surname":9154,"affiliation":63,"orcid":63},"Mihaela","Moskova",{"paper_id":9121,"author_seq":104,"given_name":9156,"surname":9157,"affiliation":63,"orcid":63},"Tinko","Tinchev","Assessing the broad general knowledge of Large Language Models (LLMs) across multiple domains in Bulgarian remains challenging due to the limited availability of Bulgarian evaluation benchmarks. To address this gap, we introduce the Bulgarian Massive Multitask Language Understanding benchmark (MMLU-BG), designed to evaluate whether LLMs possess generalised knowledge capabilities beyond simple text prediction in Bulgarian. This paper presents the structure, the development protocol, and the size of the MMLU-BG benchmark. It is tested in comparison with the original MMLU for English across seven LLMs selected according to specific criteria. The experiments demonstrate that the MMLU-BG benchmark assesses multi-domain versatility and highlights the models’ strengths and weaknesses across different subject areas.",{"paper_id":9160,"title":9161,"year":7,"month":188,"day":63,"doi":9162,"resource_url":9163,"first_page":9164,"last_page":9165,"pdf_url":9166,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9167,"paper_type":860,"authors":9168,"abstract":9193},"lrec2026-main-367","PHEB: An European Portuguese High School-Level LLM Benchmark","10.63317\u002F2o3fvueefvwj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-367","4673","4683","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.367.pdf","tavares-etal-2026-pheb",[9169,9172,9174,9177,9180,9183,9186,9188,9190],{"paper_id":9160,"author_seq":247,"given_name":9170,"surname":9171,"affiliation":63,"orcid":63},"Diogo C.","Tavares",{"paper_id":9160,"author_seq":232,"given_name":1586,"surname":9173,"affiliation":63,"orcid":63},"Ferreira",{"paper_id":9160,"author_seq":218,"given_name":9175,"surname":9176,"affiliation":63,"orcid":63},"Afonso","Simplício",{"paper_id":9160,"author_seq":203,"given_name":9178,"surname":9179,"affiliation":63,"orcid":63},"Gonçalo","Vinagre",{"paper_id":9160,"author_seq":188,"given_name":9181,"surname":9182,"affiliation":63,"orcid":63},"Ana Carolina","Condez",{"paper_id":9160,"author_seq":172,"given_name":9184,"surname":9185,"affiliation":63,"orcid":63},"Inês","Calvo",{"paper_id":9160,"author_seq":155,"given_name":9184,"surname":9187,"affiliation":63,"orcid":63},"Vieira",{"paper_id":9160,"author_seq":138,"given_name":1061,"surname":9189,"affiliation":63,"orcid":63},"Semedo",{"paper_id":9160,"author_seq":121,"given_name":9191,"surname":9192,"affiliation":63,"orcid":63},"Joao","Magalhaes","We present PHEB, a comprehensive benchmark designed to evaluate Large Language Models (LLMs) on real high school level national exams in European Portuguese. The goal is to promote the development of NLP tools and provide a reliable resource for benchmarking multilingual and educational capabilities of LLMs. Covering over 3,500 questions spanning 18 years (2006–2023) across six core subjects, the benchmark compiles high-quality questions from Portuguese National Exams, written and thoroughly curated by professors to ensure topic diversity, linguistic accuracy, and alignment with national curricula. PHEB spans a wide range of subjects, including Mathematics, Portuguese Language and Literature, History, Geography, Biology\u002FGeology, and Philosophy. Questions incorporate both multiple-choice and long-form answers to assess factual knowledge, reasoning capabilities, and language understanding. We comprehensively benchmark state-of-the-art LLMs, shedding light on key challenges such as models’ knowledge, language coverage, answer format biases and robustness to machine translation.",{"paper_id":9195,"title":9196,"year":7,"month":188,"day":63,"doi":9197,"resource_url":9198,"first_page":9199,"last_page":9200,"pdf_url":9201,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9202,"paper_type":860,"authors":9203,"abstract":9210},"lrec2026-main-368","S-GRADES - Studying Generalization of Student Response Assessments in Diverse Evaluative Settings","10.63317\u002F2hybhgodz2en","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-368","4684","4701","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.368.pdf","seuti-etal-2026-grades",[9204,9207],{"paper_id":9195,"author_seq":247,"given_name":9205,"surname":9206,"affiliation":63,"orcid":63},"Tasfia","Seuti",{"paper_id":9195,"author_seq":232,"given_name":9208,"surname":9209,"affiliation":63,"orcid":63},"Sagnik Ray","Choudhury","Evaluating student responses, from long essays to short factual answers, is a key challenge in educational NLP. Automated Essay Scoring (AES) focuses on holistic writing qualities such as coherence and argumentation, while Automatic Short Answer Grading (ASAG) emphasizes factual correctness and conceptual understanding. Despite their shared goal, these paradigms have progressed in isolation with fragmented datasets, inconsistent metrics, and separate communities. We introduce S-GRADES (Studying Generalization of Student Response Assessments in Diverse Evaluative Settings), a web-based benchmark that consolidates 14 diverse grading datasets under a unified interface with standardized access and reproducible evaluation protocols. The benchmark is fully open-source and designed for extensibility, enabling continuous integration of new datasets and evaluation settings. To demonstrate the utility of S-GRADES, we evaluate three state-of-the-art large language models across the benchmark using multiple reasoning strategies in prompting. We further examine the effects of exemplar selection and cross-dataset exemplar transfer. Our analyses illustrate how benchmark-driven evaluation reveals reliability and generalization gaps across essay and short-answer grading tasks, highlighting the importance of standardized, cross-paradigm assessment.",{"paper_id":9212,"title":9213,"year":7,"month":188,"day":63,"doi":9214,"resource_url":9215,"first_page":9216,"last_page":9217,"pdf_url":9218,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9219,"paper_type":860,"authors":9220,"abstract":9236},"lrec2026-main-369","Who Benchmarks the Benchmarks? A Case Study of LLM Evaluation in Icelandic","10.63317\u002F5nxcp3zw7vdz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-369","4702","4715","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.369.pdf","ingimundarson-etal-2026-who",[9221,9224,9227,9230,9233],{"paper_id":9212,"author_seq":247,"given_name":9222,"surname":9223,"affiliation":63,"orcid":63},"Finnur Ágúst","Ingimundarson",{"paper_id":9212,"author_seq":232,"given_name":9225,"surname":9226,"affiliation":63,"orcid":63},"Steinunn Rut","Friðriksdóttir",{"paper_id":9212,"author_seq":218,"given_name":9228,"surname":9229,"affiliation":63,"orcid":63},"Bjarki","Ármannsson",{"paper_id":9212,"author_seq":203,"given_name":9231,"surname":9232,"affiliation":63,"orcid":63},"Iris","Nowenstein",{"paper_id":9212,"author_seq":188,"given_name":9234,"surname":9235,"affiliation":63,"orcid":63},"Steinþór","Steingrímsson","This paper evaluates current Large Language Model (LLM) benchmarking for Icelandic, identifies problems, and calls for improved evaluation methods in low\u002Fmedium-resource languages in particular. We show that benchmarks that include synthetic or machine-translated data that have not been verified in any way, commonly contain severely flawed test examples that are likely to skew the results and undermine the tests’ validity. We warn against the use of such methods without verification in low\u002Fmedium-resource settings as the translation quality can, at best, only be as good as MT quality for a given language at any given time. Indeed, the results of our quantitative error analysis on existing benchmarks for Icelandic show clear differences between human-authored\u002F-translated benchmarks vs. synthetic or machine-translated benchmarks.",{"paper_id":9238,"title":9239,"year":7,"month":188,"day":63,"doi":9240,"resource_url":9241,"first_page":9242,"last_page":9243,"pdf_url":9244,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":9245,"bibkey":9246,"paper_type":860,"authors":9247,"abstract":9253},"lrec2026-main-370","Is This Idea Novel? An Automated Benchmark for Judgment of Research Ideas","10.63317\u002F4c3gy3f7epnj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-370","4716","4727","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.370.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.370_OptionalSupplementaryMaterial.zip","schopf-etal-2026-is",[9248,9251],{"paper_id":9238,"author_seq":247,"given_name":9249,"surname":9250,"affiliation":63,"orcid":63},"Tim","Schopf",{"paper_id":9238,"author_seq":232,"given_name":1732,"surname":9252,"affiliation":63,"orcid":63},"Färber","Judging the novelty of research ideas is crucial for advancing science, enabling the identification of unexplored directions, and ensuring contributions meaningfully extend existing knowledge rather than reiterate minor variations. However, given the exponential growth of scientific literature, manually judging the novelty of research ideas through literature reviews is labor-intensive, subjective, and infeasible at scale. Therefore, recent efforts have proposed automated approaches for research idea novelty judgment. Yet, evaluation of these approaches remains largely inconsistent and is typically based on non-standardized human evaluations, hindering large-scale, comparable evaluations. To address this, we introduce RINoBench, the first comprehensive benchmark for large-scale evaluation of research idea novelty judgments. It comprises 1,381 research ideas derived from and judged by human experts as well as nine automated evaluation metrics designed to assess both rubric-based novelty scores and textual justifications of novelty judgments. Using this benchmark, we evaluate several state-of-the-art large language models (LLMs) on their ability to judge the novelty of research ideas. Our findings reveal that while LLM-generated reasoning closely mirrors human rationales, this alignment does not reliably translate into accurate novelty judgments, which diverge significantly from human gold standard judgments—even among leading reasoning-capable models. Data and code available at: https:\u002F\u002Fgithub.com\u002FTimSchopf\u002FRINoBench",{"paper_id":9255,"title":9256,"year":7,"month":188,"day":63,"doi":9257,"resource_url":9258,"first_page":9259,"last_page":9260,"pdf_url":9261,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9262,"paper_type":860,"authors":9263,"abstract":9271},"lrec2026-main-371","Questionnaire Meets LLM: A Benchmark and Empirical Study of Structural Skills for Understanding Questions and Responses","10.63317\u002F438xkvmy2xd9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-371","4728","4746","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.371.pdf","nguyen-etal-2026-questionnaire",[9264,9266,9269,9270],{"paper_id":9255,"author_seq":247,"given_name":9265,"surname":2395,"affiliation":63,"orcid":63},"Duc-Hai",{"paper_id":9255,"author_seq":232,"given_name":9267,"surname":9268,"affiliation":63,"orcid":63},"Vijayakumar","Nanjappan",{"paper_id":9255,"author_seq":218,"given_name":3431,"surname":8987,"affiliation":63,"orcid":63},{"paper_id":9255,"author_seq":203,"given_name":8989,"surname":2395,"affiliation":63,"orcid":63},"Millions of people take surveys every day, from market polls to medical questionnaires and customer feedback forms. These datasets capture valuable insights, but the ability of large language models (LLMs) to process questionnaire data, where lists of questions are crossed with hundreds of respondent rows, remains underexplored. Current survey analysis tools (e.g., Qualtrics, SPSS, REDCap) are designed for human operators, leaving practitioners without evidence-based guidance on how to best represent questionnaires for LLM consumption. We address this gap by introducing QASU (Questionnaire Analysis and Structural Understanding), a benchmark that probes six structural skills, including answer lookup, respondent count, and multi-hop inference, across six serialization formats and multiple prompt strategies. Experiments on five LLMs (GPT-5-mini, Gemini-2.5-Flash, Qwen3-32B, Llama3-70B, Amazon Nova Lite) show that format choice significantly impacts performance, with up to 9 percentage points improvement over baseline formats, and reveal substantial gaps (10 to 30 percentage points) between proprietary and open-weight models. Self-augmented prompting yields model-dependent benefits, proving effective for proprietary models but unreliable for open-weight alternatives. By systematically isolating format and prompting effects, our open-source benchmark offers practical guidance for advancing both research and real-world practice in LLM-based questionnaire analysis.",{"paper_id":9273,"title":9274,"year":7,"month":188,"day":63,"doi":9275,"resource_url":9276,"first_page":9277,"last_page":9278,"pdf_url":9279,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9280,"paper_type":860,"authors":9281,"abstract":9291},"lrec2026-main-372","Assessing the Effectiveness of LLMs in Delivering Cognitive Behavioral Therapy","10.63317\u002F5davpvjizv9x","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-372","4747","4756","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.372.pdf","bedi-etal-2026-assessing",[9282,9285,9286,9289],{"paper_id":9273,"author_seq":247,"given_name":9283,"surname":9284,"affiliation":63,"orcid":63},"Navdeep Singh","Bedi",{"paper_id":9273,"author_seq":232,"given_name":7181,"surname":7182,"affiliation":63,"orcid":63},{"paper_id":9273,"author_seq":218,"given_name":9287,"surname":9288,"affiliation":63,"orcid":63},"Noriko","Kando",{"paper_id":9273,"author_seq":203,"given_name":5741,"surname":9290,"affiliation":63,"orcid":63},"Crestani","As mental health issues continue to rise globally, there is an increasing demand for accessible and scalable therapeutic solutions. Many individuals currently seek support from Large Language Models (LLMs), even though these models have not been validated for use in counseling services. In this paper, we evaluate LLMs’ ability to emulate professional therapists practicing Cognitive Behavioral Therapy (CBT). Using anonymized, transcribed role-play sessions between licensed therapists and clients, we compare two approaches: (1) a generation-only method and (2) a Retrieval-Augmented Generation (RAG) approach using CBT guidelines. We evaluate both proprietary and open-source models for linguistic quality, semantic coherence, and therapeutic fidelity using standard natural language generation (NLG) metrics, natural language inference (NLI), and automated scoring for skills assessment. Our results indicate that while LLMs can generate CBT-like dialogues, they are limited in their ability to convey empathy and maintain consistency.",{"paper_id":9293,"title":9294,"year":7,"month":188,"day":63,"doi":9295,"resource_url":9296,"first_page":9297,"last_page":9298,"pdf_url":9299,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9300,"paper_type":860,"authors":9301,"abstract":9310},"lrec2026-main-373","Transcription Accuracy in the Icelandic Gigaword Corpus: Evaluating Automatic and Manual Annotation","10.63317\u002F4f2rpzig5h8p","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-373","4757","4764","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.373.pdf","mechler-etal-2026-transcription",[9302,9304,9307],{"paper_id":9293,"author_seq":247,"given_name":8211,"surname":9303,"affiliation":63,"orcid":63},"Mechler",{"paper_id":9293,"author_seq":232,"given_name":9305,"surname":9306,"affiliation":63,"orcid":63},"Lilja Björk","Stefánsdóttir",{"paper_id":9293,"author_seq":218,"given_name":9308,"surname":9309,"affiliation":63,"orcid":63},"Anton Karl","Ingason","This paper aims to compare automatic and manually corrected annotation data in the Icelandic Gigaword Corpus. We focus on the variable use of Stylistic Fronting (SF) in Icelandic, an optional movement of words or phrases, which indicates a more formal style. Examining SF rates across time, we find that manual coding results in slightly lower SF rates than automatic coding. This difference can be explained by the different sources used in the coding process: For automatic coding, written transcripts compiled by parliament employees are used, and for manual correction, coding relies on audio files of the parliament speeches. Importantly, both types of coding are well suited to trace changing patterns of SF over a span of 16 years, suggesting that the automatic feature extraction reliably reflects the speeches that have been transcribed.",{"paper_id":9312,"title":9313,"year":7,"month":188,"day":63,"doi":9314,"resource_url":9315,"first_page":9316,"last_page":9317,"pdf_url":9318,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9319,"paper_type":860,"authors":9320,"abstract":9335},"lrec2026-main-374","Benchmark Data Contamination in Underrepresented Languages: A Comprehensive Analysis Using Brazilian Data","10.63317\u002F39wbjvajnh7t","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-374","4765","4777","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.374.pdf","vilar-etal-2026-benchmark",[9321,9324,9327,9330,9332],{"paper_id":9312,"author_seq":247,"given_name":9322,"surname":9323,"affiliation":63,"orcid":63},"Iriedson Souto Maior de Moraes","Vilar",{"paper_id":9312,"author_seq":232,"given_name":9325,"surname":9326,"affiliation":63,"orcid":63},"David Candeia","Maia",{"paper_id":9312,"author_seq":218,"given_name":9328,"surname":9329,"affiliation":63,"orcid":63},"João","Brunet",{"paper_id":9312,"author_seq":203,"given_name":5741,"surname":9331,"affiliation":63,"orcid":63},"Morais",{"paper_id":9312,"author_seq":188,"given_name":9333,"surname":9334,"affiliation":63,"orcid":63},"Leandro Balby","Marinho","Large Language Models (LLMs) are typically evaluated using standardized benchmarks to enable consistent performance measurement and model comparison. However, the reliability of these benchmarks can be undermined by data contamination, which occurs when evaluation items are inadvertently included in training corpora. While this issue has been investigated primarily in high-resource languages such as English and Chinese, its impact on underrepresented languages — such as Brazilian Portuguese — remains understudied. In this paper, we present one of the first systematic investigations of benchmark data contamination (BDC) in an underrepresented language setting, using Brazilian Portuguese as a case study. Using validated methodologies from the literature, we evaluate specialized and multilingual models across four benchmarks: BLUEX, ENEM Challenge, OAB Exams, and HealthQA-BR. Our approach applyes TS-Guessing to detect contamination via memorized knowledge, alongside a 50-character n-gram similarity strategy to identify benchmark items leaked into training data. Our results provide consistent evidence of contamination, revealing that models with stronger memorization and retrieval abilities tend to achieve artificially inflated benchmark scores. Our contributions include: (i) classifying models according to their contamination risk, (ii) identifying the benchmarks most affected by data leakage, and (iii) reporting contaminated training corpora.",{"paper_id":9337,"title":9338,"year":7,"month":188,"day":63,"doi":9339,"resource_url":9340,"first_page":9341,"last_page":9342,"pdf_url":9343,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":9344,"bibkey":9345,"paper_type":860,"authors":9346,"abstract":9362},"lrec2026-main-375","TTSVowelViz: A Tool for Visualising Text-to-Speech Model Training via Vowel Spaces","10.63317\u002F57peripccxqx","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-375","4778","4786","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.375.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.375_OptionalSupplementaryMaterial.zip","udawatta-etal-2026-ttsvowelviz",[9347,9350,9352,9355,9357,9359],{"paper_id":9337,"author_seq":247,"given_name":9348,"surname":9349,"affiliation":63,"orcid":63},"Pasindu","Udawatta",{"paper_id":9337,"author_seq":232,"given_name":9351,"surname":4091,"affiliation":63,"orcid":63},"Jesin",{"paper_id":9337,"author_seq":218,"given_name":9353,"surname":9354,"affiliation":63,"orcid":63},"Balamurali B","T",{"paper_id":9337,"author_seq":203,"given_name":9356,"surname":5981,"affiliation":63,"orcid":63},"Catherine Inez",{"paper_id":9337,"author_seq":188,"given_name":9358,"surname":7301,"affiliation":63,"orcid":63},"Ake",{"paper_id":9337,"author_seq":172,"given_name":9360,"surname":9361,"affiliation":63,"orcid":63},"Binu Nisal","Abeysinghe","In text-to-speech (TTS) model training, the saturation of the loss curve indicates how well a model learns the characteristics of the training dataset. But it does not reveal the linguistic properties learned by the model. Existing TTS approaches miss the potential to incorporate linguistic insights into model training. We introduce TTSVowelViz, a novel tool that visualises static and dynamic vowel spaces during model training, bridging linguistic knowledge and TTS model development. It helps identify which vowel sounds are accurately learned and how the vowel spaces are evolved during training. To assess TTSVowelViz, we fine-tuned a TTS model from General American English to New Zealand English and conducted a perception test. Our results show that the formants of specific vowels in the vowel spaces generated by TTSVowelViz align with human perception, effectively visualising the perceived accent shift. This work highlights vowel space visualisation as a valuable interpretability tool for TTS training.",{"paper_id":9364,"title":9365,"year":7,"month":188,"day":63,"doi":9366,"resource_url":9367,"first_page":9368,"last_page":9369,"pdf_url":9370,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9371,"paper_type":860,"authors":9372,"abstract":9383},"lrec2026-main-376","A Sociophonetic Analysis of Racial Bias in Commercial ASR Systems Using the Pacific Northwest English Corpus","10.63317\u002F4qkgjt7n9ehb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-376","4787","4796","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.376.pdf","scott-etal-2026-sociophonetic",[9373,9374,9377,9380],{"paper_id":9364,"author_seq":247,"given_name":1732,"surname":1395,"affiliation":63,"orcid":63},{"paper_id":9364,"author_seq":232,"given_name":9375,"surname":9376,"affiliation":63,"orcid":63},"Siyu","Liang",{"paper_id":9364,"author_seq":218,"given_name":9378,"surname":9379,"affiliation":63,"orcid":63},"Alicia","Wassink",{"paper_id":9364,"author_seq":203,"given_name":9381,"surname":9382,"affiliation":63,"orcid":63},"Gina-Anne","Levow","This paper presents a systematic evaluation of racial bias in four major commercial automatic speech recognition (ASR) systems using the Pacific Northwest English (PNWE) corpus. We analyze transcription accuracy across speakers from four ethnic backgrounds (African American, Caucasian American, ChicanX, and Yakama) and examine how sociophonetic variation contributes to differential system performance. We introduce a heuristically-determined Phonetic Error Rate (PER) metric that links recognition errors to specific linguistically motivated variables derived from sociophonetic annotation. Our analysis of eleven sociophonetic features reveals that vowel quality variation, particularly resistance to the low-back merger and pre-nasal merger patterns, is systematically associated with differential error rates across ethnic groups, with the most pronounced effects for African American speakers across all evaluated systems. These findings demonstrate that acoustic modeling of dialectal phonetic variation, rather than lexical or syntactic factors, remains a primary source of bias in commercial ASR systems. The study establishes the PNWE corpus as a valuable resource for bias evaluation in speech technologies and provides actionable guidance for improving ASR performance through targeted representation of sociophonetic diversity in training data.",{"paper_id":9385,"title":9386,"year":7,"month":188,"day":63,"doi":9387,"resource_url":9388,"first_page":9389,"last_page":9390,"pdf_url":9391,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9392,"paper_type":860,"authors":9393,"abstract":9403},"lrec2026-main-377","ParliaBench: An Evaluation and Benchmarking Framework for LLM-Generated Parliamentary Speech","10.63317\u002F447dqkef7ks7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-377","4797","4818","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.377.pdf","koniaris-etal-2026-parliabench",[9394,9397,9400],{"paper_id":9385,"author_seq":247,"given_name":9395,"surname":9396,"affiliation":63,"orcid":63},"Marios","Koniaris",{"paper_id":9385,"author_seq":232,"given_name":9398,"surname":9399,"affiliation":63,"orcid":63},"Argyro","Tsipi",{"paper_id":9385,"author_seq":218,"given_name":9401,"surname":9402,"affiliation":63,"orcid":63},"Panayiotis","Tsanakas","Parliamentary speech generation presents specific challenges for large language models beyond standard text generation tasks. Unlike general text generation, parliamentary speeches require not only linguistic quality but also political authenticity and ideological consistency. Current language models lack specialized training for parliamentary contexts, and existing evaluation methods focus on standard NLP metrics rather than political authenticity. To address this, we present ParliaBench, a benchmark for parliamentary speech generation. We constructed a dataset of 448k speeches from UK Parliament to enable systematic model training. We introduce an evaluation framework combining computational metrics with LLM-as-a-judge assessments for measuring generation quality across three dimensions: linguistic quality, semantic coherence, and political authenticity. We propose two novel embedding-based metrics, Political Spectrum Alignment and Party Alignment, to quantify ideological positioning. We fine-tuned five large language models (LLMs), generated 28k speeches, and evaluated them using our framework, comparing baseline and fine-tuned models. Results show that fine-tuning produces statistically significant improvements across the majority of metrics and our novel metrics demonstrate strong discriminative power for political dimensions otherwise absent from conventional evaluation, while domain fine-tuning reveals a measurable trade-off between political authenticity and lexical diversity.",{"paper_id":9405,"title":9406,"year":7,"month":188,"day":63,"doi":9407,"resource_url":9408,"first_page":9409,"last_page":9410,"pdf_url":9411,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9412,"paper_type":860,"authors":9413,"abstract":9486},"lrec2026-main-378","PARSEME 2.0 Multilingual Corpus of Multiword Expressions","10.63317\u002F2iy5qf38yhay","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-378","4819","4834","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.378.pdf","savary-etal-2026-parseme",[9414,9417,9419,9422,9424,9426,9427,9430,9433,9436,9438,9439,9440,9443,9446,9449,9451,9454,9457,9460,9463,9464,9467,9470,9473,9476,9477,9480,9483],{"paper_id":9405,"author_seq":247,"given_name":9415,"surname":9416,"affiliation":63,"orcid":63},"Agata","Savary",{"paper_id":9405,"author_seq":232,"given_name":5363,"surname":9418,"affiliation":63,"orcid":63},"Scholivet",{"paper_id":9405,"author_seq":218,"given_name":9420,"surname":9421,"affiliation":63,"orcid":63},"Carlos","Ramisch",{"paper_id":9405,"author_seq":203,"given_name":4258,"surname":9423,"affiliation":63,"orcid":63},"Nakamura",{"paper_id":9405,"author_seq":188,"given_name":7522,"surname":9425,"affiliation":63,"orcid":63},"Bilinski",{"paper_id":9405,"author_seq":172,"given_name":2548,"surname":2549,"affiliation":63,"orcid":63},{"paper_id":9405,"author_seq":155,"given_name":9428,"surname":9429,"affiliation":63,"orcid":63},"Voula","Giouli",{"paper_id":9405,"author_seq":138,"given_name":9431,"surname":9432,"affiliation":63,"orcid":63},"Stella","Markantonatou",{"paper_id":9405,"author_seq":121,"given_name":9434,"surname":9435,"affiliation":63,"orcid":63},"Vasile","Pais",{"paper_id":9405,"author_seq":104,"given_name":2960,"surname":9437,"affiliation":63,"orcid":63},"Mitrofan",{"paper_id":9405,"author_seq":87,"given_name":1012,"surname":3734,"affiliation":63,"orcid":63},{"paper_id":9405,"author_seq":73,"given_name":3696,"surname":1150,"affiliation":63,"orcid":63},{"paper_id":9405,"author_seq":55,"given_name":9441,"surname":9442,"affiliation":63,"orcid":63},"Verginica Barbu","Mititelu",{"paper_id":9405,"author_seq":38,"given_name":9444,"surname":9445,"affiliation":63,"orcid":63},"Jaka","Čibej",{"paper_id":9405,"author_seq":17,"given_name":9447,"surname":9448,"affiliation":63,"orcid":63},"Roberto Díaz","Hernández",{"paper_id":9405,"author_seq":2971,"given_name":2981,"surname":9450,"affiliation":63,"orcid":63},"Fendel",{"paper_id":9405,"author_seq":2974,"given_name":9452,"surname":9453,"affiliation":63,"orcid":63},"Polona","Gantar",{"paper_id":9405,"author_seq":857,"given_name":9455,"surname":9456,"affiliation":63,"orcid":63},"Olha","Kanishcheva",{"paper_id":9405,"author_seq":877,"given_name":9458,"surname":9459,"affiliation":63,"orcid":63},"Cvetana","Krstev",{"paper_id":9405,"author_seq":2984,"given_name":9461,"surname":9462,"affiliation":63,"orcid":63},"Chaya","Liebeskind",{"paper_id":9405,"author_seq":2988,"given_name":6855,"surname":6856,"affiliation":63,"orcid":63},{"paper_id":9405,"author_seq":2992,"given_name":9465,"surname":9466,"affiliation":63,"orcid":63},"Aleksandra M.","Marković",{"paper_id":9405,"author_seq":2996,"given_name":9468,"surname":9469,"affiliation":63,"orcid":63},"Gunta","Nešpore-Bērzkalne",{"paper_id":9405,"author_seq":3000,"given_name":9471,"surname":9472,"affiliation":63,"orcid":63},"Adriana S.","Pagano",{"paper_id":9405,"author_seq":3004,"given_name":9474,"surname":9475,"affiliation":63,"orcid":63},"Mehrnoush","Shamsfard",{"paper_id":9405,"author_seq":3008,"given_name":3566,"surname":3567,"affiliation":63,"orcid":63},{"paper_id":9405,"author_seq":3478,"given_name":9478,"surname":9479,"affiliation":63,"orcid":63},"Vahide","Tajalli",{"paper_id":9405,"author_seq":3482,"given_name":9481,"surname":9482,"affiliation":63,"orcid":63},"Carole","Tiberius",{"paper_id":9405,"author_seq":3486,"given_name":9484,"surname":9485,"affiliation":63,"orcid":63},"Aakanksha","Padhye","We present edition 2.0 of the PARSEME multilingual corpus annotated for multiword expressions (MWEs), resulting from efforts of the PARSEME community towards universality-driven modeling of idiomaticity. With respect to previous editions, we extend the annotation scope to all syntactic MWE categories: verbal, nominal, adjectival, adverbial and functional. We cover 17 languages, of which 7 are new. The annotation process is based on cross-lingually unified guidelines, phrased as decision diagrams over linguistic tests, and a typology of 18 MWE categories. The corpus contains almost 5 million tokens, over 250,000 sentences and 140,000 MWE annotations. The applicability of the corpus is tested in baseline experiments with a prompt-based MWE identification system. Results show that generic large language models do not encode sufficient knowledge to solve the MWE identification task.",{"paper_id":9488,"title":9489,"year":7,"month":188,"day":63,"doi":9490,"resource_url":9491,"first_page":9492,"last_page":9493,"pdf_url":9494,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9495,"paper_type":860,"authors":9496,"abstract":9500},"lrec2026-main-379","Introducing PerMet 1.0: A Metaphor-Annotated Corpus for Persian","10.63317\u002F26xmdq7f998f","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-379","4835","4845","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.379.pdf","miri-2026-introducing",[9497],{"paper_id":9488,"author_seq":247,"given_name":9498,"surname":9499,"affiliation":63,"orcid":63},"Mohammad Saeid","Miri","Metaphor plays a central role in human language and thought, and corpus-linguistic approaches enable its systematic investigation. Such research requires large, representative collections of metaphor-annotated linguistic data from diverse contexts. Despite the increasing availability of metaphor corpora in various languages, Persian remains underrepresented, with few publicly available resources and no large-scale register-diverse metaphor corpus. This paper introduces PerMet 1.0, a metaphor-annotated corpus for Persian. The corpus consists of approximately 120,000 tokens (about 99,000 lexical units) drawn from five registers: academic, news, fiction, social media, and spoken discourse. Five independent annotators labeled the corpus using Metaphor Identification Procedure Vrije Universiteit (MIPVU), with adaptations for Persian. Inter-annotator agreement showed a high level of consistency (κ = 0.952), confirming the reliability of the annotation. Preliminary analysis shows that 13.1% of the lexical units are related to metaphor, with the academic register showing the highest proportion, followed by news, social media, spoken, and fiction. PerMet 1.0 offers a foundational resource for research on metaphor in Persian, cross-linguistic comparative studies, and the development and fine-tuning of machine learning or large language models for automatic metaphor identification.",{"paper_id":9502,"title":9503,"year":7,"month":188,"day":63,"doi":9504,"resource_url":9505,"first_page":9506,"last_page":9507,"pdf_url":9508,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9509,"paper_type":860,"authors":9510,"abstract":9515},"lrec2026-main-380","Multi-SimLex for Dutch: Benchmarking Embedding- and Prompt-Based Model Performance on Semantic Similarity","10.63317\u002F2q9dcx9cvnu9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-380","4846","4860","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.380.pdf","brans-etal-2026-multi",[9511,9514],{"paper_id":9502,"author_seq":247,"given_name":9512,"surname":9513,"affiliation":63,"orcid":63},"Lizzy","Brans",{"paper_id":9502,"author_seq":232,"given_name":981,"surname":982,"affiliation":63,"orcid":63},"We introduce Dutch Multi-SimLex, a 1,888–pair extension of the Multi-SimLex benchmark for evaluating lexical semantic similarity in Dutch. The dataset was rated by 100 native speakers on a 0–6 scale and shows high reliability (overall ICC(2,k)=0.82) as well as strong alignment with English (ρ=0.73). Using this resource, we evaluate eighteen models across four architectural families: static embeddings, encoder-only transformers, encoder–decoders, and decoder-only LLMs. We evaluate models using two complementary approaches: embedding-based cosine similarity and prompted similarity judgments in Dutch. In embedding-based evaluation, FastText (ρ=0.485) and the monolingual Dutch encoder BERTje (ρ=0.468) achieve the strongest alignment with human ratings, while multilingual encoders such as mBERT (ρ=0.208) and XLM-R (ρ=0.186) perform weaker. Prompt-based evaluation yields substantially higher correlations, with GPT-4 (ρ=0.761) performing best, followed by DeepSeek-V3 (ρ=0.753) and Gemini 1.5 Pro (ρ=0.722). Together, the results show that model performance depends strongly on how meaning is tested. Dutch Multi-SimLex provides a reliable foundation for evaluating meaning across architectures and advancing Dutch semantic evaluation.",{"paper_id":9517,"title":9518,"year":7,"month":188,"day":63,"doi":9519,"resource_url":9520,"first_page":9521,"last_page":9522,"pdf_url":9523,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":9524,"bibkey":9525,"paper_type":860,"authors":9526,"abstract":9534},"lrec2026-main-381","MultiCoS: A Multilingual Dataset of Connective Semantics with Context–Sentence Compatibility","10.63317\u002F28ccty7yu9hn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-381","4861","4871","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.381.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.381_OptionalSupplementaryMaterial.zip","mucha-etal-2026-multicos",[9527,9529,9531],{"paper_id":9517,"author_seq":247,"given_name":8037,"surname":9528,"affiliation":63,"orcid":63},"Mucha",{"paper_id":9517,"author_seq":232,"given_name":9530,"surname":4200,"affiliation":63,"orcid":63},"Ciyang",{"paper_id":9517,"author_seq":218,"given_name":9532,"surname":9533,"affiliation":63,"orcid":63},"Wataru","Uegaki","We present a multilingual dataset of connective semantics. The dataset contains the semantic annotations of clausal connectives (e.g. and and or in English) from 24 languages, based on our original native-speaker elicitation data. Unlike existing lexica on connectives, the dataset includes systematic evidence for the annotations in the form of context-sentence compatibility judgments, including negative evidence. The paper describes the methodology of data collection and the format of the dataset. We also discuss its potential use cases for the validation of cross-linguistic generalizations, examinations of their potential counterexamples, and for benchmarking felicity judgments by NLU systems.",{"paper_id":9536,"title":9537,"year":7,"month":188,"day":63,"doi":9538,"resource_url":9539,"first_page":9540,"last_page":9541,"pdf_url":9542,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9543,"paper_type":860,"authors":9544,"abstract":9553},"lrec2026-main-382","Adverbs Revisited: Enhancing WordNet Coverage of Adverbs with a Supersense Taxonomy","10.63317\u002F3usz5zfjpmqu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-382","4872","4880","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.382.pdf","lee-etal-2026-adverbs",[9545,9547,9550],{"paper_id":9536,"author_seq":247,"given_name":9546,"surname":1359,"affiliation":63,"orcid":63},"Jooyoung",{"paper_id":9536,"author_seq":232,"given_name":9548,"surname":9549,"affiliation":63,"orcid":63},"Jader Martins Camboim de","Sá",{"paper_id":9536,"author_seq":218,"given_name":9551,"surname":9552,"affiliation":63,"orcid":63},"Cedric","Pruski","WordNet offers rich supersense hierarchies for nouns and verbs, yet adverbs remain underdeveloped, lacking a systematic semantic classification. We introduce a linguistically grounded supersense typology for adverbs, empirically validated through annotation, that captures major semantic domains including manner, temporal, frequency, degree, domain, speaker-oriented, and subject-oriented functions. Results from a pilot annotation study demonstrate that these categories provide broad coverage of adverbs in natural text and can be reliably assigned by human annotators. Incorporating this typology extends WordNet’s coverage, aligns it more closely with linguistic theory, and facilitates downstream NLP applications such as word sense disambiguation, event extraction, sentiment analysis, and discourse modeling. We present the proposed supersense categories, annotation outcomes, and directions for future work.",{"paper_id":9555,"title":9556,"year":7,"month":188,"day":63,"doi":9557,"resource_url":9558,"first_page":9559,"last_page":9560,"pdf_url":9561,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9562,"paper_type":860,"authors":9563,"abstract":9573},"lrec2026-main-383","KinyCOMET: Automatic Evaluation of Machine Translation Systems for Kinyarwanda-English","10.63317\u002F3nawgmkiq3mu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-383","4881","4888","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.383.pdf","mazimpaka-etal-2026-kinycomet",[9564,9567,9568,9571],{"paper_id":9555,"author_seq":247,"given_name":9565,"surname":9566,"affiliation":63,"orcid":63},"Prince Chris","Mazimpaka",{"paper_id":9555,"author_seq":232,"given_name":1380,"surname":4652,"affiliation":63,"orcid":63},{"paper_id":9555,"author_seq":218,"given_name":9569,"surname":9570,"affiliation":63,"orcid":63},"Samuel","Rutunda",{"paper_id":9555,"author_seq":203,"given_name":5732,"surname":9572,"affiliation":63,"orcid":63},"España-Bonet","This paper presents KinyCOMET, a new automatic evaluation metric for Kinyarwanda–English machine translation (MT). Current MT evaluation in Rwanda relies mainly on BLEU and chrF, which have been shown to correlate poorly with human judgments. To address this gap, we created a Direct Assessment (DA) dataset for Kinyarwanda-English translations and used it to fine-tune COMET models for this language pair. We evaluate two variants: KinyCOMET XLM-RoBERTa, trained from a multilingual encoder without Kinyarwanda data, and KinyCOMET Unbabel, a fine-tuned version of the Unbabel COMET model. Both models achieve strong correlations with human evaluations, with KinyCOMET Unbabel outperforming all baselines, including AfriCOMET, chrF, and BLEU. Our results show that fine-tuning pre-trained multilingual models can yield high-quality evaluators even for low-resource languages that the base model was not trained on. We release both the models and the annotated dataset publicly to foster further research on African language evaluation.",{"paper_id":9575,"title":9576,"year":7,"month":188,"day":63,"doi":9577,"resource_url":9578,"first_page":9579,"last_page":9580,"pdf_url":9581,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9582,"paper_type":860,"authors":9583,"abstract":9591},"lrec2026-main-384","Multiway Parallel Corpus in Forced Migration Domain for Multilingual Machine Translation","10.63317\u002F3gxsf4vr3pjb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-384","4889","4901","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.384.pdf","azadi-etal-2026-multiway",[9584,9586,9588],{"paper_id":9575,"author_seq":247,"given_name":8059,"surname":9585,"affiliation":63,"orcid":63},"Azadi",{"paper_id":9575,"author_seq":232,"given_name":9569,"surname":9587,"affiliation":63,"orcid":63},"Larkin",{"paper_id":9575,"author_seq":218,"given_name":9589,"surname":9590,"affiliation":63,"orcid":63},"Chi-kiu","Lo","High-quality domain-specific parallel corpora play a significant role in improving the performance of machine translation (MT) and multilingual natural language processing (NLP) systems in a target domain. However, most existing multilingual parallel corpora focus on general-purpose data, and a majority of highly specialized domains such as forced migration are suffering from lack of multilingual data. In this work, we present a new high-quality 4-way parallel corpus in the forced migration domain. The corpus consists of human-translated journal articles from Forced Migration Review in English, French, Spanish, and Arabic. Our corpus contains data aligned at both document and sentence level in four languages and provides a clean and reliable 4-way parallel resource for multilingual research in forced migration. Using this dataset, we benchmark several open-weight large language models (LLMs), an open-weight multilingual MT system, online closed MT systems, and a closed LLM across 12 translation directions. We further leverage our corpus to improve the MT quality of a top-performing multilingual foundation model with two common domain adaptation approaches, fine-tuning and few-shot prompting. Our results demonstrate the effectiveness of our corpus in improving the translation performance of current models in the forced migration domain.",{"paper_id":9593,"title":9594,"year":7,"month":188,"day":63,"doi":9595,"resource_url":9596,"first_page":9597,"last_page":9598,"pdf_url":9599,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":9600,"bibkey":9601,"paper_type":860,"authors":9602,"abstract":9608},"lrec2026-main-385","Context-8: A Data Set for Evaluating Context Sensitivity in Machine Translation","10.63317\u002F3cawdf257c7e","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-385","4902","4920","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.385.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.385_OptionalSupplementaryMaterial.zip","wang-etal-2026-context",[9603,9605],{"paper_id":9593,"author_seq":247,"given_name":9604,"surname":3676,"affiliation":63,"orcid":63},"Dongyue",{"paper_id":9593,"author_seq":232,"given_name":9606,"surname":9607,"affiliation":63,"orcid":63},"Kyo","Kageura","Context plays a crucial role in translation, enhancing both accuracy and fluency. With the advancement of machine translation (MT), the concept of context is now considered across an increasingly broader range of phenomena. Despite its importance, however, systematic definitions of context provided by communication studies and translation studies remain fragmented, and the concept of context remains elusive in MT research. To the best of our knowledge, no dataset currently exists that comprehensively evaluates MT’s sensitivity to context. In this study, we propose a systematic taxonomy of context and introduce Context-8, an evaluation dataset designed to assess context sensitivity in MT for English-to-Japanese translation. The initial release includes 130 groups comprising 533 English-to-Japanese translation examples, each requiring different context categories to produce accurate and fluent translations. The data are taken from both hand-crafted and online materials. We release Context-8 to support the evaluation and benchmarking of MT systems with respect to context sensitivity.",{"paper_id":9610,"title":9611,"year":7,"month":188,"day":63,"doi":9612,"resource_url":9613,"first_page":9614,"last_page":9615,"pdf_url":9616,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9617,"paper_type":860,"authors":9618,"abstract":9641},"lrec2026-main-386","AssamLegalTrans: A Parallel Corpus, Benchmark and Analysis for English-Assamese Machine Translation of Legal Judgments","10.63317\u002F5q53i6shk3nm","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-386","4921","4930","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.386.pdf","singh-etal-2026-assamlegaltrans",[9619,9621,9624,9625,9628,9631,9633,9634,9637,9638],{"paper_id":9610,"author_seq":247,"given_name":9620,"surname":2253,"affiliation":63,"orcid":63},"Telem Joyson",{"paper_id":9610,"author_seq":232,"given_name":9622,"surname":9623,"affiliation":63,"orcid":63},"Hemanta","Baruah",{"paper_id":9610,"author_seq":218,"given_name":2252,"surname":2253,"affiliation":63,"orcid":63},{"paper_id":9610,"author_seq":203,"given_name":9626,"surname":9627,"affiliation":63,"orcid":63},"Anindita","Talukdar",{"paper_id":9610,"author_seq":188,"given_name":9629,"surname":9630,"affiliation":63,"orcid":63},"Nasrin","Shahnaz",{"paper_id":9610,"author_seq":172,"given_name":9632,"surname":2253,"affiliation":63,"orcid":63},"Okram Jimmy",{"paper_id":9610,"author_seq":155,"given_name":4467,"surname":4468,"affiliation":63,"orcid":63},{"paper_id":9610,"author_seq":138,"given_name":9635,"surname":9636,"affiliation":63,"orcid":63},"Pallav Kumar","Dutta",{"paper_id":9610,"author_seq":121,"given_name":2255,"surname":2256,"affiliation":63,"orcid":63},{"paper_id":9610,"author_seq":104,"given_name":9639,"surname":9640,"affiliation":63,"orcid":63},"Pranab","Duara","In India, the official language for writing judgments in higher courts is English, which creates a language barrier for citizens not proficient in English. Machine Translation (MT) provides a scalable solution, but its progress for low-resource languages like Assamese is significantly limited due to the lack of legal domain data. To address this gap, we introduce the first-of-its-kind English-Assamese parallel corpus for the translation of Indian court judgments. This dataset consists of over 55,000 manually translated and validated sentence pairs from over 500 judgments of the Gauhati High Court and the Supreme Court of India. Using this dataset, we perform a comprehensive evaluation of state-of-the-art multilingual models, including NLLB-200 and Sarvam-Translate, in both zero-shot and fine-tuned settings, comparing their performance against commercial systems. Our experiments show that fine-tuning on our legal-domain dataset significantly improves the translation quality. We also conduct a thorough error analysis that points out important issues in legal translation. These include precisely translating legal terms, properly transliterating named entities, expanding abbreviations, and transforming sentence structures, such as changing passive voice to active voice, when translating from English to Assamese. By creating a publicly available dataset and examining the specific challenges, this work offers a reproducible foundation and a clear way to develop more accurate and reliable legal machine translation systems. This will help improve access to justice for Assamese speakers.",{"paper_id":9643,"title":9644,"year":7,"month":188,"day":63,"doi":9645,"resource_url":9646,"first_page":9647,"last_page":9648,"pdf_url":9649,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9650,"paper_type":860,"authors":9651,"abstract":9659},"lrec2026-main-387","Coordinate Structure Extraction for Patent Claims Using Multilingual LLMs","10.63317\u002F36wbpiacwyxf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-387","4931","4941","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.387.pdf","ishimaru-etal-2026-coordinate",[9652,9655,9656],{"paper_id":9643,"author_seq":247,"given_name":9653,"surname":9654,"affiliation":63,"orcid":63},"Tsukasa","Ishimaru",{"paper_id":9643,"author_seq":232,"given_name":4723,"surname":4724,"affiliation":63,"orcid":63},{"paper_id":9643,"author_seq":218,"given_name":9657,"surname":9658,"affiliation":63,"orcid":63},"Masaaki","Nagata","This study proposes a simple, one-stage approach to coordinate structure extraction using multilingual Large Language Models (LLMs) with Translation between Augmented Natural Languages (TANL) to develop an error detection system for coordinate structure translation. Unlike conventional multi-component methods such as CoRec, our method employs an end-to-end Transformer decoder (LLM) trained via Continual Pre-Traning (CPT) and\u002For Supervised Fine-Tuning (SFT) on English and Japanese datasets obtained from parsed treebanks that includes coordinate structures. We evaluated the proposed models on 100 English and Japanese patent claims manually annotated with coordinate structure tags. The proposed method using open-weight models such as Llama-3.2-8B or gemma-3-4b-it significantly outperformed GPT-5 and CoRec by approximately 0.02-0.03 in F1 score for the English task. The proposed method using open-weight models such as llama-3-youko-8b and Llama-3-swallow-8B-0.1v significantly outperformed GPT-5 by approximately 0.02-0.05 in F1 score for the Japanese task. In addition, models using both English and Japanese training data significantly outperform those using monolingual training data only.",{"paper_id":9661,"title":9662,"year":7,"month":188,"day":63,"doi":9663,"resource_url":9664,"first_page":9665,"last_page":9666,"pdf_url":9667,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9668,"paper_type":860,"authors":9669,"abstract":9681},"lrec2026-main-388","Human Label Variation in Implicit Discourse Relation Recognition","10.63317\u002F3nah4z4ha8r4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-388","4942","4954","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.388.pdf","yung-etal-2026-human",[9670,9673,9675,9678,9680],{"paper_id":9661,"author_seq":247,"given_name":9671,"surname":9672,"affiliation":63,"orcid":63},"Frances","Yung",{"paper_id":9661,"author_seq":232,"given_name":3652,"surname":9674,"affiliation":63,"orcid":63},"Ignatev",{"paper_id":9661,"author_seq":218,"given_name":9676,"surname":9677,"affiliation":63,"orcid":63},"Merel","Scholman",{"paper_id":9661,"author_seq":203,"given_name":2545,"surname":9679,"affiliation":63,"orcid":63},"Demberg",{"paper_id":9661,"author_seq":188,"given_name":4386,"surname":4387,"affiliation":63,"orcid":63},"There is growing recognition that many NLP tasks lack a single ground truth, as human judgments reflect diverse perspectives. To capture this variation, models have been developed to predict full annotation distributions rather than majority labels, while perspectivist models aim to reproduce the interpretations of individual annotators. In this work, we compare these approaches on Implicit Discourse Relation Recognition (IDRR), a highly ambiguous task where disagreement often arises from cognitive complexity rather than ideological bias. Our experiments show that existing annotator-specific models perform poorly in IDRR unless ambiguity is reduced, whereas models trained on label distributions yield more stable predictions. Further analysis indicates that frequent cognitively demanding cases drive inconsistency in human interpretation, posing challenges for perspectivist modeling in IDRR.",{"paper_id":9683,"title":9684,"year":7,"month":188,"day":63,"doi":9685,"resource_url":9686,"first_page":9687,"last_page":9688,"pdf_url":9689,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9690,"paper_type":860,"authors":9691,"abstract":9697},"lrec2026-main-389","Conversational Implicatures through the Lens of LLMs","10.63317\u002F5dqc2g73d3do","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-389","4955","4966","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.389.pdf","lombardi-etal-2026-conversational",[9692,9695],{"paper_id":9683,"author_seq":247,"given_name":9693,"surname":9694,"affiliation":63,"orcid":63},"Agnese","Lombardi",{"paper_id":9683,"author_seq":232,"given_name":1712,"surname":9696,"affiliation":63,"orcid":63},"Lenci","Recent research has explored the capacity of Large Language Models (LLMs) to perform pragmatic reasoning and interpret complex pragmatic phenomena. However, such phenomena are inherently ambiguous, and even human evaluations are highly variable. Many existing studies directly compare human and model responses while assuming a single \"correct\" interpretation, thereby overlooking the natural variability that characterizes human pragmatic understanding. This raises two key issues: (1) the need for novel evaluation methods that account for interpretive variability and allow for meaningful comparison between humans and models, and (2) the potential limitations of current linguistic theories in capturing the richness of human pragmatic behavior. We propose that LLMs can serve not only as benchmarks for human-model alignment, but also as tools for investigating the nature of pragmatic phenomena and their relationship to linguistic theory. To this end, we developed a handcrafted dataset encompassing eight types of conversational implicatures. Our study addresses three main research questions: (1) Do LLMs process conversational implicatures differently from humans? (2) If so, how do these differences manifest? (3) What do these findings reveal about the cognitive capacities of LLMs and the explanatory adequacy of pragmatic theory?",{"paper_id":9699,"title":9700,"year":7,"month":188,"day":63,"doi":9701,"resource_url":9702,"first_page":9703,"last_page":9704,"pdf_url":9705,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9706,"paper_type":860,"authors":9707,"abstract":9713},"lrec2026-main-390","The Emergence of the Pragmatic Dimension in Instructed-LMs","10.63317\u002F4w4mg24sz9bc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-390","4967","4973","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.390.pdf","mazzaccara-etal-2026-emergence",[9708,9710],{"paper_id":9699,"author_seq":247,"given_name":1709,"surname":9709,"affiliation":63,"orcid":63},"Mazzaccara",{"paper_id":9699,"author_seq":232,"given_name":9711,"surname":9712,"affiliation":63,"orcid":63},"Raffaella","Bernardi","Instruction-tuning fundamentally transforms how language models process linguistic input and interact with the user. Through the lens of speech act theory, we investigate whether instruction-tuning causes models to shift from prioritizing syntactical form to pragmatic intent. We create a controlled dataset of 400 sentences systematically varying along two dimensions: syntactical structure (declarative vs. interrogative) and communicative intent (assertive vs. request). Using Principal Component Analysis on hidden state representations from Qwen2.5 (1.5B-7B) and models from two other families (Gemma3-1B, and Llama3.2-3B), we reveal a consistent pattern: base models cluster sentences by syntactical form, while instruction-tuned models reorganize representations around pragmatic intent. This syntactic-to-pragmatic shift occurs in middle layers, with declarative requests and interrogative requests—maximally separated in base models—becoming the most similar categories after instruction-tuning. The phenomenon explains how instruction-tuned models correctly interpret indirect speech acts, treating polite declaratives like ‘‘I’d appreciate corrections\" as functionally equivalent to direct interrogatives. Our findings demonstrate that instruction-tuning teaches models to prioritize the communicative dimension over surface form, a fundamental reorganization consistent across model scales and architectures.",{"paper_id":9715,"title":9716,"year":7,"month":188,"day":63,"doi":9717,"resource_url":9718,"first_page":9719,"last_page":9720,"pdf_url":9721,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9722,"paper_type":860,"authors":9723,"abstract":9741},"lrec2026-main-391","Distributed Partial Information Puzzles: Examining Common Ground Construction under Epistemic Asymmetry","10.63317\u002F36sdnib5usq7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-391","4974","4987","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.391.pdf","zhu-etal-2026-distributed",[9724,9727,9730,9731,9734,9737,9738],{"paper_id":9715,"author_seq":247,"given_name":9725,"surname":9726,"affiliation":63,"orcid":63},"Yifan","Zhu",{"paper_id":9715,"author_seq":232,"given_name":9728,"surname":9729,"affiliation":63,"orcid":63},"Mariah","Bradford",{"paper_id":9715,"author_seq":218,"given_name":2007,"surname":3861,"affiliation":63,"orcid":63},{"paper_id":9715,"author_seq":203,"given_name":9732,"surname":9733,"affiliation":63,"orcid":63},"Timothy","Obiso",{"paper_id":9715,"author_seq":188,"given_name":9735,"surname":9736,"affiliation":63,"orcid":63},"Videep","Venkatesha",{"paper_id":9715,"author_seq":172,"given_name":4091,"surname":4092,"affiliation":63,"orcid":63},{"paper_id":9715,"author_seq":155,"given_name":9739,"surname":9740,"affiliation":63,"orcid":63},"Nikhil","Krishnaswamy","Establishing *common ground*, a shared set of beliefs and mutually recognized facts, is fundamental to collaboration, yet remains a challenge for current AI systems, especially in multimodal, multiparty settings, where the collaborators bring different information to the table. We introduce the **Distributed Partial Information Puzzle (DPIP)**, a collaborative construction task that elicits rich multimodal communication under epistemic asymmetry. We present a multimodal dataset of these interactions, annotated and temporally aligned across speech, gesture, and action modalities to support reasoning over propositional content and belief dynamics. We then evaluate two paradigms for modeling common ground (CG): (1) state-of-the-art large language models (LLMs), prompted to infer shared beliefs from multimodal updates, and (2) an axiomatic pipeline grounded in Dynamic Epistemic Logic (DEL) that incrementally performs the same task. Results on the annotated DPIP data indicate that it poses a challenge to modern LLMs’ abilities to track both task progression and belief state.",{"paper_id":9743,"title":9744,"year":7,"month":188,"day":63,"doi":9745,"resource_url":9746,"first_page":9747,"last_page":9748,"pdf_url":9749,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9750,"paper_type":860,"authors":9751,"abstract":9758},"lrec2026-main-392","Grounded Misunderstandings in Asymmetric Dialogue: A Perspectivist Annotation Scheme for MapTask","10.63317\u002F59anbt78wyj7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-392","4988","5001","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.392.pdf","li-etal-2026-grounded",[9752,9754,9757],{"paper_id":9743,"author_seq":247,"given_name":9753,"surname":3446,"affiliation":63,"orcid":63},"Nan",{"paper_id":9743,"author_seq":232,"given_name":9755,"surname":9756,"affiliation":63,"orcid":63},"Albert","Gatt",{"paper_id":9743,"author_seq":218,"given_name":4386,"surname":4387,"affiliation":63,"orcid":63},"Collaborative dialogue relies on participants incrementally establishing common ground, yet in asymmetric settings they may believe they agree while referring to different entities. We introduce a perspectivist annotation scheme for the HCRC MapTask corpus (Anderson et al., 1991) that separately captures speaker and addressee grounded interpretations for each reference expression, enabling us to trace how understanding emerges, diverges, and repairs over time. Using a scheme-constrained LLM annotation pipeline, we obtain 13k annotated reference expressions with reliability estimates and analyze the resulting understanding states. The results show that full misunderstandings are rare once lexical variants are unified, but multiplicity discrepancies systematically induce divergences, revealing how apparent grounding can mask referential misalignment. Our framework provides both a resource and an analytic lens for studying grounded misunderstanding and for evaluating (V)LLMs’ capacity to model perspective-dependent grounding in collaborative dialogue.",{"paper_id":9760,"title":9761,"year":7,"month":188,"day":63,"doi":9762,"resource_url":9763,"first_page":9764,"last_page":9765,"pdf_url":9766,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":9767,"bibkey":9768,"paper_type":860,"authors":9769,"abstract":9781},"lrec2026-main-393","Assessing LLM Reasoning through Implicit Causal Chain Discovery in Climate Discourse","10.63317\u002F2rgvpup3swpf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-393","5002","5014","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.393.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.393_OptionalSupplementaryMaterial.zip","allein-etal-2026-assessing",[9770,9773,9776,9778],{"paper_id":9760,"author_seq":247,"given_name":9771,"surname":9772,"affiliation":63,"orcid":63},"Liesbeth","Allein",{"paper_id":9760,"author_seq":232,"given_name":9774,"surname":9775,"affiliation":63,"orcid":63},"Nataly","Pineda-Castañeda",{"paper_id":9760,"author_seq":218,"given_name":1104,"surname":9777,"affiliation":63,"orcid":63},"Rocci",{"paper_id":9760,"author_seq":203,"given_name":9779,"surname":9780,"affiliation":63,"orcid":63},"Marie-Francine","Moens","How does a cause lead to an effect, and which intermediate causal steps explain their connection? This work scrutinizes the mechanistic causal reasoning capabilities of large language models (LLMs) to answer these questions through the task of implicit causal chain discovery. In a diagnostic evaluation framework, we instruct nine LLMs to generate all possible intermediate causal steps linking given cause-effect pairs in causal chain structures. These pairs are drawn from recent resources in argumentation studies featuring polarized discussion on climate change. Our analysis reveals that LLMs vary in the number and granularity of causal steps they produce. Although they are generally self-consistent and confident about the intermediate causal connections in the generated chains, their judgments are mainly driven by associative pattern matching rather than genuine causal reasoning. Nonetheless, human evaluations confirmed the logical coherence and integrity of the generated chains. Our baseline causal chain discovery approach, insights from our diagnostic evaluation, and benchmark dataset with causal chains lay a solid foundation for advancing future work in implicit, mechanistic causal reasoning in argumentation settings.",{"paper_id":9783,"title":9784,"year":7,"month":188,"day":63,"doi":9785,"resource_url":9786,"first_page":9787,"last_page":9788,"pdf_url":9789,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9790,"paper_type":860,"authors":9791,"abstract":9804},"lrec2026-main-394","AccurateRAG: A Framework for Building Accurate Retrieval-Augmented Question-Answering Applications","10.63317\u002F2ygvnkbv24j6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-394","5015","5023","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.394.pdf","nguyen-etal-2026-accuraterag",[9792,9794,9796,9798,9800,9802],{"paper_id":9783,"author_seq":247,"given_name":9793,"surname":2395,"affiliation":63,"orcid":63},"Linh The",{"paper_id":9783,"author_seq":232,"given_name":9795,"surname":2407,"affiliation":63,"orcid":63},"Chi",{"paper_id":9783,"author_seq":218,"given_name":9797,"surname":2395,"affiliation":63,"orcid":63},"Dung Ngoc",{"paper_id":9783,"author_seq":203,"given_name":9799,"surname":6474,"affiliation":63,"orcid":63},"Van-Cuong",{"paper_id":9783,"author_seq":188,"given_name":9801,"surname":7585,"affiliation":63,"orcid":63},"Hoang",{"paper_id":9783,"author_seq":172,"given_name":9803,"surname":2395,"affiliation":63,"orcid":63},"Dat Quoc","We introduce AccurateRAG—a novel framework for constructing high-performance question-answering applications based on retrieval-augmented generation (RAG). Our framework offers a pipeline for development efficiency with tools for raw dataset processing, fine-tuning data generation, text embedding & LLM fine-tuning, output evaluation, and building RAG systems locally. Experimental results show that our framework outperforms previous strong baselines and obtains new state-of-the-art question-answering performance on benchmark datasets.",{"paper_id":9806,"title":9807,"year":7,"month":188,"day":63,"doi":9808,"resource_url":9809,"first_page":9810,"last_page":9811,"pdf_url":9812,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9813,"paper_type":860,"authors":9814,"abstract":9824},"lrec2026-main-395","VideoEvent: Leveraging Relevance and LLMs for Video Question Answering","10.63317\u002F5dnrdsjog6tj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-395","5024","5034","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.395.pdf","lin-etal-2026-videoevent",[9815,9817,9819,9821],{"paper_id":9806,"author_seq":247,"given_name":9816,"surname":1816,"affiliation":63,"orcid":63},"Chen-Chen",{"paper_id":9806,"author_seq":232,"given_name":9818,"surname":1359,"affiliation":63,"orcid":63},"Ming-Han",{"paper_id":9806,"author_seq":218,"given_name":9820,"surname":7319,"affiliation":63,"orcid":63},"KunRu",{"paper_id":9806,"author_seq":203,"given_name":9822,"surname":9823,"affiliation":63,"orcid":63},"Yu-Chee","Tseng","We propose VideoEvent, a lightweight and efficient training-free framework for Video Question Answering (VQA) with large language models (LLMs). Although several training-free VQA methods have been proposed, they often neglect the temporal dependencies between frames or clips, treating them as isolated units and relying on complex or resource-intensive components. To address this limitation while maintaining performance and simplicity, we propose VideoEvent, a framework that segments an input video into question-relevant temporal events and selectively supplements them with low-level visual cues such as background and object layout. Our method selects semantically relevant time spans and retrieves one representative background frame to enrich the prompt to LLM. This design minimizes reliance on additional tools and reduces inference cost, making it highly suitable for practical deployment. Experimental results on EgoSchema and NExT-QA show that VideoEvent reduces inference cost by up to 30% while maintaining state-of-the-art accuracy, and its background module improves accuracy by 1–3% across multiple frameworks.",{"paper_id":9826,"title":9827,"year":7,"month":188,"day":63,"doi":9828,"resource_url":9829,"first_page":9830,"last_page":9831,"pdf_url":9832,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9833,"paper_type":860,"authors":9834,"abstract":9851},"lrec2026-main-396","MORQA: Benchmarking Evaluation Metrics for Medical Open-Ended Question Answering","10.63317\u002F5e46vrbnwfwe","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-396","5035","5054","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.396.pdf","yim-etal-2026-morqa",[9835,9838,9841,9843,9845,9848],{"paper_id":9826,"author_seq":247,"given_name":9836,"surname":9837,"affiliation":63,"orcid":63},"Wen-wai","Yim",{"paper_id":9826,"author_seq":232,"given_name":9839,"surname":9840,"affiliation":63,"orcid":63},"Asma Ben","Abacha",{"paper_id":9826,"author_seq":218,"given_name":9842,"surname":2998,"affiliation":63,"orcid":63},"Zixuan",{"paper_id":9826,"author_seq":203,"given_name":3172,"surname":9844,"affiliation":63,"orcid":63},"Doerning",{"paper_id":9826,"author_seq":188,"given_name":9846,"surname":9847,"affiliation":63,"orcid":63},"Fei","Xia",{"paper_id":9826,"author_seq":172,"given_name":9849,"surname":9850,"affiliation":63,"orcid":63},"Meliha","Yetisgen","Evaluating natural language generation (NLG) systems in the medical domain presents unique challenges due to the critical demands for accuracy, relevance, and domain-specific expertise. Traditional automatic evaluation metrics, such as BLEU, ROUGE, and BERTScore, often fall short in distinguishing between high-quality outputs, especially given the open-ended nature of medical question answering (QA) tasks where multiple valid responses exists. In this work, we introduce MORQA (Medical Open-Response QA), a new multilingual benchmark designed to assess the effectiveness of NLG evaluation metrics across three medical visual and text-based QA datasets in English and Chinese. Unlike prior resources, our datasets feature 2-4+ gold-standard answers authored by medical professionals, along with expert human ratings for three English and Chinese subsets. We benchmark both traditional metrics and large language model (LLM)-based evaluators, such as GPT-4 and Gemini, finding that LLM-based approaches significantly outperform traditional metrics in correlating with expert judgments. We further analyze factors driving this improvement, including LLMs’ sensitivity to semantic nuances and robustness to variability among reference answers. Our results provide the first comprehensive, multilingual qualitative study of NLG evaluation in the medical domain, highlighting the need for human-aligned evaluation methods. We release our code and annotations to support future research.",{"paper_id":9853,"title":9854,"year":7,"month":188,"day":63,"doi":9855,"resource_url":9856,"first_page":9857,"last_page":9858,"pdf_url":9859,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9860,"paper_type":860,"authors":9861,"abstract":9872},"lrec2026-main-397","LegalRikai: Open Benchmark – a Benchmark for Complex Japanese Corporate Legal Tasks","10.63317\u002F3cpdgfmzeav7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-397","5055","5077","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.397.pdf","fujita-etal-2026-legalrikai",[9862,9864,9867,9869],{"paper_id":9853,"author_seq":247,"given_name":9863,"surname":5923,"affiliation":63,"orcid":63},"Shogo",{"paper_id":9853,"author_seq":232,"given_name":9865,"surname":9866,"affiliation":63,"orcid":63},"Yuji","Naraki",{"paper_id":9853,"author_seq":218,"given_name":9868,"surname":9726,"affiliation":63,"orcid":63},"Yiqing",{"paper_id":9853,"author_seq":203,"given_name":9870,"surname":9871,"affiliation":63,"orcid":63},"Shinsuke","Mori","This paper introduces LegalRikai: Open Benchmark, a new benchmark comprising four complex tasks that emulate Japanese corporate legal practices. The benchmark was created by legal professionals under the supervision of an attorney. This benchmark has 100 samples that require long-form, structured outputs, and we evaluated them against multiple practical criteria. We conducted both human and automated evaluations using leading LLMs, including GPT-5, Gemini 2.5 Pro, and Claude Opus 4.1. Our human evaluation revealed that abstract instructions prompted unnecessary modifications, highlighting model weaknesses in document-level editing that were missed by conventional short-text tasks. Furthermore, our analysis reveals that automated evaluation aligns well with human judgment on criteria with clear linguistic grounding, and assessing structural consistency remains a challenge. The result demonstrates the utility of automated evaluation as a screening tool when expert availability is limited. We propose a dataset evaluation framework to promote more practice-oriented research in the legal domain.",{"paper_id":9874,"title":9875,"year":7,"month":188,"day":63,"doi":9876,"resource_url":9877,"first_page":9878,"last_page":9879,"pdf_url":9880,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9881,"paper_type":860,"authors":9882,"abstract":9892},"lrec2026-main-398","Integrating Arithmetic Learning Improves Mathematical Reasoning in Smaller Models","10.63317\u002F35u36mdkj4r7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-398","5078","5094","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.398.pdf","gangwar-etal-2026-integrating",[9883,9886,9889],{"paper_id":9874,"author_seq":247,"given_name":9884,"surname":9885,"affiliation":63,"orcid":63},"Neeraj","Gangwar",{"paper_id":9874,"author_seq":232,"given_name":9887,"surname":9888,"affiliation":63,"orcid":63},"Suma","Bhat",{"paper_id":9874,"author_seq":218,"given_name":9890,"surname":9891,"affiliation":63,"orcid":63},"Nickvash","Kani","While large models pre-trained on high-quality data exhibit excellent performance on mathematical reasoning (e.g., GSM8k, MultiArith), it remains challenging to specialize smaller models for these tasks. Common approaches to address this challenge include knowledge distillation from large teacher models and data augmentation (e.g., rephrasing questions and generating synthetic solutions). Despite these efforts, smaller models struggle with arithmetic computations, leading to errors in mathematical reasoning. In this work, we leverage a synthetic arithmetic dataset generated programmatically to enhance the reasoning capabilities of smaller models. We investigate two key approaches to incorporate this dataset: (1) intermediate fine-tuning, in which a model is fine-tuned on the arithmetic dataset before training it on a reasoning dataset, and (2) integrating the arithmetic dataset into an instruction-tuning mixture, allowing the model to learn arithmetic skills alongside general instruction-following abilities. Our experiments on multiple reasoning benchmarks demonstrate that incorporating an arithmetic dataset, whether through targeted fine-tuning or within an instruction-tuning mixture, enhances models’ arithmetic capabilities, thereby improving their mathematical reasoning performance.",{"paper_id":9894,"title":9895,"year":7,"month":188,"day":63,"doi":9896,"resource_url":9897,"first_page":9898,"last_page":9899,"pdf_url":9900,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9901,"paper_type":860,"authors":9902,"abstract":9910},"lrec2026-main-399","mSCoRe: A Multilingual and Scalable Benchmark for Skill-based Commonsense Reasoning","10.63317\u002F5kajwk9dj3j9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-399","5095","5115","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.399.pdf","ngo-etal-2026-mscore",[9903,9905,9908],{"paper_id":9894,"author_seq":247,"given_name":9904,"surname":7585,"affiliation":63,"orcid":63},"Nghia Trung",{"paper_id":9894,"author_seq":232,"given_name":9906,"surname":9907,"affiliation":63,"orcid":63},"Franck","Dernoncourt",{"paper_id":9894,"author_seq":218,"given_name":9909,"surname":2395,"affiliation":63,"orcid":63},"Thien Huu","Recent advancements in reasoning-reinforced Large Language Models (LLMs) have shown remarkable capabilities in complex reasoning tasks. However, the mechanism underlying their utilization of different human reasoning skills remains poorly investigated, especially for multilingual commonsense reasoning that involves everyday knowledge across different languages and cultures. To address this gap, we propose a Multilingual and Scalable Benchmark for Skill-based Commonsense Reasoning (mSCoRe). Our benchmark incorporates three key components that are designed to systematically evaluate LLM’s reasoning capabilities, including: (1) a novel taxonomy of reasoning skills that enables fine-grained analysis of models’ reasoning processes, (2) a robust data synthesis pipeline tailored specifically for commonsense reasoning evaluation, and (3) a complexity scaling framework allowing task difficulty to scale dynamically alongside future improvements in LLM abilities. Extensive experiments on eights state-of-the-art LLMs of varying sizes and training approaches demonstrate that mSCoRe remains significantly challenging for current models, particularly at higher complexity levels. Our results reveal the limitations of such reasoning-reinforced models when confronted with nuanced multilingual general and cultural commonsense. We further provide detailed analysis on the models’ reasoning processes, suggesting future directions for improving multilingual commonsense reasoning capabilities.",{"paper_id":9912,"title":9913,"year":7,"month":188,"day":63,"doi":9914,"resource_url":9915,"first_page":9916,"last_page":9917,"pdf_url":9918,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9919,"paper_type":860,"authors":9920,"abstract":9926},"lrec2026-main-400","A Binary Problem in Binary QA: Diverse LLMs or Diverse Question Interpretations? That Is the Ensembling Question","10.63317\u002F43t2yvgid7tw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-400","5116","5128","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.400.pdf","rosales-etal-2026-binary",[9921,9923],{"paper_id":9912,"author_seq":247,"given_name":1586,"surname":9922,"affiliation":63,"orcid":63},"Rosales",{"paper_id":9912,"author_seq":232,"given_name":9924,"surname":9925,"affiliation":63,"orcid":63},"Santiago","Miret","Effectively leveraging diversity has been shown to improve performance for various machine learning models, including large language models (LLMs). However, determining the most effective way of using diversity remains a challenge. In this work, we compare two diversity approaches for answering binary questions using LLMs: model diversity, which relies on multiple models answering the same question, and question interpretation diversity, which relies on using the same model to answer the same question framed in different ways. For both cases, we apply majority voting as the ensemble consensus heuristic to determine the final answer. Our experiments on boolq, strategyqa, and pubmedqa show that question interpretation diversity consistently leads to better ensemble accuracy compared to model diversity. Furthermore, our analysis of GPT and LLaMa shows that model diversity typically produces results between the best and the worst ensemble members without clear improvement.",{"paper_id":9928,"title":9929,"year":7,"month":188,"day":63,"doi":9930,"resource_url":9931,"first_page":9932,"last_page":9933,"pdf_url":9934,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9935,"paper_type":860,"authors":9936,"abstract":9948},"lrec2026-main-401","ObfusQAte: A Proposed Framework to Evaluate LLM Robustness on Obfuscated Factual Question Answering","10.63317\u002F4bcqprdhjoxv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-401","5129","5145","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.401.pdf","ghosh-etal-2026-obfusqate",[9937,9940,9943,9946],{"paper_id":9928,"author_seq":247,"given_name":9938,"surname":9939,"affiliation":63,"orcid":63},"Shubhra","Ghosh",{"paper_id":9928,"author_seq":232,"given_name":9941,"surname":9942,"affiliation":63,"orcid":63},"Abhilekh","Borah",{"paper_id":9928,"author_seq":218,"given_name":9944,"surname":9945,"affiliation":63,"orcid":63},"Aditya Kumar","Guru",{"paper_id":9928,"author_seq":203,"given_name":9947,"surname":9939,"affiliation":63,"orcid":63},"Kripabandhu","The rapid proliferation of Large Language Models (LLMs) has significantly contributed to the development of equitable AI systems capable of factual question-answering (QA). However, no known study tests the LLMs’ robustness when presented with obfuscated versions of questions. To systematically evaluate these limitations, we propose a novel technique, ObfusQAte and leveraging the same, introduce ObfusQA, a comprehensive, first of its kind, framework, with multi-tiered obfuscation levels designed to examine LLM capabilities across three distinct dimensions: (i) Named-Entity Indirection, (ii) Distractor Indirection, and (iii) Contextual Overload. By capturing these fine-grained distinctions in language, ObfusQA provides a comprehensive benchmark for evaluating LLM robustness and adaptability. Our study observes that LLMs exhibit a tendency to fail or generate hallucinated responses, when confronted with these increasingly nuanced variations. To foster research in this direction, we make ObfusQAte publicly available.",{"paper_id":9950,"title":9951,"year":7,"month":188,"day":63,"doi":9952,"resource_url":9953,"first_page":9954,"last_page":9955,"pdf_url":9956,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9957,"paper_type":860,"authors":9958,"abstract":9969},"lrec2026-main-402","POLAR: A Corpus of Questions, Responses and Argumentation in Polish Political Radio Discourse","10.63317\u002F5hu4iymus53n","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-402","5146","5157","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.402.pdf","ziembicki-etal-2026-polar",[9959,9961,9963,9966],{"paper_id":9950,"author_seq":247,"given_name":1668,"surname":9960,"affiliation":63,"orcid":63},"Ziembicki",{"paper_id":9950,"author_seq":232,"given_name":1313,"surname":9962,"affiliation":63,"orcid":63},"Zwierzchowska",{"paper_id":9950,"author_seq":218,"given_name":9964,"surname":9965,"affiliation":63,"orcid":63},"Ewelina","Sobol",{"paper_id":9950,"author_seq":203,"given_name":9967,"surname":9968,"affiliation":63,"orcid":63},"Katarzyna Anna","Przerada","In this paper, we present POLAR: an experimental dataset designed to investigate question–answer structures in political interviews. The study also aims to integrate this level of annotation with the identification of argumentative structures. The dataset comprises orthographic transcriptions of Polish political radio interviews conducted between December 2023 and March 2024, with a total duration of nearly 10 hours of recordings (94,015 tokens). Manual annotation was performed on three levels: (a) identification of questions as speech acts, (b) classification of responses to questions, and (c) argumentative structures in which interrogative sentences function as premises or conclusions. The results show that not all interrogative sentences function as questions in the sense of requesting information — 23% do not serve this function, while 13% were identified as components of argumentative structures. We also introduce a gold-standard corpus, together with baseline experiments and LLM-based evaluations, demonstrating the usefulness of the resource for both theoretical research and NLP applications.",{"paper_id":9971,"title":9972,"year":7,"month":188,"day":63,"doi":9973,"resource_url":9974,"first_page":9975,"last_page":9976,"pdf_url":9977,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9978,"paper_type":860,"authors":9979,"abstract":9992},"lrec2026-main-403","MHTS: Multi-Hop Tree Structure Framework for Generating Difficulty-Controllable QA Datasets for RAG Evaluation","10.63317\u002F5mcjipa88zjm","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-403","5158","5168","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.403.pdf","lee-etal-2026-mhts",[9980,9982,9983,9986,9988,9991],{"paper_id":9971,"author_seq":247,"given_name":9981,"surname":1359,"affiliation":63,"orcid":63},"Jeongsoo",{"paper_id":9971,"author_seq":232,"given_name":8455,"surname":8456,"affiliation":63,"orcid":63},{"paper_id":9971,"author_seq":218,"given_name":9984,"surname":9985,"affiliation":63,"orcid":63},"Kyohoon","Jin",{"paper_id":9971,"author_seq":203,"given_name":9987,"surname":6008,"affiliation":63,"orcid":63},"JunNyeong",{"paper_id":9971,"author_seq":188,"given_name":9989,"surname":9990,"affiliation":63,"orcid":63},"Minwoo","Sim",{"paper_id":9971,"author_seq":172,"given_name":9989,"surname":5173,"affiliation":63,"orcid":63},"Existing RAG benchmarks often overlook query difficulty, leading to inflated performance on simpler questions and unreliable evaluations. A robust benchmark dataset must satisfy three key criteria: quality, ensuring complete and reliable ground truth (GT) responses; diversity, expanding semantic coverage to prevent overfitting; and difficulty, capturing the complexity of reasoning based on hops and the distribution of supporting evidence. However, current benchmarks lack a systematic approach to defining and controlling query difficulty at a fine-grained level. To address this, we propose MHTS (Multi-Hop Tree Structure), a novel dataset synthesis framework that systematically controls multi-hop reasoning complexity by leveraging a multi-hop tree structure to generate logically connected, multi-chunk queries. Our fine-grained difficulty estimation formula exhibits a strong correlation with the overall performance metrics of a RAG system, validating its effectiveness in assessing both retrieval and answer generation capabilities. By ensuring high-quality, diverse, and difficulty-controlled queries, our approach enhances RAG evaluation and benchmarking capabilities. This work contributes to the development of more reliable, efficient, and adaptable AI-driven research assistants, facilitating advancements in document-based reasoning and retrieval tasks.",{"paper_id":9994,"title":9995,"year":7,"month":188,"day":63,"doi":9996,"resource_url":9997,"first_page":9998,"last_page":9999,"pdf_url":10000,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10001,"paper_type":860,"authors":10002,"abstract":10013},"lrec2026-main-404","CareMedEval Dataset: Evaluating Critical Appraisal and Reasoning in the Biomedical Field","10.63317\u002F2erqq5fuxpge","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-404","5169","5181","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.404.pdf","bonzi-etal-2026-caremedeval",[10003,10006,10008,10011,10012],{"paper_id":9994,"author_seq":247,"given_name":10004,"surname":10005,"affiliation":63,"orcid":63},"Doria","Bonzi",{"paper_id":9994,"author_seq":232,"given_name":1515,"surname":10007,"affiliation":63,"orcid":63},"Guiggi",{"paper_id":9994,"author_seq":218,"given_name":10009,"surname":10010,"affiliation":63,"orcid":63},"Frederic","Bechet",{"paper_id":9994,"author_seq":203,"given_name":9420,"surname":9421,"affiliation":63,"orcid":63},{"paper_id":9994,"author_seq":188,"given_name":5599,"surname":5600,"affiliation":63,"orcid":63},"Critical appraisal of scientific literature is an essential skill in the biomedical field. While large language models (LLMs) can offer promising support in this task, their reliability remains limited, particularly for critical reasoning in specialized domains. We introduce CareMedEval, an original dataset designed to evaluate LLMs on biomedical critical appraisal and reasoning tasks. Derived from authentic exams taken by French medical students, the dataset contains 534 questions based on 37 scientific articles. Unlike existing benchmarks, CareMedEval explicitly evaluates critical reading and reasoning grounded in scientific papers. Benchmarking state-of-the-art generalist and biomedical-specialized LLMs under various context conditions reveals the difficulty of the task: open and commercial models fail to exceed an Exact Match Rate of 0.5 even though generating intermediate reasoning tokens considerably improves the results. Yet, models remain challenged especially on questions about study limitations and statistical analysis. CareMedEval provides a challenging benchmark for grounded reasoning, exposing current LLM limitations and paving the way for future development of automated support for critical appraisal.",{"paper_id":10015,"title":10016,"year":7,"month":188,"day":63,"doi":10017,"resource_url":10018,"first_page":10019,"last_page":10020,"pdf_url":10021,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10022,"paper_type":860,"authors":10023,"abstract":10037},"lrec2026-main-405","LongTailQA: Benchmarking LLMs and RAG Models on Disambiguated Long-Tail Entities","10.63317\u002F4tdekxzqph7x","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-405","5182","5191","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.405.pdf","xion-etal-2026-longtailqa",[10024,10026,10029,10031,10033,10035],{"paper_id":10015,"author_seq":247,"given_name":1600,"surname":10025,"affiliation":63,"orcid":63},"Xion",{"paper_id":10015,"author_seq":232,"given_name":10027,"surname":10028,"affiliation":63,"orcid":63},"Uwe","Hadler",{"paper_id":10015,"author_seq":218,"given_name":9249,"surname":10030,"affiliation":63,"orcid":63},"Cofala",{"paper_id":10015,"author_seq":203,"given_name":1101,"surname":10032,"affiliation":63,"orcid":63},"Idahl",{"paper_id":10015,"author_seq":188,"given_name":10034,"surname":1874,"affiliation":63,"orcid":63},"Soumyadeep",{"paper_id":10015,"author_seq":172,"given_name":4552,"surname":10036,"affiliation":63,"orcid":63},"Nejdl","Large Language Models (LLMs) struggle with memorizing long-tail facts. Retrieval-Augmented Generation (RAG) models show better performance on long-tail Question Answering (QA) by offloading memory to external knowledge sources. We demonstrate that popular QA benchmarks such as PopQA, WITQA, and EntityQA contain significant entity ambiguity, with 8-30% of long-tail questions referencing entities with non-unique names. This ambiguity confounds evaluation, obscuring true model capabilities. To perform robust benchmarking, we disambiguate these questions with the Wikipedia knowledge graph to develop LongTailQA, an improved QA benchmark that mitigates entity ambiguity in long-tail entity questions. We evaluate various recent LLMs and RAG models, such as Self-RAG and InstructRAG, investigating retriever quality and retrieval depth impacts on QA performance. We observe that: (i) disambiguation improves model accuracy up to 24.7%, (ii) RAG models benefit significantly more than vanilla LLMs, (iii) simply increasing retrieval depth does not improve RAG performance, and (iv) RAG models achieve high accuracy with perfect information, highlighting the need to filter noisy documents during retrieval. The LongTailQA benchmark facilitates robust evaluation of long-tail knowledge recall and RAG system effectiveness. We make the codebase and datasets publicly available at https:\u002F\u002Fgithub.com\u002Fwilliamx854\u002FLongTailQA-Benchmark",{"paper_id":10039,"title":10040,"year":7,"month":188,"day":63,"doi":10041,"resource_url":10042,"first_page":10043,"last_page":10044,"pdf_url":10045,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10046,"paper_type":860,"authors":10047,"abstract":10054},"lrec2026-main-406","CRaFT: An Explanation-Based Framework for Evaluating Cultural Reasoning in Multilingual Language Models","10.63317\u002F583qrad7zvbi","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-406","5192","5202","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.406.pdf","hossain-etal-2026-craft",[10048,10051],{"paper_id":10039,"author_seq":247,"given_name":10049,"surname":10050,"affiliation":63,"orcid":63},"Shehenaz","Hossain",{"paper_id":10039,"author_seq":232,"given_name":10052,"surname":10053,"affiliation":63,"orcid":63},"Haithem","Afli","Correct answers do not necessarily reflect cultural understanding. We introduce CRaFT, an explanation-based multilingual evaluation framework designed to assess how large language models (LLMs) reason across cultural contexts. Rather than scoring outputs solely based on accuracy, CRaFT evaluates model explanations using four interpretable metrics: Cultural Fluency, Deviation, Consistency, and Linguistic Adaptation. We apply the framework to 50 culturally grounded questions from the World Values Survey, translated into Arabic, Bengali, and Spanish, and evaluate three models (GPT-4o, DeepSeek, FANAR) across over 2,100 answer–explanation pairs. Results reveal significant cross-lingual variation in reasoning: Arabic reduces fluency, Bengali enhances it, and Spanish remains largely stable. While GPT-4o adapts more effectively across languages, it exhibits lower consistency; FANAR shows stable but rigid reasoning. These findings suggest that cultural awareness in LLMs is not intrinsic but emerges through linguistic framing. CRaFT offers a new lens for evaluating cross-cultural reasoning in multilingual settings, providing actionable insights for building culturally adaptive language models.",{"paper_id":10056,"title":10057,"year":7,"month":188,"day":63,"doi":10058,"resource_url":10059,"first_page":10060,"last_page":10061,"pdf_url":10062,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10063,"paper_type":860,"authors":10064,"abstract":10071},"lrec2026-main-407","HEAD-QA v2: Expanding a Healthcare Benchmark for Reasoning","10.63317\u002F2dvxxrgarr9d","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-407","5203","5214","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.407.pdf","correa-etal-2026-head",[10065,10067,10069],{"paper_id":10056,"author_seq":247,"given_name":5114,"surname":10066,"affiliation":63,"orcid":63},"Correa",{"paper_id":10056,"author_seq":232,"given_name":9420,"surname":10068,"affiliation":63,"orcid":63},"Gómez-Rodríguez",{"paper_id":10056,"author_seq":218,"given_name":1061,"surname":10070,"affiliation":63,"orcid":63},"Vilares","We introduce HEAD-QA v2, an expanded and updated version of a Spanish\u002FEnglish healthcare multiple-choice reasoning dataset originally released by Vilares and Gómez-Rodríguez (2019). The update responds to the growing need for high-quality datasets that capture the linguistic and conceptual complexity of healthcare reasoning. We extend the dataset to over 12,000 questions from ten years of Spanish professional exams, benchmark several open-source LLMs using prompting, RAG, and probability-based answer selection, and provide additional multilingual versions to support future work. Results indicate that performance is mainly driven by model scale and intrinsic reasoning ability, with complex inference strategies obtaining limited gains. Together, these results establish HEAD-QA v2 as a reliable resource for advancing research on biomedical reasoning and model improvement.",{"paper_id":10073,"title":10074,"year":7,"month":188,"day":63,"doi":10075,"resource_url":10076,"first_page":10077,"last_page":10078,"pdf_url":10079,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10080,"paper_type":860,"authors":10081,"abstract":10087},"lrec2026-main-408","Beyond MCQ: An Open-Ended Arabic Cultural QA Benchmark with Dialect Variants","10.63317\u002F2smjp2wega4e","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-408","5215","5231","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.408.pdf","bhatti-etal-2026-beyond",[10082,10085],{"paper_id":10073,"author_seq":247,"given_name":10083,"surname":10084,"affiliation":63,"orcid":63},"Hunzalah Hassan","Bhatti",{"paper_id":10073,"author_seq":232,"given_name":10086,"surname":5462,"affiliation":63,"orcid":63},"Firoj","Large Language Models (LLMs) are increasingly used to answer everyday questions, yet their performance on culturally grounded and dialectal content remains limited across languages and their varieties. We propose a comprehensive method that (i) translates Modern Standard Arabic (MSA) multiple-choice questions (MCQs) into English and several Arabic dialects, (ii) converts them into open-ended questions (OEQs), (iii) benchmarks a range of zero-shot and fine-tuned LLMs under both MCQ and OEQ settings, and (iv) generates chain-of-thought (CoT) rationales to fine-tune models for step-by-step reasoning. Using this method, we extend an existing dataset in which QAs are parallelly aligned across language varieties, making it, to our knowledge, the first of its kind. A large portion of the resulting test set is further validated through targeted human annotation and native-speaker post-editing. We conduct extensive experiments with both open and closed models. Our findings show that (i) models underperform on Arabic dialects, showing persistent gaps in culturally grounded and dialect-specific knowledge; (ii) Arabic-centric models perform well on MCQs but struggle with OEQs; and (iii) CoT improves judged correctness while yielding mixed n-gram-based metrics.",{"paper_id":10089,"title":10090,"year":7,"month":188,"day":63,"doi":10091,"resource_url":10092,"first_page":10093,"last_page":10094,"pdf_url":10095,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10096,"paper_type":860,"authors":10097,"abstract":10110},"lrec2026-main-409","Automatic Inter-document Multi-hop Scientific QA Generation","10.63317\u002F3tv9zuz8heht","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-409","5232","5245","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.409.pdf","lee-etal-2026-automatic",[10098,10100,10102,10105,10108],{"paper_id":10089,"author_seq":247,"given_name":10099,"surname":1359,"affiliation":63,"orcid":63},"Seungmin",{"paper_id":10089,"author_seq":232,"given_name":10101,"surname":5173,"affiliation":63,"orcid":63},"Dongha",{"paper_id":10089,"author_seq":218,"given_name":10103,"surname":10104,"affiliation":63,"orcid":63},"Yuni","Jeon",{"paper_id":10089,"author_seq":203,"given_name":10106,"surname":10107,"affiliation":63,"orcid":63},"Junyoung","Koh",{"paper_id":10089,"author_seq":188,"given_name":10109,"surname":1913,"affiliation":63,"orcid":63},"Min","Existing automatic scientific question generation studies mainly focus on single-document factoid QA, overlooking the inter-document reasoning crucial for scientific understanding. We present AIM-SciQA, an automated framework for generating multi-document, multi-hop scientific QA datasets. AIM-SciQA extracts single-hop QAs using large language models (LLMs) with machine reading comprehension and constructs cross-document relations based on embedding-based semantic alignment while selectively leveraging citation information. Applied to 8,211 PubMed Central papers, it produced 411,409 single-hop and 13,672 multi-hop QAs, forming the IM-SciQA dataset. Human and automatic validation confirmed high factual consistency, and experimental results demonstrate that IM-SciQA effectively differentiates reasoning capabilities across retrieval and QA stages, providing a realistic and interpretable benchmark for retrieval-augmented scientific reasoning. We further extend this framework to construct CIM-SciQA, a citation-guided variant achieving comparable performance to the Oracle setting, reinforcing the dataset’s validity and generality.",{"paper_id":10112,"title":10113,"year":7,"month":188,"day":63,"doi":10114,"resource_url":10115,"first_page":10116,"last_page":10117,"pdf_url":10118,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10119,"paper_type":860,"authors":10120,"abstract":10127},"lrec2026-main-410","CRiT-QA: Evaluating Multi-hop Reasoning with Counterfactual Chains and Distractor Traps","10.63317\u002F2jvxj7kwecuo","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-410","5246","5255","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.410.pdf","yun-etal-2026-crit",[10121,10124,10126],{"paper_id":10112,"author_seq":247,"given_name":10122,"surname":10123,"affiliation":63,"orcid":63},"Jungmin","Yun",{"paper_id":10112,"author_seq":232,"given_name":10125,"surname":8456,"affiliation":63,"orcid":63},"June Hyoung",{"paper_id":10112,"author_seq":218,"given_name":6880,"surname":5173,"affiliation":63,"orcid":63},"Evaluating the multi-hop reasoning capabilities of large language models remains a significant challenge. Although current models achieve strong results on existing multi-hop question answering datasets, such performance often masks two critical vulnerabilities: (1) reliance on internal parametric knowledge rather than adherence to the provided context, and (2) exploitation of dataset shortcuts, such as single-document cues or type-matching, that diminish the need for genuine evidence aggregation across multiple documents. We introduce CRiT-QA (Counterfactual Reasoning with Traps), a dataset explicitly designed to address both limitations. To neutralize reliance on memorized knowledge and enforce strict context dependency, CRiT-QA transforms factual reasoning chains with counterfactual entities. Furthermore, it injects multi-anchor distractor chains, plausible but incorrect reasoning paths that diverge at different hops. These traps require models to follow the entire reasoning process rather than exploiting shallow heuristics. Our experiments show that LLMs exhibit substantial performance degradation on CRiT-QA compared to standard datasets, exposing their vulnerability to counterfactual conditions and distractor traps. CRiT-QA thus serves as a rigorous diagnostic tool for evaluating genuine multi-hop reasoning and provides a foundation for developing more reliable, evidence-grounded LLMs.",{"paper_id":10129,"title":10130,"year":7,"month":188,"day":63,"doi":10131,"resource_url":10132,"first_page":10133,"last_page":10134,"pdf_url":10135,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10136,"paper_type":860,"authors":10137,"abstract":10152},"lrec2026-main-411","TARAZ: Persian Short-Answer Question Benchmark for Cultural Evaluation of Language Models","10.63317\u002F2ophy9wqyzrd","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-411","5256","5267","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.411.pdf","iranmanesh-etal-2026-taraz",[10138,10141,10143,10146,10149],{"paper_id":10129,"author_seq":247,"given_name":10139,"surname":10140,"affiliation":63,"orcid":63},"Reihaneh","Iranmanesh",{"paper_id":10129,"author_seq":232,"given_name":6574,"surname":10142,"affiliation":63,"orcid":63},"Davoudi",{"paper_id":10129,"author_seq":218,"given_name":10144,"surname":10145,"affiliation":63,"orcid":63},"Pasha","Abrishamchian",{"paper_id":10129,"author_seq":203,"given_name":10147,"surname":10148,"affiliation":63,"orcid":63},"Ophir","Frieder",{"paper_id":10129,"author_seq":188,"given_name":10150,"surname":10151,"affiliation":63,"orcid":63},"Nazli","Goharian","This paper presents a comprehensive evaluation framework for assessing the cultural competence of large language models (LLMs) in Persian. Existing Persian cultural benchmarks rely predominantly on multiple-choice formats and English-centric metrics that fail to capture Persian’s morphological complexity and semantic nuance. Our framework introduces a Persian-specific short-answer evaluation that combines rule-based morphological normalization with a hybrid syntactic and semantic similarity module, enabling robust soft-match scoring beyond exact string overlap. Through systematic evaluation of 15 state-of-the-art open- and closed-source models across three culturally grounded Persian datasets, we demonstrate that our hybrid evaluation improves scoring consistency by +10 compared to exact-match baselines by capturing meaning that surface-level methods cannot detect. Our human evaluation further confirms that the proposed semantic similarity metric achieves higher agreement with human judgments than LLM-based judges. We publicly release our evaluation framework, providing the first standardized benchmark for measuring cultural understanding in Persian and establishing a reproducible foundation for cross-cultural LLM evaluation research.",{"paper_id":10154,"title":10155,"year":7,"month":188,"day":63,"doi":10156,"resource_url":10157,"first_page":10158,"last_page":10159,"pdf_url":10160,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":10161,"bibkey":10162,"paper_type":860,"authors":10163,"abstract":10171},"lrec2026-main-412","Benchmarking Mathematical Reasoning in a Low-Resource Language: Structured Prompting and Evaluation in Basque","10.63317\u002F48gd2ysjuh8s","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-412","5268","5289","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.412.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.412_OptionalSupplementaryMaterial.zip","martinezcriado-etal-2026-benchmarking",[10164,10167,10168],{"paper_id":10154,"author_seq":247,"given_name":10165,"surname":10166,"affiliation":63,"orcid":63},"Inigo","Martinez-Criado",{"paper_id":10154,"author_seq":232,"given_name":1276,"surname":1277,"affiliation":63,"orcid":63},{"paper_id":10154,"author_seq":218,"given_name":10169,"surname":10170,"affiliation":63,"orcid":63},"Jeremy","Barnes","Large Language Models (LLMs) have shown impressive performance on tasks requiring complex reasoning, but most evaluations tend to focus on English and other high-resource languages. This work investigates how well LLMs perform mathematical reasoning in low-resource languages, using Basque as a primary case study. To support this analysis, we introduce MASEU, a benchmark designed to evaluate reasoning in Basque across arithmetic, algebraic, and logical tasks. We then use this dataset to address three key questions: 1) how well do LLMs support Basque in reasoning tasks, 2) to what extent can including English in prompts improve results, and 3) what is the effect of continued pretraining in Basque? To explore these aspects, we use prompting strategies adapted for mathematical reasoning, building upon the foundations of CoT prompting and one of its subsequent evolutions, DUP prompting, which together allow for more precise experimentation across zero-shot and few-shot settings, providing insights into how multilingual models handle reasoning tasks in underrepresented languages.",{"paper_id":10173,"title":10174,"year":7,"month":188,"day":63,"doi":10175,"resource_url":10176,"first_page":10177,"last_page":10178,"pdf_url":10179,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10180,"paper_type":860,"authors":10181,"abstract":10189},"lrec2026-main-413","Assessing the Difficulty of Inference Types in Natural Language Inference for Clinical Trials","10.63317\u002F359toazp33g8","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-413","5290","5300","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.413.pdf","aguiar-etal-2026-assessing",[10182,10185,10186],{"paper_id":10173,"author_seq":247,"given_name":10183,"surname":10184,"affiliation":63,"orcid":63},"Mathilde","Aguiar",{"paper_id":10173,"author_seq":232,"given_name":1159,"surname":1160,"affiliation":63,"orcid":63},{"paper_id":10173,"author_seq":218,"given_name":10187,"surname":10188,"affiliation":63,"orcid":63},"Nona","Naderi","Large Language Models (LLMs) achieve competitive results on Natural Language Inference when applied to clinical trials; however, it is not yet clear which type of inference LLMs perform well or poorly on. We address this by proposing new supplementary annotations for the existing NLI4CT dataset on the types of inferences observed in clinical trials. Our dataset supplements NLI4CT with a total of 1,949 new annotations using our carefully crafted guidelines for 17 types of inferences. To investigate how inference types affect the performance of LLMs, we prompt Flan-T5, Llama, Mistral, and Qwen and evaluate their performance using our newly annotated dataset. We found that logical inferences negatively affect the overall performance of Qwen3-4B, Qwen2.5-7B, and Qwen2.5-14B, whereas numerical inferences negatively affect the performance of Flan-T5-XL and Mixtral. Further analysis shows that MMed-Llama-3 struggles to understand the structure of clinical trial reports. Other parameters, such as the number of inference types involved or the section type in the premise, also influence the performance of the models. Our code and dataset are publicly available.",{"paper_id":10191,"title":10192,"year":7,"month":188,"day":63,"doi":10193,"resource_url":10194,"first_page":10195,"last_page":10196,"pdf_url":10197,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10198,"paper_type":860,"authors":10199,"abstract":10224},"lrec2026-main-414","Reasoning Graph-Structured Question Answering: Datasets and Insights from LLM Benchmarking","10.63317\u002F4zjpmtqxxtx4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-414","5301","5316","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.414.pdf","yone-etal-2026-reasoning",[10200,10203,10206,10209,10212,10215,10218,10220,10221,10222],{"paper_id":10191,"author_seq":247,"given_name":10201,"surname":10202,"affiliation":63,"orcid":63},"Khin","Yone",{"paper_id":10191,"author_seq":232,"given_name":10204,"surname":10205,"affiliation":63,"orcid":63},"Devasha","Trivedi",{"paper_id":10191,"author_seq":218,"given_name":10207,"surname":10208,"affiliation":63,"orcid":63},"Anish","Pahilajani",{"paper_id":10191,"author_seq":203,"given_name":10210,"surname":10211,"affiliation":63,"orcid":63},"Jincen","Shuai",{"paper_id":10191,"author_seq":188,"given_name":10213,"surname":10214,"affiliation":63,"orcid":63},"Samyak Rajesh","Jain",{"paper_id":10191,"author_seq":172,"given_name":10216,"surname":10217,"affiliation":63,"orcid":63},"Ryan","Rossi",{"paper_id":10191,"author_seq":155,"given_name":10219,"surname":2400,"affiliation":63,"orcid":63},"Nesreen K.",{"paper_id":10191,"author_seq":138,"given_name":9906,"surname":9907,"affiliation":63,"orcid":63},{"paper_id":10191,"author_seq":121,"given_name":2998,"surname":3676,"affiliation":63,"orcid":63},{"paper_id":10191,"author_seq":104,"given_name":10223,"surname":6876,"affiliation":63,"orcid":63},"Namyong","Large Language Models (LLMs) have shown remarkable success in multi-hop question-answering (M-QA) due to their advanced reasoning capabilities. However, the influence of reasoning structures on their performance remains underexplored, primarily due to the lack of M-QA datasets that explicitly encode the reasoning pathways underlying each question-answer pair. To address this gap, we introduce the reasoning graph-structured question answering dataset (GRS-QA), which provides both semantic contexts and reasoning structures for the QA pairs. Unlike existing M-QA datasets, GRS-QA explicitly captures intricate reasoning pathways through reasoning graphs, where nodes correspond to textual contexts and edges denote logical flows. Using GRS-QA, we systematically evaluate LLM performance across varying context structures, prompting styles, and data domains. Our empirical analysis reveals that LLMs perform differently based on the reasoning structure, context, and prompting styles, indicating their varying ability to leverage graph-structured knowledge. Notably, providing explicit reasoning guidance proves more effective than supplying contextual information alone.",{"paper_id":10226,"title":10227,"year":7,"month":188,"day":63,"doi":10228,"resource_url":10229,"first_page":10230,"last_page":10231,"pdf_url":10232,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10233,"paper_type":860,"authors":10234,"abstract":10247},"lrec2026-main-415","JBE-QA: Japanese Bar Exam QA Dataset for Assessing Legal Domain Knowledge","10.63317\u002F5knc9vm8vgda","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-415","5317","5327","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.415.pdf","cao-etal-2026-jbe",[10235,10238,10241,10243,10244,10245],{"paper_id":10226,"author_seq":247,"given_name":10236,"surname":10237,"affiliation":63,"orcid":63},"Zhihan","Cao",{"paper_id":10226,"author_seq":232,"given_name":10239,"surname":10240,"affiliation":63,"orcid":63},"Fumihito","Nishino",{"paper_id":10226,"author_seq":218,"given_name":5852,"surname":10242,"affiliation":63,"orcid":63},"Yamada",{"paper_id":10226,"author_seq":203,"given_name":4994,"surname":2395,"affiliation":63,"orcid":63},{"paper_id":10226,"author_seq":188,"given_name":4606,"surname":4607,"affiliation":63,"orcid":63},{"paper_id":10226,"author_seq":172,"given_name":5824,"surname":10246,"affiliation":63,"orcid":63},"Satoh","We introduce JBE-QA, a Japanese Bar Exam Question–Answering dataset to evaluate large language models’ legal knowledge. Derived from the multiple-choice (tantō-shiki) section of the Japanese bar exam (2015–2024), JBE-QA provides the first comprehensive benchmark for Japanese legal-domain evaluation of LLMs. It covers the Civil Code, the Penal Code, and the Constitution, extending beyond the Civil Code focus of prior Japanese resources. Each question is decomposed into independent true\u002Ffalse judgments with structured contextual fields. The dataset contains 3,464 items with balanced labels. We evaluate 26 LLMs, including proprietary, open-weight, Japanese-specialised, and reasoning models. Our results show that proprietary models with reasoning enabled perform best, and the Constitution questions are generally easier than the Civil Code or the Penal Code questions.",{"paper_id":10249,"title":10250,"year":7,"month":188,"day":63,"doi":10251,"resource_url":10252,"first_page":10253,"last_page":10254,"pdf_url":10255,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10256,"paper_type":860,"authors":10257,"abstract":10261},"lrec2026-main-416","A Diagnostic Benchmark for Sweden-Related Factual Knowledge","10.63317\u002F5c6ftec8tnio","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-416","5328","5334","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.416.pdf","kunz-2026-diagnostic",[10258],{"paper_id":10249,"author_seq":247,"given_name":10259,"surname":10260,"affiliation":63,"orcid":63},"Jenny","Kunz","Many Swedish benchmarks are translations of US-centric benchmarks and are therefore not suitable for testing knowledge that is particularly relevant, or even specific, to Sweden. We therefore introduce a manually written question-answering benchmark specifically targeted at Sweden-related personalities and events, many of which receive very limited coverage in international media. Our annotators drew inspiration from a popular radio program featuring public figures from culture and media, as well as major sports events in Sweden. The dataset can be used to measure factual recall across models of varying sizes and degrees of Swedish coverage, and allows probing of cross-lingual factual consistency, as it contains English translations. Using the dataset, we find that smaller models with stronger Swedish coverage perform comparably to a multilingual model three times larger in recalling Sweden-related facts. We also observe that continued pre-training on Swedish generally improves factual knowledge but leads to partial forgetting of previously known information. These results demonstrate the dataset’s potential as a diagnostic tool for studying language adaptation and knowledge retention in multilingual models during language adaptation.",{"paper_id":10263,"title":10264,"year":7,"month":188,"day":63,"doi":10265,"resource_url":10266,"first_page":10267,"last_page":10268,"pdf_url":10269,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10270,"paper_type":860,"authors":10271,"abstract":10283},"lrec2026-main-417","GeoBenchmark: Probing Large Language Models for Geo-Spatial Knowledge","10.63317\u002F26pwz3584735","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-417","5335","5348","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.417.pdf","abayomi-etal-2026-geobenchmark",[10272,10275,10278,10280],{"paper_id":10263,"author_seq":247,"given_name":10273,"surname":10274,"affiliation":63,"orcid":63},"Ayomide","Abayomi",{"paper_id":10263,"author_seq":232,"given_name":10276,"surname":10277,"affiliation":63,"orcid":63},"Jose G.","Moreno",{"paper_id":10263,"author_seq":218,"given_name":2216,"surname":10279,"affiliation":63,"orcid":63},"Radouane",{"paper_id":10263,"author_seq":203,"given_name":10281,"surname":10282,"affiliation":63,"orcid":63},"Lynda","Tamine","Large Language Models (LLMs) demonstrate strong factual recall of general-purpose knowledge but struggle with grounded geospatial knowledge. To measure and help probe LLMs for spatial knowledge, we present GeoBenchmark, a benchmark for evaluating geographic commonsense along three core spatial relations: direction, distance, and topology. Using data extracted from YAGO2geo and Ordnance Survey ward geometries, spatial relations were formalized as structured triplets and systematically transformed into balanced binary (Yes\u002FNo) and Multiple-Choice (MCQ) question-answer pairs. Besides, we consider atomic and composite questions based on the number of spatial relations involved. The resulting dataset comprises 26k binary and 13k MCQ samples, uniformly distributed across atomic, binary, and ternary relation levels. We establish baselines with LLaMA-8B and Mistral-7B under zero-shot prompting, achieving 52-63% accuracy on atomic questions but below 35% on ternary relations, which exposes the models’ limited compositional spatial understanding and strong option bias. GeoBenchmark provides a comprehensive, reproducible resource for probing and advancing LLMs’ geographic commonsense, paving the way for future research in spatial and geographic probing of LLMs as well as knowledge editing.",{"paper_id":10285,"title":10286,"year":7,"month":188,"day":63,"doi":10287,"resource_url":10288,"first_page":10289,"last_page":10290,"pdf_url":10291,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10292,"paper_type":860,"authors":10293,"abstract":10306},"lrec2026-main-418","FactOReS: Fact-checking with an Evidence-based Open Resource in Spanish","10.63317\u002F44m8nbvp7z85","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-418","5349","5366","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.418.pdf","bravo-etal-2026-factores",[10294,10297,10298,10299,10302,10305],{"paper_id":10285,"author_seq":247,"given_name":10295,"surname":10296,"affiliation":63,"orcid":63},"Nagore","Bravo",{"paper_id":10285,"author_seq":232,"given_name":7054,"surname":7055,"affiliation":63,"orcid":63},{"paper_id":10285,"author_seq":218,"given_name":1058,"surname":1059,"affiliation":63,"orcid":63},{"paper_id":10285,"author_seq":203,"given_name":10300,"surname":10301,"affiliation":63,"orcid":63},"Alba Bonet","Jover",{"paper_id":10285,"author_seq":188,"given_name":10303,"surname":10304,"affiliation":63,"orcid":63},"Estela","Saquete",{"paper_id":10285,"author_seq":172,"given_name":7059,"surname":7060,"affiliation":63,"orcid":63},"Automated Fact-Checking (AFC) has become a popular research area in Natural Language Processing (NLP), intending to support human verification through evidence-based veracity prediction systems that provide transparency at each stage of the process. Despite the global significance of misinformation and the substantial progress made in AFC research, multilingual approaches to evidence-based fact-checking remain inadequately addressed. This work introduces FactOReS, the first publicly available dataset evaluated for evidence-based veracity prediction in Spanish, constructed from real Spanish-language claims and verified fact-checking articles. We establish performance baselines by systematically applying In-Context Learning (ICL) with Large Language Models (LLMs) to both an established English dataset and our novel Spanish dataset. Despite good zero-shot and few-shot performance, results in both languages demonstrate that each step requires further research in order to improve the overall results in the evidence-based veracity prediction task. Finally, we propose a semi-automated methodology that integrates computational processing with human validation, offering a reproducible framework for developing multilingual evidence-based fact-checking resources for the benefit of the NLP research community. Data and code available: https:\u002F\u002Fgithub.com\u002Fhitz-zentroa\u002FAFC_FactOReS",{"paper_id":10308,"title":10309,"year":7,"month":188,"day":63,"doi":10310,"resource_url":10311,"first_page":10312,"last_page":10313,"pdf_url":10314,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10315,"paper_type":860,"authors":10316,"abstract":10335},"lrec2026-main-419","Stands to Reason: Investigating the Effect of Reasoning on Idiomaticity Detection","10.63317\u002F24iojjct3mvu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-419","5367","5376","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.419.pdf","phelps-etal-2026-stands",[10317,10320,10322,10325,10328,10331,10333],{"paper_id":10308,"author_seq":247,"given_name":10318,"surname":10319,"affiliation":63,"orcid":63},"Dylan","Phelps",{"paper_id":10308,"author_seq":232,"given_name":7059,"surname":10321,"affiliation":63,"orcid":63},"Wilkens",{"paper_id":10308,"author_seq":218,"given_name":10323,"surname":10324,"affiliation":63,"orcid":63},"Edward","Gow-Smith",{"paper_id":10308,"author_seq":203,"given_name":10326,"surname":10327,"affiliation":63,"orcid":63},"Thomas M. R.","Pickard",{"paper_id":10308,"author_seq":188,"given_name":10329,"surname":10330,"affiliation":63,"orcid":63},"Maggie","Mi",{"paper_id":10308,"author_seq":172,"given_name":2146,"surname":10332,"affiliation":63,"orcid":63},"Idiart",{"paper_id":10308,"author_seq":155,"given_name":4071,"surname":10334,"affiliation":63,"orcid":63},"Villavicencio","The recent trend towards utilisation of reasoning models has improved the performance of Large Language Models (LLMs) across many tasks which involve logical steps. One linguistic task that could benefit from this framing is idiomaticity detection, as a potentially idiomatic expression must first be understood in relation to the context before it can be disambiguated. In this paper, we explore how reasoning capabilities in LLMs affect idiomaticity detection performance and examine the effect of model size. We evaluate, as open source representative models, the suite of DeepSeek-R1 distillation models ranging from 1.5B to 70B parameters across four idiomaticity detection datasets. We find the effect of reasoning to be smaller and more varied than expected. For smaller models, producing chain-of-thought (CoT) reasoning increases performance from Math-tuned intermediate models, but not to the levels of the base models, whereas larger models (14B, 32B, and 70B) show modest improvements. Our in-depth analyses reveal that larger models demonstrate good understanding of idiomaticity, successfully producing accurate definitions of expressions, while smaller models often fail to output the actual meaning. For this reason, we also experiment with providing definitions in the prompts of smaller models, which we show can improve performance in some cases.",{"paper_id":10337,"title":10338,"year":7,"month":188,"day":63,"doi":10339,"resource_url":10340,"first_page":10341,"last_page":10342,"pdf_url":10343,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10344,"paper_type":860,"authors":10345,"abstract":10366},"lrec2026-main-420","ESG-QA: Building a Dataset for Question Answering on Environmental, Social, and Governance Pillars","10.63317\u002F23kmozdtq6yp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-420","5377","5388","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.420.pdf","assis-etal-2026-esg",[10346,10349,10352,10355,10357,10359,10362,10365],{"paper_id":10337,"author_seq":247,"given_name":10347,"surname":10348,"affiliation":63,"orcid":63},"Gabriel","Assis",{"paper_id":10337,"author_seq":232,"given_name":10350,"surname":10351,"affiliation":63,"orcid":63},"Ayrton","Surica",{"paper_id":10337,"author_seq":218,"given_name":10353,"surname":10354,"affiliation":63,"orcid":63},"Pedro","Kroll",{"paper_id":10337,"author_seq":203,"given_name":10356,"surname":9009,"affiliation":63,"orcid":63},"Gabriela Aires",{"paper_id":10337,"author_seq":188,"given_name":10358,"surname":1871,"affiliation":63,"orcid":63},"Darian",{"paper_id":10337,"author_seq":172,"given_name":10360,"surname":10361,"affiliation":63,"orcid":63},"Edson","Bollis",{"paper_id":10337,"author_seq":155,"given_name":10363,"surname":10364,"affiliation":63,"orcid":63},"Lucas Francisco Amaral Orosco","Pellicer",{"paper_id":10337,"author_seq":138,"given_name":4071,"surname":4072,"affiliation":63,"orcid":63},"Environmental, Social, and Governance (ESG) factors are becoming increasingly central to corporate accountability and sustainable development. However, benchmarks for evaluating large language models (LLMs) in this domain remain scarce. To alleviate this gap, we present ESG-QA, a dataset of 87,261 question–answer–context triplets spanning the three ESG pillars. ESG-QA was built using an LLM-based Question Answer (QA) generation pipeline, enhanced through rule-based and semantic filtering, and validated by human inspection, enabling both abstractive QA and retrieval-augmented setups. We benchmark three open-weight LLM families (Llama-3, Gemma-3, and Qwen-3) across multiple dimensions, including correctness, environmental impact, and readability. Results show that Qwen-3 with retrieval achieves the highest absolute QA performance, while Gemma-3 provides the strongest overall balance between correctness, efficiency, and clarity. By releasing ESG-QA and its generation framework, this work establishes a comprehensive benchmark for advancing ESG-oriented QA and promoting more transparent and responsible AI evaluation.",{"paper_id":10368,"title":10369,"year":7,"month":188,"day":63,"doi":10370,"resource_url":10371,"first_page":10372,"last_page":10373,"pdf_url":10374,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10375,"paper_type":860,"authors":10376,"abstract":10387},"lrec2026-main-421","Enhancing and Evaluating Tabular Models on the Fly via Synthetic Question–Answer Generation","10.63317\u002F27e3cist39z2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-421","5389","5413","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.421.pdf","grijalba-etal-2026-enhancing",[10377,10380,10383,10386],{"paper_id":10368,"author_seq":247,"given_name":10378,"surname":10379,"affiliation":63,"orcid":63},"Jorge Osés","Grijalba",{"paper_id":10368,"author_seq":232,"given_name":10381,"surname":10382,"affiliation":63,"orcid":63},"Eugenio Martí­nez","Cámara",{"paper_id":10368,"author_seq":218,"given_name":10384,"surname":10385,"affiliation":63,"orcid":63},"L. Alfonso","Ureñ-López",{"paper_id":10368,"author_seq":203,"given_name":1318,"surname":1319,"affiliation":63,"orcid":63},"Question Answering (QA) over Tabular Data has been traditionally a challenging task, but LLMs have recently shown the ability to respond to questions related to this type of structured data. However, current tabular QA datasets are skewed toward Wikipedia tables and SQL-style answers composed of human-crafted question–answer pairs. This limits the evaluation of LLMs on this task to a narrow genre of data and language, while also requiring extensive human effort for dataset or benchmark creation. To address this, we introduce SynTabQA, a methodology for the automatic generation of synthetic question–answer pairs from any unannotated table. SynTabQA defines a detailed question typology, enabling fine-grained evaluation and facilitating the creation of diverse QA datasets. Our approach not only provides an automated test bed for any tabular dataset but can also be used in few-shot settings to supply LLMs with tailored examples, improving their focus and accuracy. We validate SynTabQA on two large, manually constructed tabular QA benchmarks of distinct nature.",{"paper_id":10389,"title":10390,"year":7,"month":188,"day":63,"doi":10391,"resource_url":10392,"first_page":10393,"last_page":10394,"pdf_url":10395,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10396,"paper_type":860,"authors":10397,"abstract":10410},"lrec2026-main-422","VIVID: A Culturally Grounded Benchmark Exposing the Figurative Language Gap in Vietnamese NLP","10.63317\u002F3b4ag6rpijwb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-422","5414","5430","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.422.pdf","do-etal-2026-vivid",[10398,10401,10403,10404,10405,10408],{"paper_id":10389,"author_seq":247,"given_name":10399,"surname":10400,"affiliation":63,"orcid":63},"Tu Tran","Do",{"paper_id":10389,"author_seq":232,"given_name":10402,"surname":2395,"affiliation":63,"orcid":63},"Nhat Ngoc",{"paper_id":10389,"author_seq":218,"given_name":8972,"surname":2407,"affiliation":63,"orcid":63},{"paper_id":10389,"author_seq":203,"given_name":8989,"surname":2395,"affiliation":63,"orcid":63},{"paper_id":10389,"author_seq":188,"given_name":10406,"surname":10407,"affiliation":63,"orcid":63},"Tu Minh","Phuong",{"paper_id":10389,"author_seq":172,"given_name":10409,"surname":1727,"affiliation":63,"orcid":63},"Long Hoang","We present VIVID (Vietnamese Idioms for Validation and Interpretation Depth), the first systematic benchmark for evaluating culturally grounded figurative language understanding in Vietnamese. VIVID comprises 1,636 idioms and proverbs annotated with five complexity traits (literal expressions, pragmatic nuances, Sino-Vietnamese terms, uncommon vocabulary, folk knowledge) and seven semantic themes. We establish an evaluation framework combining generative and discriminative tasks, proposing an LLM-as-a-Judge approach with aspect-based prompting validated against human judgment (Cohen’s κ = 0.792). Evaluating eight state-of-the-art models reveals critical gaps: Vietnamese-specialized models drastically underperform multilingual systems (VinaLLaMA-7B: 0.13 vs. GPT-4o: 2.46), and even top models achieve less than 50% of maximum scores. Notably, few-shot prompting does not universally improve performance, with GPT-4o exhibiting degradation due to stylistic overfitting. Our analysis exposes systematic failures including literal over-interpretation, lexical gaps, and pragmatic flattening, demonstrating that current models lack cultural competence for nuanced figurative interpretation. VIVID provides an essential tool for advancing figurative language understanding in culturally rich contexts.",{"paper_id":10412,"title":10413,"year":7,"month":188,"day":63,"doi":10414,"resource_url":10415,"first_page":10416,"last_page":10417,"pdf_url":10418,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10419,"paper_type":860,"authors":10420,"abstract":10430},"lrec2026-main-423","Assessing Logical Coherence of LLMs via Fine-Grained NLI","10.63317\u002F4prei82n6ev9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-423","5431","5444","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.423.pdf","larraya-etal-2026-assessing",[10421,10424,10427,10428],{"paper_id":10412,"author_seq":247,"given_name":10422,"surname":10423,"affiliation":63,"orcid":63},"Jon Felix Apaolaza","Larraya",{"paper_id":10412,"author_seq":232,"given_name":10425,"surname":10426,"affiliation":63,"orcid":63},"Begoña","Altuna",{"paper_id":10412,"author_seq":218,"given_name":1276,"surname":1277,"affiliation":63,"orcid":63},{"paper_id":10412,"author_seq":203,"given_name":10165,"surname":10429,"affiliation":63,"orcid":63},"Lopez-Gazpio","Natural Language Inference (NLI) is a long-standing probe of models’ reasoning capabilities, yet it remains unclear how state-of-the-art systems represent and combine logical clauses in a way that supports robust generalization. We study directional effects in deductive NLI and introduce causal coherence, an evaluation paradigm that tests whether predictions remain consistent when the directionality of inference is reversed. Using fine-grained minimal-pair phrase data from PhrasIS, we evaluate encoder, decoder, and encoder–decoder transformers and analyze their behavior under both standard and manipulated settings. Our results show that models frequently fail to maintain logical stability when directionality varies, indicating shallow pattern matching rather than genuine clause composition. We formalize soft and hard causal coherence to disentangle directional consistency from correctness, and we provide an error analysis that highlights systematic failures involving semantic relations. Our findings suggest that deductive causal reasoning and coherence remain missing components in current transformer architectures, and that addressing them is necessary for reliable NLI.",{"paper_id":10432,"title":10433,"year":7,"month":188,"day":63,"doi":10434,"resource_url":10435,"first_page":10436,"last_page":10437,"pdf_url":10438,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10439,"paper_type":860,"authors":10440,"abstract":10449},"lrec2026-main-424","Counter-Hypothesis Generation: Towards Evaluating How LLMs Reason about Alternatives","10.63317\u002F336pssnozxaw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-424","5445","5449","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.424.pdf","abdolmaleki-etal-2026-counter",[10441,10444,10447,10448],{"paper_id":10432,"author_seq":247,"given_name":10442,"surname":10443,"affiliation":63,"orcid":63},"Marzieh","Abdolmaleki",{"paper_id":10432,"author_seq":232,"given_name":10445,"surname":10446,"affiliation":63,"orcid":63},"Aaron","Maladry",{"paper_id":10432,"author_seq":218,"given_name":2182,"surname":2183,"affiliation":63,"orcid":63},{"paper_id":10432,"author_seq":203,"given_name":2294,"surname":2295,"affiliation":63,"orcid":63},"Reasoning about alternatives is a fundamental component of human cognition and argumentation, yet it remains unclear whether large language models (LLMs) can coherently generate and assess them. This paper introduces Counter-Hypothesis Generation (CHG), a novel task for evaluating how LLMs construct plausible hypotheses when contextual information changes. Inspired by open-domain commonsense reasoning, where models infer and compare multiple explanations, CHG bridges commonsense and counterfactual reasoning by requiring models to generate hypotheses that remain logically consistent with modified premises. We present a test set annotated by a human expert and complemented with counter-hypotheses generated by OpenAI-o3 and DeepSeek-r1. Experimental results reveal that even advanced reasoning models exhibit notable limitations in counter-hypothesis generation.",{"paper_id":10451,"title":10452,"year":7,"month":188,"day":63,"doi":10453,"resource_url":10454,"first_page":10455,"last_page":10456,"pdf_url":10457,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10458,"paper_type":860,"authors":10459,"abstract":10466},"lrec2026-main-425","LFQA-HP-1M: A Large-Scale Human Preference Dataset for Long-Form Question Answering","10.63317\u002F2oq3nh8a3zff","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-425","5450","5465","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.425.pdf","jahan-etal-2026-lfqa",[10460,10462,10465],{"paper_id":10451,"author_seq":247,"given_name":10461,"surname":8312,"affiliation":63,"orcid":63},"Rafid Ishrak",{"paper_id":10451,"author_seq":232,"given_name":10463,"surname":10464,"affiliation":63,"orcid":63},"Fahmid Shahriar","Iqbal",{"paper_id":10451,"author_seq":218,"given_name":9208,"surname":9209,"affiliation":63,"orcid":63},"Long-form question answering (LFQA) demands nuanced evaluation of multi-sentence explanatory responses, yet existing metrics often fail to reflect human judgment. We present LFQA-HP-1M, a large-scale dataset comprising 1.3M human pairwise preference annotations for LFQA. We propose nine rubrics for answer quality evaluation, and show that simple linear models based on these features perform comparably to state-of-the-art LLM evaluators. We further examine transitivity consistency, positional bias, and verbosity biases in LLM evaluators and demonstrate their vulnerability to adversarial perturbations. Overall, this work provides one of the largest public LFQA preference datasets and a rubric-driven framework for transparent and reliable evaluation.",{"paper_id":10468,"title":10469,"year":7,"month":188,"day":63,"doi":10470,"resource_url":10471,"first_page":10472,"last_page":10473,"pdf_url":10474,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10475,"paper_type":860,"authors":10476,"abstract":10482},"lrec2026-main-426","Orthographic Constraint Satisfaction and Human Difficulty Alignment in Large Language Models","10.63317\u002F3erhoom72odv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-426","5466","5481","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.426.pdf","tuck-etal-2026-orthographic",[10477,10480],{"paper_id":10468,"author_seq":247,"given_name":10478,"surname":10479,"affiliation":63,"orcid":63},"Bryan E.","Tuck",{"paper_id":10468,"author_seq":232,"given_name":10481,"surname":2347,"affiliation":63,"orcid":63},"Rakesh","Large language models must satisfy hard orthographic constraints during controlled text generation, yet systematic cross-family evaluation remains limited. We evaluate 39 configurations spanning three model families (Qwen3, Claude Haiku 4.5, GPT-5-mini) on 58 word puzzles requiring character-level constraint satisfaction. Cross-family differences produce substantially larger performance gaps (2.0–2.2×, F1 = 0.761 vs. 0.343) than parameter scaling within families (83% gain from 4B to 32B scaling), and a partial-correlation analysis rules out tokenizer design as a confound for within-family scaling. Thinking budget sensitivity proves heterogeneous: high-capacity models show strong returns (+0.102 to +0.136 F1), while mid-sized variants saturate or degrade, showing inconsistent compute benefits. Using difficulty ratings from 10,000 human solvers per puzzle, we establish modest but consistent calibration (ρ = 0.28–0.42) across all families, yet identify systematic failures on common words with unusual orthography (\"data\", \"loll\", \"acai\": 83–91% human success, 94–98% model miss rate). These failures point to over-reliance on distributional plausibility that penalizes orthographically atypical but constraint-valid patterns.",{"paper_id":10484,"title":10485,"year":7,"month":188,"day":63,"doi":10486,"resource_url":10487,"first_page":10488,"last_page":10489,"pdf_url":10490,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10491,"paper_type":860,"authors":10492,"abstract":10506},"lrec2026-main-427","LIT-RAGBench: Benchmarking Generator Capabilities of Large Language Models in Retrieval-Augmented Generation","10.63317\u002F3ep7jffajytv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-427","5482","5493","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.427.pdf","itai-etal-2026-lit",[10493,10496,10498,10501,10504],{"paper_id":10484,"author_seq":247,"given_name":10494,"surname":10495,"affiliation":63,"orcid":63},"Koki","Itai",{"paper_id":10484,"author_seq":232,"given_name":10497,"surname":5822,"affiliation":63,"orcid":63},"Shunichi",{"paper_id":10484,"author_seq":218,"given_name":10499,"surname":10500,"affiliation":63,"orcid":63},"Yuta","Yamamoto",{"paper_id":10484,"author_seq":203,"given_name":10502,"surname":10503,"affiliation":63,"orcid":63},"Gouki","Minegishi",{"paper_id":10484,"author_seq":188,"given_name":2044,"surname":10505,"affiliation":63,"orcid":63},"Otsuki","Retrieval-Augmented Generation (RAG) is a framework in which a Generator, such as a Large Language Model (LLM), produces answers by retrieving documents from an external collection using a Retriever. In practice, Generators must integrate evidence from long contexts, perform multi-step reasoning, interpret tables, and abstain when evidence is missing. However, existing benchmarks for Generators provide limited coverage, with none enabling simultaneous evaluation of multiple capabilities under unified conditions. To bridge the gap between existing evaluations and practical use, we introduce LIT-RAGBench (the Logic, Integration, Table, Reasoning, and Abstention RAG Generator Benchmark), which defines five categories: Integration, Reasoning, Logic, Table, and Abstention—each further divided into practical evaluation aspects. LIT-RAGBench systematically covers patterns combining multiple aspects across categories. By using fictional entities and scenarios, LIT-RAGBench evaluates answers grounded in the provided external documents. The dataset consists of 114 human-constructed Japanese questions and an English version generated by machine translation with human curation. We use LLM-as-a-Judge for scoring and report category-wise and overall accuracy. Across API-based and open-weight models, no model exceeds 90% overall accuracy. By making strengths and weaknesses measurable within each category, LIT-RAGBench serves as a valuable metric for model selection in practical RAG deployments and for building RAG-specialized models.",{"paper_id":10508,"title":10509,"year":7,"month":188,"day":63,"doi":10510,"resource_url":10511,"first_page":10512,"last_page":10513,"pdf_url":10514,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10515,"paper_type":860,"authors":10516,"abstract":10526},"lrec2026-main-428","Investigating Reasoning with Hypotheses: The RIP2 Corpus","10.63317\u002F5crysfardw7h","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-428","5494","5505","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.428.pdf","schad-etal-2026-investigating",[10517,10520,10523],{"paper_id":10508,"author_seq":247,"given_name":10518,"surname":10519,"affiliation":63,"orcid":63},"Ella","Schad",{"paper_id":10508,"author_seq":232,"given_name":10521,"surname":10522,"affiliation":63,"orcid":63},"Clara","Seyfried",{"paper_id":10508,"author_seq":218,"given_name":10524,"surname":10525,"affiliation":63,"orcid":63},"Chris","Reed","Analyses of hypothesis generation in fictionalised environments have significant potential for exploring factors influencing reasoning and decision-making in naturalistic contexts. Based on transcripts of 16 groups playing a murder mystery game, with a total of 42 human participants, RIP2 is a 177,000 word corpus exemplifying reasoning in the forensic domain. With a 80,000 word representative sample of the corpus annotated using an argumentation framework, RIP2 is nearly twice the size of the RIP Corpus of Collaborative Hypothesis-Making (RIP1), currently the only existing corpus of hypothesis-making in group environments. With an new experimental set-up and guidelines for annotating both cases of hypothesising and conjecturing, RIP2 offers insight into how participants generate, maintain, and reject hypotheses, as well as how they interact with others’ contributions. Based on its close exploration of six groups (three successful), this corpus particularly allows for group-level comparisons of factors influencing group success. Within this paper, we discuss the main contributions for understanding hypothesising and collaborative reasoning, and offer use cases for extended work demonstrating how analysis of hypothesis generation can be used for future research on argumentation quality and decision-making.",{"paper_id":10528,"title":10529,"year":7,"month":188,"day":63,"doi":10530,"resource_url":10531,"first_page":10532,"last_page":10533,"pdf_url":10534,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10535,"paper_type":860,"authors":10536,"abstract":10546},"lrec2026-main-429","Can Multimodal LLMs Generate Pedagogical Questions?","10.63317\u002F4z4gj3h8jmc7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-429","5506","5515","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.429.pdf","gerald-etal-2026-can",[10537,10539,10541,10543,10544],{"paper_id":10528,"author_seq":247,"given_name":1316,"surname":10538,"affiliation":63,"orcid":63},"Gerald",{"paper_id":10528,"author_seq":232,"given_name":3956,"surname":10540,"affiliation":63,"orcid":63},"Ghannay",{"paper_id":10528,"author_seq":218,"given_name":1616,"surname":10542,"affiliation":63,"orcid":63},"Lascar",{"paper_id":10528,"author_seq":203,"given_name":1216,"surname":1217,"affiliation":63,"orcid":63},{"paper_id":10528,"author_seq":188,"given_name":8037,"surname":10545,"affiliation":63,"orcid":63},"Vilnat","Educational materials frequently combine text, diagrams, tables, and charts to convey complex concepts. Understanding such materials often requires reasoning across modalities rather than relying solely on textual descriptions. In educational contexts, the main challenge lies in assessing the relevance and quality of the questions themselves. This raises a key issue: what defines a good question in a specialized learning environment? By comparison, evaluating answers is a more conventional task, although it requires examining criteria consistent with the targeted educational level. To the best of our knowledge, the use of LLMs for assessing the pedagogical relevance of questions remains unexplored. This gap highlights the need to define pedagogical relevance more clearly and to investigate the consistency of LLM judgments, as well as their alignment with human evaluations. We introduce a new Multimodal QA dataset in the education domain. To reduce the need for extensive human annotation, we leverage LLMs to help design questions on educational material, jointly with a human annotation. Contrary to most of QA Multimodal corpora, we focus on questions that could be asked by a teacher in his\u002Fher class, and that need dealing with different parts of the document to be answered. Results show that while LLMs as a judge is an efficient framework, many problem could arise and that align prediction with human annotators is a difficult task for complex criteria.",{"paper_id":10548,"title":10549,"year":7,"month":188,"day":63,"doi":10550,"resource_url":10551,"first_page":10552,"last_page":10553,"pdf_url":10554,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10555,"paper_type":860,"authors":10556,"abstract":10569},"lrec2026-main-430","The Riddle of Reflection: Evaluating Reasoning and Self-Awareness in Multilingual LLMs Using Indian Riddles","10.63317\u002F2pgfbjkdofoe","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-430","5516","5527","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.430.pdf","m-etal-2026-riddle",[10557,10560,10563,10566],{"paper_id":10548,"author_seq":247,"given_name":10558,"surname":10559,"affiliation":63,"orcid":63},"Abhinav P","M",{"paper_id":10548,"author_seq":232,"given_name":10561,"surname":10562,"affiliation":63,"orcid":63},"Ojasva","Saxena",{"paper_id":10548,"author_seq":218,"given_name":10564,"surname":10565,"affiliation":63,"orcid":63},"Oswald","C",{"paper_id":10548,"author_seq":203,"given_name":10567,"surname":10568,"affiliation":63,"orcid":63},"Parameswari","Krishnamurthy","The extent to which large language models (LLMs) can perform culturally grounded reasoning across non-English languages remains underexplored. This paper examines the reasoning and self-assessment abilities of LLMs across seven major Indian languages- Bengali, Gujarati, Hindi, Kannada, Malayalam, Tamil, and Telugu. We introduce a multilingual riddle dataset combining traditional riddles with context-reconstructed variants and evaluate five LLMs- Gemini 2.5 Pro, Gemini 2.5 Flash, Mistral-Saba, LLaMA-4-Scout, and LLaMA-4-Maverick under seven prompting strategies. In the first stage, we assess riddle-solving performance and find that while Gemini 2.5 Pro performs best overall, few-shot methods yield only marginal gains, and accuracy varies notably across languages. In the second stage, we conduct a self-evaluation experiment to measure reasoning consistency. The results reveal a key finding: a model’s initial accuracy is inversely correlated with its ability to identify its own mistakes. Top-performing models such as Gemini 2.5 Pro are overconfident (4.34% True Negative Rate), whereas lower-performing models like LLaMA-4-Scout are substantially more self-aware (42.09% True Negative Rate). These results point to clear gaps in multilingual reasoning and highlight the need for models that not only reason effectively but also recognize their own limitations.",{"paper_id":10571,"title":10572,"year":7,"month":188,"day":63,"doi":10573,"resource_url":10574,"first_page":10575,"last_page":10576,"pdf_url":10577,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10578,"paper_type":860,"authors":10579,"abstract":10583},"lrec2026-main-431","Using Songs to Improve Kazakh Automatic Speech Recognition","10.63317\u002F5hqonmz5roum","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-431","5528","5537","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.431.pdf","yeshpanov-2026-songs",[10580],{"paper_id":10571,"author_seq":247,"given_name":10581,"surname":10582,"affiliation":63,"orcid":63},"Rustem","Yeshpanov","Developing automatic speech recognition (ASR) systems for low-resource languages is hindered by the scarcity of transcribed corpora. This proof-of-concept study explores songs as an unconventional yet promising data source for Kazakh ASR. We curate a dataset of 3,013 audio-text pairs (about 4.5 hours) from 195 songs by 36 artists, segmented at the lyric-line level. Using Whisper as the base recogniser, we fine-tune models under seven training scenarios involving Songs, Common Voice Corpus (CVC), and FLEURS, and evaluate them on three benchmarks: CVC, FLEURS, and Kazakh Speech Corpus 2 (KSC2). Results show that song-based fine-tuning improves performance over zero-shot baselines. For instance, Whisper Large-V3 Turbo trained on a mixture of Songs, CVC, and FLEURS achieves 27.6% normalised WER on CVC and 11.8% on FLEURS, while halving the error on KSC2 (39.3% vs. 81.2%) relative to the zero-shot model. Although these gains remain below those of models trained on the 1,100-hour KSC2 corpus, they demonstrate that even modest song-speech mixtures can yield meaningful adaptation improvements in low-resource ASR. The dataset is released on Hugging Face for research purposes under a gated, non-commercial licence.",{"paper_id":10585,"title":10586,"year":7,"month":188,"day":63,"doi":10587,"resource_url":10588,"first_page":10589,"last_page":10590,"pdf_url":10591,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10592,"paper_type":860,"authors":10593,"abstract":10596},"lrec2026-main-432","Southern Kurdish Speech Recognition Resources and Benchmarking","10.63317\u002F2rkqhw7hmo2d","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-432","5538","5544","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.432.pdf","mohammadamini-etal-2026-southern",[10594,10595],{"paper_id":10585,"author_seq":247,"given_name":1932,"surname":1933,"affiliation":63,"orcid":63},{"paper_id":10585,"author_seq":232,"given_name":1938,"surname":1939,"affiliation":63,"orcid":63},"This article introduces a dedicated speech recognition dataset for Southern Kurdish, which is a threatened variant of Kurdish macrolanguage. We present 30 hours of validated read speech for training and an evaluation benchmark for Southern Kurdish Automatic Speech Recognition (ASR). Both the training data and evaluation benchmark are read speech recorded by crowdsourcing campaigns. Besides a detailed description of the provided resources, we provide the ASR baselines using Whisper-turbo and wav2vec-bert CTC architectures. We achieved a 4.09 CER and 24.26 WER on our benchmark using wav2vec-bert model. We also provide a categorization of errors to support further improvements in future studies.The resources and trained models are released under the CC BY-NC-ND 4.0 license and are publicly available at https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Faranemini\u002Fsouthern-kurdish-asr",{"paper_id":10598,"title":10599,"year":7,"month":188,"day":63,"doi":10600,"resource_url":10601,"first_page":10602,"last_page":10603,"pdf_url":10604,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10605,"paper_type":860,"authors":10606,"abstract":10617},"lrec2026-main-433","MASA: A Novel Multimodal Foundation Model for L2 Speaking Assessment in Picture-description Scenarios","10.63317\u002F2yc5vwhcevz5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-433","5545","5554","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.433.pdf","yan-etal-2026-masa",[10607,10610,10613,10615],{"paper_id":10598,"author_seq":247,"given_name":10608,"surname":10609,"affiliation":63,"orcid":63},"Bi-Cheng","Yan",{"paper_id":10598,"author_seq":232,"given_name":10611,"surname":10612,"affiliation":63,"orcid":63},"Fu-An","Chao",{"paper_id":10598,"author_seq":218,"given_name":10614,"surname":1816,"affiliation":63,"orcid":63},"Hong-Yun H.Y.",{"paper_id":10598,"author_seq":203,"given_name":10616,"surname":1840,"affiliation":63,"orcid":63},"Berlin","Automatic speaking assessment (ASA) manages to quantify the language competence of second language (L2) learners by providing a proficiency score based on their spoken responses. Existing efforts typically employ a neural grader coupled with a set of handcrafted features to gauge the competence of language in L2 learners from multiple facets. Despite their decent efficacy, these methods are limited by a laborious feature engineering process and largely overlook the utilization of scoring rubrics that are presented to human raters in speaking assessment. In light of this, we put forward a novel Multimodal foundation model for ASA, termed MASA, for use in picture-description scenarios. Our approach effectively streamlines the feature engineering process by leveraging the pre-trained encoders of a multimodal foundation model, and emulates the nuanced scoring behaviors of human raters by incorporating scoring rubrics directly into the modeling process. Furthermore, a simple, training-free method is introduced to alleviate the scoring bias in MASA by contrasting the output distributions derived from the multimodal and single-modal inputs. A series of experiments conducted on a picture-description task of the General English Proficiency Test (GEPT) dataset validates the feasibility and superiority of our method in comparison to several cutting-edge baselines.",{"paper_id":10619,"title":10620,"year":7,"month":188,"day":63,"doi":10621,"resource_url":10622,"first_page":10623,"last_page":10624,"pdf_url":10625,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10626,"paper_type":860,"authors":10627,"abstract":10637},"lrec2026-main-434","Tools for Estimating the Perceived Level of Phonetic Reduction","10.63317\u002F3qachmx9chqu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-434","5555","5565","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.434.pdf","ward-etal-2026-tools",[10628,10631,10632,10635],{"paper_id":10619,"author_seq":247,"given_name":10629,"surname":10630,"affiliation":63,"orcid":63},"Nigel","Ward",{"paper_id":10619,"author_seq":232,"given_name":8612,"surname":5548,"affiliation":63,"orcid":63},{"paper_id":10619,"author_seq":218,"given_name":10633,"surname":10634,"affiliation":63,"orcid":63},"Emma (Danny) R.","Boushka",{"paper_id":10619,"author_seq":203,"given_name":4392,"surname":10636,"affiliation":63,"orcid":63},"Niebuhr","Phonetic reduction is very common in casual speech, where it is associated with several important pragmatic functions. However these phenomena have been little studied. To support investigations and applications, we present tools that automatically estimate the level of perceived phonetic reduction. Trained on annotated dialog data and exploiting HuBert features, these handle American English and Northern Mexican Spanish. For English, word-level predictions correlate up to 0.55 with average human judgments. This is adequate at least for statistical studies of reduction in corpora, as seen in explorations of turn-yielding and prominence-marking behaviors. The tools are open-source and publicly available",{"paper_id":10639,"title":10640,"year":7,"month":188,"day":63,"doi":10641,"resource_url":10642,"first_page":10643,"last_page":10644,"pdf_url":10645,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10646,"paper_type":860,"authors":10647,"abstract":10675},"lrec2026-main-435","FalAR: A Large-scale Speaker-Annotated European Portuguese Speech Corpus of Parliamentary Sessions","10.63317\u002F2xhv97bm5dyd","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-435","5566","5577","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.435.pdf","teixeira-etal-2026-falar",[10648,10651,10653,10656,10659,10662,10665,10667,10670,10673],{"paper_id":10639,"author_seq":247,"given_name":10649,"surname":10650,"affiliation":63,"orcid":63},"Francisco","Teixeira",{"paper_id":10639,"author_seq":232,"given_name":9420,"surname":10652,"affiliation":63,"orcid":63},"Carvalho",{"paper_id":10639,"author_seq":218,"given_name":10654,"surname":10655,"affiliation":63,"orcid":63},"Mariana","Julião",{"paper_id":10639,"author_seq":203,"given_name":10657,"surname":10658,"affiliation":63,"orcid":63},"Catarina","Botelho",{"paper_id":10639,"author_seq":188,"given_name":10660,"surname":10661,"affiliation":63,"orcid":63},"Rubén","Solera-Ureña",{"paper_id":10639,"author_seq":172,"given_name":10663,"surname":10664,"affiliation":63,"orcid":63},"Sérgio","Paulo",{"paper_id":10639,"author_seq":155,"given_name":1316,"surname":10666,"affiliation":63,"orcid":63},"Rolland",{"paper_id":10639,"author_seq":138,"given_name":10668,"surname":10669,"affiliation":63,"orcid":63},"Ben","Peters",{"paper_id":10639,"author_seq":121,"given_name":10671,"surname":10672,"affiliation":63,"orcid":63},"Isabel","Trancoso",{"paper_id":10639,"author_seq":104,"given_name":5238,"surname":10674,"affiliation":63,"orcid":63},"Abad","State-of-the-art performance for Automatic Speech Recognition (ASR) largely depends on the availability of large-scale labeled corpora. This creates a demand for increased data collection efforts, particularly for under-represented languages and dialectal varieties. Due to having considerably fewer speakers (around 11 million), European Portuguese (EP) is overshadowed by Brazilian Portuguese (BP) (around 200 million speakers) in currently available large-scale speech data resources, resulting in under-performing speech-based systems for EP users. To address this gap, and following similar data collection efforts for other languages, we present FalAR, a large-scale, speaker-annotated speech corpus of European Portuguese parliamentary sessions. Spanning approximately 20 years, FalAR comprises 5,800 hours of speech data. In addition, 4,850 hours have speaker identity annotations, for a total of 1,180 speakers with associated metadata including age, gender, political affiliation, and parliamentary role. The corpus was built using a state-of-the-art EP CAMÕES ASR model for transcription-reference alignment. In this paper, we describe the data collection process, together with the main characteristics of the FalAR corpus. Furthermore, we evaluate the trade-off between data quantity and alignment accuracy on ASR performance, with our experiments demonstrating that incorporating FalAR as pre-training data yields up to 14% relative WER improvement over baseline models.",{"paper_id":10677,"title":10678,"year":7,"month":188,"day":63,"doi":10679,"resource_url":10680,"first_page":10681,"last_page":10682,"pdf_url":10683,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10684,"paper_type":860,"authors":10685,"abstract":10695},"lrec2026-main-436","English to Central Kurdish Speech Translation: Corpus Creation, Evaluation, and Orthographic Standardization","10.63317\u002F4jy562hboezr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-436","5578","5587","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.436.pdf","mohammadamini-etal-2026-english",[10686,10687,10690,10693,10694],{"paper_id":10677,"author_seq":247,"given_name":1932,"surname":1933,"affiliation":63,"orcid":63},{"paper_id":10677,"author_seq":232,"given_name":10688,"surname":10689,"affiliation":63,"orcid":63},"Daban","Jaff",{"paper_id":10677,"author_seq":218,"given_name":10691,"surname":10692,"affiliation":63,"orcid":63},"Josep","Crego",{"paper_id":10677,"author_seq":203,"given_name":1938,"surname":1939,"affiliation":63,"orcid":63},{"paper_id":10677,"author_seq":188,"given_name":2409,"surname":4364,"affiliation":63,"orcid":63},"We present KUTED, a speech-to-text translation (S2TT) dataset for Central Kurdish, derived from TED and TEDx talks. The corpus comprises 91,000 sentence pairs, including 170 hours of English audio, 1.65 million English tokens, and 1.40 million Central Kurdish tokens. We evaluate KUTED on the S2TT task and find that orthographic variation significantly degrades Kurdish translation performance, producing nonstandard outputs. To address this, we propose a systematic text standardization approach that yields substantial performance gains and more consistent translations. On a test set separated from TED talks, a fine-tuned Seamless model achieves 15.18 BLEU, and we improve Seamless baseline by 3.0 BLEU on the FLEURS benchmark. We also train a Transformer model from scratch and evaluate a cascaded system that combines Seamless (ASR) with NLLB (MT).",{"paper_id":10697,"title":10698,"year":7,"month":188,"day":63,"doi":10699,"resource_url":10700,"first_page":10701,"last_page":10702,"pdf_url":10703,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10704,"paper_type":860,"authors":10705,"abstract":10722},"lrec2026-main-437","Automatic Prediction of Prominence and Boundary Strength from Text","10.63317\u002F3k3ii2w38tnj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-437","5588","5596","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.437.pdf","mas-etal-2026-automatic",[10706,10709,10712,10714,10716,10719],{"paper_id":10697,"author_seq":247,"given_name":10707,"surname":10708,"affiliation":63,"orcid":63},"Pauline","Mas",{"paper_id":10697,"author_seq":232,"given_name":10710,"surname":10711,"affiliation":63,"orcid":63},"Kévin","Vythelingum",{"paper_id":10697,"author_seq":218,"given_name":2837,"surname":10713,"affiliation":63,"orcid":63},"Chevelu",{"paper_id":10697,"author_seq":203,"given_name":8103,"surname":10715,"affiliation":63,"orcid":63},"Ouédraogo",{"paper_id":10697,"author_seq":188,"given_name":10717,"surname":10718,"affiliation":63,"orcid":63},"Damien","Lolive",{"paper_id":10697,"author_seq":172,"given_name":10720,"surname":10721,"affiliation":63,"orcid":63},"Olivier","Rosec","In Text-to-Speech synthesis (TTS), the prediction of prosodic information from text is a difficult challenge, since it requires information related to the context that may not be present in the text. Previous studies have shown that prosodic annotations from an oracle benefit TTS models and improve their prosodic rendering as well as their controllability. In this paper, we investigate different strategies to automatically predict prominence and boundary strength from text. We compare three prediction strategies on a French audiobook dataset: dedicated predictors jointly trained in a TTS model, a BERT-informed Prosody Predictor (BIPP) and its auto-regressive counterpart, both benefiting from semantic text embeddings. BIPP exhibits the best performance in our experiments, indicating that using phonetized syllables as complementary information to the semantic embedding provided by a BERT-like model is the best strategy to predict prosodic events.",{"paper_id":10724,"title":10725,"year":7,"month":188,"day":63,"doi":10726,"resource_url":10727,"first_page":10728,"last_page":10729,"pdf_url":10730,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10731,"paper_type":860,"authors":10732,"abstract":10743},"lrec2026-main-438","SOMVOICE: A First Dataset to Study the Effects of Sleep Deprivation on Voice Characteristics of Healthy French Speakers","10.63317\u002F5gzhbud3vbof","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-438","5597","5606","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.438.pdf","martin-etal-2026-somvoice",[10733,10735,10738,10741],{"paper_id":10724,"author_seq":247,"given_name":10734,"surname":3843,"affiliation":63,"orcid":63},"Vincent P.",{"paper_id":10724,"author_seq":232,"given_name":10736,"surname":10737,"affiliation":63,"orcid":63},"Jean-Luc","Rouas",{"paper_id":10724,"author_seq":218,"given_name":10739,"surname":10740,"affiliation":63,"orcid":63},"Colleen","Beaumard",{"paper_id":10724,"author_seq":203,"given_name":1159,"surname":10742,"affiliation":63,"orcid":63},"Philip","Excessive sleepiness is a significant public health issue and a critical personal health indicator associated with various disorders. Given its high prevalence in the general population, clinicians need tools to regularly measure patients’ sleepiness levels in natural settings, such as automatic speech analysis. In this article, we introduce the SOMVOICE corpus, the first French corpus containing read-speech recordings from the same participants either after a normal night or after a night of total sleep deprivation. Participants were included according to strict inclusion and exclusion criteria based on both medical characteristics and reading proficiency. The recordings were labelled with both objective and subjective measures of sleepiness, as well as fatigue and anxiety. After introducing the data-collection methodology, we use linear mixed models to conduct a preliminary investigation of the effect of total sleep deprivation on the collected sleepiness-related measures and on participants’ reading behaviour. Doing so, we found that sleep deprivation strongly influences objective and subjective sleepiness measurements as well as fatigue self-reports, but has a lesser effect on anxiety. Regarding reading behaviour, sleep deprivation is associated with a lower speech rate (duration of the recordings and phoneme rate) and more pauses (number of pauses and pause ratio)",{"paper_id":10745,"title":10746,"year":7,"month":188,"day":63,"doi":10747,"resource_url":10748,"first_page":10749,"last_page":10750,"pdf_url":10751,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10752,"paper_type":860,"authors":10753,"abstract":10762},"lrec2026-main-439","Automatic Prediction of Child Speech Fluency with Game-Based Data from German Preschoolers","10.63317\u002F48vj8xeqn5ok","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-439","5607","5616","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.439.pdf","kany-etal-2026-automatic",[10754,10756,10759],{"paper_id":10745,"author_seq":247,"given_name":5784,"surname":10755,"affiliation":63,"orcid":63},"Kany",{"paper_id":10745,"author_seq":232,"given_name":10757,"surname":10758,"affiliation":63,"orcid":63},"Bernd","Möbius",{"paper_id":10745,"author_seq":218,"given_name":10760,"surname":10761,"affiliation":63,"orcid":63},"Jürgen","Trouvain","This paper introduces an approach to automatically predict the speech fluency of preschool children as part of Language Proficiency Assessments. We use spontaneous speech data from children with German as native and second language aged 4–6 years, collected via a game–based elicitation method. The recordings were mainly annotated manually on various fluency-related phenomena. The resulting feature values were compared to human fluency ratings of the same data. The human ratings and the fluency-related acoustic features were used to build Cumulative Link Mixed Models (CLMMs) with and without splines to test their ability to predict the human ratings with multiple metrics (Spearman’s ρ, MAE, quadratic weighted κ). Results show that a parsimonious linear model already reaches near-human agreement (quadratic weighted kappa κ = 0.65) and that incorporating non-linear spline effects does not improve predictive accuracy. These findings suggest that relatively simple CLMMs can substitute additional human raters in fine-grained fluency assessment of preschool children, which is a task that is already challenging for trained listeners.",{"paper_id":10764,"title":10765,"year":7,"month":188,"day":63,"doi":10766,"resource_url":10767,"first_page":10768,"last_page":10769,"pdf_url":10770,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10771,"paper_type":860,"authors":10772,"abstract":10783},"lrec2026-main-440","Selective Augmentation: Improving Universal Automatic Phonetic Transcription via G2P Bootstrapping","10.63317\u002F53t62v2i3f8m","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-440","5617","5624","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.440.pdf","bystrich-etal-2026-selective",[10773,10775,10778,10781],{"paper_id":10764,"author_seq":247,"given_name":995,"surname":10774,"affiliation":63,"orcid":63},"Bystrich",{"paper_id":10764,"author_seq":232,"given_name":10776,"surname":10777,"affiliation":63,"orcid":63},"Julia Maria","Pritzen",{"paper_id":10764,"author_seq":218,"given_name":10779,"surname":10780,"affiliation":63,"orcid":63},"Christoph Andreas","Schmidt",{"paper_id":10764,"author_seq":203,"given_name":4283,"surname":10782,"affiliation":63,"orcid":63},"Wich-Reif","In the field of universal automatic phonetic transcription (APT), clean and diverse training transcriptions are required. However, such high-quality data is limited. We propose the bootstrapping approach Selective Augmentation to improve the available training transcriptions by selectively transferring distinctions between languages. Based on the model MultIPA, we exemplarily show that we could increase the accuracy of an existing feature (plosive voicing) and add a new feature (plosive aspiration) by augmenting the existing training data using information from a separate helper language (Hindi). We describe intrinsic challenges of the evaluation and develop objective metrics to determine the success: Voicing accuracy was increased by 17.6% by reducing the number of false positives. Additionally, aspiration recognition was introduced: While the baseline transcribed 0% of German \u002Fp, t, k\u002F as aspirated, our approach transcribed them as aspirated in 61.2% of the cases. Introducing aspiration recognition to APT models allowed for the tenuis class to be successfully reduced by 32.2%, which also reduces the conflations between the test language’s plosives.",{"paper_id":10785,"title":10786,"year":7,"month":188,"day":63,"doi":10787,"resource_url":10788,"first_page":10789,"last_page":10790,"pdf_url":10791,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10792,"paper_type":860,"authors":10793,"abstract":10800},"lrec2026-main-441","AURORA Model of Formant-to-tongue Inversion for Didactic and Clinical Applications","10.63317\u002F2sw67miiiyc7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-441","5625","5632","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.441.pdf","strycharczuk-etal-2026-aurora",[10794,10797],{"paper_id":10785,"author_seq":247,"given_name":10795,"surname":10796,"affiliation":63,"orcid":63},"Patrycja","Strycharczuk",{"paper_id":10785,"author_seq":232,"given_name":10798,"surname":10799,"affiliation":63,"orcid":63},"Sam","Kirkham","This paper outlines the conceptual and computational foundations of the AURORA (Acoustic Understanding and Real-time Observation of Resonant Articulations) model. AURORA predicts tongue displacement and shape in vowel sounds based on the first two formant values. It is intended as a didactic aid helping to explain the relationship between formants and the underlying articulation, as well as a foundation for biofeedback applications. The model is informed by ultrasound tongue imaging and acoustic data from 40 native speakers of English. In this paper we discuss the motivation for the model, the modelling objectives as well as the model architecture. We provide a qualitative evaluation of the model, focusing on selected tongue features. We then present two tools developed to make the model more accessible to a wider audience, a Shiny app and a prototype software for real-time tongue biofeedback. Potential users include students of phonetics, linguists in fields adjacent to phonetics, as well as speech and language therapy practitioners and clients.",{"paper_id":10802,"title":10803,"year":7,"month":188,"day":63,"doi":10804,"resource_url":10805,"first_page":10806,"last_page":10807,"pdf_url":10808,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10809,"paper_type":860,"authors":10810,"abstract":10824},"lrec2026-main-442","Investigating the Role of Synthetic Data Augmentation and Training Strategies on Improving Low-Resource Language ASR","10.63317\u002F4c4ad7t5i967","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-442","5633","5639","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.442.pdf","hao-etal-2026-investigating",[10811,10813,10815,10818,10821],{"paper_id":10802,"author_seq":247,"given_name":10123,"surname":10812,"affiliation":63,"orcid":63},"Hao",{"paper_id":10802,"author_seq":232,"given_name":10139,"surname":10814,"affiliation":63,"orcid":63},"Amooie",{"paper_id":10802,"author_seq":218,"given_name":10816,"surname":10817,"affiliation":63,"orcid":63},"Wietse de","Vries",{"paper_id":10802,"author_seq":203,"given_name":10819,"surname":10820,"affiliation":63,"orcid":63},"Rik van","Noord",{"paper_id":10802,"author_seq":188,"given_name":10822,"surname":10823,"affiliation":63,"orcid":63},"Martijn","Wieling","Low-resource automatic speech recognition (ASR) is challenging due to a scarcity of annotated data. While synthetic data from text-to-speech (TTS) systems can augment ASR training, its efficacy for low-resource languages remains unclear. In this study, we investigate under which conditions TTS-based data augmentation is most effective for low-resource languages. Experiments on six low-resource languages in Common Voice show that synthetic data is most beneficial under extremely low-resource ASR conditions (i.e., less than one hour of available real speech data), or for languages with larger amounts of TTS data (i.e., more than 10 hours). Additionally, increasing the amount and diversity of synthetic data while keeping an appropriate ratio of synthetic-to-real data can further improve ASR performance.",{"paper_id":10826,"title":10827,"year":7,"month":188,"day":63,"doi":10828,"resource_url":10829,"first_page":10830,"last_page":10831,"pdf_url":10832,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10833,"paper_type":860,"authors":10834,"abstract":10852},"lrec2026-main-443","AutoRPT: A Tool for Bootstrapping Prosodic Annotation","10.63317\u002F25i8wpabeno4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-443","5640","5648","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.443.pdf","heiney-etal-2026-autorpt",[10835,10838,10840,10843,10845,10847,10850],{"paper_id":10826,"author_seq":247,"given_name":10836,"surname":10837,"affiliation":63,"orcid":63},"Seth","Heiney",{"paper_id":10826,"author_seq":232,"given_name":1316,"surname":10839,"affiliation":63,"orcid":63},"Hicks",{"paper_id":10826,"author_seq":218,"given_name":10841,"surname":10842,"affiliation":63,"orcid":63},"Sally","Little",{"paper_id":10826,"author_seq":203,"given_name":4061,"surname":10844,"affiliation":63,"orcid":63},"Lourenco",{"paper_id":10826,"author_seq":188,"given_name":3610,"surname":10846,"affiliation":63,"orcid":63},"Retana",{"paper_id":10826,"author_seq":172,"given_name":10848,"surname":10849,"affiliation":63,"orcid":63},"Eliana","Stevens",{"paper_id":10826,"author_seq":155,"given_name":2837,"surname":10851,"affiliation":63,"orcid":63},"Howell","Automated Rapid Prosody Transcription (AutoRPT) is a tool for bootstrapping manual annotation of prosodic events in either corpora or standalone audio files using the Rapid Prosody Transcription (RPT) scheme. It functions by utilizing two Long-Short Term Memory (LSTM) models, trained on measures of pitch\u002FF0 and intensity. In addition to discrete, slightly over-generated predictions of prominence and boundary, AutoRPT produces continuous predictions between 0 and 1, similar to crowd-sourced RPT annotations averaged over listeners. Marginal predictions above a given threshold are also indicated discretely by question marks, as in the PoLaR Annotation Guidelines. Annotators achieved a statistically significant increase in annotation speed by modifying AutoRPT-generated annotations over creating annotations without assistance. In contrast with older tools such as AuToBI (Rosenberg, 2010), AutoRPT generates more theory-agnostic annotations which can support the work of non-expert annotators, and which we expect will offer greater flexibility in the prosodic annotation of other English language varieties.",{"paper_id":10854,"title":10855,"year":7,"month":188,"day":63,"doi":10856,"resource_url":10857,"first_page":10858,"last_page":10859,"pdf_url":10860,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10861,"paper_type":860,"authors":10862,"abstract":10875},"lrec2026-main-444","J-CHAT: Japanese Large-scale Spoken Dialogue Corpus for Spoken Dialogue Language Modeling","10.63317\u002F2rjcajopifoh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-444","5649","5656","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.444.pdf","nakata-etal-2026-chat",[10863,10864,10867,10868,10870,10873],{"paper_id":10854,"author_seq":247,"given_name":9532,"surname":4256,"affiliation":63,"orcid":63},{"paper_id":10854,"author_seq":232,"given_name":10865,"surname":10866,"affiliation":63,"orcid":63},"Kentaro","Seki",{"paper_id":10854,"author_seq":218,"given_name":4264,"surname":4265,"affiliation":63,"orcid":63},{"paper_id":10854,"author_seq":203,"given_name":1463,"surname":10869,"affiliation":63,"orcid":63},"Saito",{"paper_id":10854,"author_seq":188,"given_name":10871,"surname":10872,"affiliation":63,"orcid":63},"Shinnosuke","Takamichi",{"paper_id":10854,"author_seq":172,"given_name":4252,"surname":10874,"affiliation":63,"orcid":63},"Saruwatari","Spoken dialogue is essential for human-AI interactions, providing expressive capabilities beyond text. Developing effective spoken dialogue systems (SDSs) requires large-scale, high-quality, and diverse spoken dialogue corpora. However, existing datasets are often limited in size, spontaneity, or linguistic coherence. To address these limitations, we introduce J-CHAT, a 76,000-hour open-source Japanese spoken dialogue corpus. Constructed using an automated, language-independent methodology, J-CHAT ensures acoustic cleanliness, diversity, and natural spontaneity. The corpus is built from YouTube and podcast data, with extensive filtering and denoising to enhance quality. Experimental results with generative spoken dialogue language models trained on J-CHAT demonstrate its effectiveness for SDS development. By providing a robust foundation for training advanced dialogue models, we anticipate that J-CHAT will drive progress in human-AI dialogue research and applications.",{"paper_id":10877,"title":10878,"year":7,"month":188,"day":63,"doi":10879,"resource_url":10880,"first_page":10881,"last_page":10882,"pdf_url":10883,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10884,"paper_type":860,"authors":10885,"abstract":10908},"lrec2026-main-445","ViMedCSS: A Vietnamese Medical Code-Switching Speech Dataset & Benchmark","10.63317\u002F58uwrquo3znb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-445","5657","5665","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.445.pdf","nguyen-etal-2026-vimedcss",[10886,10888,10891,10893,10895,10898,10901,10903,10906],{"paper_id":10877,"author_seq":247,"given_name":10887,"surname":2395,"affiliation":63,"orcid":63},"Tung X.",{"paper_id":10877,"author_seq":232,"given_name":10889,"surname":10890,"affiliation":63,"orcid":63},"Nhu","Vo",{"paper_id":10877,"author_seq":218,"given_name":10892,"surname":2395,"affiliation":63,"orcid":63},"Giang Son",{"paper_id":10877,"author_seq":203,"given_name":10894,"surname":9801,"affiliation":63,"orcid":63},"Duy Mai",{"paper_id":10877,"author_seq":188,"given_name":10896,"surname":10897,"affiliation":63,"orcid":63},"Chien Dinh","Huynh",{"paper_id":10877,"author_seq":172,"given_name":10899,"surname":10900,"affiliation":63,"orcid":63},"Inigo Jauregi","Unanue",{"paper_id":10877,"author_seq":155,"given_name":4386,"surname":10902,"affiliation":63,"orcid":63},"Piccardi",{"paper_id":10877,"author_seq":138,"given_name":10904,"surname":10905,"affiliation":63,"orcid":63},"Wray","Buntine",{"paper_id":10877,"author_seq":121,"given_name":10907,"surname":6468,"affiliation":63,"orcid":63},"Dung D.","Code-switching (CS), which is when Vietnamese speech uses English words like drug names or procedures, is a common phenomenon in Vietnamese medical communication. This creates challenges for Automatic Speech Recognition (ASR) systems, especially in low-resource languages like Vietnamese. Current most ASR systems struggle to recognize correctly English medical terms within Vietnamese sentences, and no benchmark addresses this challenge. In this paper, we construct a 34-hour Vietnamese Medical Code-Switching Speech dataset (ViMedCSS) containing 16,576 utterances. Each utterance includes at least one English medical term drawn from a curated bilingual lexicon covering five medical topics. Using this dataset, we evaluate several state-of-the-art ASR models and examine different specific fine-tuning strategies for improving medical term recognition to investigate the best approach to solve in the dataset. Experimental results show that Vietnamese-optimized models perform better on general segments, while multilingual pretraining helps capture English insertions. The combination of both approaches yields the best balance between overall and code-switched accuracy. This work provides the first benchmark for Vietnamese medical code-switching and offers insights into effective domain adaptation for low-resource, multilingual ASR systems.",{"paper_id":10910,"title":10911,"year":7,"month":188,"day":63,"doi":10912,"resource_url":10913,"first_page":10914,"last_page":10915,"pdf_url":10916,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10917,"paper_type":860,"authors":10918,"abstract":10922},"lrec2026-main-446","Towards Privacy-Preserving Fine-Tuning: Anonymization of Aphasic Speech for Effective ASR","10.63317\u002F2g92dv8iohqz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-446","5666","5676","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.446.pdf","hofstetter-etal-2026-privacy",[10919,10921],{"paper_id":10910,"author_seq":247,"given_name":4763,"surname":10920,"affiliation":63,"orcid":63},"Hofstetter",{"paper_id":10910,"author_seq":232,"given_name":3655,"surname":3656,"affiliation":63,"orcid":63},"The scarcity of publicly available aphasic speech data, driven largely by privacy concerns, poses a significant barrier for fine-tuning Automatic Speech Recognition (ASR) systems in this domain. This study investigates the privacy–utility trade-off of speech anonymization as a strategy to increase data availability. A signal-based McAdams anonymization method is applied to a subset of the AphasiaBank corpus comprising approximately 132 hours of speech from 425 individuals. Privacy is evaluated using an ECAPA-TDNN based Automatic Speaker Verification system and the Equal Error Rate metric. Linguistic utility is assessed by the Word Error Rate using wav2vec2.0 ASR model, tested in multiple conditions, both pretrained and fine-tuned on unprotected and anonymized audio. Our results show that fine-tuning on anonymized aphasic speech data improves ASR performance by +18 % compared to the performance of generic models on non-anonymized speech. Crucially, this gain in utility is achieved alongside substantial privacy protection, with anonymization increasing the privacy by +440 % compared to sharing unprotected speech. This work thus provides a proof-of-concept, demonstrating that speech anonymization mitigates privacy risks to tackle data scarcity and support the development of more effective ASR systems for people with aphasia.",{"paper_id":10924,"title":10925,"year":7,"month":188,"day":63,"doi":10926,"resource_url":10927,"first_page":10928,"last_page":10929,"pdf_url":10930,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10931,"paper_type":860,"authors":10932,"abstract":10944},"lrec2026-main-447","ParlaSpeech 3.0: Richly Annotated Spoken Parliamentary Corpora of Croatian, Czech, Polish, and Serbian","10.63317\u002F3b7dnbjr75es","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-447","5677","5688","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.447.pdf","ljubei-etal-2026-parlaspeech",[10933,10936,10938,10941],{"paper_id":10924,"author_seq":247,"given_name":10934,"surname":10935,"affiliation":63,"orcid":63},"Nikola","Ljubešić",{"paper_id":10924,"author_seq":232,"given_name":1625,"surname":10937,"affiliation":63,"orcid":63},"Rupnik",{"paper_id":10924,"author_seq":218,"given_name":10939,"surname":10940,"affiliation":63,"orcid":63},"Ivan","Porupski",{"paper_id":10924,"author_seq":203,"given_name":10942,"surname":10943,"affiliation":63,"orcid":63},"Taja Kuzman","Pungeršek","ParlaSpeech is a collection of spoken parliamentary corpora currently spanning four Slavic languages – Croatian, Czech, Polish and Serbian – with a total size of more than 6 thousand hours. The corpora were built in an automatic fashion from the ParlaMint transcripts and their corresponding metadata, which were aligned to the speech recordings of each corresponding parliament. In this release of the dataset, each of the corpora has been significantly enriched with several automatic annotation layers. The textual modality of all four corpora has been enriched with linguistic annotations and sentiment predictions. Similarly, their spoken modality has been automatically enriched with occurrences of filled pauses, the most frequent type of disfluency in typical speech. Two languages have been additionally enriched with detailed word- and grapheme-level alignments, and the automatic annotation of the position of primary stress in multisyllabic words. With these enrichments, the usefulness of the corpora has been greatly increased for downstream research across multiple disciplines, which we showcase through an analysis of acoustic correlates of sentiment. All the corpora are made available for download in JSONL and TextGrid formats, as well as for search through a concordancer.",{"paper_id":10946,"title":10947,"year":7,"month":188,"day":63,"doi":10948,"resource_url":10949,"first_page":10950,"last_page":10951,"pdf_url":10952,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10953,"paper_type":860,"authors":10954,"abstract":10962},"lrec2026-main-448","LexiPhon: A Collection of Phonetically Transcribed Lexicons from Wikipedia","10.63317\u002F2ju5wvz6x3mw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-448","5689","5700","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.448.pdf","doucette-etal-2026-lexiphon",[10955,10957,10960],{"paper_id":10946,"author_seq":247,"given_name":3454,"surname":10956,"affiliation":63,"orcid":63},"Doucette",{"paper_id":10946,"author_seq":232,"given_name":10958,"surname":10959,"affiliation":63,"orcid":63},"Timothy J.","O'Donnell",{"paper_id":10946,"author_seq":218,"given_name":3540,"surname":10961,"affiliation":63,"orcid":63},"Sonderegger","We introduce LexiPhon, an open-source dataset of phonetically transcribed lexicons for 87 languages derived from Wikipedia data with automated grapheme-to-phoneme (G2P) transcription, along with the open-source software used to create it. Each lexicon provides transcriptions generated by up to three G2P methods, crowdsourced transcriptions from WikiPron (Lee et al., 2020) where available, word frequencies calculated from Wikipedia, along with word lengths and phonological neighborhood densities. We introduce an internal validation metric based on phonological feature edit distance to ensure transcriptions are consistent within languages, as manual validation is not possible. This dataset fills a gap in the existing space of phonetic lexicons, with a much larger set of words per language than existing multilingual word lists, and more languages than existing lexicon datasets. The dataset, along with the software used to create it, are freely available on OSF at https:\u002F\u002Fosf.io\u002Frd9ma\u002Foverview?view_only=398802df19ad488ab7da7e7798cd7aca.",{"paper_id":10964,"title":10965,"year":7,"month":188,"day":63,"doi":10966,"resource_url":10967,"first_page":10968,"last_page":10969,"pdf_url":10970,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10971,"paper_type":860,"authors":10972,"abstract":10980},"lrec2026-main-449","ROG: A Multi-Layer Manually Annotated Corpus of Spoken Slovenian","10.63317\u002F44np3dvumisj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-449","5701","5710","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.449.pdf","dobrovoljc-etal-2026-rog",[10973,10976,10977,10978,10979],{"paper_id":10964,"author_seq":247,"given_name":10974,"surname":10975,"affiliation":63,"orcid":63},"Kaja","Dobrovoljc",{"paper_id":10964,"author_seq":232,"given_name":7008,"surname":7009,"affiliation":63,"orcid":63},{"paper_id":10964,"author_seq":218,"given_name":9444,"surname":9445,"affiliation":63,"orcid":63},{"paper_id":10964,"author_seq":203,"given_name":1625,"surname":10937,"affiliation":63,"orcid":63},{"paper_id":10964,"author_seq":188,"given_name":10934,"surname":10935,"affiliation":63,"orcid":63},"We present ROG, the first manually annotated spoken corpus of Slovenian to integrate morphosyntactic, prosodic, and interactional layers in a unified framework. Building on the pre-existing Spoken Slovenian Treebank (SST) and newly available recordings from the GOS 2 reference corpus, the resource combines over 75,000 words (10 hours) of annotated speech. The entire corpus features lemmatization, MULTEXT-East morphosyntax, and Universal Dependencies annotations, while approximately half includes additional layers for prosodic units, disfluencies, and dialogue acts. All annotation layers are systematically aligned and cross-referenced, enabling detailed multi-dimensional analyses of spoken language. We describe the corpus design, annotation workflow, data release, and baseline modeling results, showcasing the resource’s value for both linguistic analysis and speech-aware NLP model development. All ROG transcriptions and annotations, along with half of the audio recordings, are freely available under CC-BY via (anonymized) repository.",{"paper_id":10982,"title":10983,"year":7,"month":188,"day":63,"doi":10984,"resource_url":10985,"first_page":10986,"last_page":10987,"pdf_url":10988,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":10989,"paper_type":860,"authors":10990,"abstract":10996},"lrec2026-main-450","Building a Dataset for French Accent Classification Evaluation: Are We There Yet?","10.63317\u002F5ayaxbnxjfen","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-450","5711","5721","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.450.pdf","fabre-etal-2026-building",[10991,10992,10994],{"paper_id":10982,"author_seq":247,"given_name":2317,"surname":2318,"affiliation":63,"orcid":63},{"paper_id":10982,"author_seq":232,"given_name":909,"surname":10993,"affiliation":63,"orcid":63},"Avanzi",{"paper_id":10982,"author_seq":218,"given_name":1219,"surname":10995,"affiliation":63,"orcid":63},"Portet","Current evaluation practices in speech processing systems often overlook the diversity of spoken accents, leading to significant performance disparities across speaker groups. This issue largely comes from biases and imbalances in training corpora, and is further compounded by the scarcity of open-source datasets suitable for evaluating accent variability in French. To address this gap, we extend the CFPR dataset with explicit accent labels, providing a new benchmark for assessing the robustness of speech technology systems across diverse French accents. We additionally conduct a perceptual study with 87 human participants to evaluate the reliability and interpretability of these labels. Using this resource, we evaluated an eight-class French accent classifier trained on Common Voice data. The first results highlight both the complexity of automatic French accent recognition in low-resource settings, and the difficulty for French-speakers to perceive all the linguistic variabilities in French-speaking countries.",{"paper_id":10998,"title":10999,"year":7,"month":188,"day":63,"doi":11000,"resource_url":11001,"first_page":11002,"last_page":11003,"pdf_url":11004,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11005,"paper_type":860,"authors":11006,"abstract":11016},"lrec2026-main-451","M3-SLU: Evaluating Speaker-Attributed Reasoning in Multimodal Large Language Models","10.63317\u002F5giy6mqijruy","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-451","5722","5736","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.451.pdf","kwon-etal-2026-m3",[11007,11009,11011,11014],{"paper_id":10998,"author_seq":247,"given_name":11008,"surname":8456,"affiliation":63,"orcid":63},"Yejin",{"paper_id":10998,"author_seq":232,"given_name":11010,"surname":1336,"affiliation":63,"orcid":63},"Taewoo",{"paper_id":10998,"author_seq":218,"given_name":11012,"surname":11013,"affiliation":63,"orcid":63},"Hyunsoo","Yoon",{"paper_id":10998,"author_seq":203,"given_name":11015,"surname":5173,"affiliation":63,"orcid":63},"Chang Ouk","We present M3-SLU, a new multimodal large language model (MLLM) benchmark for evaluating multi-speaker, multi-turn spoken language understanding. While recent models show strong performance in speech and text comprehension, they still struggle with speaker-attributed reasoning, the ability to understand who said what and when in natural conversations. M3-SLU is built from four open corpora (CHiME-6, MELD, MultiDialog, and AMI) and comprises over 12,000 validated instances with paired audio, transcripts, and metadata. It includes two tasks: (1) Speaker-Attributed Question Answering and (2) Speaker Attribution via Utterance Matching. We provide baseline results for both cascaded pipelines and end-to-end MLLMs, evaluated using an LLM-as-Judge and accuracy metrics. Results show that while models can capture what was said, they often fail to identify who said it, revealing a key gap in speaker-aware dialogue understanding. M3-SLU offers as a challenging benchmark to advance research in speaker-aware multimodal understanding.",{"paper_id":11018,"title":11019,"year":7,"month":188,"day":63,"doi":11020,"resource_url":11021,"first_page":11022,"last_page":11023,"pdf_url":11024,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11025,"paper_type":860,"authors":11026,"abstract":11036},"lrec2026-main-452","Medispeech: A French Reading and Spontaneous Speech Corpus for Sleepiness Estimation","10.63317\u002F569a46bcz7ez","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-452","5737","5748","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.452.pdf","beaumard-etal-2026-medispeech",[11027,11028,11029,11032,11034,11035],{"paper_id":11018,"author_seq":247,"given_name":10739,"surname":10740,"affiliation":63,"orcid":63},{"paper_id":11018,"author_seq":232,"given_name":10734,"surname":3843,"affiliation":63,"orcid":63},{"paper_id":11018,"author_seq":218,"given_name":11030,"surname":11031,"affiliation":63,"orcid":63},"Charles","Brazier",{"paper_id":11018,"author_seq":203,"given_name":1179,"surname":11033,"affiliation":63,"orcid":63},"Coelho",{"paper_id":11018,"author_seq":188,"given_name":10736,"surname":10737,"affiliation":63,"orcid":63},{"paper_id":11018,"author_seq":172,"given_name":1159,"surname":10742,"affiliation":63,"orcid":63},"Excessive Daytime Sleepiness (EDS) is associated with several diseases and therefore negatively affects the daily life of impacted people. Its diagnosis and follow-up are difficult because they require testing at the hospital for one full day. Monitoring patients regularly in ecological conditions may be done through speech analysis. Although several corpora containing speech from sleepy subjects exist, they do not suit ecological requirements regarding either the device used for recording or the speech elicitation tasks. In this paper, we introduce the Medispeech corpus containing reading, daily-life semi-spontaneous, and medically-oriented spontaneous tasks. Fifty-nine French subjects were recorded with both a professional-quality microphone and a smartphone using a dedicated application, resulting in 1,729 recordings for a total duration of 21 hours. Their EDS diagnosis was assessed by both a physiological objective measurement (mean sleep latency measured during a clinical test) and a subjective questionnaire (Karolinska Sleepiness Scale). Phenotyping of subjects is assured by collecting socio-demographic and medical data related to diverse dimensions of sleepiness, comorbidities, and addictions. Finally, we analyse the validity of our data collection protocol by measuring the effective duration of speech (after discarding pauses) and assessing its links with the collected subjects’ characteristics.",{"paper_id":11038,"title":11039,"year":7,"month":188,"day":63,"doi":11040,"resource_url":11041,"first_page":11042,"last_page":11043,"pdf_url":11044,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11045,"paper_type":860,"authors":11046,"abstract":11069},"lrec2026-main-453","StarDrinks: An English and Korean Test Set for SLU Evaluation in a Drink Ordering Scenario","10.63317\u002F4rxgtayocdby","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-453","5749","5756","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.453.pdf","boito-etal-2026-stardrinks",[11047,11050,11053,11055,11058,11061,11064,11066],{"paper_id":11038,"author_seq":247,"given_name":11048,"surname":11049,"affiliation":63,"orcid":63},"Marcely Zanon","Boito",{"paper_id":11038,"author_seq":232,"given_name":11051,"surname":11052,"affiliation":63,"orcid":63},"Caroline","Brun",{"paper_id":11038,"author_seq":218,"given_name":11054,"surname":5173,"affiliation":63,"orcid":63},"Inyoung",{"paper_id":11038,"author_seq":203,"given_name":11056,"surname":11057,"affiliation":63,"orcid":63},"Denys M.","PROUX",{"paper_id":11038,"author_seq":188,"given_name":11059,"surname":11060,"affiliation":63,"orcid":63},"Salah","Ait-Mokhtar",{"paper_id":11038,"author_seq":172,"given_name":11062,"surname":11063,"affiliation":63,"orcid":63},"Nikolaos","Lagos",{"paper_id":11038,"author_seq":155,"given_name":10736,"surname":11065,"affiliation":63,"orcid":63},"Meunier",{"paper_id":11038,"author_seq":138,"given_name":11067,"surname":11068,"affiliation":63,"orcid":63},"Ioan","Calapodescu","LLMs and speech assistants are increasingly used for task-oriented interactions, yet their evaluation often relies on controlled scenarios that fail to capture the variability and complexity of real user requests. Drink ordering, for example, involves diverse named entities, drink types, sizes, customizations, and brand-specific terminology, as well as spontaneous speech phenomena such as hesitations and self-corrections. To address this gap, we introduce StarDrinks, a test set in English and Korean containing speech utterances features, transcriptions, and annotated slots. Our dataset supports speech-to-slots SLU, transcription-to-slots NLU, and speech-to-transcription ASR evaluation, providing a realistic benchmark for model robustness and generalization in a linguistically rich, real-world task.",{"paper_id":11071,"title":11072,"year":7,"month":188,"day":63,"doi":11073,"resource_url":11074,"first_page":11075,"last_page":11076,"pdf_url":11077,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11078,"paper_type":860,"authors":11079,"abstract":11100},"lrec2026-main-454","Audio-Lyrics Alignment Dataset for Italian Arias","10.63317\u002F4cpveetxxtmx","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-454","5757","5766","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.454.pdf","jajoria-etal-2026-audio",[11080,11083,11085,11088,11089,11092,11094,11097,11099],{"paper_id":11071,"author_seq":247,"given_name":11081,"surname":11082,"affiliation":63,"orcid":63},"Pushkar","Jajoria",{"paper_id":11071,"author_seq":232,"given_name":3060,"surname":11084,"affiliation":63,"orcid":63},"Graciotti",{"paper_id":11071,"author_seq":218,"given_name":11086,"surname":11087,"affiliation":63,"orcid":63},"Giovanna","Casali",{"paper_id":11071,"author_seq":203,"given_name":3598,"surname":3599,"affiliation":63,"orcid":63},{"paper_id":11071,"author_seq":188,"given_name":11090,"surname":11091,"affiliation":63,"orcid":63},"Rodolfo","Delmonte",{"paper_id":11071,"author_seq":172,"given_name":4682,"surname":11093,"affiliation":63,"orcid":63},"Pompilio",{"paper_id":11071,"author_seq":155,"given_name":11095,"surname":11096,"affiliation":63,"orcid":63},"Rocco","Tripodi",{"paper_id":11071,"author_seq":138,"given_name":4091,"surname":11098,"affiliation":63,"orcid":63},"McDermott",{"paper_id":11071,"author_seq":121,"given_name":3626,"surname":3627,"affiliation":63,"orcid":63},"Aligning song lyrics with sung audio is challenging, especially for languages and music styles where annotated datasets are scarce. We address this gap by presenting the first dataset of Italian opera arias annotated with lyrics and time-stamps per word. The dataset comprises of 24 arias drawn from well-known operas of the 18th to 20th centuries with a total audio duration of nearly two hours. We benchmark both music alignment models and speech forced alignment models and show that existing methods face significant challenges on this dataset, with performance dropping by 45% compared to other datasets. Multilingual and speech-based models exhibit relatively better performance on this dataset. We also evaluate few-shot fine-tuning of these models on the new dataset and find that, while it yields only marginal overall improvement, it produces localized gains on specific arias, suggesting that limited exposure helps the model adapt to some patterns but cannot fully overcome differences in language or musical style.",{"paper_id":11102,"title":11103,"year":7,"month":188,"day":63,"doi":11104,"resource_url":11105,"first_page":11106,"last_page":11107,"pdf_url":11108,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11109,"paper_type":860,"authors":11110,"abstract":11126},"lrec2026-main-455","The Added Value of Metadata and Annotations: Evidence from Two Large-Scale, Naturalistic Corpus Studies","10.63317\u002F4c4triganae3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-455","5767","5775","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.455.pdf","popescu-etal-2026-added",[11111,11114,11116,11118,11120,11123],{"paper_id":11102,"author_seq":247,"given_name":11112,"surname":11113,"affiliation":63,"orcid":63},"Anisia","Popescu",{"paper_id":11102,"author_seq":232,"given_name":8211,"surname":11115,"affiliation":63,"orcid":63},"Cronenberg",{"paper_id":11102,"author_seq":218,"given_name":5576,"surname":11117,"affiliation":63,"orcid":63},"Vasilescu",{"paper_id":11102,"author_seq":203,"given_name":5576,"surname":11119,"affiliation":63,"orcid":63},"Chitoran",{"paper_id":11102,"author_seq":188,"given_name":11121,"surname":11122,"affiliation":63,"orcid":63},"Lori","Lamel",{"paper_id":11102,"author_seq":172,"given_name":11124,"surname":11125,"affiliation":63,"orcid":63},"Martine","Adda-Decker","This paper presents two case studies that highlight both the challenges and benefits of working with large-scale, naturalistic phonetic data. Our aim is to encourage researchers not to shy away from phonetic data found “in the wild”, even when such data are messy, noisy, or incomplete – because they can yield robust, novel insights beyond the reach of controlled laboratory studies. We focus on challenges that are endemic to large corpora, including degraded audio quality, sparse or inconsistent annotations, and missing speaker metadata. By comparing two corpus-based studies that diverge in methodology and statistical design, we show how different approaches can mitigate these limitations while still extracting meaningful patterns.",{"paper_id":11128,"title":11129,"year":7,"month":188,"day":63,"doi":11130,"resource_url":11131,"first_page":11132,"last_page":11133,"pdf_url":11134,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11135,"paper_type":860,"authors":11136,"abstract":11162},"lrec2026-main-456","CS-YODAS: A Mined Dataset of In-the-Wild Code-Switched Speech","10.63317\u002F3fir5kp98qyt","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-456","5776","5784","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.456.pdf","yan-etal-2026-cs",[11137,11138,11140,11142,11145,11147,11150,11151,11153,11155,11157,11158,11160],{"paper_id":11128,"author_seq":247,"given_name":6381,"surname":10609,"affiliation":63,"orcid":63},{"paper_id":11128,"author_seq":232,"given_name":11139,"surname":3676,"affiliation":63,"orcid":63},"Qingzheng",{"paper_id":11128,"author_seq":218,"given_name":6059,"surname":11141,"affiliation":63,"orcid":63},"Wiesner",{"paper_id":11128,"author_seq":203,"given_name":11143,"surname":11144,"affiliation":63,"orcid":63},"Anuj","Diwan",{"paper_id":11128,"author_seq":188,"given_name":6250,"surname":11146,"affiliation":63,"orcid":63},"Iakovenko",{"paper_id":11128,"author_seq":172,"given_name":11148,"surname":11149,"affiliation":63,"orcid":63},"Alex","Polok",{"paper_id":11128,"author_seq":155,"given_name":4221,"surname":4222,"affiliation":63,"orcid":63},{"paper_id":11128,"author_seq":138,"given_name":11152,"surname":1449,"affiliation":63,"orcid":63},"Shuichiro",{"paper_id":11128,"author_seq":121,"given_name":9231,"surname":11154,"affiliation":63,"orcid":63},"Emerman",{"paper_id":11128,"author_seq":104,"given_name":1316,"surname":11156,"affiliation":63,"orcid":63},"Hain",{"paper_id":11128,"author_seq":87,"given_name":2686,"surname":2687,"affiliation":63,"orcid":63},{"paper_id":11128,"author_seq":73,"given_name":1625,"surname":11159,"affiliation":63,"orcid":63},"Viechnicki",{"paper_id":11128,"author_seq":55,"given_name":11161,"surname":4450,"affiliation":63,"orcid":63},"Shinji","We present CS-YODAS, a Creative Commons dataset of in-the-wild code-switched speech mined from multilingual YouTube data. Code-switching, or the alternation between languages within an utterance or conversation, is common in multilingual settings but remains underrepresented in existing CS speech resources, which are typically small, domain-specific, or artificially constructed. Building on the YODAS corpus, we develop a scalable, human-in-the-loop pipeline for identifying and validating naturally occurring code-switching. The resulting dataset, which totals 313 hrs and spans 7 matrix languages, provides diverse, real-world examples of spontaneous code-switched speech. We further analyze the distribution and characteristics of code-switching in the wild, examining language-pair frequencies and switching patterns, and report baseline results for spoken language identification. We hope that CS-YODAS will encourage broader and more comprehensive research on code-switched speech. Dataset link: https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fbyan\u002Fcs-yodas.",{"paper_id":11164,"title":11165,"year":7,"month":188,"day":63,"doi":11166,"resource_url":11167,"first_page":11168,"last_page":11169,"pdf_url":11170,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11171,"paper_type":860,"authors":11172,"abstract":11179},"lrec2026-main-457","The Limits of Data Scaling: Sub-token Utilization and Acoustic Saturation in Multilingual ASR","10.63317\u002F5pht8xx8yosb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-457","5785","5793","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.457.pdf","liang-etal-2026-limits",[11173,11174,11176,11177],{"paper_id":11164,"author_seq":247,"given_name":9375,"surname":9376,"affiliation":63,"orcid":63},{"paper_id":11164,"author_seq":232,"given_name":2331,"surname":11175,"affiliation":63,"orcid":63},"Ballier",{"paper_id":11164,"author_seq":218,"given_name":9381,"surname":9382,"affiliation":63,"orcid":63},{"paper_id":11164,"author_seq":203,"given_name":5132,"surname":11178,"affiliation":63,"orcid":63},"Wright","How much audio is needed to fully observe a multilingual ASR model’s learned sub-token inventory across languages, and does data disparity in multilingual pre-training affect how these tokens are utilized during inference? We address this question by analyzing Whisper’s decoding behavior during inference across 49 languages. By logging decoding candidate sub-tokens and tracking their cumulative discovery over time, we study the utilization pattern of the model’s sub-token space. Results show that the total number of discovered tokens remains largely independent of a language’s pre-training hours, indicating that data disparity does not strongly influence lexical diversity in the model’s hypothesis space. Sub-token discovery rates follow a consistent exponential saturation pattern across languages, suggesting a stable time window after which additional audio yields minimal new token activation. We refer to this convergence threshold as acoustic saturation time AST. Further analyses of rank–frequency distributions reveal Zipf-like patterns better modeled by a Zipf–Mandelbrot law, and mean sub-token length shows a positive correlation with resource level. Additionally, those metrics show more favorable patterns for languages in the Latin script than those in scripts such as Cyrillic, CJK and Semitic. Together, our study suggests that sub-token utilization during multilingual ASR inference is constrained more by the statistical, typological, and orthographical structure of the speech than by training data scale, providing an empirical basis for more equitable corpus construction and cross-lingual evaluation.",{"paper_id":11181,"title":11182,"year":7,"month":188,"day":63,"doi":11183,"resource_url":11184,"first_page":11185,"last_page":11186,"pdf_url":11187,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11188,"paper_type":860,"authors":11189,"abstract":11207},"lrec2026-main-458","AusKidTalk: Developing Transcription Guidelines for Continuous Australian English Child Speech","10.63317\u002F4j6otnjq8c3n","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-458","5794","5804","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.458.pdf","szalay-etal-2026-auskidtalk",[11190,11193,11195,11197,11199,11202,11205],{"paper_id":11181,"author_seq":247,"given_name":11191,"surname":11192,"affiliation":63,"orcid":63},"Tuende","Szalay",{"paper_id":11181,"author_seq":232,"given_name":11194,"surname":9753,"affiliation":63,"orcid":63},"Zheng",{"paper_id":11181,"author_seq":218,"given_name":11196,"surname":1837,"affiliation":63,"orcid":63},"Renata",{"paper_id":11181,"author_seq":203,"given_name":8839,"surname":11198,"affiliation":63,"orcid":63},"Shahin",{"paper_id":11181,"author_seq":188,"given_name":11200,"surname":11201,"affiliation":63,"orcid":63},"Sirojan","Tharmakulasingam",{"paper_id":11181,"author_seq":172,"given_name":11203,"surname":11204,"affiliation":63,"orcid":63},"Kirrie","Ballard",{"paper_id":11181,"author_seq":155,"given_name":11206,"surname":2400,"affiliation":63,"orcid":63},"Beena","Guidelines are required for accurate and consistent transcription of speech corpora, especially when they contain more challenging, e.g. spontaneous or under-resourced speech. This paper presents a workflow and guidelines for transcribing spontaneous and under-resourced child speech in AusKidTalk, the first Australian English child corpus. Speech samples were elicited using a story-telling task and are 3.5 minutes long per child on average. Orthographic transcriptions were generated using automatic speech recognition (ASR) tools and corrected manually. A novel hand-correction protocol consisting of guidelines, hand-correction interface, and ground truth transcriptions together with consistency metrics were developed. Nine annotators submitted hand-corrections for 261 children’s story-telling task, and 25 ground truth tasks. Manual correction was 11-fold of speech time with a 3.5-minute-long story-telling task corrected in approximately 40 minutes. Efficiency is attributed to the quality of automatic transcription with 23% word error rate. Manual correction was accurate with annotators achieving consistent results on 15\u002F25 ground truth submissions. Most inconsistent ground truth submissions were caused by a single, challenging ground truth task. These results show that our workflow yields efficient and accurate transcriptions, although transcriptions of potentially more challenging narrative tasks (e.g., elicited from younger children) might require further corrections.",{"paper_id":11209,"title":11210,"year":7,"month":188,"day":63,"doi":11211,"resource_url":11212,"first_page":11213,"last_page":11214,"pdf_url":11215,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11216,"paper_type":860,"authors":11217,"abstract":11242},"lrec2026-main-459","spINAch: A Diachronic Corpus of French Broadcast Speech Controlled for Speakers' Age and Gender","10.63317\u002F58hgwvgvkz6g","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-459","5805","5820","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.459.pdf","devauchelle-etal-2026-spinach",[11218,11220,11222,11225,11228,11230,11233,11235,11237,11240],{"paper_id":11209,"author_seq":247,"given_name":2467,"surname":11219,"affiliation":63,"orcid":63},"Devauchelle",{"paper_id":11209,"author_seq":232,"given_name":1061,"surname":11221,"affiliation":63,"orcid":63},"Doukhan",{"paper_id":11209,"author_seq":218,"given_name":11223,"surname":11224,"affiliation":63,"orcid":63},"Remi","Uro",{"paper_id":11209,"author_seq":203,"given_name":11226,"surname":11227,"affiliation":63,"orcid":63},"Lucas","Ondel",{"paper_id":11209,"author_seq":188,"given_name":5784,"surname":11229,"affiliation":63,"orcid":63},"Pelloin",{"paper_id":11209,"author_seq":172,"given_name":11231,"surname":11232,"affiliation":63,"orcid":63},"Olympia","Imbert-Brégégère",{"paper_id":11209,"author_seq":155,"given_name":3263,"surname":11234,"affiliation":63,"orcid":63},"Lefort",{"paper_id":11209,"author_seq":138,"given_name":10710,"surname":11236,"affiliation":63,"orcid":63},"Picard",{"paper_id":11209,"author_seq":121,"given_name":11238,"surname":11239,"affiliation":63,"orcid":63},"Emeline","Seignobos",{"paper_id":11209,"author_seq":104,"given_name":9755,"surname":11241,"affiliation":63,"orcid":63},"Rilliard","We present spINAch, a large diachronic corpus of French speech from radio and television archives, balanced by speakers’ gender, age (20-95 years old), and spanning 60 years from 1955 to 2015. The dataset includes over 320 hours of recordings from more than two thousand speakers. The methodology for building the corpus is described, focusing on the quality of collected samples in acoustic terms. The data were automatically transcribed and phonetically aligned to allow studies at a phonemic level. More than 3 million oral vowels have been analyzed to propose their fundamental frequency and formants. The corpus, available to the community for research purposes, is valuable for describing the evolution of Parisian French through the representation of gender and age. The presented analyses also demonstrate that the diachronic nature of the corpus allows the observation of various phonetic phenomena, such as the evolution of voice pitch over time (which does not differ by gender in our data) and the neutralization of the \u002Fa\u002F-\u002Fɑ\u002F opposition in Parisian French during this period.",{"paper_id":11244,"title":11245,"year":7,"month":188,"day":63,"doi":11246,"resource_url":11247,"first_page":11248,"last_page":11249,"pdf_url":11250,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11251,"paper_type":860,"authors":11252,"abstract":11264},"lrec2026-main-460","SALAN: A Massive ASR Dataset for the Languages of Niger","10.63317\u002F2ibxrv25uwgo","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-460","5821","5827","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.460.pdf","keita-etal-2026-salan",[11253,11256,11258,11259,11262],{"paper_id":11244,"author_seq":247,"given_name":11254,"surname":11255,"affiliation":63,"orcid":63},"Mamadou K","Keita",{"paper_id":11244,"author_seq":232,"given_name":8040,"surname":11257,"affiliation":63,"orcid":63},"Homan",{"paper_id":11244,"author_seq":218,"given_name":7225,"surname":7226,"affiliation":63,"orcid":63},{"paper_id":11244,"author_seq":203,"given_name":11260,"surname":11261,"affiliation":63,"orcid":63},"Abdoulaye","Sako",{"paper_id":11244,"author_seq":188,"given_name":11263,"surname":3190,"affiliation":63,"orcid":63},"Seydou","We introduce SALAN, a large-scale speech dataset covering eight of the major indigenous languages of Niger: Zarma, Hausa, Buduma, Gourmantchema, Tubu, Tamasheq, Fulfulde, and Kanuri. The final dataset exceeds 2,000 hours of audio, largely sourced from radio broadcasts and community recordings. We transcribed portions of the audio using the MMS model and conducted manual verification for 110 hours across Zarma and Hausa. We then used active learning to expand annotation to an additional 5 hours of high-uncertainty Zarma segments. To evaluate SALAN’s utility for ASR, We fine-tuned both Wav2vec2 XLS-R and Whisper on Zarma subsets and carried out additional pre-training with multilingual unlabeled data. Our best model achieved a word error rate of 25.3% and a character error rate of 6.2%. SALAN and the trained models will be made publicly available for use by researchers and speakers, with the potential to impact over 20 million individuals in Niger and neighboring countries.",{"paper_id":11266,"title":11267,"year":7,"month":188,"day":63,"doi":11268,"resource_url":11269,"first_page":11270,"last_page":11271,"pdf_url":11272,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11273,"paper_type":860,"authors":11274,"abstract":11281},"lrec2026-main-461","Listening for Ideology: Automatic Analysis of Character Speech in Historical Nazi Propaganda Films","10.63317\u002F4fxfeqysxtnz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-461","5828","5838","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.461.pdf","ruth-etal-2026-listening",[11275,11277,11279],{"paper_id":11266,"author_seq":247,"given_name":2331,"surname":11276,"affiliation":63,"orcid":63},"Ruth",{"paper_id":11266,"author_seq":232,"given_name":2152,"surname":11278,"affiliation":63,"orcid":63},"Burghardt",{"paper_id":11266,"author_seq":218,"given_name":4651,"surname":11280,"affiliation":63,"orcid":63},"Niekler","While the visual dimension of film has been widely explored in digital humanities through methods such as \"distant viewing\", the audio layer has received less attention despite its crucial role in meaning-making. We address this gap with a four-step pipeline combining speaker diarization, audio gender classification, automatic speech recognition (ASR), and LLM-based psycholinguistic analysis to infer character traits from film dialogues. Applying this method to a set of Nazi propaganda films, we find that despite challenges in speaker diarization due to noisy historical film audio, modern ASR and GPT-based analyses produce character profiles consistent with existing filmic research. Our proposed pipeline advances distant reading of film dialogue, complementing visual analyses and enabling scalable study of ideology in historical cinema. A case study of female characters in NS films identifies three recurring types, centered on the ideological figure of the mother in National Socialism.",{"paper_id":11283,"title":11284,"year":7,"month":188,"day":63,"doi":11285,"resource_url":11286,"first_page":11287,"last_page":11288,"pdf_url":11289,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11290,"paper_type":860,"authors":11291,"abstract":11305},"lrec2026-main-462","Supplementary Resources and Analysis for Automatic Speech Recognition Systems Trained on the Loquacious Dataset","10.63317\u002F4zsvhm25r7zf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-462","5839","5848","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.462.pdf","rossenbach-etal-2026-supplementary",[11292,11294,11296,11299,11300,11302],{"paper_id":11283,"author_seq":247,"given_name":6539,"surname":11293,"affiliation":63,"orcid":63},"Rossenbach",{"paper_id":11283,"author_seq":232,"given_name":11295,"surname":4660,"affiliation":63,"orcid":63},"Robin",{"paper_id":11283,"author_seq":218,"given_name":11297,"surname":11298,"affiliation":63,"orcid":63},"Tina","Raissi",{"paper_id":11283,"author_seq":203,"given_name":2467,"surname":4937,"affiliation":63,"orcid":63},{"paper_id":11283,"author_seq":188,"given_name":4026,"surname":11301,"affiliation":63,"orcid":63},"Kleppel",{"paper_id":11283,"author_seq":172,"given_name":11303,"surname":11304,"affiliation":63,"orcid":63},"Ralf","Schlüter","The recently published Loquacious dataset aims to be a replacement for established English automatic speech recognition (ASR) datasets such as LibriSpeech or TED-Lium. The main goal of Loquacious dataset is to provide properly defined training and test partitions across many acoustic and language domains, with an open license suitable for both academia and industry. To further promote the benchmarking and usability of this new dataset, we present additional resources in the form of n-gram language models (LMs), a grapheme-to-phoneme (G2P) model and pronunciation lexica, with open and public access. Utilizing those additional resources we show experimental results across a wide range of ASR architectures with different label units and topologies. Our initial experimental results indicate that the Loquacious dataset offers a valuable study case for a variety of common challenges in ASR.",{"paper_id":11307,"title":11308,"year":7,"month":188,"day":63,"doi":11309,"resource_url":11310,"first_page":11311,"last_page":11312,"pdf_url":11313,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11314,"paper_type":860,"authors":11315,"abstract":11319},"lrec2026-main-463","WhiteHouse: Translation of the Casablanca Corpus for Multi-dialectal Arabic Speech Translation","10.63317\u002F4zqn965acien","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-463","5849","5855","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.463.pdf","bougares-etal-2026-whitehouse",[11316,11317,11318],{"paper_id":11307,"author_seq":247,"given_name":3736,"surname":3737,"affiliation":63,"orcid":63},{"paper_id":11307,"author_seq":232,"given_name":3730,"surname":3731,"affiliation":63,"orcid":63},{"paper_id":11307,"author_seq":218,"given_name":3733,"surname":3734,"affiliation":63,"orcid":63},"Remarkable progress has been made recently in the speech processing of Arabic dialects. This is primarily due to the availability of large multilingual pre-trained models as well as the development of multiple well-annotated datasets that support training, fine-tuning, and evaluation of various speech models. However, most existing research on Arabic speech processing did not consider Automatic Speech Translation (AST) and focused mainly on Dialect Identification (DI) and Automatic Speech Recognition (ASR) tasks. To address this gap, we introduce WhiteHouse, the first multi-dialectal Arabic-English Speech Translation Corpus. WhiteHouse supplements the recently created Casablanca dataset with English translation for each utterance in the transcripts. This results in a three-way parallel speech-transcription-translation multi-dialectal Arabic dataset. WhiteHouse dataset is used to evaluate various SoTA speech translation models. Our experiments show that SoTA speech translation models performs poorly when evaluated on Arabic dialectal conditions. All the data used during training and testing are released for public use and further improvements",{"paper_id":11321,"title":11322,"year":7,"month":188,"day":63,"doi":11323,"resource_url":11324,"first_page":11325,"last_page":11326,"pdf_url":11327,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11328,"paper_type":860,"authors":11329,"abstract":11336},"lrec2026-main-464","ToneSwiper: Facilitating Manual ToDI-annotation of Dutch Prosody","10.63317\u002F5ctuktk6ffpi","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-464","5856","5863","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.464.pdf","westera-etal-2026-toneswiper",[11330,11333],{"paper_id":11321,"author_seq":247,"given_name":11331,"surname":11332,"affiliation":63,"orcid":63},"Matthijs","Westera",{"paper_id":11321,"author_seq":232,"given_name":11334,"surname":11335,"affiliation":63,"orcid":63},"Ariëlle","Reitsema","Manual transcription of intonation by experts remains an essential part of research on the structure and meaning of intonation across languages, as well as for developing computational methods for automatic intonation transcription. We present ToneSwiper, a Python program with a graphical user interface that facilitates manual intonation transcription in the ToDI framework (Transcription of Dutch Intonation; Gussenhoven, 2005), with possible adaptation to similar (e.g., ToBI-like) frameworks for other languages. For the trained annotator, it enables efficient ToDI transcription of speech by integrating an audio-player, a spectrogram and pitch contour plot, auto-scroll, dynamic audio stretching, and an intuitive hotkey interface that maps key sequences to ToDI elements, e.g., pressing up-down for a high-to-low accent (H*L). In this way, transcription is conducted by ‘swiping‘ over the arrow keys on the keyboard. We present the program and its motivation, as well as a small-scale pilot study on annotation efficiency and inter-rater agreement, using a highly challenging sample of task-oriented dialogue from the Dutch Map Task Corpus (Ladd and Schepman, 2003).",{"paper_id":11338,"title":11339,"year":7,"month":188,"day":63,"doi":11340,"resource_url":11341,"first_page":11342,"last_page":11343,"pdf_url":11344,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11345,"paper_type":860,"authors":11346,"abstract":11362},"lrec2026-main-465","IMaSC: A Malayalam Speech Corpus for High-Quality Text-to-Speech Synthesis","10.63317\u002F39kfsuabkvgh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-465","5864","5872","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.465.pdf","gopinath-etal-2026-imasc",[11347,11350,11353,11356,11359],{"paper_id":11338,"author_seq":247,"given_name":11348,"surname":11349,"affiliation":63,"orcid":63},"Deepa P.","Gopinath",{"paper_id":11338,"author_seq":232,"given_name":11351,"surname":11352,"affiliation":63,"orcid":63},"Thennal D","K",{"paper_id":11338,"author_seq":218,"given_name":11354,"surname":11355,"affiliation":63,"orcid":63},"Vrinda V.","Nair",{"paper_id":11338,"author_seq":203,"given_name":11357,"surname":11358,"affiliation":63,"orcid":63},"Swaraj K.","S",{"paper_id":11338,"author_seq":188,"given_name":11360,"surname":11361,"affiliation":63,"orcid":63},"Sachin","G","Modern text-to-speech (TTS) systems use deep learning to synthesize speech increasingly approaching human quality, but they require a database of high-quality audio-text sentence pairs for training. Malayalam, the official language of the Indian state of Kerala and spoken by 35+ million people, is a low-resource language in terms of available corpora for TTS systems. In this paper, we present IMaSC, a Malayalam text and speech corpora containing 49 hours and 37 minutes of recorded speech. With 8 speakers and a total of 34,473 text-audio pairs, IMaSC is larger than every other publicly available alternative. We evaluated the database by using it to train TTS models for each speaker based on a modern deep learning architecture. With an average mean opinion score of 4.50, we find that the synthesized speech of our model is close to human quality.",{"paper_id":11364,"title":11365,"year":7,"month":188,"day":63,"doi":11366,"resource_url":11367,"first_page":11368,"last_page":11369,"pdf_url":11370,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11371,"paper_type":860,"authors":11372,"abstract":11381},"lrec2026-main-466","Speak in Context: Multilingual ASR with Speech–Context Alignment via Contrastive Learning","10.63317\u002F55r6d5b9jek6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-466","5873","5882","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.466.pdf","zhang-etal-2026-speak",[11373,11375,11378],{"paper_id":11364,"author_seq":247,"given_name":11374,"surname":1519,"affiliation":63,"orcid":63},"Yuchen",{"paper_id":11364,"author_seq":232,"given_name":11376,"surname":11377,"affiliation":63,"orcid":63},"Haralambos","Mouratidis",{"paper_id":11364,"author_seq":218,"given_name":11379,"surname":11380,"affiliation":63,"orcid":63},"Ravi","Shekhar","Automatic speech recognition (ASR) has benefited from advances in pretrained speech and language models, yet most systems remain constrained to monolingual settings and short, isolated utterances. While recent efforts in context-aware ASR show promise, two key challenges persist: limited multilingual support and the absence of principled alignment between speech and contextual representations. In this paper, we introduce a context-aware multilingual ASR framework that supports diverse languages and accents while preserving the modularity of pretrained models. Our approach combines a frozen speech encoder and a decoder-only language model via a lightweight projection module, allowing structured context prompts, including dialogue history and biasing words, to guide transcription. To improve interaction between speech and context, we employ a contrastive learning objective that aligns their representations in a shared embedding space. Evaluations on over 1,500 hours of real-world conversational speech across 11 languages and 5 English dialects show that contextual input consistently improves recognition quality. Contrastive alignment provides additional gains when applied to different context types, with an overall performance gain of over 5%. These results highlight the importance of both contextual modeling and cross-modal alignment in multilingual ASR.",{"paper_id":11383,"title":11384,"year":7,"month":188,"day":63,"doi":11385,"resource_url":11386,"first_page":11387,"last_page":11388,"pdf_url":11389,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":11390,"bibkey":11391,"paper_type":860,"authors":11392,"abstract":11401},"lrec2026-main-467","Task-Lens: Cross-Task Utility Based Speech Dataset Profiling for Low-Resource Indian Languages","10.63317\u002F43kj7ptiob6p","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-467","5883","5895","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.467.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.467_OptionalSupplementaryMaterial.zip","sharma-etal-2026-task",[11393,11396,11398],{"paper_id":11383,"author_seq":247,"given_name":11394,"surname":11395,"affiliation":63,"orcid":63},"Swati","Sharma",{"paper_id":11383,"author_seq":232,"given_name":11397,"surname":11395,"affiliation":63,"orcid":63},"Divya V.",{"paper_id":11383,"author_seq":218,"given_name":11399,"surname":11400,"affiliation":63,"orcid":63},"Anubha","Gupta","The rising demand for inclusive speech technologies amplifies the need for multilingual datasets for Natural Language Processing (NLP) research. However, limited awareness of existing task-specific resources in low-resource languages hinders research. This challenge is especially acute in linguistically diverse countries, such as India. Cross-task profiling of existing Indian speech datasets can alleviate the data scarcity challenge. This involves investigating the utility of datasets across multiple downstream tasks rather than focusing on a single task. Prior surveys typically catalogue datasets for a single task, leaving comprehensive cross-task profiling as an open opportunity. Therefore, we propose Task-Lens, a cross-task survey that assesses the readiness of 50 Indian speech datasets spanning 26 languages for nine downstream speech tasks. First, we analyze which datasets contain metadata and properties suitable for specific tasks. Next, we propose task-aligned enhancements to unlock datasets to their full downstream potential. Finally, we identify tasks and Indian languages that are critically underserved by current resources. Our findings reveal that many Indian speech datasets contain untapped metadata that can support multiple downstream tasks. By uncovering cross-task linkages and gaps, Task-Lens enables researchers to explore the broader applicability of existing datasets and to prioritize dataset creation for underserved tasks and languages.",{"paper_id":11403,"title":11404,"year":7,"month":188,"day":63,"doi":11405,"resource_url":11406,"first_page":11407,"last_page":11408,"pdf_url":11409,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11410,"paper_type":860,"authors":11411,"abstract":11417},"lrec2026-main-468","Introducing MELI: The Mandarin-English Language Interview Corpus","10.63317\u002F3umiyc4sxwhk","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-468","5896","5904","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.468.pdf","liu-etal-2026-introducing",[11412,11414],{"paper_id":11403,"author_seq":247,"given_name":11413,"surname":3916,"affiliation":63,"orcid":63},"Suyuan",{"paper_id":11403,"author_seq":232,"given_name":11415,"surname":11416,"affiliation":63,"orcid":63},"Molly","Babel","We introduce the Mandarin–English Language Interview (MELI) Corpus, an open-source resource of 29.8 hours of speech from 51 Mandarin–English bilingual speakers. MELI combines matched sessions in Mandarin and English with two speaking styles: read sentences and spontaneous interviews about language varieties, standardness, and learning experiences. Audio was recorded at 44.1 kHz (16-bit, stereo). Interviews were fully transcribed, force-aligned at word and phone levels, and anonymized. Descriptively, the Mandarin component totals  14.7 hours (mean duration 17.3 minutes) and the English component  15.1 hours (mean duration 17.8 minutes). We report token\u002Ftype statistics for each language and document code-switching patterns (frequent in Mandarin sessions; more limited in English sessions). The corpus design supports within-\u002Fcross-speaker, within\u002Fcross-language acoustic comparison and links speech content to speakers’ stated language attitudes, enabling both quantitative and qualitative analyses. The MELI Corpus will be released with transcriptions, alignments, metadata, scans of labelled maps and documentation under a CC BY-NC 4.0 license.",{"paper_id":11419,"title":11420,"year":7,"month":188,"day":63,"doi":11421,"resource_url":11422,"first_page":11423,"last_page":11424,"pdf_url":11425,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11426,"paper_type":860,"authors":11427,"abstract":11440},"lrec2026-main-469","PhonemeDF: A Synthetic Speech Dataset for Audio Deepfake Detection and Naturalness Evaluation","10.63317\u002F5b4zfxnfqwm4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-469","5905","5915","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.469.pdf","nallaguntla-etal-2026-phonemedf",[11428,11431,11434,11437],{"paper_id":11419,"author_seq":247,"given_name":11429,"surname":11430,"affiliation":63,"orcid":63},"Vamshi","Nallaguntla",{"paper_id":11419,"author_seq":232,"given_name":11432,"surname":11433,"affiliation":63,"orcid":63},"Aishwarya R.","Fursule",{"paper_id":11419,"author_seq":218,"given_name":11435,"surname":11436,"affiliation":63,"orcid":63},"Shruti","Kshirsagar",{"paper_id":11419,"author_seq":203,"given_name":11438,"surname":11439,"affiliation":63,"orcid":63},"Anderson Raymundo","Avila","The growing sophistication of speech generated by Artificial Intelligence (AI) has introduced new challenges in audio deepfake detection. Text-to-speech (TTS) and voice conversion (VC) technologies can create highly convincing synthetic speech with naturalness and intelligibility. This poses serious threats to voice biometric security and to systems designed to combat the spread of spoken misinformation, where synthetic voices may be used to disseminate false or malicious content. While interest in AI-generated speech has increased, resources for evaluating naturalness at the phoneme level remain limited. In this work, we address this gap by presenting the Phoneme-Level DeepFake dataset (PhonemeDF), comprising parallel real and synthetic speech segmented at the phoneme level. Real speech samples are derived from a subset of LibriSpeech, while synthetic samples are generated using four TTS and three VC systems. For each system, phoneme-aligned TextGrid files are obtained using the Montreal Forced Aligner (MFA). We compute the Kullback–Leibler divergence (KLD) between real and synthetic phoneme distributions to quantify fidelity and establish a ranking based on similarity to natural speech. Our findings show a clear correlation between the KLD of real and synthetic phoneme distributions and the performance of classifiers trained to distinguish them, suggesting that KLD can serve as an indicator of the most discriminative phonemes for deepfake detection.",{"paper_id":11442,"title":11443,"year":7,"month":188,"day":63,"doi":11444,"resource_url":11445,"first_page":11446,"last_page":11447,"pdf_url":11448,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11449,"paper_type":860,"authors":11450,"abstract":11456},"lrec2026-main-470","How Much Data for Stable Formant Values? Pipeline for Convergence Detection Based on Read Speech","10.63317\u002F3mgdob6szndp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-470","5916","5925","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.470.pdf","sward-etal-2026-how",[11451,11454,11455],{"paper_id":11442,"author_seq":247,"given_name":11452,"surname":11453,"affiliation":63,"orcid":63},"Kayla","Sward",{"paper_id":11442,"author_seq":232,"given_name":1915,"surname":1916,"affiliation":63,"orcid":63},{"paper_id":11442,"author_seq":218,"given_name":1918,"surname":1919,"affiliation":63,"orcid":63},"This study investigates the stability and convergence of vowel formants (F1, F2, F3) in read speech through an extensive corpus of audiobook recordings. While most formant studies rely on brief, isolated utterances recorded in laboratory settings, this analysis draws on 3,384 chapters (about 942 hours) of continuous, stylistically varied speech from publicly available audiobooks. The data was processed using an automated pipeline that comprised transcription, phoneme alignment, and formant extraction. Several statistical techniques – First Token Within (FTW), Cumulative Sum (CUSUM), Two-Sample t-Test, Confidence Interval (CI) Shrinkage, Piecewise Linear Fitting (PWLF), and Binary Segmentation (BinSeg) – were compared for their effectiveness in identifying stabilization points. Findings indicate that formant means generally stabilize within 60 to 230 vowel tokens per phoneme, dependent on vowel type and speaker gender. Of the methods that were evaluated, CUSUM yielded the most consistent and informative results. The results provide practical guidelines for determining the quantity of non-laboratory speech required to obtain reliable vowel formant averages.",{"paper_id":11458,"title":11459,"year":7,"month":188,"day":63,"doi":11460,"resource_url":11461,"first_page":11462,"last_page":11463,"pdf_url":11464,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11465,"paper_type":860,"authors":11466,"abstract":11480},"lrec2026-main-471","MUSCAT: MUltilingual, SCientific ConversATion Benchmark","10.63317\u002F2q8inmh4zmpa","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-471","5926","5937","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.471.pdf","sinhamahapatra-etal-2026-muscat",[11467,11470,11472,11475,11478,11479],{"paper_id":11458,"author_seq":247,"given_name":11468,"surname":11469,"affiliation":63,"orcid":63},"Supriti","Sinhamahapatra",{"paper_id":11458,"author_seq":232,"given_name":11471,"surname":2395,"affiliation":63,"orcid":63},"Thai-Binh",{"paper_id":11458,"author_seq":218,"given_name":11473,"surname":11474,"affiliation":63,"orcid":63},"Yiğit","Oğuz",{"paper_id":11458,"author_seq":203,"given_name":11476,"surname":11477,"affiliation":63,"orcid":63},"Enes Yavuz","Ugan",{"paper_id":11458,"author_seq":188,"given_name":1380,"surname":1381,"affiliation":63,"orcid":63},{"paper_id":11458,"author_seq":172,"given_name":869,"surname":2112,"affiliation":63,"orcid":63},"The goal of multilingual speech technology is to facilitate seamless communication between individuals speaking different languages, creating the experience as though everyone were a multilingual speaker. To create this experience, speech technology needs to address several challenges: Handling mixed multilingual input, specific vocabulary, and code-switching. However, there is currently no dataset benchmarking this situation. We propose a new benchmark to evaluate current Automatic Speech Recognition (ASR) systems, whether they are able to handle these challenges. The benchmark consists of bilingual discussions on scientific papers between multiple speakers, each conversing in a different language. We provide a standard evaluation framework, beyond Word Error Rate (WER) enabling consistent comparison of ASR performance across languages. Experimental results demonstrate that the proposed dataset is still an open challenge for state-of-the-art ASR systems. The dataset is available in https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002Fgoodpiku\u002Fmuscat-eval",{"paper_id":11482,"title":11483,"year":7,"month":188,"day":63,"doi":11484,"resource_url":11485,"first_page":11486,"last_page":11487,"pdf_url":11488,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11489,"paper_type":860,"authors":11490,"abstract":11496},"lrec2026-main-472","Towards a Diagnostic and Predictive Evaluation Methodology for Sequence Labeling Tasks","10.63317\u002F3f4yxedpym6a","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-472","5938","5959","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.472.pdf","alvarezmellado-etal-2026-diagnostic",[11491,11493],{"paper_id":11482,"author_seq":247,"given_name":2968,"surname":11492,"affiliation":63,"orcid":63},"Alvarez-Mellado",{"paper_id":11482,"author_seq":232,"given_name":11494,"surname":11495,"affiliation":63,"orcid":63},"Julio","Gonzalo","Standard evaluation in NLP typically indicates that system A is better on average than system B, but it provides little info on how to improve performance and, what is worse, it should not come as a surprise if B ends up being better than A on outside data. We propose an evaluation methodology for sequence labeling tasks grounded on error analysis that provides both quantitative and qualitative information on where systems must be improved and predicts how models will perform on a different distribution. The key is to create test sets that, contrary to common practice, do not rely on gathering large amounts of real-world in-distribution scraped data, but consists in handcrafting a small set of linguistically motivated examples that exhaustively cover the range of span attributes (such as shape, length, casing, sentence position, etc.) a system may encounter in the wild. We demonstrate this methodology on a benchmark for anglicism identification in Spanish. Our methodology provides results that are diagnostic (because they help identify systematic weaknesses in performance), actionable (because they can inform which model is better suited for a given scenario) and predictive: our method predicts model performance on external datasets with a median correlation of 0.85.",{"paper_id":11498,"title":11499,"year":7,"month":188,"day":63,"doi":11500,"resource_url":11501,"first_page":11502,"last_page":11503,"pdf_url":11504,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11505,"paper_type":860,"authors":11506,"abstract":11519},"lrec2026-main-473","Memorization or Lucky Guesses: Detecting Short Sequences from Copyrighted Dutch News in LLM Output","10.63317\u002F3iqgydmfqiju","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-473","5960","5969","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.473.pdf","veerbeek-etal-2026-memorization",[11507,11510,11513,11516],{"paper_id":11498,"author_seq":247,"given_name":11508,"surname":11509,"affiliation":63,"orcid":63},"Joris","Veerbeek",{"paper_id":11498,"author_seq":232,"given_name":11511,"surname":11512,"affiliation":63,"orcid":63},"Kas","Berendsen",{"paper_id":11498,"author_seq":218,"given_name":11514,"surname":11515,"affiliation":63,"orcid":63},"Alessandra","Polimeno",{"paper_id":11498,"author_seq":203,"given_name":11517,"surname":11518,"affiliation":63,"orcid":63},"Antal van den","Bosch","Demonstrating that large language models have memorized copyrighted material is more feasible for high-volume publishers than for smaller outlets whose content appears less frequently online. This study explores how even short, repeated sequences–rather than full articles–can serve as evidence of memorization. Focusing on Dutch news sources included in the mC4 dataset, we test whether GPT-4 and mT5 reproduce excerpts from thousands of articles, including standardized editorial boilerplate. By comparing results to a post-training baseline and modeling memorization as a survival process, we find that repeated, publication-specific phrases are significantly more likely to be completed verbatim. The approach provides a means to detect empirical evidence of memorization in cases where full reproduction is unlikely.",{"paper_id":11521,"title":11522,"year":7,"month":188,"day":63,"doi":11523,"resource_url":11524,"first_page":11525,"last_page":11526,"pdf_url":11527,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":11528,"bibkey":11529,"paper_type":860,"authors":11530,"abstract":11542},"lrec2026-main-474","When Numbers Tell Half the Story: Human-Metric Alignment in Topic Model Evaluation","10.63317\u002F387fiwstpipw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-474","5970","5980","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.474.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.474_OptionalSupplementaryMaterial.zip","prouteau-etal-2026-when",[11531,11533,11535,11537,11540],{"paper_id":11521,"author_seq":247,"given_name":3251,"surname":11532,"affiliation":63,"orcid":63},"Prouteau",{"paper_id":11521,"author_seq":232,"given_name":7075,"surname":11534,"affiliation":63,"orcid":63},"Lareau",{"paper_id":11521,"author_seq":218,"given_name":2331,"surname":11536,"affiliation":63,"orcid":63},"Dugue",{"paper_id":11521,"author_seq":203,"given_name":11538,"surname":11539,"affiliation":63,"orcid":63},"Jean-Charles","Lamirel",{"paper_id":11521,"author_seq":188,"given_name":5419,"surname":11541,"affiliation":63,"orcid":63},"Malaterre","Topic models uncover latent thematic structures in text corpora, yet evaluating their quality remains challenging, particularly in specialized domains. Existing methods often rely on automated metrics like topic coherence and diversity, which may not fully align with human judgment. Human evaluation tasks, such as word intrusion, provide valuable insights but are costly and primarily validated on general-domain corpora. This paper introduces Topic Word Mixing (TWM), a novel human evaluation task assessing inter-topic distinctness by testing whether annotators can distinguish between word sets from single or mixed topics. TWM complements word intrusion’s focus on intra-topic coherence and provides a human-grounded counterpart to diversity metrics. We evaluate six topic models–both statistical and embedding-based (LDA, NMF, Top2Vec, BERTopic, CFMF, CFMF-emb)–comparing automated metrics with human evaluation methods based on nearly 4,000 annotations from a domain-specific corpus of philosophy of science publications. Our findings reveal that word intrusion and coherence metrics do not always align, particularly in specialized domains, and that TWM captures human-perceived distinctness while appearing to align with diversity metrics. We release the annotated dataset and task generation code. This work highlights the need for evaluation frameworks bridging automated and human assessments, particularly for domain-specific corpora.",{"paper_id":11544,"title":11545,"year":7,"month":188,"day":63,"doi":11546,"resource_url":11547,"first_page":11548,"last_page":11549,"pdf_url":11550,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11551,"paper_type":860,"authors":11552,"abstract":11561},"lrec2026-main-475","Detecting Hallucinations in Authentic LLM–Human Interactions","10.63317\u002F5pykihiz52tk","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-475","5981","5995","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.475.pdf","ren-etal-2026-detecting",[11553,11556,11559],{"paper_id":11544,"author_seq":247,"given_name":11554,"surname":11555,"affiliation":63,"orcid":63},"Yujie","Ren",{"paper_id":11544,"author_seq":232,"given_name":11557,"surname":11558,"affiliation":63,"orcid":63},"Niklas","Gruhlke",{"paper_id":11544,"author_seq":218,"given_name":8037,"surname":11560,"affiliation":63,"orcid":63},"Lauscher","As large language models (LLMs) are increasingly applied in sensitive domains such as medicine and law, hallucination detection has become a critical task. Although numerous benchmarks have been proposed to advance research in this area, most of them are artificially constructed––either through deliberate hallucination induction or simulated interactions––rather than derived from genuine LLM–human dialogues. Consequently, these benchmarks fail to fully capture the characteristics of hallucinations that occur in real-world usage. To address this limitation, we introduce AuthenHallu, the first hallucination detection benchmark built entirely from authentic LLM–human interactions. For AuthenHallu, we select and annotate samples from genuine LLM–human dialogues, thereby providing a faithful reflection of how LLMs hallucinate in everyday user interactions. Statistical analysis shows that hallucinations occur in 31.4% of the query–response pairs in our benchmark, and this proportion increases dramatically to 60.0% in challenging domains such as ’Math & Number Problems’. Furthermore, we explore the potential of using vanilla LLMs themselves as hallucination detectors and find that, despite some promise, their current performance remains insufficient in real-world scenarios. The data and code are publicly available at https:\u002F\u002Fgithub.com\u002FTAI-HAMBURG\u002FAuthenHallu.",{"paper_id":11563,"title":11564,"year":7,"month":188,"day":63,"doi":11565,"resource_url":11566,"first_page":11567,"last_page":11568,"pdf_url":11569,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11570,"paper_type":860,"authors":11571,"abstract":11582},"lrec2026-main-476","Issue Detection and Category Classification in Domain-Specific Technical Logbooks","10.63317\u002F4q9gt37vbz6w","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-476","5996","6008","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.476.pdf","karimi-etal-2026-issue",[11572,11575,11578,11581],{"paper_id":11563,"author_seq":247,"given_name":11573,"surname":11574,"affiliation":63,"orcid":63},"Afshin","Karimi",{"paper_id":11563,"author_seq":232,"given_name":11576,"surname":11577,"affiliation":63,"orcid":63},"Ingmar","Hartl",{"paper_id":11563,"author_seq":218,"given_name":11579,"surname":11580,"affiliation":63,"orcid":63},"Henrik","Tuennermann",{"paper_id":11563,"author_seq":203,"given_name":8037,"surname":11560,"affiliation":63,"orcid":63},"Operating large-scale research infrastructures such as free-electron lasers produces vast amounts of operator-authored documentation that records daily observations, anomalies, and maintenance actions. These logbooks and incident reports contain valuable operational knowledge but often remain underexplored due to their unstructured, domain-specific language. While large language models (LLMs) show strong generalization in general domains, their effectiveness on such technical operator text has, to the best of our knowledge, not been systematically assessed. We introduce two new English datasets from real-world laser operations: (i) a logbook dataset annotated for binary issue detection (does an entry describe or report an actionable fault?), and (ii) an operator ticket dataset annotated for multi-class issue categorization assign each ticket to one of 13 technical categories). The corpora comprise 2,979 logbook entries and 758 tickets from 2022–2024; both are cleaned, anonymized, and suitable for benchmarking classification performance. We evaluate four open LLMs (LLaMA-3, Mistral-Small, Qwen-3-30B, GPT-OSS-120B) under zero-shot, few-shot, and chain-of-thought (CoT) prompting, using multiple semantically equivalent prompt variants per setting to assess robustness. Across both tasks, few-shot prompting is consistently strongest, with top systems reaching F1 approx 0.84 for logbook issue detection and Macro-F1 0.42 for operator ticket categorization. These results suggest that incorporating a handful of in-domain examples can substantially improve performance on operator-authored technical text, even without fine-tuning.",{"paper_id":11584,"title":11585,"year":7,"month":188,"day":63,"doi":11586,"resource_url":11587,"first_page":11588,"last_page":11589,"pdf_url":11590,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11591,"paper_type":860,"authors":11592,"abstract":11603},"lrec2026-main-477","Once upon a Kernel: Extracting Important Events from Narratives","10.63317\u002F3qpxoskno4hc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-477","6009","6021","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.477.pdf","sharma-etal-2026-once",[11593,11595,11597,11600],{"paper_id":11584,"author_seq":247,"given_name":11594,"surname":11395,"affiliation":63,"orcid":63},"Anshu Kiran",{"paper_id":11584,"author_seq":232,"given_name":7031,"surname":11596,"affiliation":63,"orcid":63},"Castiblanco-Melendez",{"paper_id":11584,"author_seq":218,"given_name":11598,"surname":11599,"affiliation":63,"orcid":63},"Alejandro","Morales",{"paper_id":11584,"author_seq":203,"given_name":11601,"surname":11602,"affiliation":63,"orcid":63},"Mark A.","Finlayson","Not all events in a narrative are created equal: some events are more important than others. Kernel events, a concept introduced in the field of narratology, are causally linked events that move the narrative forward, and cannot be removed without breaking the narrative’s logical coherence. While event detection and extraction tasks have been widely studied in natural language processing and information retrieval fields, the idea of kernel events has been largely unexplored. In this work, we introduce the first corpus and model for kernel event detection. Our contributions include: the refinement of the kernel event concept captured in detailed annotation guidelines grounded in narratological principles; an annotation study yielding a gold-standard dataset of kernel events in narrative texts; and a first-of-its-kind kernel event detection system. Annotation achieved an inter-annotator agreement of 0.61 Kappa, underscoring the reliability of the guidelines. Using these data, we trained several models in both fine-tuned and generative modes for kernel event detection, with a LoRA fine-tuned Llama3 achieving an F1 of 0.695. This work establishes a benchmark for kernel event detection, with potential applications in summarization, narrative similarity detection, and narrative understanding. We release our code and data for the benefit of other researchers.",{"paper_id":11605,"title":11606,"year":7,"month":188,"day":63,"doi":11607,"resource_url":11608,"first_page":11609,"last_page":11610,"pdf_url":11611,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11612,"paper_type":860,"authors":11613,"abstract":11618},"lrec2026-main-478","Temporal Expression Recognition in Legal Transcripts","10.63317\u002F5n7bd6gxobss","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-478","6022","6037","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.478.pdf","goldstein-etal-2026-temporal",[11614,11617],{"paper_id":11605,"author_seq":247,"given_name":11615,"surname":11616,"affiliation":63,"orcid":63},"Elizabeth J.","Goldstein",{"paper_id":11605,"author_seq":232,"given_name":2960,"surname":4937,"affiliation":63,"orcid":63},"Before working with clinical text data, it is critical and necessary to blind, remove or substitute any personal information in clinical reports. This information may contain named entities, contact details and biographical information, all of which could lead to direct conclusions about an individual. However, there are certain scenarios in which clinical documentation cannot be anonymized, such as when it concerns a rare disease. These records contain information such as mentions of genetic peculiarities or the name of the treating physician. At first glance, this information does not appear to allow conclusions to be drawn about individuals, but it can. In this paper, we address the task of predicting whether a medical report (or a sentence therein) refers to a rare disease or not. Records of rare diseases may contain references to relatives and certain indications that can help reveal whether a rare disease is present. We design a pattern-based approach and a TF-IDF-based predictor, as well as two supervised learning experiments (one at document level and one at sentence level), achieving an F1-score of up to 98%. Our research is the first step towards a larger endeavor in which we aim to support experts involved in documenting medical narratives of rare diseases with automated processes.",{"paper_id":11620,"title":11621,"year":7,"month":188,"day":63,"doi":11622,"resource_url":11623,"first_page":11624,"last_page":11625,"pdf_url":11626,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11627,"paper_type":860,"authors":11628,"abstract":11645},"lrec2026-main-479","Multilingual, Multimodal Pipeline for Creating Authentic and Structured Fact-Checked Claim Dataset","10.63317\u002F3p5czhw4hvv3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-479","6038","6055","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.479.pdf","hsnbeyi-etal-2026-multilingual",[11629,11632,11635,11638,11640,11642],{"paper_id":11620,"author_seq":247,"given_name":11630,"surname":11631,"affiliation":63,"orcid":63},"Z. Melce","Hüsünbeyi",{"paper_id":11620,"author_seq":232,"given_name":11633,"surname":11634,"affiliation":63,"orcid":63},"Virginie","Mouilleron",{"paper_id":11620,"author_seq":218,"given_name":11636,"surname":11637,"affiliation":63,"orcid":63},"Leonie","Uhling",{"paper_id":11620,"author_seq":203,"given_name":1668,"surname":11639,"affiliation":63,"orcid":63},"Foppe",{"paper_id":11620,"author_seq":188,"given_name":1559,"surname":11641,"affiliation":63,"orcid":63},"Scheffler",{"paper_id":11620,"author_seq":172,"given_name":11643,"surname":11644,"affiliation":63,"orcid":63},"Djamé","Seddah","The rapid proliferation of misinformation across online platforms underscores the urgent need for robust, up-to-date, explainable, and multilingual fact-checking resources. However, existing datasets are limited in scope, often lacking multimodal evidence, structured annotations, and detailed links between claims, evidence, and verdicts. This paper introduces a comprehensive data collection and processing pipeline that constructs multimodal fact-checking datasets in French and German languages by aggregating ClaimReview feeds, scraping full debunking articles, normalizing heterogeneous claim verdicts, and enriching them with structured metadata and aligned visual content. We used state-of-the-art large language models (LLMs) and multimodal LLMs for (i) evidence extraction under predefined evidence categories and (ii) justification generation that links evidence to verdicts. Evaluation with G-Eval and human assessment demonstrates that our pipeline enables fine-grained comparison of fact-checking practices across different organizations or media markets, facilitates the development of more interpretable and evidence-grounded fact-checking models, and lays the groundwork for future research on multilingual, multimodal misinformation verification.",{"paper_id":11647,"title":11648,"year":7,"month":188,"day":63,"doi":11649,"resource_url":11650,"first_page":11651,"last_page":11652,"pdf_url":11653,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11654,"paper_type":860,"authors":11655,"abstract":11664},"lrec2026-main-480","A Study on Building Efficient Zero-Shot Relation Extraction Models","10.63317\u002F5ndtgoxeph5j","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-480","6056","6067","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.480.pdf","thomas-etal-2026-study",[11656,11657,11660,11662],{"paper_id":11647,"author_seq":247,"given_name":7736,"surname":1316,"affiliation":63,"orcid":63},{"paper_id":11647,"author_seq":232,"given_name":11658,"surname":11659,"affiliation":63,"orcid":63},"Caio","Corro",{"paper_id":11647,"author_seq":218,"given_name":1150,"surname":11661,"affiliation":63,"orcid":63},"Gravier",{"paper_id":11647,"author_seq":203,"given_name":7494,"surname":11663,"affiliation":63,"orcid":63},"Sébillot","Zero-shot relation extraction aims to identify relations between entity mentions using textual descriptions of novel types (i.e., previously unseen) instead of labeled training examples. Previous works often rely on unrealistic assumptions: (1) pairs of mentions are often encoded directly in the input, which prevents offline pre-computation for large scale document database querying; (2) no rejection mechanism is introduced, biasing the evaluation when using these models in a retrieval scenario where some (and often most) inputs are irrelevant and must be ignored. In this work, we study the robustness of existing zero-shot relation extraction models when adapting them to a realistic extraction scenario. To this end, we introduce a typology of existing models, and propose several strategies to build single pass models and models with a rejection mechanism. We adapt several state-of-the-art tools, and compare them in this challenging setting, showing that no existing work is really robust to realistic assumptions, but overall AlignRE (Li et al., 2024) performs best along all criteria.",{"paper_id":11666,"title":11667,"year":7,"month":188,"day":63,"doi":11668,"resource_url":11669,"first_page":11670,"last_page":11671,"pdf_url":11672,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11673,"paper_type":860,"authors":11674,"abstract":11680},"lrec2026-main-481","Beyond Catalogue Counts: The Dataset Visibility Asymmetry in Low-Resource Multilingual NLP","10.63317\u002F3bep4yiomtp2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-481","6068","6079","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.481.pdf","tan-etal-2026-beyond",[11675,11677],{"paper_id":11666,"author_seq":247,"given_name":11676,"surname":916,"affiliation":63,"orcid":63},"Zhiyin",{"paper_id":11666,"author_seq":232,"given_name":11678,"surname":11679,"affiliation":63,"orcid":63},"Changxu","Duan","Multilingual NLP often relies on dataset counts from centralized catalogues to characterize which languages are resource-rich or resource-poor. However, these catalogues record only one layer of dataset visibility: what has been registered or institutionally distributed. They do not necessarily reflect which datasets are created, cited, or reused in the research literature. To examine this gap, we combine a catalogue-based baseline with literature-backed evidence of dataset circulation. We introduce the Resource Density Index (RDI), defined as the number of catalogued datasets per one million speakers, and compute it for the 200 most widely spoken languages in Ethnologue. Among them, 118 languages (59%) have an average RDI of zero across the LRE Map and the Linguistic Data Consortium (LDC), and another 23 fall below 0.1, corresponding to at most one catalogued dataset per ten million speakers. We then apply an LLM-assisted citation-mining pipeline over the Semantic Scholar corpus to these 141 low-visibility languages. After manual validation and consolidation, we identify 609 unique datasets across 53 languages, of which 356 remain openly accessible through working public links. These results reveal a substantial visibility gap: many large-speaker languages appear data-poor in catalogue records yet show clear evidence of dataset activity in the research literature. Our findings suggest that multilingual data scarcity should be understood not only as a production problem, but also as a question of documentation, discoverability, and long-term accessibility. Code and data are publicly available at https:\u002F\u002Fgithub.com\u002Fzhiyintan\u002Fdataset-visibility-asymmetry.",{"paper_id":11682,"title":11683,"year":7,"month":188,"day":63,"doi":11684,"resource_url":11685,"first_page":11686,"last_page":11687,"pdf_url":11688,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":11689,"bibkey":11690,"paper_type":860,"authors":11691,"abstract":11698},"lrec2026-main-482","BLooP: Zero-Shot Abstractive Summarization Using Large Language Models with Bigram Lookahead Promotion","10.63317\u002F53bjejyjyh8r","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-482","6080","6102","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.482.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.482_OptionalSupplementaryMaterial.zip","iyer-etal-2026-bloop",[11692,11695],{"paper_id":11682,"author_seq":247,"given_name":11693,"surname":11694,"affiliation":63,"orcid":63},"Varun","Iyer",{"paper_id":11682,"author_seq":232,"given_name":11696,"surname":11697,"affiliation":63,"orcid":63},"Cornelia","Caragea","Abstractive summarization requires models to generate summaries that convey information in the source document. While large language models can generate summaries without fine-tuning, they often miss key details and include extraneous information. We propose BLooP (Bigram Lookahead Promotion), a simple training-free decoding intervention that encourages large language models (LLMs) to generate tokens that form bigrams from the source document. BLooP operates through a hash table lookup at each decoding step, requiring no training, fine-tuning, or model modification. We demonstrate improvements in ROUGE and BARTScore for [Llama‑3.1‑8B‑Instruct](https:\u002F\u002Fhuggingface.co\u002Fmeta-llama\u002FLlama-3.1-8B-Instruct), [Mistral‑Nemo‑Instruct‑2407](https:\u002F\u002Fhuggingface.co\u002Fmistralai\u002FMistral-Nemo-Instruct-2407), and [Gemma‑2‑9B‑IT](https:\u002F\u002Fhuggingface.co\u002Fgoogle\u002Fgemma-2-9b-it) on CNN\u002FDM, CCSum, Multi-News, and SciTLDR. Human evaluation shows that BLooP significantly improves faithfulness without reducing readability. We make the code available [here](https:\u002F\u002Fgithub.com\u002Fvaruniyer\u002FBLooP).",{"paper_id":11700,"title":11701,"year":7,"month":188,"day":63,"doi":11702,"resource_url":11703,"first_page":11704,"last_page":11705,"pdf_url":11706,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":11707,"bibkey":11708,"paper_type":860,"authors":11709,"abstract":11735},"lrec2026-main-483","OasisSimp: An Open-source Asian-English Sentence Simplification Dataset","10.63317\u002F4ws5ja7xyadd","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-483","6103","6115","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.483.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.483_OptionalSupplementaryMaterial.zip","liu-etal-2026-oasissimp",[11710,11712,11715,11717,11719,11721,11723,11726,11728,11731,11734],{"paper_id":11700,"author_seq":247,"given_name":11711,"surname":3916,"affiliation":63,"orcid":63},"Hannah",{"paper_id":11700,"author_seq":232,"given_name":11713,"surname":11714,"affiliation":63,"orcid":63},"Murphy","Tian",{"paper_id":11700,"author_seq":218,"given_name":11716,"surname":2207,"affiliation":63,"orcid":63},"Iqra",{"paper_id":11700,"author_seq":203,"given_name":11718,"surname":8151,"affiliation":63,"orcid":63},"Haonan",{"paper_id":11700,"author_seq":188,"given_name":11720,"surname":7319,"affiliation":63,"orcid":63},"Qiaoyiwen",{"paper_id":11700,"author_seq":172,"given_name":11722,"surname":6675,"affiliation":63,"orcid":63},"Blair",{"paper_id":11700,"author_seq":155,"given_name":11724,"surname":11725,"affiliation":63,"orcid":63},"Uthayasanker","Thayasivam",{"paper_id":11700,"author_seq":138,"given_name":11727,"surname":1359,"affiliation":63,"orcid":63},"Annie En-Shiun",{"paper_id":11700,"author_seq":121,"given_name":11729,"surname":11730,"affiliation":63,"orcid":63},"Pakawat","Nakwijit",{"paper_id":11700,"author_seq":104,"given_name":11732,"surname":11733,"affiliation":63,"orcid":63},"Surangika","Ranathunga",{"paper_id":11700,"author_seq":87,"given_name":11379,"surname":11380,"affiliation":63,"orcid":63},"Text simplification aims to make complex text more accessible by reducing linguistic complexity while preserving the original meaning. However, progress in this area remains limited for mid-resource and low-resource languages due to the scarcity of high-quality data. To address this gap, we introduce OasisSimp, a multilingual dataset for sentence-level text simplification covering five languages: English, Sinhala, Tamil, Pashto, and Thai. Among these, no prior sentence simplification datasets exist for Thai, Pashto, and Tamil, while limited data is available for Sinhala. Each language simplification dataset was created through direct human annotation, where trained annotators followed detailed guidelines to simplify sentences while maintaining meaning, fluency, and grammatical correctness. We evaluate eight open-weight multilingual Large Language Models (LLMs) on OasisSimp and observe substantial performance disparities between high-resource and low-resource languages, highlighting the simplification challenges in multilingual settings. OasisSimp thus provides both a valuable multilingual resource and a challenging benchmark, revealing the limitations of current LLM-based simplification methods and paving the way for future research in low-resource text simplification. The dataset will be open-sourced upon acceptance.",{"paper_id":11737,"title":11738,"year":7,"month":188,"day":63,"doi":11739,"resource_url":11740,"first_page":11741,"last_page":11742,"pdf_url":11743,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11744,"paper_type":860,"authors":11745,"abstract":11755},"lrec2026-main-484","Fully Automated Identification of Lexical Alignment and Preference-Stage Shifts in Large Language Models","10.63317\u002F4ut7ammh7z3h","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-484","6116","6131","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.484.pdf","juzek-etal-2026-fully",[11746,11749,11752],{"paper_id":11737,"author_seq":247,"given_name":11747,"surname":11748,"affiliation":63,"orcid":63},"Thomas Stephan","Juzek",{"paper_id":11737,"author_seq":232,"given_name":11750,"surname":11751,"affiliation":63,"orcid":63},"Xiaoyang","Ming",{"paper_id":11737,"author_seq":218,"given_name":11753,"surname":11754,"affiliation":63,"orcid":63},"Jose A.","Hernandez","The language used by digital chat assistants such as ChatGPT can diverge from human expectations (misalignment). Research, mostly on Scientific English, has described both WHAT divergences occur and, to some extent, WHY, linking them to the training stage of human preference learning. Yet, existing approaches rely on manual curation. This paper introduces two curation-free, assumption-light evaluation metrics: the Lexical Alignment Score, which identifies lexical overuse, and the Triangulated Preference Shift, which quantifies how much of such shifts can be attributed to human preference learning. Using PubMed abstracts, continuations were generated and measured using windowed document prevalence across six model families (Falcon, Gemma, Llama, Mistral, OLMo, Yi). The procedure identifies, without manual intervention, overused items such as ’suggest’, ’additionally’, and ’strategy’, and estimates their link to preference learning. Our findings replicate prior work and remain stable across parameter settings, random seeds, and evaluation on further data. The approach scales readily and enables systematic study of lexical (mis)alignment beyond Scientific English and across languages, and as such, the metrics have the potential to contribute to improved alignment for future models and understanding of its origins.",{"paper_id":11757,"title":11758,"year":7,"month":188,"day":63,"doi":11759,"resource_url":11760,"first_page":11761,"last_page":11762,"pdf_url":11763,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11764,"paper_type":860,"authors":11765,"abstract":11768},"lrec2026-main-485","How Much Noise Can BERT Handle? Insights from Multilingual Sentence Difficulty Detection","10.63317\u002F3996hxsmfa2k","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-485","6132","6143","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.485.pdf","khallaf-etal-2026-how",[11766,11767],{"paper_id":11757,"author_seq":247,"given_name":1078,"surname":1079,"affiliation":63,"orcid":63},{"paper_id":11757,"author_seq":232,"given_name":1081,"surname":1082,"affiliation":63,"orcid":63},"Noisy training data can significantly degrade the performance of language-model-based classifiers, particularly in non-topical classification tasks. This study explores a range of denoising strategies for sentence-level difficulty detection, using training data derived from document-level difficulty annotations obtained through noisy crowdsourcing. Beyond monolingual settings, we also address cross-lingual transfer, where a multilingual language model is trained in one language and tested in another. We evaluate several noise reduction techniques, including Gaussian Mixture Models (GMM), Co-Teaching, Noise Transition Matrices, and Label Smoothing. Our results indicate that while BERT-based models exhibit inherent robustness to noise, incorporating explicit noise detection can further enhance performance. For our smaller dataset, GMM-based noise filtering proves particularly effective in improving prediction quality by raising the AUC score from 0.52 to 0.86, or to 0.92 when two de-noising methods are combined (GMM and Co-Teaching). However, for our larger dataset, the intrinsic regularisation of pre-trained language models provides a strong baseline, with denoising methods yielding only marginal gains (from 0.8948 to 0.8984, or to 0.9061 when two denoising methods are combined). Nonetheless, removing noisy sentences (about 20% of the dataset) helps in producing a cleaner corpus with fewer infelicities. As a result we have released the largest available multilingual corpus for sentence difficulty prediction.",{"paper_id":11770,"title":11771,"year":7,"month":188,"day":63,"doi":11772,"resource_url":11773,"first_page":11774,"last_page":11775,"pdf_url":11776,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11777,"paper_type":860,"authors":11778,"abstract":11784},"lrec2026-main-486","Comparing Reading Behavior across Reader Expertise and Text Complexity: Insights from the French Eye-Tracking Corpus (FETA)","10.63317\u002F2xr5zj2u6h7p","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-486","6144","6154","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.486.pdf","ivchenko-etal-2026-comparing",[11779,11782],{"paper_id":11770,"author_seq":247,"given_name":11780,"surname":11781,"affiliation":63,"orcid":63},"Oksana","Ivchenko",{"paper_id":11770,"author_seq":232,"given_name":8892,"surname":11783,"affiliation":63,"orcid":63},"Grabar","This study examines how readers process general and medical texts with varying levels of complexity and how text simplification affects reading behavior. Using eye-tracking data, we compared two participant groups – common population and speech therapy students – as they read French medical, clinical, and general-domain texts in both original and simplified versions. We applied unsupervised clustering to identify patterns in reading behavior and investigate whether these patterns differ across participant groups, text types and complexity. The analysis identified between two and four clusters per group and condition, revealing distinct reading strategies ranging from effortful re-reading behavior to fluent, streamlined processing. The results reveal that medical and clinical texts elicit longer fixations and more regressions, indicating greater processing effort, while simplification produces shorter fixations and more fluid reading. Speech therapy students generally exhibit more efficient and stable gaze patterns, reflecting greater metalinguistic awareness and familiarity with the field. The dataset is a novel resource for modelling cognitive aspects of text complexity in French.",{"paper_id":11786,"title":11787,"year":7,"month":188,"day":63,"doi":11788,"resource_url":11789,"first_page":11790,"last_page":11791,"pdf_url":11792,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11793,"paper_type":860,"authors":11794,"abstract":11805},"lrec2026-main-487","Scaling LLM Reasoning from Minimal Labels: A Semi-Supervised Framework with a Lightweight Verifier","10.63317\u002F36fhpois9sxp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-487","6155","6165","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.487.pdf","kato-etal-2026-scaling",[11795,11798,11801,11804],{"paper_id":11786,"author_seq":247,"given_name":11796,"surname":11797,"affiliation":63,"orcid":63},"Keizo","Kato",{"paper_id":11786,"author_seq":232,"given_name":11799,"surname":11800,"affiliation":63,"orcid":63},"Chenhui","Chu",{"paper_id":11786,"author_seq":218,"given_name":11802,"surname":11803,"affiliation":63,"orcid":63},"Yugo","Murawaki",{"paper_id":11786,"author_seq":203,"given_name":5009,"surname":5010,"affiliation":63,"orcid":63},"For the development of Large language models (LLMs), recent approaches to generating pseudo intermediate reasoning have shown remarkable progress. But they typically rely on large numbers of correctly annotated answers to assess reasoning quality. This paper presents a semi-supervised framework that scales reasoning learning from minimal supervision, turning reasoning verification itself into a data creation mechanism. We train a lightweight reasoning-correctness classifier on only a few labeled samples, which judges whether intermediate reasoning traces generated by an LLM are valid. Furthermore, an entropy-based confidence threshold filters out unreliable samples, and the remaining high-confidence reasoning traces are used to fine-tune the model. Experiments on Verifiable Math Problems (Orca-Math subset) and Question Answering on Image Scene Graphs (GQA) with Visual Programming show that our method achieves accuracy comparable to using 10–15× more labeled data. Ablation analyses confirm that both the classifier and entropy filtering are essential for scalable and noise-resistant pseudo-labeling. By replacing expensive answer-level supervision with lightweight reasoning verification, our method provides a practical path toward constructing large-scale reasoning resources and paves the way for future autonomous reasoning systems that learn from minimal human input.",{"paper_id":11807,"title":11808,"year":7,"month":188,"day":63,"doi":11809,"resource_url":11810,"first_page":11811,"last_page":11812,"pdf_url":11813,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11814,"paper_type":860,"authors":11815,"abstract":11821},"lrec2026-main-488","PARL: Prompt-based Agents for Reinforcement Learning","10.63317\u002F3z4zqifrngk9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-488","6166","6184","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.488.pdf","resendiz-etal-2026-parl",[11816,11819],{"paper_id":11807,"author_seq":247,"given_name":11817,"surname":11818,"affiliation":63,"orcid":63},"Yarik Menchaca","Resendiz",{"paper_id":11807,"author_seq":232,"given_name":1064,"surname":11820,"affiliation":63,"orcid":63},"Klinger","Large language models (LLMs) have demonstrated high performance on tasks expressed in natural language, particularly in zero- or few-shot settings. These are typically framed as supervised (e.g., classification) or unsupervised (e.g., clustering) problems. However, limited work evaluates LLMs as agents in reinforcement learning (RL) tasks (e.g., playing games), where learning occurs through interaction with an environment and a reward system. While prior work focused on representing tasks that rely on a language representation, we study structured, non-linguistic reasoning – such as interpreting positions in a grid world. We therefore introduce PARL (Prompt-based Agent for Reinforcement Learning), a method that uses LLMs as RL agents through prompting, without any fine-tuning. PARL encodes actions, states, and rewards in the prompt, enabling the model to learn through trial-and-error interaction. We evaluate PARL on three standard RL tasks that do not entirely rely on natural language. We show that it can match or outperform traditional RL agents in simple environments by leveraging pretrained knowledge. However, we identify performance limitations in tasks that require complex mathematical operations or decoding states and actions.",{"paper_id":11823,"title":11824,"year":7,"month":188,"day":63,"doi":11825,"resource_url":11826,"first_page":11827,"last_page":11828,"pdf_url":11829,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11830,"paper_type":860,"authors":11831,"abstract":11837},"lrec2026-main-489","SPQ: An Ensemble Technique for Large Language Model Compression","10.63317\u002F3t98siww4xf2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-489","6185","6195","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.489.pdf","yao-etal-2026-spq",[11832,11834],{"paper_id":11823,"author_seq":247,"given_name":11833,"surname":3225,"affiliation":63,"orcid":63},"Jiamin",{"paper_id":11823,"author_seq":232,"given_name":11835,"surname":11836,"affiliation":63,"orcid":63},"Eren","Gultepe","This study presents an ensemble technique, SPQ (SVD-Pruning-Quantization), for large language model (LLM) compression that combines variance-retained singular value decomposition (SVD), activation-based pruning, and post-training linear quantization. Each component targets a different source of inefficiency: i) pruning removes redundant neurons in MLP layers, ii) SVD reduces attention projections into compact low-rank factors, iii) and 8-bit quantization uniformly compresses all linear layers. At matched compression ratios, SPQ outperforms individual methods (SVD-only, pruning-only, or quantization-only) in perplexity, demonstrating the benefit of combining complementary techniques. Applied to LLaMA-2-7B, SPQ achieves up to 75% memory reduction while maintaining or improving perplexity (e.g., WikiText-2 reduced from 5.47 to 4.91) and preserving accuracy on downstream benchmarks such as C4, TruthfulQA, and GSM8K. Compared to strong baselines like GPTQ and SparseGPT, SPQ offers competitive perplexity and accuracy while using less memory (6.86 GB vs. 7.16 GB for GPTQ). Moreover, SPQ improves inference throughput over GPTQ, achieving up to a 1.9× speedup, which further enhances its practicality for real-world deployment. The effectiveness of SPQ’s robust compression through layer-aware and complementary compression techniques may provide practical deployment of LLMs in memory-constrained environments. Code is available at: https:\u002F\u002Fgithub.com\u002FJiaminYao\u002FSPQ_LLM_Compression\u002F",{"paper_id":11839,"title":11840,"year":7,"month":188,"day":63,"doi":11841,"resource_url":11842,"first_page":11843,"last_page":11844,"pdf_url":11845,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11846,"paper_type":860,"authors":11847,"abstract":11856},"lrec2026-main-490","FPSC: A Sustainable Pipeline for Building a Faroese Parliamentary Speech Corpus","10.63317\u002F5hk9vzqo8xi6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-490","6196","6205","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.490.pdf","lg-etal-2026-fpsc",[11848,11851,11852,11854],{"paper_id":11839,"author_seq":247,"given_name":11849,"surname":11850,"affiliation":63,"orcid":63},"Dávid í","Lág",{"paper_id":11839,"author_seq":232,"given_name":3175,"surname":8877,"affiliation":63,"orcid":63},{"paper_id":11839,"author_seq":218,"given_name":11853,"surname":7255,"affiliation":63,"orcid":63},"Carlos Daniel Hernandez",{"paper_id":11839,"author_seq":203,"given_name":1649,"surname":11855,"affiliation":63,"orcid":63},"Gudnason","This work addresses the lack of large-scale, natural speech data for Faroese automatic speech recognition. Existing resources, such as the 100-hour Ravnursson corpus, consist of read speech and do not capture the spontaneous variation, sociolinguistic aspects and prosody of real dialogue, limiting model performance. To overcome this, we present the Faroese Parliament Speech Corpus (FPSC)—a 1,600-hour collection of parliamentary recordings comprising 89,000 speeches with detailed speaker and linguistic metadata. The corpus includes weakly supervised transcriptions generated using an ensemble of four Faroese-adapted ASR models combined through a ROVER-based voting procedure. In creating FPSC, we trained several new state-of-the-art ASR models for Faroese—some built on large-scale pretrained backbones and others leveraging multilingual transfer—all outperforming previously published Faroese ASR systems. FPSC represents the first corpus of natural spoken Faroese and a major step toward realistic ASR modeling for Faroese, offering an open, reproducible, and scalable resource for future speech and language research.",{"paper_id":11858,"title":11859,"year":7,"month":188,"day":63,"doi":11860,"resource_url":11861,"first_page":11862,"last_page":11863,"pdf_url":11864,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11865,"paper_type":860,"authors":11866,"abstract":11878},"lrec2026-main-491","Efficient Dialect-Aware Modeling and Conditioning for Low-Resource Taiwanese Hakka Speech Processing","10.63317\u002F36umc2hxdiw5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-491","6206","6215","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.491.pdf","anci-etal-2026-efficient",[11867,11869,11871,11873,11875,11877],{"paper_id":11858,"author_seq":247,"given_name":3168,"surname":11868,"affiliation":63,"orcid":63},"An-Ci",{"paper_id":11858,"author_seq":232,"given_name":11870,"surname":1837,"affiliation":63,"orcid":63},"Kuan-Tang",{"paper_id":11858,"author_seq":218,"given_name":11872,"surname":9590,"affiliation":63,"orcid":63},"Tien-Hong",{"paper_id":11858,"author_seq":203,"given_name":11874,"surname":1359,"affiliation":63,"orcid":63},"Hung-Shin",{"paper_id":11858,"author_seq":188,"given_name":11876,"surname":3676,"affiliation":63,"orcid":63},"Hsin-Min",{"paper_id":11858,"author_seq":172,"given_name":10616,"surname":1840,"affiliation":63,"orcid":63},"Taiwanese Hakka is a low-resource, endangered language that poses significant challenges for automatic speech recognition (ASR), including high dialectal variability and the presence of two distinct writing systems (Hanzi and Pinyin). Traditional ASR models often encounter difficulties in this context, as they tend to conflate essential linguistic content with dialect-specific variations across both phonological and lexical dimensions. To address these challenges, we propose a unified framework grounded in the Recurrent Neural Network Transducers (RNN-T). Central to our approach is the introduction of dialect-aware modeling strategies designed to disentangle dialectal ”style” from linguistic ”content”, which enhances the model’s capacity to learn robust and generalized representations. Additionally, the framework employs parameter-efficient prediction networks to concurrently model ASR (Hanzi and Pinyin). We demonstrate that these tasks create a powerful synergy, wherein the cross-script objective serves as a mutual regularizer to improve the primary ASR tasks. Experiments conducted on the HAT corpus reveal that our model achieves 57.00% and 40.41% relative error rate reduction on Hanzi and Pinyin ASR, respectively. To our knowledge, this is the first systematic investigation into the impact of Hakka dialectal variations on ASR and the first single model capable of jointly addressing these tasks.",{"paper_id":11880,"title":11881,"year":7,"month":188,"day":63,"doi":11882,"resource_url":11883,"first_page":11884,"last_page":11885,"pdf_url":11886,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11887,"paper_type":860,"authors":11888,"abstract":11901},"lrec2026-main-492","Construction of Japanese Prefectural Assembly Minutes Datasets across Three Electoral Terms: Comparative Analysis of 2011, 2015, and 2019 Four-Year Periods","10.63317\u002F5hsgnayssvei","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-492","6216","6223","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.492.pdf","takamaru-etal-2026-construction",[11889,11892,11895,11898],{"paper_id":11880,"author_seq":247,"given_name":11890,"surname":11891,"affiliation":63,"orcid":63},"Keiichi","Takamaru",{"paper_id":11880,"author_seq":232,"given_name":11893,"surname":11894,"affiliation":63,"orcid":63},"Hokuto","Ototake",{"paper_id":11880,"author_seq":218,"given_name":11896,"surname":11897,"affiliation":63,"orcid":63},"Yuzu","Uchida",{"paper_id":11880,"author_seq":203,"given_name":11899,"surname":11900,"affiliation":63,"orcid":63},"Yasutomo","Kimura","The presented longitudinal cross-regional corpus of Japanese prefectural assembly minutes spans 12 years (2011-2023) across three electoral terms. The corpus comprises 12,236,974 records containing 743,147,226 characters (471,496,688 tokens) of transcribed remarks from the plenary sessions of all 47 prefectural assemblies in Japan. Each dataset is organized by speaker, with assembly members linked to their electoral information, including gender, age, and electoral district. Through a comparative analysis across the three terms, we documented significant temporal changes. The proportion of members aged 25-44 decreased, whereas female representation increased. Female members use 20-30% more characters per speech than male counterparts across all age groups. The proportion of members who never speak varies from under 2% for younger females to over 10% for males aged 65+. We demonstrate the utility of the corpus through three applications: a quantitative analysis of gender and age patterns in political discourse, AI-driven computational dialectology for extracting regional linguistic features, and a web-based search and visualization system. This longitudinal cross-regional corpus provides a valuable resource for interdisciplinary research on subnational politics, computational linguistics, dialectology, and political communication in non-Western democracies. The datasets are available for research purposes upon request, with public query access provided through a web-based interface.",{"paper_id":11903,"title":11904,"year":7,"month":188,"day":63,"doi":11905,"resource_url":11906,"first_page":11907,"last_page":11908,"pdf_url":11909,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11910,"paper_type":860,"authors":11911,"abstract":11921},"lrec2026-main-493","EDDA-Coordinata: An Annotated Dataset of Historical Geographic Coordinates","10.63317\u002F5guc63fgjocp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-493","6224","6234","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.493.pdf","moncla-etal-2026-edda",[11912,11915,11916,11918],{"paper_id":11903,"author_seq":247,"given_name":11913,"surname":11914,"affiliation":63,"orcid":63},"Ludovic","Moncla",{"paper_id":11903,"author_seq":232,"given_name":1159,"surname":7478,"affiliation":63,"orcid":63},{"paper_id":11903,"author_seq":218,"given_name":1270,"surname":11917,"affiliation":63,"orcid":63},"Joliveau",{"paper_id":11903,"author_seq":203,"given_name":11919,"surname":11920,"affiliation":63,"orcid":63},"Katherine","McDonough","This paper introduces a dataset of enriched geographic coordinates retrieved from Diderot and d’Alembert’s eighteenth-century Encyclopédie. Automatically recovering geographic coordinates from historical texts is a complex task, as they are expressed in a variety of ways and with varying levels of precision. To improve retrieval of coordinates from similar digitized early modern texts, we have created a gold standard dataset, trained models, published the resulting inferred and normalized coordinate data, and experimented applying these models to new texts. From 74,000 total articles in each of the digitized versions of the Encyclopédie from ARTFL and ENCCRE, we examined 15,278 geographical entries, manually identifying 4,798 containing coordinates, and 10,480 with descriptive but non-numerical references. Leveraging our gold standard annotations, we trained transformer-based models to retrieve and normalize coordinates. The pipeline presented here combines a classifier to identify coordinate-bearing entries and a second model for retrieval, tested across encoder–decoder and decoder architectures. Cross-validation yielded an 86% EM score. On an out-of-domain eighteenth-century Trévoux dictionary (also in French), our fine-tuned model had an 61% EM score, while for the nineteenth-century, 7th edition of the Encyclopædia Britannica in English, the EM was 77%. These findings highlight the gold standard dataset’s usefulness as training data, and our two-step method’s cross-lingual, cross-domain generalizability.",{"paper_id":11923,"title":11924,"year":7,"month":188,"day":63,"doi":11925,"resource_url":11926,"first_page":11927,"last_page":11928,"pdf_url":11929,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11930,"paper_type":860,"authors":11931,"abstract":11943},"lrec2026-main-494","Mental Health Disorder Detection beyond Social Media: A Systematic Review of Available Datasets","10.63317\u002F32pcux3ahpau","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-494","6235","6250","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.494.pdf","puspo-etal-2026-mental",[11932,11935,11936,11939,11942],{"paper_id":11923,"author_seq":247,"given_name":11933,"surname":11934,"affiliation":63,"orcid":63},"Sadiya Sayara Chowdhury","Puspo",{"paper_id":11923,"author_seq":232,"given_name":7181,"surname":7182,"affiliation":63,"orcid":63},{"paper_id":11923,"author_seq":218,"given_name":11937,"surname":11938,"affiliation":63,"orcid":63},"Stevie","Chancellor",{"paper_id":11923,"author_seq":203,"given_name":11940,"surname":11941,"affiliation":63,"orcid":63},"Özlem","Uzuner",{"paper_id":11923,"author_seq":188,"given_name":6445,"surname":6446,"affiliation":63,"orcid":63},"Detecting mental health disorders in a timely manner is an important societal challenge. NLP and machine learning (ML) methods used to assist with detection rely on data collected primarily from social media. However, such datasets often have sampling biases and inherent ethical and privacy issues. One avenue to overcome these limitations is non-social media data. We present the first comprehensive review of non-social media, free-text datasets for mental health research. We use the PRISMA methodology to conduct our survey and we review datasets available in multiple languages. We find that non-social media free-text based datasets are predominantly focused on English and on detecting depression. These datasets also vary in demographics, platforms, data types, annotation techniques, and methodologies. This systematic review also reveals key gaps and highlights opportunities to develop more diverse, reliable and clinically-relevant resources.",{"paper_id":11945,"title":11946,"year":7,"month":188,"day":63,"doi":11947,"resource_url":11948,"first_page":11949,"last_page":11950,"pdf_url":11951,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11952,"paper_type":860,"authors":11953,"abstract":11957},"lrec2026-main-495","German Counseling Grounding-Act Corpus (GRACO)","10.63317\u002F3cbc5jpg7spd","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-495","6251","6261","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.495.pdf","belosevic-2026-german",[11954],{"paper_id":11945,"author_seq":247,"given_name":11955,"surname":11956,"affiliation":63,"orcid":63},"Milena","Belosevic","We present a corpus of 196 German counseling conversations (ca. 25k turns) between advice seekers and counselors from nine domains. A subset of 11.5k turns was double-annotated with grounding acts (e.g., acknowledgments, repairs), attempts to advance the conversation, success of advancing, and conversation phases. Baseline classification experiments with logistic regression and GBERT-base illustrate the impact of class imbalance in grounding-act classification. For logistic regression, train-only balancing improves Macro-F1 from 0.417 [0.377–0.434] to 0.444 [0.394–0.478]. For GBERT-base, performance remains competitive (Macro-F1 0.481), with balancing yielding comparable results under the same evaluation protocol. Given the scarcity of German corpora of naturally occurring conversations annotated for grounding phenomena, we provide a novel resource for both conversation analysis and natural language processing, facilitating the design of realistic human-language model interactions in German. Code and data are available at https:\u002F\u002Fosf.io\u002F6k275\u002Foverview.",{"paper_id":11959,"title":11960,"year":7,"month":188,"day":63,"doi":11961,"resource_url":11962,"first_page":11963,"last_page":11964,"pdf_url":11965,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11966,"paper_type":860,"authors":11967,"abstract":11972},"lrec2026-main-496","Presenting the Prague Discourse Treebank 4.0","10.63317\u002F3qyaqpgm8b25","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-496","6262","6276","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.496.pdf","mrovsk-etal-2026-presenting",[11968,11969],{"paper_id":11959,"author_seq":247,"given_name":1975,"surname":1976,"affiliation":63,"orcid":63},{"paper_id":11959,"author_seq":232,"given_name":11970,"surname":11971,"affiliation":63,"orcid":63},"Pavlína","Synková","The Prague Discourse Treebank 4.0 is a large genre-diversified language resource with annotation of discourse relations marked by explicit connectives in Czech texts. It consists of 175 thousand sentences with 82 thousand discourse relations. We present the treebank as well as the methods used during the annotation of its individual parts, some of which were annotated fully manually, others using cost-effective partially automatic methods, achieving a comparable quality. The discourse annotation is available in two formats and theoretical frameworks: the Prague discourse annotation on top of deep syntax dependency trees, and the Penn Discourse Treebank style on top of plain texts, using both discourse type\u002Fsense taxonomies in both formats. The corpus is publicly and freely available, offering a valuable resource for linguistic research and natural language processing tasks.",{"paper_id":11974,"title":11975,"year":7,"month":188,"day":63,"doi":11976,"resource_url":11977,"first_page":11978,"last_page":11979,"pdf_url":11980,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11981,"paper_type":860,"authors":11982,"abstract":11988},"lrec2026-main-497","Evaluation of Co-Speech Gesture Tracking Techniques in Naturalistic Interactions","10.63317\u002F2k233c5pnfsi","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-497","6277","6288","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.497.pdf","ivanova-etal-2026-evaluation",[11983,11985],{"paper_id":11974,"author_seq":247,"given_name":2981,"surname":11984,"affiliation":63,"orcid":63},"Ivanova",{"paper_id":11974,"author_seq":232,"given_name":11986,"surname":11987,"affiliation":63,"orcid":63},"Naomi","Harte","Hand gestures convey a significant portion of communicative meaning, making multimodal datasets essential for interaction research. However, annotating gestures remains a time-consuming and challenging task. To speed up the process, semi-automatic methods have been developed that identify segments with hand movement for annotators to refine. These typically combine a pose estimation model with a rule-based or statistical movement detection algorithm. However, most are validated on idealised, non-naturalistic datasets with minimal hand occlusions. We benchmark combinations of four pose estimation methods (OpenPose, MediaPipe, DeepLabCut, and Kinect) and two rule-based movement detection algorithms on two naturalistic, conversational datasets. The best pipelines combine the SPUDNIG displacement algorithm with OpenPose on MULTISIMO and with DeepLabCut on ECOLANG. These pipelines achieved Tversky scores of 0.57 on MULTISIMO and 0.65 on ECOLANG, with recall scores of 0.73 and 0.78, respectively. While off-the-shelf gesture detection systems can support annotation, performance remains limited on naturalistic data, and careful camera setup minimizing occlusions is essential.",{"paper_id":11990,"title":11991,"year":7,"month":188,"day":63,"doi":11992,"resource_url":11993,"first_page":11994,"last_page":11995,"pdf_url":11996,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":11997,"paper_type":860,"authors":11998,"abstract":12002},"lrec2026-main-498","Voices across Decades: A Multimodal Diachronic Corpus of German Bundestag Debates (GerParlDia-MM)","10.63317\u002F3vgihkgnkg75","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-498","6289","6297","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.498.pdf","siegert-2026-voices",[11999],{"paper_id":11990,"author_seq":247,"given_name":12000,"surname":12001,"affiliation":63,"orcid":63},"Ingo","Siegert","This paper presents a multimodal diachronic corpus of German parliamentary debates spanning 1949 – 2025. The dataset focuses on speakers with exceptionally long political careers in the Bundestag, covering at least six parliamentary terms for female and eight for male members, comprising 75 individuals (43 men\u002F32 female) and 2,136 speeches. The corpus integrates audio, video (when available), and official transcripts, enriched with metadata on date, party affiliation, and legislative term. Transcripts were temporally aligned with parliamentary media recordings, and non-speech segments were automatically removed. The corpus enables research on voice aging, intra-speaker variability, and longitudinal political language, and supports benchmarking of ASR and speaker recognition across decades. Thus, this corpus bridges the gap between short-term speech corpora and single-speaker longitudinal datasets, offering a unique foundation for studying change in voice, style, and rhetoric over more than seventy years of German parliamentary history.",{"paper_id":12004,"title":12005,"year":7,"month":188,"day":63,"doi":12006,"resource_url":12007,"first_page":12008,"last_page":12009,"pdf_url":12010,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12011,"paper_type":860,"authors":12012,"abstract":12016},"lrec2026-main-499","MultiWikiQA: A Reading Comprehension Benchmark in 300+ Languages","10.63317\u002F2msrgsu9isrx","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-499","6298","6311","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.499.pdf","smart-2026-multiwikiqa",[12013],{"paper_id":12004,"author_seq":247,"given_name":12014,"surname":12015,"affiliation":63,"orcid":63},"Dan Saattrup","Smart","We introduce a new reading comprehension dataset, dubbed MultiWikiQA, which covers 306 languages and has 1,220,757 samples in total. We start with Wikipedia articles, which also provide the context for the dataset samples, and use an LLM to generate question\u002Fanswer pairs related to the Wikipedia article, ensuring that the answer appears verbatim within the article. Next, the question is then rephrased to hinder simple word matching methods from performing well on the dataset. We conduct a crowdsourced human evaluation of the fluency of the generated questions, which included 156 respondents across 30 of the languages (both low- and high-resource). All 30 languages received a mean fluency rating above “mostly natural”, showing that the samples are of good quality. We evaluate 6 different language models, both decoder and encoder models of varying sizes, showing that the benchmark is sufficiently difficult and that there is a large performance discrepancy amongst the languages. Both the dataset and survey evaluations are publicly available.",{"paper_id":12018,"title":12019,"year":7,"month":188,"day":63,"doi":12020,"resource_url":12021,"first_page":12022,"last_page":12023,"pdf_url":12024,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12025,"paper_type":860,"authors":12026,"abstract":12029},"lrec2026-main-500","SALOMO: An Annotation Tool for Complex Annotation Tasks with a Large Number of Labels","10.63317\u002F4odk4p62uz9p","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-500","6312","6321","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.500.pdf","menzner-2026-salomo",[12027],{"paper_id":12018,"author_seq":247,"given_name":9249,"surname":12028,"affiliation":63,"orcid":63},"Menzner","Manual annotation of linguistic units such as sentences with labels drawn from a large inventory or taxonomy imposes an enormous cognitive load on human subjects. For our exemplary task, we devised a taxonomy of media bias with 37 categories. Selecting the appropriate category (or none) for thousands of news sentences is likely to be tiring and error-prone for humans. To address these type of annotation tasks involving large numbers of labels, we present SALOMO, an annotation tool that pre-selects labels by letting a committee of LLMs make decisions. Human annotators are then tasked mainly with resolving cases where the LLMs disagree. While our tool is independent of any particular task, we describe its design, present a short corpus annotated with a novel fine-grained taxonomy of news bias types as a concrete case study, and demonstrate experimentally both the significant time savings and workload reduction achieved with the pre-selection mechanism, as well as the strong bias it introduces toward the displayed selection. We also provide the mini-dataset of biased sentences and their associated bias types from our experiment.",{"paper_id":12031,"title":12032,"year":7,"month":188,"day":63,"doi":12033,"resource_url":12034,"first_page":12035,"last_page":12036,"pdf_url":12037,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12038,"paper_type":860,"authors":12039,"abstract":12049},"lrec2026-main-501","VietJobs: A Vietnamese Job Advertisement Dataset","10.63317\u002F2nkvn5bc8dou","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-501","6322","6331","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.501.pdf","dinh-etal-2026-vietjobs",[12040,12043,12046],{"paper_id":12031,"author_seq":247,"given_name":12041,"surname":12042,"affiliation":63,"orcid":63},"Hieu Pham","Dinh",{"paper_id":12031,"author_seq":232,"given_name":12044,"surname":12045,"affiliation":63,"orcid":63},"Hung Nguyen","Huy",{"paper_id":12031,"author_seq":218,"given_name":12047,"surname":12048,"affiliation":63,"orcid":63},"Mo","El-Haj","VietJobs is the first large-scale, publicly available corpus of Vietnamese job advertisements, comprising 48,092 postings and over 15 million words collected from all 34 provinces and municipalities across Vietnam. The dataset provides extensive linguistic and structured information, including job titles, categories, salaries, skills, and employment conditions, covering 16 occupational domains and multiple employment types (full-time, part-time, and internship). Designed to support research in natural language processing and labour market analytics, VietJobs captures substantial linguistic, regional, and socio-economic diversity. We benchmark several generative large language models (LLMs) on two core tasks: job category classification and salary estimation. Instruction-tuned models such as Qwen2.5-7B-Instruct and Llama-SEA-LION-v3-8B-IT demonstrate notable gains under few-shot and fine-tuned settings, while highlighting challenges in multilingual and Vietnamese-specific modelling for structured labour market prediction. VietJobs establishes a new benchmark for Vietnamese NLP and offers a valuable foundation for future research on recruitment language, socio-economic representation, and AI-driven labour market analysis. All code and resources are available at: https:\u002F\u002Fgithub.com\u002FVinNLP\u002FVietJobs.",{"paper_id":12051,"title":12052,"year":7,"month":188,"day":63,"doi":12053,"resource_url":12054,"first_page":12055,"last_page":12056,"pdf_url":12057,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12058,"paper_type":860,"authors":12059,"abstract":12067},"lrec2026-main-502","A Resource on Dialogical Moves in Native and Non-Native Academic Writers of English","10.63317\u002F3cbozhs8zty6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-502","6332","6339","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.502.pdf","dagostino-etal-2026-resource",[12060,12062,12065],{"paper_id":12051,"author_seq":247,"given_name":2891,"surname":12061,"affiliation":63,"orcid":63},"D'Agostino",{"paper_id":12051,"author_seq":232,"given_name":12063,"surname":12064,"affiliation":63,"orcid":63},"Narjes Sheikh","Asadi",{"paper_id":12051,"author_seq":218,"given_name":2968,"surname":12066,"affiliation":63,"orcid":63},"Musi","This paper provides a new approach to the study of linguistic differences in research articles written by native and non-native English writers, including a novel linguistic resource. Conceptually, we propose a functional definition of academic nativeness. Empirically, we operationalize this definition through a survey and the development of a reliable automatic method to distinguish academic natives from non-natives. We then release a corpus of 80 research articles in the field of Linguistics, with introductions manually and reliably annotated for dialogical moves. Preliminary experiments indicate that automatic annotation using large language models remains challenging. Furthermore, the linguistic features that differentiate academic natives and non-natives diverge from those typically associated with general native\u002Fnon-native English distinctions. These findings aim to inform and enhance academic writing pedagogy, while also offering insights relevant to broader language and corpus studies, as well as computational research.",{"paper_id":12069,"title":12070,"year":7,"month":188,"day":63,"doi":12071,"resource_url":12072,"first_page":12073,"last_page":12074,"pdf_url":12075,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12076,"paper_type":860,"authors":12077,"abstract":12081},"lrec2026-main-503","A Corpus-Based Profiling of Regional English Variants in Global Media: Insights from Olympic Journalism","10.63317\u002F3kzf8hoic5ht","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-503","6340","6348","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.503.pdf","mao-2026-corpus",[12078],{"paper_id":12069,"author_seq":247,"given_name":12079,"surname":12080,"affiliation":63,"orcid":63},"Felix","Mao","This paper investigates the distinctive linguistic characteristics of regional English variants through a quantitative analysis of global media coverage. The study applies advanced classification techniques, integrating GPT-based embeddings with Support Vector Machines, to a novel corpus, the Olympic Journalism English Variants Corpus. Comprising news articles related to Olympic Games covered by prominent news outlets in the United States, China, Spain, and Mexico between 2020 and 2023, this corpus enables a fine-grained analysis of 164 linguistic features across lexical, syntactic, readability, and sentiment dimensions. The findings reveal strong and interpretable distinctions in features such as verb ratio, nominality, and readability. This study not only demonstrated the enhanced classification capabilities of the model (optimized F1 score = 97.2), but also yielded deeper, data-driven stylistic analysis and insights of each English variant. This work provides a potential template that can be expanded to other World Englishes research.",{"paper_id":12083,"title":12084,"year":7,"month":188,"day":63,"doi":12085,"resource_url":12086,"first_page":12087,"last_page":12088,"pdf_url":12089,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12090,"paper_type":860,"authors":12091,"abstract":12106},"lrec2026-main-504","JFC-Recipe: A Dataset for Nutrient Estimation from Japanese User-Generated Cooking Recipes","10.63317\u002F42sopxjrdzsf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-504","6349","6360","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.504.pdf","shirai-etal-2026-jfc",[12092,12095,12098,12101,12103,12105],{"paper_id":12083,"author_seq":247,"given_name":12093,"surname":12094,"affiliation":63,"orcid":63},"Keisuke","Shirai",{"paper_id":12083,"author_seq":232,"given_name":12096,"surname":12097,"affiliation":63,"orcid":63},"Yoko","Yamakata",{"paper_id":12083,"author_seq":218,"given_name":12099,"surname":12100,"affiliation":63,"orcid":63},"Hirotaka","Kameko",{"paper_id":12083,"author_seq":203,"given_name":2043,"surname":12102,"affiliation":63,"orcid":63},"Sunto",{"paper_id":12083,"author_seq":188,"given_name":8532,"surname":12104,"affiliation":63,"orcid":63},"Harashima",{"paper_id":12083,"author_seq":172,"given_name":9870,"surname":9871,"affiliation":63,"orcid":63},"Estimating nutrients from recipes is essential for performing proper daily dietary control. The nutrients of the recipe could be roughly calculated by identifying the nutrients and weights of each ingredient in the recipe. However, no dataset with fully manual annotations of nutritional values and weights has been released so far, especially for Japanese recipes. In this work, we propose a novel dataset called the Japanese Food Composition Recipe Dataset (JFC-Recipe). The JFC-Recipe dataset consists of two types of annotations: (i) food item annotation that links ingredients in recipes to a database providing nutrients for foods and (ii) amount and unit annotation that are converted into weights in grams using a weight table. We describe a data collection procedure and annotation process, show statistics, and provide inter-annotator agreements to validate the quality of our annotations. In experiments, we tackle two tasks of food item estimation and quantity estimation. Experimental results show that pre-trained language models learn to estimate food items and quantities accurately.",{"paper_id":12108,"title":12109,"year":7,"month":188,"day":63,"doi":12110,"resource_url":12111,"first_page":12112,"last_page":12113,"pdf_url":12114,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12115,"paper_type":860,"authors":12116,"abstract":12127},"lrec2026-main-505","Annotating Conversational Phases and Communication Techniques: A Corpus of German Teacher-Parent Counseling Conversations","10.63317\u002F4ukmi5gkdygr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-505","6361","6372","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.505.pdf","hallmen-etal-2026-annotating",[12117,12119,12122,12124,12126],{"paper_id":12108,"author_seq":247,"given_name":995,"surname":12118,"affiliation":63,"orcid":63},"Hallmen",{"paper_id":12108,"author_seq":232,"given_name":12120,"surname":12121,"affiliation":63,"orcid":63},"Kathrin","Gietl",{"paper_id":12108,"author_seq":218,"given_name":2004,"surname":12123,"affiliation":63,"orcid":63},"Hillesheim",{"paper_id":12108,"author_seq":203,"given_name":12125,"surname":6138,"affiliation":63,"orcid":63},"Annemarie",{"paper_id":12108,"author_seq":188,"given_name":4360,"surname":6037,"affiliation":63,"orcid":63},"Teacher-parent conversations are critical for student success, yet teachers often lack structured training in counseling communication skills. We present the first annotated corpus of teacher-parent counseling conversations consisting of 59 German dialogues (approximately 6k sentences, 21k annotations) simulated by prospective elementary school teachers, peers, and professional actors. The corpus features theory-grounded annotations for conversational phases (Beginning, Informational, Argumentative, Decision-Making, Concluding) and communication techniques (Paraphrasing, Verbalizing, Structuring). We provide detailed annotation guidelines operationalizing established counseling pedagogy frameworks for computational analysis. Inter-annotator agreement analysis reveals substantial agreement (Fleiss’ k = 0.669 to 0.724, Krippendorff’s a = 0.666 to 0.735). Our analysis reveals confusion patterns, providing insights into counseling discourse structure. Baseline experiments with BERT-based models and open-source LLMs achieve F1 scores of up to 71% depending on task and model. The corpus, guidelines, and baseline code are publicly available under CC BY-NC-SA 4.0 license, enabling research on automated dialogue analysis and AI-based training tools for teacher education.",{"paper_id":12129,"title":12130,"year":7,"month":188,"day":63,"doi":12131,"resource_url":12132,"first_page":12133,"last_page":12134,"pdf_url":12135,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12136,"paper_type":860,"authors":12137,"abstract":12148},"lrec2026-main-506","RO-ABSA: A Romanian Dataset and Baselines for Aspect-Based Sentiment Analysis","10.63317\u002F3y4i7u4c78n5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-506","6373","6382","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.506.pdf","alina-etal-2026-ro",[12138,12140,12141,12143,12145],{"paper_id":12129,"author_seq":247,"given_name":12139,"surname":5684,"affiliation":63,"orcid":63},"Gheorghe Andreea",{"paper_id":12129,"author_seq":232,"given_name":7739,"surname":4283,"affiliation":63,"orcid":63},{"paper_id":12129,"author_seq":218,"given_name":12142,"surname":2968,"affiliation":63,"orcid":63},"Ionescu",{"paper_id":12129,"author_seq":203,"given_name":12144,"surname":2813,"affiliation":63,"orcid":63},"Ruseti",{"paper_id":12129,"author_seq":188,"given_name":12146,"surname":12147,"affiliation":63,"orcid":63},"Dascalu","Mihai","Despite the increasing use and applicability of sentiment analysis tools, a significant lack of datasets exists for low-resource or limited-resource languages, such as Romanian, which adequately address this task while considering language-specific traits. To overcome this limitation, we introduce a new dataset suitable for Aspect-Based Sentiment Analysis (ABSA) in Romanian, encompassing aspect term categorisation (ATC) and aspect-level sentiment classification (ALSC). Our dataset comprises approximately 6,250 annotated reviews with over 10,600 attributes and their corresponding polarities. We establish comprehensive baselines for each component and for the entire ABSA task. For ABSA, we evaluate two complementary strategies: (1) an end-to-end generative model that produces aspect–sentiment pairs, and (2) a pipeline combining encoder-based ATC and ALSC models. We fine-tune encoder, encoder–decoder, and decoder-only architectures and additionally test transfer learning from English for ATC. Few-shot prompting with LLaMA-3.3 and GPT-4o is also explored for comparison. Fine-tuned models consistently outperform few-shot setups: the best end-to-end ABSA model achieves an F1 score of 0.81, while the ATC and ALSC components reach 0.81 and 0.93 F1, respectively. These results highlight both the challenge of the RO-ABSA dataset and the benefits of supervised fine-tuning for Romanian ABSA.",{"paper_id":12150,"title":12151,"year":7,"month":188,"day":63,"doi":12152,"resource_url":12153,"first_page":12154,"last_page":12155,"pdf_url":12156,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12157,"paper_type":860,"authors":12158,"abstract":12206},"lrec2026-main-507","The Moral Foundations Reddit Corpus","10.63317\u002F2b6xmbq3kphf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-507","6383","6407","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.507.pdf","trager-etal-2026-moral",[12159,12162,12165,12168,12171,12174,12177,12179,12181,12183,12185,12188,12191,12193,12195,12197,12200,12203],{"paper_id":12150,"author_seq":247,"given_name":12160,"surname":12161,"affiliation":63,"orcid":63},"Jackson P.","Trager",{"paper_id":12150,"author_seq":232,"given_name":12163,"surname":12164,"affiliation":63,"orcid":63},"Alireza S.","Ziabari",{"paper_id":12150,"author_seq":218,"given_name":12166,"surname":12167,"affiliation":63,"orcid":63},"Elnaz","Rahmati",{"paper_id":12150,"author_seq":203,"given_name":12169,"surname":12170,"affiliation":63,"orcid":63},"Aida Mostafazadeh","Davani",{"paper_id":12150,"author_seq":188,"given_name":12172,"surname":12173,"affiliation":63,"orcid":63},"Preni","Golazizian",{"paper_id":12150,"author_seq":172,"given_name":12175,"surname":12176,"affiliation":63,"orcid":63},"Farzan","Karimi-Malekabadi",{"paper_id":12150,"author_seq":155,"given_name":2207,"surname":12178,"affiliation":63,"orcid":63},"Omrani",{"paper_id":12150,"author_seq":138,"given_name":12180,"surname":3446,"affiliation":63,"orcid":63},"Zhihe",{"paper_id":12150,"author_seq":121,"given_name":2669,"surname":12182,"affiliation":63,"orcid":63},"Kennedy",{"paper_id":12150,"author_seq":104,"given_name":2565,"surname":12184,"affiliation":63,"orcid":63},"Chochlakis",{"paper_id":12150,"author_seq":87,"given_name":12186,"surname":12187,"affiliation":63,"orcid":63},"Nils Karl","Reimer",{"paper_id":12150,"author_seq":73,"given_name":12189,"surname":12190,"affiliation":63,"orcid":63},"Melissa","Reyes",{"paper_id":12150,"author_seq":55,"given_name":12192,"surname":4721,"affiliation":63,"orcid":63},"Kesley",{"paper_id":12150,"author_seq":38,"given_name":12194,"surname":3270,"affiliation":63,"orcid":63},"Mellow",{"paper_id":12150,"author_seq":17,"given_name":3799,"surname":12196,"affiliation":63,"orcid":63},"Merrifield",{"paper_id":12150,"author_seq":2971,"given_name":12198,"surname":12199,"affiliation":63,"orcid":63},"Arta","Khosravi",{"paper_id":12150,"author_seq":2974,"given_name":12201,"surname":12202,"affiliation":63,"orcid":63},"Evans","Alvarez",{"paper_id":12150,"author_seq":857,"given_name":12204,"surname":12205,"affiliation":63,"orcid":63},"Morteza","Dehghani","Moral framing and sentiment can affect a variety of online and offline behaviors, including donation, environmental action, political engagement, and protest. Various computational methods in Natural Language Processing (NLP) have been used to detect moral sentiment from textual data, but achieving strong performance in such subjective tasks requires large, hand-annotated datasets. Previous corpora annotated for moral sentiment have proven valuable and have generated new insights both within NLP and across the social sciences, but have been limited to Twitter. To facilitate improving our understanding of the role of moral rhetoric, we present the Moral Foundations Reddit Corpus, a collection of 16,123 English Reddit comments that have been curated from 12 distinct subreddits, hand-annotated by at least three trained annotators for 8 categories of moral sentiment (i.e., Care, Proportionality, Equality, Purity, Authority, Loyalty, Thin Morality, Implicit\u002FExplicit Morality) based on the updated Moral Foundations Theory (MFT) framework. We evaluate baselines using large language models (Llama3-8B, Ministral-8B) in zero-shot, few-shot, and PEFT (Parameter-Efficient Fine-Tuning) settings, comparing their performance to fine-tuned encoder-only models like BERT (Bidirectional Encoder Representations from Transformers). The results show that LLMs continue to lag behind fine-tuned encoders on this subjective task, underscoring the ongoing need for human-annotated moral corpora for AI alignment evaluation",{"paper_id":12208,"title":12209,"year":7,"month":188,"day":63,"doi":12210,"resource_url":12211,"first_page":12212,"last_page":12213,"pdf_url":12214,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12215,"paper_type":860,"authors":12216,"abstract":12226},"lrec2026-main-508","A Semi-Automatic Workflow for Transcribing and Annotating Broadcast News","10.63317\u002F3r9divdrcjes","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-508","6408","6417","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.508.pdf","draxler-etal-2026-semi",[12217,12220,12222,12223],{"paper_id":12208,"author_seq":247,"given_name":12218,"surname":12219,"affiliation":63,"orcid":63},"Christoph","Draxler",{"paper_id":12208,"author_seq":232,"given_name":5230,"surname":12221,"affiliation":63,"orcid":63},"Grawunder",{"paper_id":12208,"author_seq":218,"given_name":10760,"surname":10761,"affiliation":63,"orcid":63},{"paper_id":12208,"author_seq":203,"given_name":12224,"surname":12225,"affiliation":63,"orcid":63},"Felicitas","Kleber","Audio data archived in radio broadcast stations represent a rich source for various research purposes from phonetic questions up to training and test data for speech modelling. We present an efficient semi-automatic workflow for pre-processing, transcribing and analysing large linguistic-phonetic audio corpora. As a pilot study, we process radio broadcast news from a German public radio station containing recordings from 1956 until 2017. The workflow consists of basic preprocessing, automatic speech recognition, manual word correction, automatic generation of pairs of audio chunks and transcripts, plus an automatic word-, syllable- and phoneme-level segmentation of these chunks. The workflow is organised using the Octra Backend management tool, manual validation and correction of transcripts and chunking are performed using the Octra editor, and the BAS web services perform the segmentation. In an example analysis we show with our specific radio corpus how to use it for comparative longitudinal structure analyses of broadcast news, and for text- and signal-based studies on changes of speech and articulation rate.",{"paper_id":12228,"title":12229,"year":7,"month":188,"day":63,"doi":12230,"resource_url":12231,"first_page":12232,"last_page":12233,"pdf_url":12234,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12235,"paper_type":860,"authors":12236,"abstract":12246},"lrec2026-main-509","From Rosetta to Match-Up: A Paired Corpus of Linguistic Puzzles with Human and LLM Benchmarks","10.63317\u002F3yq3khhygtya","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-509","6418","6427","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.509.pdf","majmudar-etal-2026-rosetta",[12237,12240,12241,12244],{"paper_id":12228,"author_seq":247,"given_name":12238,"surname":12239,"affiliation":63,"orcid":63},"Neh","Majmudar",{"paper_id":12228,"author_seq":232,"given_name":8037,"surname":1837,"affiliation":63,"orcid":63},{"paper_id":12228,"author_seq":218,"given_name":12242,"surname":12243,"affiliation":63,"orcid":63},"Jinfan Frank","Hu",{"paper_id":12228,"author_seq":203,"given_name":2968,"surname":12245,"affiliation":63,"orcid":63},"Filatova","In this paper, we examine linguistic puzzles used in high school linguistics competitions, focusing on two common formats: Rosetta Stone and Match-Up. We propose a systematic procedure for converting existing Rosetta Stone puzzles into corresponding Match-Up counterparts. Because linguistic puzzle creation is complex and time-consuming, our method provides an efficient way to accelerate the generation of new puzzles. We evaluate the resulting Rosetta Stone–Match-Up pairs with both human participants and large language models (LLMs). Our results show that both expert human solvers and LLMs display an all-or-nothing pattern on Match-Up puzzles, either solving them completely or failing entirely. This work contributes a new dataset of paired puzzles and provides a detailed evaluation of puzzle difficulty across formats, offering insights into both human and machine linguistic reasoning.",{"paper_id":12248,"title":12249,"year":7,"month":188,"day":63,"doi":12250,"resource_url":12251,"first_page":12252,"last_page":12253,"pdf_url":12254,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12255,"paper_type":860,"authors":12256,"abstract":12266},"lrec2026-main-510","Tracing How Annotators Think: Augmenting Preference Judgments with Reading Processes","10.63317\u002F3nrqhnhixp4j","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-510","6428","6438","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.510.pdf","langis-etal-2026-tracing",[12257,12260,12262,12264],{"paper_id":12248,"author_seq":247,"given_name":12258,"surname":12259,"affiliation":63,"orcid":63},"Karin Johanna Denton de","Langis",{"paper_id":12248,"author_seq":232,"given_name":1600,"surname":12261,"affiliation":63,"orcid":63},"Walker",{"paper_id":12248,"author_seq":218,"given_name":12263,"surname":6468,"affiliation":63,"orcid":63},"Khanh Chi",{"paper_id":12248,"author_seq":203,"given_name":12265,"surname":1336,"affiliation":63,"orcid":63},"Dongyeop","We propose an annotation approach that captures not only labels but also the reading process underlying annotators’ decisions, e.g., what parts of the text they focus on, re-read or skim. Using this approach, we conduct a case study on the preference annotation task and create a dataset PreferRead that contains fine-grained annotator reading behaviors obtained from mouse tracking. PreferRead enables detailed analysis of how annotators navigate between a prompt and two candidate responses before selecting their preference. We find that annotators re-read a response in roughly half of all trials, most often revisiting the option they ultimately choose, and rarely revisit the prompt. Reading behaviors are also significantly related to annotation outcomes: re-reading is associated with higher inter-annotator agreement, whereas long reading paths and times are associated with lower agreement. These results demonstrate that reading processes provide a complementary cognitive dimension for understanding annotator reliability, decision-making and disagreement in complex, subjective NLP tasks.",{"paper_id":12268,"title":12269,"year":7,"month":188,"day":63,"doi":12270,"resource_url":12271,"first_page":12272,"last_page":12273,"pdf_url":12274,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12275,"paper_type":860,"authors":12276,"abstract":12288},"lrec2026-main-511","CodeClarity: A Framework and Benchmark for Evaluating Multilingual Code Summarization","10.63317\u002F3cmt3ycig8a7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-511","6439","6451","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.511.pdf","chakraborty-etal-2026-codeclarity",[12277,12280,12282,12285],{"paper_id":12268,"author_seq":247,"given_name":12278,"surname":12279,"affiliation":63,"orcid":63},"Madhurima","Chakraborty",{"paper_id":12268,"author_seq":232,"given_name":12281,"surname":11395,"affiliation":63,"orcid":63},"Drishti",{"paper_id":12268,"author_seq":218,"given_name":12283,"surname":12284,"affiliation":63,"orcid":63},"Maryam","Sikander",{"paper_id":12268,"author_seq":203,"given_name":12286,"surname":12287,"affiliation":63,"orcid":63},"Eman","Nisar","Large Language Models (LLMs) are increasingly used to summarize and document code, yet most research and training data remain limited to English. This creates barriers for developers working in other languages and leaves the multilingual capabilities of LLMs largely unexplored. We present CodeClarity, a framework for evaluating multilingual code summarization across six programming and six natural languages. It combines reference-based metrics, LLM-judge ratings, and faithfulness checks (identifiers and script) to capture surface similarity, semantic adequacy, and code-aware fidelity. Our experiments reveal that lexical metrics penalize morphologically rich languages, while judge-based evaluations provide more stable, semantically aligned assessments. This work establishes the first reproducible foundation for studying multilingual code summarization and points toward fairer, more inclusive evaluation of code intelligence systems. CodeClarity-Bench and the full evaluation pipeline are publicly available at huggingface.co\u002FCodeClarity and github.com\u002FMadhuNimmo\u002FCodeClarity, enabling community-scale human validation and follow-up studies.",{"paper_id":12290,"title":12291,"year":7,"month":188,"day":63,"doi":12292,"resource_url":12293,"first_page":12294,"last_page":12295,"pdf_url":12296,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12297,"paper_type":860,"authors":12298,"abstract":12315},"lrec2026-main-512","A Longitudinal, Multinational, and Multilingual Corpus of News Coverage of the Russo-Ukrainian War","10.63317\u002F5ecxdux9s9ft","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-512","6452","6471","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.512.pdf","mohanty-etal-2026-longitudinal",[12299,12302,12305,12307,12309,12312],{"paper_id":12290,"author_seq":247,"given_name":12300,"surname":12301,"affiliation":63,"orcid":63},"Dikshya","Mohanty",{"paper_id":12290,"author_seq":232,"given_name":12303,"surname":12304,"affiliation":63,"orcid":63},"Taisiia","Sabadyn",{"paper_id":12290,"author_seq":218,"given_name":12306,"surname":4069,"affiliation":63,"orcid":63},"Jelwin",{"paper_id":12290,"author_seq":203,"given_name":12308,"surname":3676,"affiliation":63,"orcid":63},"Chenlu",{"paper_id":12290,"author_seq":188,"given_name":12310,"surname":12311,"affiliation":63,"orcid":63},"Abhishek","Kalugade",{"paper_id":12290,"author_seq":172,"given_name":12313,"surname":12314,"affiliation":63,"orcid":63},"Ritwik","Banerjee","We present DNIPRO, a corpus of 246K news articles from the Russo-Ukrainian war (Feb 2022 – Aug 2024) spanning eleven outlets across five nation-states (Russia, Ukraine, U.S., U.K., China) and three languages. The corpus features comprehensive metadata and human-evaluated annotations for stance, sentiment, and topical framing, enabling systematic analysis of competing geopolitical narratives. It is uniquely suited for empirical studies of narrative divergence, media framing, and information warfare. Our exploratory analyses reveal how media outlets construct incompatible realities through divergent attribution and topical selection without direct refutation of opposing narratives. dnipro empowers empirical research on narrative evolution, cross-lingual information flow, and computational detection of implicit contradictions in fragmented information ecosystems.",{"paper_id":12317,"title":12318,"year":7,"month":188,"day":63,"doi":12319,"resource_url":12320,"first_page":12321,"last_page":12322,"pdf_url":12323,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12324,"paper_type":860,"authors":12325,"abstract":12354},"lrec2026-main-513","SKILL-IR-Discourse: A Large, Annotated Corpus of Argumentation and Domain Discourse on International Relations","10.63317\u002F4i2ptc5x4ese","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-513","6472","6482","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.513.pdf","wolska-etal-2026-skill",[12326,12329,12332,12335,12338,12341,12343,12345,12347,12349,12351],{"paper_id":12317,"author_seq":247,"given_name":12327,"surname":12328,"affiliation":63,"orcid":63},"Magdalena","Wolska",{"paper_id":12317,"author_seq":232,"given_name":12330,"surname":12331,"affiliation":63,"orcid":63},"Matti","Wiegmann",{"paper_id":12317,"author_seq":218,"given_name":12333,"surname":12334,"affiliation":63,"orcid":63},"Sassan","Gholiagha",{"paper_id":12317,"author_seq":203,"given_name":12336,"surname":12337,"affiliation":63,"orcid":63},"Mitja","Sienknecht",{"paper_id":12317,"author_seq":188,"given_name":12339,"surname":12340,"affiliation":63,"orcid":63},"Dora","Kiesel",{"paper_id":12317,"author_seq":172,"given_name":12342,"surname":9018,"affiliation":63,"orcid":63},"Irene Lopez",{"paper_id":12317,"author_seq":155,"given_name":4377,"surname":12344,"affiliation":63,"orcid":63},"Riehmann",{"paper_id":12317,"author_seq":138,"given_name":10757,"surname":12346,"affiliation":63,"orcid":63},"Fröhlich",{"paper_id":12317,"author_seq":121,"given_name":2965,"surname":12348,"affiliation":63,"orcid":63},"Girgensohn",{"paper_id":12317,"author_seq":104,"given_name":10760,"surname":12350,"affiliation":63,"orcid":63},"Neyer",{"paper_id":12317,"author_seq":87,"given_name":12352,"surname":12353,"affiliation":63,"orcid":63},"Benno","Stein","We present a large annotated corpus of scholarly discourse in the domain of International Relations, a subfield of political science. The corpus comprises 190 articles (over 1500K tokens) annotated at the argumentation, basic rhetorical, and domain level. Five of the included articles (ca. 62K tokens) constitute a Gold-standard, coded by domain experts. The remaining articles were coded by annotators trained on the Gold-standard and monitored for annotation quality. We describe our corpus creation methodology, the annotation process and quality assurance, the corpus itself, and present insights into the data: Most argumentative structures in the data are simple premise-conclusion structures, fewer than half of the claims have explicit supporting evidence. Counter-arguments to claims are rare. The claim-to-support ratio varies widely between articles; possibly to some extent due to the topics covered (with clear common ground) or to the differences between authors’ styles. The distribution of theoretical vs. evaluative statements varies strongly between articles; this can be attributed to such factors as different methodological approaches between the articles and the methodological focus of the publishing journal.",{"paper_id":12356,"title":12357,"year":7,"month":188,"day":63,"doi":12358,"resource_url":12359,"first_page":12360,"last_page":12361,"pdf_url":12362,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12363,"paper_type":860,"authors":12364,"abstract":12383},"lrec2026-main-514","Building Multimodal Corpora Using Microtask Pipelines and Local Annotators","10.63317\u002F434uxg6yj2aj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-514","6483","6495","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.514.pdf","hotti-etal-2026-building",[12365,12368,12369,12372,12374,12377,12380],{"paper_id":12356,"author_seq":247,"given_name":12366,"surname":12367,"affiliation":63,"orcid":63},"Helmiina","Hotti",{"paper_id":12356,"author_seq":232,"given_name":5547,"surname":5548,"affiliation":63,"orcid":63},{"paper_id":12356,"author_seq":218,"given_name":12370,"surname":12371,"affiliation":63,"orcid":63},"Anna-Kaisa","Jokipohja",{"paper_id":12356,"author_seq":203,"given_name":3655,"surname":12373,"affiliation":63,"orcid":63},"Kalliokoski",{"paper_id":12356,"author_seq":188,"given_name":12375,"surname":12376,"affiliation":63,"orcid":63},"Henna","Paakki",{"paper_id":12356,"author_seq":172,"given_name":12378,"surname":12379,"affiliation":63,"orcid":63},"Rosa","Suviranta",{"paper_id":12356,"author_seq":155,"given_name":12381,"surname":12382,"affiliation":63,"orcid":63},"Tuomo","Hiippala","Multimodality, or how human communication and interaction combine multiple forms of expression, is studied across diverse fields of research. Many of these fields have underlined the need for large, richly annotated multimodal corpora to support empirical research. While language resources are increasingly annotated using microtask crowdsourcing, multimodal corpora remain largely reliant on expert annotators, which creates a bottleneck for scalability and broad applicability. This paper presents a novel hybrid approach to multimodal corpus annotation, leveraging the efficiency of microtask pipelines while preserving theoretical rigour. Our approach decomposes the annotation process into sequences of simple, well-instructed tasks, which are then performed by locally recruited non-expert annotators. We demonstrate the feasibility of this approach by presenting a pipeline for annotating the multimodal structure of school textbooks.",{"paper_id":12385,"title":12386,"year":7,"month":188,"day":63,"doi":12387,"resource_url":12388,"first_page":12389,"last_page":12390,"pdf_url":12391,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12392,"paper_type":860,"authors":12393,"abstract":12447},"lrec2026-main-515","Beyond Fake News Detection: A Community-based Study of the Multicultural Nature of Information Disorder","10.63317\u002F4iyhqziwo6ri","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-515","6496","6508","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.515.pdf","gemelli-etal-2026-beyond",[12394,12396,12398,12400,12403,12406,12409,12410,12411,12414,12417,12420,12423,12426,12429,12430,12432,12435,12438,12441,12444],{"paper_id":12385,"author_seq":247,"given_name":2548,"surname":12395,"affiliation":63,"orcid":63},"Gemelli",{"paper_id":12385,"author_seq":232,"given_name":12397,"surname":5732,"affiliation":63,"orcid":63},"Giulia Di",{"paper_id":12385,"author_seq":218,"given_name":12399,"surname":1519,"affiliation":63,"orcid":63},"Yiran",{"paper_id":12385,"author_seq":203,"given_name":12401,"surname":12402,"affiliation":63,"orcid":63},"Md Azizul","Hoque",{"paper_id":12385,"author_seq":188,"given_name":12404,"surname":12405,"affiliation":63,"orcid":63},"Alberto De La Torre","Solís",{"paper_id":12385,"author_seq":172,"given_name":12407,"surname":12408,"affiliation":63,"orcid":63},"Mohamad Mojtaba Behboudi","Eshkiki",{"paper_id":12385,"author_seq":155,"given_name":6933,"surname":6934,"affiliation":63,"orcid":63},{"paper_id":12385,"author_seq":138,"given_name":3425,"surname":6931,"affiliation":63,"orcid":63},{"paper_id":12385,"author_seq":121,"given_name":12412,"surname":12413,"affiliation":63,"orcid":63},"Caterina Maria","Cappello",{"paper_id":12385,"author_seq":104,"given_name":12415,"surname":12416,"affiliation":63,"orcid":63},"Maziar Kianimoghadam","Jouneghani",{"paper_id":12385,"author_seq":87,"given_name":12418,"surname":12419,"affiliation":63,"orcid":63},"Payam","Latifi",{"paper_id":12385,"author_seq":73,"given_name":12421,"surname":12422,"affiliation":63,"orcid":63},"Yashar","Mahboudi",{"paper_id":12385,"author_seq":55,"given_name":12424,"surname":12425,"affiliation":63,"orcid":63},"Farzaneh","Mohseni",{"paper_id":12385,"author_seq":38,"given_name":12427,"surname":12428,"affiliation":63,"orcid":63},"Dario","Placenti",{"paper_id":12385,"author_seq":17,"given_name":2072,"surname":2073,"affiliation":63,"orcid":63},{"paper_id":12385,"author_seq":2971,"given_name":7898,"surname":12431,"affiliation":63,"orcid":63},"Sanguinetti",{"paper_id":12385,"author_seq":2974,"given_name":12433,"surname":12434,"affiliation":63,"orcid":63},"Aurora","Scarpellini",{"paper_id":12385,"author_seq":857,"given_name":12436,"surname":12437,"affiliation":63,"orcid":63},"Chiara","Zanchi",{"paper_id":12385,"author_seq":877,"given_name":12439,"surname":12440,"affiliation":63,"orcid":63},"Usman","Naseem",{"paper_id":12385,"author_seq":2984,"given_name":12442,"surname":12443,"affiliation":63,"orcid":63},"Marco Antonio","Stranisci",{"paper_id":12385,"author_seq":2988,"given_name":12445,"surname":12446,"affiliation":63,"orcid":63},"Simona","Frenda","Recognizing disinformation is a challenging task for humans and AI systems. News can be false, misleading, or harmful, and its interpretation often depends on the cultural context of the audience. However, existing datasets rarely account for these contextual and cultural differences, as they are typically not designed from the perspective of news consumers. To address this gap, in this paper, we present the Information Disorder (InDor) corpus, a multilingual dataset of news articles in English, Farsi, Italian, and Russian, annotated for information disorder detection and explanation. The corpus was developed through a participatory process involving contributors from diverse cultural and professional backgrounds, who engaged in data collection, annotation, and evaluation of Large Language Model (LLM) performance on the task. Our findings highlight that false and manipulated news manifest differently across cultural settings, and that current LLMs fail to adequately capture this complexity. This underscores the need for culturally aware computational approaches in the study of information disorder.",{"paper_id":12449,"title":12450,"year":7,"month":188,"day":63,"doi":12451,"resource_url":12452,"first_page":12453,"last_page":12454,"pdf_url":12455,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12456,"paper_type":860,"authors":12457,"abstract":12464},"lrec2026-main-516","FreeTxt-Vi: A Benchmarked Vietnamese-English Toolkit for Segmentation, Sentiment, and Summarisation","10.63317\u002F58migi9wn3u6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-516","6509","6518","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.516.pdf","nguyen-etal-2026-freetxt",[12458,12460,12461,12463],{"paper_id":12449,"author_seq":247,"given_name":12459,"surname":2395,"affiliation":63,"orcid":63},"Hung Huy",{"paper_id":12449,"author_seq":232,"given_name":12047,"surname":12048,"affiliation":63,"orcid":63},{"paper_id":12449,"author_seq":218,"given_name":1216,"surname":12462,"affiliation":63,"orcid":63},"Rayson",{"paper_id":12449,"author_seq":203,"given_name":2843,"surname":2844,"affiliation":63,"orcid":63},"FreeTxt-Vi is a free and open-source web-based toolkit for creating and analysing bilingual Vietnamese–English text collections. Positioned at the intersection of corpus linguistics and natural language processing (NLP), it enables users to build, explore, and interpret free-text data without requiring programming expertise. The system combines established corpus analysis features such as concordancing, keyword analysis, word relation exploration, and interactive visualisation with modern transformer-based NLP components for sentiment analysis and summarisation. A key contribution of this work is the design of a unified bilingual NLP pipeline that integrates a hybrid VnCoreNLP + Byte Pair Encoding (BPE) segmentation strategy, a fine-tuned TabularisAI sentiment classifier, and a fine-tuned Qwen2.5 model for abstractive summarisation. Unlike existing text analysis platforms, FreeTxt-Vi is evaluated as a set of language processing components. We conduct a three-part evaluation covering segmentation, sentiment analysis, and summarisation, and demonstrate that our approach achieves competitive or superior performance compared to widely used baselines in both Vietnamese and English. By reducing technical barriers to multilingual text analysis, FreeTxt-Vi supports reproducible research and promotes the development of language resources for Vietnamese, a widely spoken but underrepresented language in NLP. The toolkit is applicable to a wide range of domains, including education, digital humanities, cultural heritage, and the social sciences, where qualitative text data are common but often difficult to process at scale.",{"paper_id":12466,"title":12467,"year":7,"month":188,"day":63,"doi":12468,"resource_url":12469,"first_page":12470,"last_page":12471,"pdf_url":12472,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12473,"paper_type":860,"authors":12474,"abstract":12481},"lrec2026-main-517","The Patrologia Graeca Corpus: OCR, Annotation, and Open Release of Noisy Nineteenth-Century Polytonic Greek Editions","10.63317\u002F2gvtaxwvjtc7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-517","6519","6527","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.517.pdf","vidalgorne-etal-2026-patrologia",[12475,12478],{"paper_id":12466,"author_seq":247,"given_name":12476,"surname":12477,"affiliation":63,"orcid":63},"Chahan","Vidal-Gorène",{"paper_id":12466,"author_seq":232,"given_name":12479,"surname":12480,"affiliation":63,"orcid":63},"Bastien","Kindt","We present the Patrologia Graeca Corpus, the first large-scale open OCR and linguistic resource for nineteenth-century editions of Ancient Greek. The collection covers the remaining undigitized volumes of the Patrologia Graeca (PG), printed in complex bilingual (Greek–Latin) layouts and characterized by highly degraded polytonic Greek typography. Through a dedicated pipeline combining YOLO-based layout detection and CRNN-based text recognition, we achieve a character error rate (CER) of 1.05% and a word error rate (WER) of 4.69%, largely outperforming existing OCR systems for polytonic Greek. The resulting corpus contains around six million lemmatized and part-of-speech tagged tokens, aligned with full OCR and layout annotations. Beyond its philological value, this corpus establishes a new benchmark for OCR on noisy polytonic Greek and provides training material for future models, including LLMs.",{"paper_id":12483,"title":12484,"year":7,"month":188,"day":63,"doi":12485,"resource_url":12486,"first_page":12487,"last_page":12488,"pdf_url":12489,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12490,"paper_type":860,"authors":12491,"abstract":12502},"lrec2026-main-518","National Library as Corpus: DeLiKo-2025@DNB – a Very Large Corpus of German-language Contemporary Literature","10.63317\u002F59wsms6588ys","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-518","6528","6535","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.518.pdf","kupietz-etal-2026-national",[12492,12495,12498,12500],{"paper_id":12483,"author_seq":247,"given_name":12493,"surname":12494,"affiliation":63,"orcid":63},"Marc","Kupietz",{"paper_id":12483,"author_seq":232,"given_name":12496,"surname":12497,"affiliation":63,"orcid":63},"Nils","Diewald",{"paper_id":12483,"author_seq":218,"given_name":1041,"surname":12499,"affiliation":63,"orcid":63},"Genêt",{"paper_id":12483,"author_seq":203,"given_name":4651,"surname":12501,"affiliation":63,"orcid":63},"Witt","This paper introduces DeLiKo-2025@DNB, a very large, linguistically annotated corpus of German-language contemporary literature, freely accessible via https:\u002F\u002Fkorap.dnb.de\u002F. The corpus currently comprises 21 billion words from over 287,000 books published between 2005 and the present, spanning pulp and genre fiction as well as literary award-winning works. It covers the entire holdings of EPUB-format fiction ebooks deposited with the German National Library (DNB). We provide a detailed account of the corpus composition, metadata, and key features. Additionally, we explain our strategy for enabling lawful and effective access through the deployment of the open‑source corpus analysis platform KorAP at the DNB, and we discuss both the transferability of our approach and work to other national libraries and our ongoing and planned extensions and enhancements.",{"paper_id":12504,"title":12505,"year":7,"month":188,"day":63,"doi":12506,"resource_url":12507,"first_page":12508,"last_page":12509,"pdf_url":12510,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12511,"paper_type":860,"authors":12512,"abstract":12538},"lrec2026-main-519","Multi-party Conversational Corpus of L1 and L2 for Speech Alignment Research (Teams-SK): Methodological Approach","10.63317\u002F3qzsztq7ab7m","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-519","6536","6546","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.519.pdf","benus-etal-2026-multi",[12513,12515,12517,12519,12522,12524,12527,12530,12533,12536],{"paper_id":12504,"author_seq":247,"given_name":2813,"surname":12514,"affiliation":63,"orcid":63},"Benus",{"paper_id":12504,"author_seq":232,"given_name":8796,"surname":12516,"affiliation":63,"orcid":63},"Gatial",{"paper_id":12504,"author_seq":218,"given_name":2022,"surname":12518,"affiliation":63,"orcid":63},"György",{"paper_id":12504,"author_seq":203,"given_name":12520,"surname":12521,"affiliation":63,"orcid":63},"Mária","Hricková",{"paper_id":12504,"author_seq":188,"given_name":3843,"surname":12523,"affiliation":63,"orcid":63},"Kažimír",{"paper_id":12504,"author_seq":172,"given_name":12525,"surname":12526,"affiliation":63,"orcid":63},"Zuzana","Kozáčiková",{"paper_id":12504,"author_seq":155,"given_name":12528,"surname":12529,"affiliation":63,"orcid":63},"Lucia","Mareková",{"paper_id":12504,"author_seq":138,"given_name":12531,"surname":12532,"affiliation":63,"orcid":63},"Róbert","Sabo",{"paper_id":12504,"author_seq":121,"given_name":12534,"surname":12535,"affiliation":63,"orcid":63},"Marian","Trnka",{"paper_id":12504,"author_seq":104,"given_name":2022,"surname":12537,"affiliation":63,"orcid":63},"Vráb","The tendency for speakers to align or accommodate their verbal and non-verbal behaviour to their interlocutors is a fundamental mechanism in spoken interaction, strongly associated with successful communication and social bonding. Despite its ubiquity and documentation across various modalities and linguistic levels (e.g., lexical, prosodic), a lack of comparable, multi-layered linguistic resources and methodological agreement prevents a deeper understanding of its cognitive mechanisms. Multidimensional view of speech alignment might enhance its application in areas like language training or human-machine interaction. This paper addresses these gaps by presenting the development of a multilingual corpus of L1 Slovak and L2 English speech, extending a comparable corpus in L1 English. The corpus utilizes a modified cooperative board game, Forbidden Island, to elicit semi-spontaneous, multi-party conversation and introduces a complementary pair game to specifically target and prime syntactic alignment. The resource includes psychological metadata (e.g., personality, anxiety, perceived dominance) and enables a reproducible methodology for investigating the relationship between entrainment patterns and individual characteristics. By providing a non-Germanic language perspective and a direct L1–L2 comparison framework at prosodic, lexical, pragmatic and syntactic levels, this corpus offers a rich resource for advancing the theoretical understanding, replication, and practical application of speech alignment.",{"paper_id":12540,"title":12541,"year":7,"month":188,"day":63,"doi":12542,"resource_url":12543,"first_page":12544,"last_page":12545,"pdf_url":12546,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12547,"paper_type":860,"authors":12548,"abstract":12563},"lrec2026-main-520","Is Semi-Automatic Transcription Useful in Corpus Creation? Preliminary Considerations on the KIParla Corpus","10.63317\u002F3gw6i7cbs5rc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-520","6547","6560","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.520.pdf","simonotti-etal-2026-is",[12549,12552,12555,12557,12560],{"paper_id":12540,"author_seq":247,"given_name":12550,"surname":12551,"affiliation":63,"orcid":63},"Martina","Simonotti",{"paper_id":12540,"author_seq":232,"given_name":12553,"surname":12554,"affiliation":63,"orcid":63},"Ludovica","Pannitto",{"paper_id":12540,"author_seq":218,"given_name":7656,"surname":12556,"affiliation":63,"orcid":63},"Zucchini",{"paper_id":12540,"author_seq":203,"given_name":12558,"surname":12559,"affiliation":63,"orcid":63},"Silvia","Ballarè",{"paper_id":12540,"author_seq":188,"given_name":12561,"surname":12562,"affiliation":63,"orcid":63},"Caterina","Mauri","This paper analyses the implementation of Automatic Speech Recognition (ASR) into the transcription workflow of the KIParla corpus, a resource of spoken Italian. Through a two-phase experiment, 11 expert and novice transcribers produced both manual and ASR-assisted transcriptions of identical audio segments across three different types of conversation, which were subsequently analyzed through a combination of statistical modeling, word-level alignment and a series of annotation-based metrics. Results show that ASR-assisted workflows can increase transcription speed but do not systemically improve accuracy or prosodic annotation quality. Improvements appear to depend on multiple factors, including workflow configuration, conversation type and annotator experience. These findings are therefore yet not generalizable and highlight the complex interplay between transcription expertise, data type and workflow design. Despite current limitations, ASR-assisted transcription, potentially when supported by task-specific fine-tuning, could be integrated into the KIParla transcription workflow to accelerate corpus creation without compromising linguistic and annotation quality. More broadly, this work underscores the potential of semi-automatic transcription for corpus building, especially in complex settings involving multiple speakers and spontaneous, conversational data.",{"paper_id":12565,"title":12566,"year":7,"month":188,"day":63,"doi":12567,"resource_url":12568,"first_page":12569,"last_page":12570,"pdf_url":12571,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12572,"paper_type":860,"authors":12573,"abstract":12593},"lrec2026-main-521","Open Korean Historical Corpus: A Millennia-Scale Diachronic Collection of Public Domain Texts","10.63317\u002F45k9ywz4c59t","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-521","6561","6572","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.521.pdf","song-etal-2026-open",[12574,12576,12578,12581,12583,12585,12588,12591],{"paper_id":12565,"author_seq":247,"given_name":12575,"surname":1913,"affiliation":63,"orcid":63},"Seyoung",{"paper_id":12565,"author_seq":232,"given_name":12577,"surname":5173,"affiliation":63,"orcid":63},"Nawon",{"paper_id":12565,"author_seq":218,"given_name":12579,"surname":12580,"affiliation":63,"orcid":63},"Songeun","Chae",{"paper_id":12565,"author_seq":203,"given_name":12582,"surname":6876,"affiliation":63,"orcid":63},"Kiwoong",{"paper_id":12565,"author_seq":188,"given_name":12584,"surname":9985,"affiliation":63,"orcid":63},"Jiho",{"paper_id":12565,"author_seq":172,"given_name":12586,"surname":12587,"affiliation":63,"orcid":63},"Haneul","Yoo",{"paper_id":12565,"author_seq":155,"given_name":12589,"surname":12590,"affiliation":63,"orcid":63},"Kyunghyun","Cho",{"paper_id":12565,"author_seq":138,"given_name":3543,"surname":12592,"affiliation":63,"orcid":63},"Oh","The history of the Korean language is characterized by a discrepancy between its spoken and written forms and a pivotal shift from Chinese characters to the Hangul alphabet. However, this linguistic evolution has remained largely unexplored in NLP due to a lack of accessible historical corpora. To address this gap, we introduce the Open Korean Historical Corpus, a large-scale, openly licensed dataset spanning 1,300 years and 6 languages, as well as under-represented writing systems like Korean-style Sinitic (Idu) and Hanja-Hangul mixed script. This corpus contains 17.7 million documents and 5.1 billion tokens from 19 sources, ranging from the 7th century to 2025. We leverage this resource to quantitatively analyze major linguistic shifts: (1) Idu usage peaked in the 1860s before declining sharply; (2) the transition from Hanja to Hangul was a rapid transformation starting around 1890; and (3) North Korea’s lexical divergence causes modern tokenizers to produce up to 51 times higher out-of-vocabulary rates. This work provides a foundational resource for quantitative diachronic analysis by capturing the history of the Korean language. Moreover, it can serve as a pre-training corpus for large language models, potentially improving their understanding of Sino-Korean vocabulary in modern Hangul as well as archaic writing systems.",{"paper_id":12595,"title":12596,"year":7,"month":188,"day":63,"doi":12597,"resource_url":12598,"first_page":12599,"last_page":12600,"pdf_url":12601,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12602,"paper_type":860,"authors":12603,"abstract":12616},"lrec2026-main-522","NAIST LIFE STORY: A Seven-Year Crowdsourced Dataset of Japanese Emotion-related Episodes","10.63317\u002F3coh2p848v7q","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-522","6573","6584","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.522.pdf","ito-etal-2026-naist",[12604,12607,12610,12613,12614],{"paper_id":12595,"author_seq":247,"given_name":12605,"surname":12606,"affiliation":63,"orcid":63},"Kazuhiro","Ito",{"paper_id":12595,"author_seq":232,"given_name":12608,"surname":12609,"affiliation":63,"orcid":63},"Junko","Hayashi",{"paper_id":12595,"author_seq":218,"given_name":12611,"surname":12612,"affiliation":63,"orcid":63},"Hiroyuki","Nagai",{"paper_id":12595,"author_seq":203,"given_name":1460,"surname":1461,"affiliation":63,"orcid":63},{"paper_id":12595,"author_seq":188,"given_name":1472,"surname":12615,"affiliation":63,"orcid":63},"ARAMAKI","Existing emotion datasets have supported a wide range of NLP tasks, but most are static resources that capture language use only at the time of their creation. As a result, they cannot represent how emotional meanings shift in response to cultural and social change. To address this limitation, we present NAIST LIFE STORY, a seven-year collection of Japanese emotion-related episodes that reflect contemporary topics across multiple years. Since 2017, 1,000 crowdsourced participants per quarter have written short texts describing personal experiences associated with seven emotions: anger, anxiety, disgust, trust, joy, sadness, and surprise. The dataset currently spans 28 periods and includes gender and age information for each participant. Analyses reveal systematic differences in text length and lexical diversity across emotions, as well as clear temporal trends linked to major events such as the COVID-19 pandemic. A preliminary experiment with a large language model shows that using this dataset as contextual evidence improves time-aware emotion inference, demonstrating its value for studying the evolving relationship between emotion and language.",{"paper_id":12618,"title":12619,"year":7,"month":188,"day":63,"doi":12620,"resource_url":12621,"first_page":12622,"last_page":12623,"pdf_url":12624,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12625,"paper_type":860,"authors":12626,"abstract":12637},"lrec2026-main-523","Audience Engagement with Arabic Women's Social Empowerment and Wellbeing: A Decadal Corpus","10.63317\u002F3hkcoy9egx54","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-523","6585","6596","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.523.pdf","zaghouani-etal-2026-audience",[12627,12628,12631,12634],{"paper_id":12618,"author_seq":247,"given_name":2365,"surname":2366,"affiliation":63,"orcid":63},{"paper_id":12618,"author_seq":232,"given_name":12629,"surname":12630,"affiliation":63,"orcid":63},"Mabrouka","Bessghaier",{"paper_id":12618,"author_seq":218,"given_name":12632,"surname":12633,"affiliation":63,"orcid":63},"Md. Rafiul","Biswas",{"paper_id":12618,"author_seq":203,"given_name":12635,"surname":12636,"affiliation":63,"orcid":63},"Shimaa Amer","Ibrahim","This paper presents the Arabic Women and Society Corpus, a ten-year collection of 252,487 public Arabic Facebook posts related to women’s empowerment and social wellbeing. The corpus was collected from 51,660 pages across 77 countries between 2014 and 2024, resulting in more than 267 million user interactions. Each post includes engagement metrics such as shares, comments, and emotional reactions, providing a unique view of audience sentiment and social attention. The data were processed using an automated pipeline with language identification, normalization, and metadata cleaning to ensure reliability and reproducibility. The corpus enables large-scale analysis of gender discourse, social reform, and emotional engagement across Arabic dialects. It supports research in Arabic natural language processing, computational social science, and digital communication studies. The dataset and accompanying documentation will be released publicly for research use under an open license.",{"paper_id":12639,"title":12640,"year":7,"month":188,"day":63,"doi":12641,"resource_url":12642,"first_page":12643,"last_page":12644,"pdf_url":12645,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12646,"paper_type":860,"authors":12647,"abstract":12653},"lrec2026-main-524","ArPoMeme: An Annotated Arabic Multimodal Dataset for Political Ideology and Polarization","10.63317\u002F5ghumwhwumkz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-524","6597","6608","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.524.pdf","zaghouani-etal-2026-arpomeme",[12648,12649,12651,12652],{"paper_id":12639,"author_seq":247,"given_name":2365,"surname":2366,"affiliation":63,"orcid":63},{"paper_id":12639,"author_seq":232,"given_name":6841,"surname":12650,"affiliation":63,"orcid":63},"Attia",{"paper_id":12639,"author_seq":218,"given_name":12632,"surname":12633,"affiliation":63,"orcid":63},{"paper_id":12639,"author_seq":203,"given_name":4224,"surname":4225,"affiliation":63,"orcid":63},"Memes have become a prominent medium of political communication in the Arab world, reflecting how humor, imagery, and text interact to express ideological and cultural positions. Despite the centrality of memes to online political discourse, there is a lack of systematically curated resources for analyzing their multimodal and ideological dimensions in Arabic. This paper presents ArPoMeme, a large-scale dataset of approximately 7,300 Arabic political memes categorized by ideological orientation, including Leftist, Islamist, Pan-Arabist, and Satirical perspectives. The dataset captures the diversity of Arabic meme ecosystems by grounding classification in the self-identification of public Facebook pages and groups that produce and disseminate these memes. To ensure both scale and accuracy, we designed a semi-automated data collection pipeline combining Playwright-based Facebook scraping with Google Drive synchronization, followed by text extraction using the Qwen2.5-VL-7B vision–language model. The extracted text was manually verified and annotated for three polarization dimensions: Us vs. Them framing, Hostility toward out-groups, and Calls to action. Annotation was conducted through a custom Streamlit-based interface supporting distributed labeling, real-time tracking, and version control. The resulting dataset links visual content, textual messages, and ideological orientation, enabling fine-grained analysis of political antagonism, mobilization, and humor. Quantitative analysis of the annotated corpus reveals strong asymmetries in antagonistic framing across ideological groups, with Islamist and satirical memes exhibiting the highest levels of hostility and mobilization cues. The dataset and the annotation tool offer a reproducible and publicly available resource for studying Arabic political discourse, multimodal ideology detection, and polarization dynamics.",{"paper_id":12655,"title":12656,"year":7,"month":188,"day":63,"doi":12657,"resource_url":12658,"first_page":12659,"last_page":12660,"pdf_url":12661,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12662,"paper_type":860,"authors":12663,"abstract":12695},"lrec2026-main-525","Universal NER v2: Towards a Massively Multilingual Named Entity Recognition Benchmark","10.63317\u002F4qhcjikvgeda","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-525","6609","6618","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.525.pdf","blevins-etal-2026-universal",[12664,12667,12670,12671,12674,12677,12678,12679,12680,12681,12684,12686,12689,12692],{"paper_id":12655,"author_seq":247,"given_name":12665,"surname":12666,"affiliation":63,"orcid":63},"Terra","Blevins",{"paper_id":12655,"author_seq":232,"given_name":12668,"surname":12669,"affiliation":63,"orcid":63},"Stephen","Mayhew",{"paper_id":12655,"author_seq":218,"given_name":7398,"surname":7399,"affiliation":63,"orcid":63},{"paper_id":12655,"author_seq":203,"given_name":12672,"surname":12673,"affiliation":63,"orcid":63},"Hila","Gonen",{"paper_id":12655,"author_seq":188,"given_name":12675,"surname":12676,"affiliation":63,"orcid":63},"Shachar","Mirkin",{"paper_id":12655,"author_seq":172,"given_name":9434,"surname":9435,"affiliation":63,"orcid":63},{"paper_id":12655,"author_seq":155,"given_name":10974,"surname":10975,"affiliation":63,"orcid":63},{"paper_id":12655,"author_seq":138,"given_name":9428,"surname":9429,"affiliation":63,"orcid":63},{"paper_id":12655,"author_seq":121,"given_name":8532,"surname":4481,"affiliation":63,"orcid":63},{"paper_id":12655,"author_seq":104,"given_name":12682,"surname":12683,"affiliation":63,"orcid":63},"Eugene","Jang",{"paper_id":12655,"author_seq":87,"given_name":12685,"surname":5173,"affiliation":63,"orcid":63},"Eungseo",{"paper_id":12655,"author_seq":73,"given_name":12687,"surname":12688,"affiliation":63,"orcid":63},"Jeongyeon","Seo",{"paper_id":12655,"author_seq":55,"given_name":12690,"surname":12691,"affiliation":63,"orcid":63},"Xenophon","Gialis",{"paper_id":12655,"author_seq":38,"given_name":12693,"surname":12694,"affiliation":63,"orcid":63},"Yuval","Pinter","We present Universal NER (UNER) v2, a significant extension of the initial version released in 2024. UNER is a collaborative dataset for multilingual named-entity annotations, built to support research on NER methods in a cross-linguistic setting. UNER v2 adds 11 new datasets in 10 typologically varied languages to the resource, including multiple parallel evaluation benchmarks aligned with each other and other datasets in UNER v1, while maintaining the same annotation guidelines and high standards for inter-annotator agreement. We report detailed statistics for the dataset and benchmark UNER v2 using both encoder-based model architectures and LLMs.",{"paper_id":12697,"title":12698,"year":7,"month":188,"day":63,"doi":12699,"resource_url":12700,"first_page":12701,"last_page":12702,"pdf_url":12703,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12704,"paper_type":860,"authors":12705,"abstract":12712},"lrec2026-main-526","JobArabi: An Arabic Corpus and Analysis of Job Announcements from Social Media","10.63317\u002F59fr3tst4j4z","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-526","6619","6629","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.526.pdf","zaghouani-etal-2026-jobarabi",[12706,12707,12708,12709],{"paper_id":12697,"author_seq":247,"given_name":2365,"surname":2366,"affiliation":63,"orcid":63},{"paper_id":12697,"author_seq":232,"given_name":12635,"surname":12636,"affiliation":63,"orcid":63},{"paper_id":12697,"author_seq":218,"given_name":12629,"surname":12630,"affiliation":63,"orcid":63},{"paper_id":12697,"author_seq":203,"given_name":12710,"surname":12711,"affiliation":63,"orcid":63},"Houda","Bouamor","This paper introduces JobArabi, a large-scale corpus of Arabic job announcements collected from social media between January 2024 and October 2025. The dataset contains 20,528 public posts from X and captures more than two years of employment-related discourse across Arabic-speaking online communities. The corpus was compiled using a linguistically informed query framework covering 21 Arabic keyword families that reflect gendered, plural, formal, and dialectal expressions of recruitment language. The resulting dataset includes posts from institutional, commercial, and individual accounts and provides metadata such as timestamps, engagement indicators, and geolocation when available, enabling temporal and regional analysis of employment discourse.Quantitative analysis reveals several sociolinguistic patterns in online recruitment, including the persistence of gendered hiring language, regional variation in occupational demand, and the emotional framing of recruitment messages. These findings highlight the potential of Arabic social media as a resource for studying labor market communication and linguistic change.The JobArabi corpus, together with documentation and collection scripts, will be released to support research in Arabic NLP, computational social science, and digital labor studies.",{"paper_id":12714,"title":12715,"year":7,"month":188,"day":63,"doi":12716,"resource_url":12717,"first_page":12718,"last_page":12719,"pdf_url":12720,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12721,"paper_type":860,"authors":12722,"abstract":12732},"lrec2026-main-527","ParaCLEAN: Improving Translation Quality through Systematic Parallel Data Cleaning","10.63317\u002F36e3vfurjna4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-527","6630","6640","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.527.pdf","mash-etal-2026-paraclean",[12723,12726,12729],{"paper_id":12714,"author_seq":247,"given_name":12724,"surname":12725,"affiliation":63,"orcid":63},"Audrey","Mash",{"paper_id":12714,"author_seq":232,"given_name":12727,"surname":12728,"affiliation":63,"orcid":63},"Ella Paulina","Bohman",{"paper_id":12714,"author_seq":218,"given_name":12730,"surname":12731,"affiliation":63,"orcid":63},"Maite","Melero","Parallel corpora often contain significant noise, particularly in low-resource settings where both collected and synthetic data are combined. We present ParaCLEAN, a modular pipeline for cleaning parallel data that integrates embeddings-based filtering, language identification, deduplication, and normalisation. Experiments on Catalan to Japanese translation demonstrate that ParaCLEAN improves data quality and downstream MT performance. Ablation studies highlight the contribution of each step. ParaCLEAN is lightweight, reproducible, and extensible for diverse language pairs.",{"paper_id":12734,"title":12735,"year":7,"month":188,"day":63,"doi":12736,"resource_url":12737,"first_page":12738,"last_page":12739,"pdf_url":12740,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12741,"paper_type":860,"authors":12742,"abstract":12745},"lrec2026-main-528","DReUD: Discourse Relations in Universal Dependencies","10.63317\u002F3x39iuhcg3s3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-528","6641","6646","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.528.pdf","mrovsk-etal-2026-dreud",[12743,12744],{"paper_id":12734,"author_seq":247,"given_name":1975,"surname":1976,"affiliation":63,"orcid":63},{"paper_id":12734,"author_seq":232,"given_name":11970,"surname":11971,"affiliation":63,"orcid":63},"We present a proposal for an annotation scheme and data representation of shallow discourse relations annotation in the Universal Dependencies (UD) framework, as a theoretically appropriate and also practically oriented extension of the established morphosyntactic analysis. We outline the design requirements for the annotation scheme, encompassing simplicity, comprehensibility, theoretical grounding, practical applicability and technical robustness, while accommodating the specific constraints of shallow discourse analysis. At the same time, we present a work-in-progress baseline version of DReUD (Discourse Relations in Universal Dependencies), a modular shallow discourse parser for Universal Dependencies as a command-line program, a web client and a REST API service for Czech and English, designed for a seamless and rapid integration of discourse relations analysis both in the theoretical research and in NLP applications.",{"paper_id":12747,"title":12748,"year":7,"month":188,"day":63,"doi":12749,"resource_url":12750,"first_page":12751,"last_page":12752,"pdf_url":12753,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12754,"paper_type":860,"authors":12755,"abstract":12772},"lrec2026-main-529","MultiGraSCCo: A Multilingual Anonymization Benchmark with Annotations of Personal Identifiers","10.63317\u002F4bzj7bdw86tn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-529","6647","6660","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.529.pdf","baroud-etal-2026-multigrascco",[12756,12758,12760,12762,12765,12768,12770],{"paper_id":12747,"author_seq":247,"given_name":12636,"surname":12757,"affiliation":63,"orcid":63},"Baroud",{"paper_id":12747,"author_seq":232,"given_name":12218,"surname":12759,"affiliation":63,"orcid":63},"Otto",{"paper_id":12747,"author_seq":218,"given_name":2545,"surname":12761,"affiliation":63,"orcid":63},"Czehmann",{"paper_id":12747,"author_seq":203,"given_name":12763,"surname":12764,"affiliation":63,"orcid":63},"Christine","Hovhannisyan",{"paper_id":12747,"author_seq":188,"given_name":12766,"surname":12767,"affiliation":63,"orcid":63},"Lisa","Raithel",{"paper_id":12747,"author_seq":172,"given_name":4763,"surname":12769,"affiliation":63,"orcid":63},"Möller",{"paper_id":12747,"author_seq":155,"given_name":5621,"surname":12771,"affiliation":63,"orcid":63},"Roller","Accessing sensitive patient data for machine learning is challenging due to privacy concerns. Datasets with annotations of personally identifiable information are crucial for developing and testing anonymization systems, which would enable safe data sharing that complies with privacy regulations. Since accessing real patient data is a bottleneck, synthetic data offers an efficient solution for data scarcity, bypassing privacy regulations that apply to real data. Moreover, neural machine translation can help to create high-quality data for low-resource languages by translating validated real or synthetic data from a high-resource language. In this work, we create a multilingual anonymization benchmark in ten languages, using a machine translation methodology that preserves the original annotations and renders city and people names in a culturally and contextually appropriate form in each target language. Our evaluation study with medical professionals confirms the quality of the translations, both in general and with respect to the translation and adaptation of personal information. Our benchmark with over 2,500 annotations of personal information can be used in many applications, including training annotators, validating annotations across institutions without legal complications, and helping improve the performance of automatic personal information detection. We make our benchmark and annotation guidelines available for further research.",{"paper_id":12774,"title":12775,"year":7,"month":188,"day":63,"doi":12776,"resource_url":12777,"first_page":12778,"last_page":12779,"pdf_url":12780,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12781,"paper_type":860,"authors":12782,"abstract":12796},"lrec2026-main-530","Structured Legal Document Generation in India: A Model-Agnostic Wrapper Approach with VidhikDastaavej","10.63317\u002F2wswvwyqxt4k","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-530","6661","6673","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.530.pdf","nigam-etal-2026-structured",[12783,12786,12789,12792,12793],{"paper_id":12774,"author_seq":247,"given_name":12784,"surname":12785,"affiliation":63,"orcid":63},"Shubham Kumar","Nigam",{"paper_id":12774,"author_seq":232,"given_name":12787,"surname":12788,"affiliation":63,"orcid":63},"Deepak Patnaik","Balaramamahanthi",{"paper_id":12774,"author_seq":218,"given_name":12790,"surname":12791,"affiliation":63,"orcid":63},"Noel","Shallum",{"paper_id":12774,"author_seq":203,"given_name":9947,"surname":9939,"affiliation":63,"orcid":63},{"paper_id":12774,"author_seq":188,"given_name":12794,"surname":12795,"affiliation":63,"orcid":63},"Arnab","Bhattacharya","Automating legal document drafting can improve efficiency and reduce the burden of manual legal work. Yet, the structured generation of private legal documents remains underexplored, particularly in the Indian context, due to the scarcity of public datasets and the complexity of adapting models for long-form legal drafting. To address this gap, we introduce VidhikDastaavej, a large-scale, anonymized dataset of private legal documents curated in collaboration with an Indian law firm. Covering 133 diverse categories, this dataset is the first resource of its kind and provides a foundation for research in structured legal text generation and Legal AI more broadly. We further propose a Model-Agnostic Wrapper (MAW), a two-stage generation framework that first plans the section structure of a legal draft and then generates each section with retrieval-based prompts. MAW is independent of any specific LLM, making it adaptable across both open- and closed-source models. Comprehensive evaluation, including lexical, semantic, LLM-based, and expert-driven assessments with inter-annotator agreement, shows that the wrapper substantially improves factual accuracy, coherence, and completeness compared to fine-tuned baselines. This work establishes both a new benchmark dataset and a generalizable generation framework, paving the way for future research in AI-assisted legal drafting.",{"paper_id":12798,"title":12799,"year":7,"month":188,"day":63,"doi":12800,"resource_url":12801,"first_page":12802,"last_page":12803,"pdf_url":12804,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12805,"paper_type":860,"authors":12806,"abstract":12811},"lrec2026-main-531","PolyglotQL: A Pipeline for Multilingual Text-to-SPARQL Dataset Generation","10.63317\u002F5ow3k3fbz296","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-531","6674","6684","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.531.pdf","perez-etal-2026-polyglotql",[12807,12808,12810],{"paper_id":12798,"author_seq":247,"given_name":11494,"surname":1180,"affiliation":63,"orcid":63},{"paper_id":12798,"author_seq":232,"given_name":5741,"surname":12809,"affiliation":63,"orcid":63},"Barth",{"paper_id":12798,"author_seq":218,"given_name":3009,"surname":3010,"affiliation":63,"orcid":63},"We present PolyglotQL, an open-source ETL (Extract, Transform, Load) pipeline for systematically creating multilingual text-to-SPARQL datasets, along with an accompanying framework for evaluating text-to-SPARQL generation models. PolyglotQL provides an extensible and modular architecture that aggregates, normalizes, and augments heterogeneous question–SPARQL pairs from established text-to-SPARQL datasets. With this pipeline, we automatically construct a bilingual English–German dataset featuring contextualized entity and relationship mappings as well as automatically translated and aligned question pairs. We also conduct an empirical evaluation using two multilingual open large language models under two distinct contextualization settings. The results show consistent performance improvements when explicit grounding information is provided, highlighting the benefits of structured context in multilingual semantic parsing.",{"paper_id":12813,"title":12814,"year":7,"month":188,"day":63,"doi":12815,"resource_url":12816,"first_page":12817,"last_page":12818,"pdf_url":12819,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12820,"paper_type":860,"authors":12821,"abstract":12842},"lrec2026-main-532","Building and Annotating a Large Comparable Corpus for Studying Semantic Quantification - Chinese, French, Japanese, Korean","10.63317\u002F3rn8zokrdunu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-532","6685","6694","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.532.pdf","blin-etal-2026-building",[12822,12825,12827,12830,12832,12834,12837,12839,12841],{"paper_id":12813,"author_seq":247,"given_name":12823,"surname":12824,"affiliation":63,"orcid":63},"raoul","blin",{"paper_id":12813,"author_seq":232,"given_name":12826,"surname":8691,"affiliation":63,"orcid":63},"Jinnam",{"paper_id":12813,"author_seq":218,"given_name":12828,"surname":12829,"affiliation":63,"orcid":63},"WU","qishen",{"paper_id":12813,"author_seq":203,"given_name":12831,"surname":1519,"affiliation":63,"orcid":63},"Yuxin",{"paper_id":12813,"author_seq":188,"given_name":12833,"surname":8705,"affiliation":63,"orcid":63},"Soonhee",{"paper_id":12813,"author_seq":172,"given_name":12835,"surname":12836,"affiliation":63,"orcid":63},"Takahiro","Morita",{"paper_id":12813,"author_seq":155,"given_name":869,"surname":12838,"affiliation":63,"orcid":63},"Delaporte",{"paper_id":12813,"author_seq":138,"given_name":12840,"surname":3676,"affiliation":63,"orcid":63},"Ilaine",{"paper_id":12813,"author_seq":121,"given_name":864,"surname":3916,"affiliation":63,"orcid":63},"Quantifiers and noun quantification are well-studied topics in linguistics, but, to the best of our knowledge, there are still no dedicated multilingual resources for the study of quantification. To address this gap, we compiled a large multilingual comparable corpus (Chinese, French, Japanese, Korean) and propose to enrich it with both syntactic and “quantificational annotation” (semantic information relevant to the study of quantification). In this paper, we present both the corpus and the annotation project, and report on our initial attempt at quantificational annotation, the challenges encountered, and the linguistic observations drawn from it.",{"paper_id":12844,"title":12845,"year":7,"month":188,"day":63,"doi":12846,"resource_url":12847,"first_page":12848,"last_page":12849,"pdf_url":12850,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12851,"paper_type":860,"authors":12852,"abstract":12863},"lrec2026-main-533","Towards the Generation and Application of Dynamic Web-Based Visualization of UIMA-based Annotations for Big-Data Corpora with the Help of Unified Dynamic Annotation Visualizer","10.63317\u002F5ce2aaity4yz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-533","6695","6705","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.533.pdf","dahmann-etal-2026-generation",[12853,12856,12858,12860,12862],{"paper_id":12844,"author_seq":247,"given_name":12854,"surname":12855,"affiliation":63,"orcid":63},"Thiemo","Dahmann",{"paper_id":12844,"author_seq":232,"given_name":1296,"surname":12857,"affiliation":63,"orcid":63},"Schneider",{"paper_id":12844,"author_seq":218,"given_name":12859,"surname":3402,"affiliation":63,"orcid":63},"Philipp",{"paper_id":12844,"author_seq":203,"given_name":2709,"surname":12861,"affiliation":63,"orcid":63},"Abrami",{"paper_id":12844,"author_seq":188,"given_name":869,"surname":2349,"affiliation":63,"orcid":63},"The automatic and manual annotation of unstructured corpora is a routine task in many scientific fields and is supported by a variety of existing software solutions. Despite this variety, few solutions currently support annotation visualization, especially for dynamic generation and interaction. To bridge this gap and visualize annotated corpora based on user-, project-, or corpus-specific aspects, we developed Unified Dynamic Annotation Visualizer (UDAV). UDAV is a web-based solution that implements features not supported by comparable tools, enabling a customizable and extensible toolbox for interacting with annotations and allowing integration into existing big-data frameworks. We exemplify UDAV through a range of visualizations and also provide an evaluation of corpus import and processing performance.",{"paper_id":12865,"title":12866,"year":7,"month":188,"day":63,"doi":12867,"resource_url":12868,"first_page":12869,"last_page":12870,"pdf_url":12871,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":12872,"paper_type":860,"authors":12873,"abstract":13226},"lrec2026-main-534","The MultiplEYE Text Corpus: Towards a Diverse and Ever-Expanding Multilingual Text Corpus","10.63317\u002F42gkpf6a6x2x","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-534","6706","6721","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.534.pdf","kasper-etal-2026-multipleye",[12874,12877,12879,12882,12884,12887,12890,12893,12896,12898,12901,12904,12907,12910,12913,12916,12918,12921,12924,12927,12929,12931,12934,12937,12940,12942,12944,12947,12950,12953,12957,12960,12964,12968,12970,12972,12975,12979,12983,12987,12990,12993,12997,13001,13004,13008,13012,13015,13019,13022,13025,13029,13033,13037,13040,13044,13048,13052,13056,13060,13064,13067,13070,13073,13076,13080,13082,13085,13089,13093,13097,13101,13105,13109,13113,13117,13120,13123,13127,13131,13134,13137,13141,13145,13148,13151,13155,13159,13162,13166,13170,13174,13177,13180,13184,13188,13192,13195,13199,13203,13207,13211,13214,13218,13220,13223],{"paper_id":12865,"author_seq":247,"given_name":12875,"surname":12876,"affiliation":63,"orcid":63},"Ramunė","Kasperė",{"paper_id":12865,"author_seq":232,"given_name":2742,"surname":12878,"affiliation":63,"orcid":63},"Bondar",{"paper_id":12865,"author_seq":218,"given_name":12880,"surname":12881,"affiliation":63,"orcid":63},"Sergiu","Nisioi",{"paper_id":12865,"author_seq":203,"given_name":3414,"surname":12883,"affiliation":63,"orcid":63},"Stegenwallner-Schütz",{"paper_id":12865,"author_seq":188,"given_name":12885,"surname":12886,"affiliation":63,"orcid":63},"Hanne B. Søndergaard","Knudsen",{"paper_id":12865,"author_seq":172,"given_name":12888,"surname":12889,"affiliation":63,"orcid":63},"Ana","Matić",{"paper_id":12865,"author_seq":155,"given_name":12891,"surname":12892,"affiliation":63,"orcid":63},"Eva Pavlinušić","Vilus",{"paper_id":12865,"author_seq":138,"given_name":12894,"surname":12895,"affiliation":63,"orcid":63},"Dorota","Klimek-Jankowska",{"paper_id":12865,"author_seq":121,"given_name":12436,"surname":12897,"affiliation":63,"orcid":63},"Tschirner",{"paper_id":12865,"author_seq":104,"given_name":12899,"surname":12900,"affiliation":63,"orcid":63},"Not Battesta","Soliva",{"paper_id":12865,"author_seq":87,"given_name":12902,"surname":12903,"affiliation":63,"orcid":63},"Deborah N.","Jakobi",{"paper_id":12865,"author_seq":73,"given_name":12905,"surname":12906,"affiliation":63,"orcid":63},"Cui","Ding",{"paper_id":12865,"author_seq":55,"given_name":12908,"surname":12909,"affiliation":63,"orcid":63},"Dima Abu","Romi",{"paper_id":12865,"author_seq":38,"given_name":12911,"surname":12912,"affiliation":63,"orcid":63},"Cengiz","Acarturk",{"paper_id":12865,"author_seq":17,"given_name":12914,"surname":12915,"affiliation":63,"orcid":63},"Matilda","Agdler",{"paper_id":12865,"author_seq":2971,"given_name":12917,"surname":6297,"affiliation":63,"orcid":63},"Anton Marius",{"paper_id":12865,"author_seq":2974,"given_name":12919,"surname":12920,"affiliation":63,"orcid":63},"Mohd Faizan","Ansari",{"paper_id":12865,"author_seq":857,"given_name":12922,"surname":12923,"affiliation":63,"orcid":63},"Annalisa","Arcidiacono",{"paper_id":12865,"author_seq":877,"given_name":12925,"surname":12926,"affiliation":63,"orcid":63},"Elizabete Ausma Velta","Barisa",{"paper_id":12865,"author_seq":2984,"given_name":12888,"surname":12928,"affiliation":63,"orcid":63},"Bautista",{"paper_id":12865,"author_seq":2988,"given_name":12766,"surname":12930,"affiliation":63,"orcid":63},"Beinborn",{"paper_id":12865,"author_seq":2992,"given_name":12932,"surname":12933,"affiliation":63,"orcid":63},"Yevgeni","Berzak",{"paper_id":12865,"author_seq":2996,"given_name":12935,"surname":12936,"affiliation":63,"orcid":63},"Nedeljka","Bjelanović",{"paper_id":12865,"author_seq":3000,"given_name":12938,"surname":12939,"affiliation":63,"orcid":63},"Anna Isabelle","Bothmann",{"paper_id":12865,"author_seq":3004,"given_name":1380,"surname":12941,"affiliation":63,"orcid":63},"Brasser",{"paper_id":12865,"author_seq":3008,"given_name":12561,"surname":12943,"affiliation":63,"orcid":63},"Cacioli",{"paper_id":12865,"author_seq":3478,"given_name":12945,"surname":12946,"affiliation":63,"orcid":63},"Anila","Çepani",{"paper_id":12865,"author_seq":3482,"given_name":12948,"surname":12949,"affiliation":63,"orcid":63},"Ilze","Ceple",{"paper_id":12865,"author_seq":3486,"given_name":12951,"surname":12952,"affiliation":63,"orcid":63},"Adelina","Cerpja",{"paper_id":12865,"author_seq":12954,"given_name":12955,"surname":12956,"affiliation":63,"orcid":63},"30","Dalí","Chirino",{"paper_id":12865,"author_seq":12958,"given_name":1380,"surname":12959,"affiliation":63,"orcid":63},"31","Chromý",{"paper_id":12865,"author_seq":12961,"given_name":12962,"surname":12963,"affiliation":63,"orcid":63},"32","Alessandro Corona","Mendozza",{"paper_id":12865,"author_seq":12965,"given_name":12966,"surname":12967,"affiliation":63,"orcid":63},"33","Iria","de-Dios-Flores",{"paper_id":12865,"author_seq":878,"given_name":12969,"surname":3057,"affiliation":63,"orcid":63},"Nazik Dinçtopal",{"paper_id":12865,"author_seq":900,"given_name":12888,"surname":12971,"affiliation":63,"orcid":63},"Došen",{"paper_id":12865,"author_seq":12973,"given_name":1628,"surname":12974,"affiliation":63,"orcid":63},"36","Elersič",{"paper_id":12865,"author_seq":12976,"given_name":12977,"surname":12978,"affiliation":63,"orcid":63},"37","Inmaculada","Fajardo",{"paper_id":12865,"author_seq":12980,"given_name":12981,"surname":12982,"affiliation":63,"orcid":63},"38","Zigmunds","Freibergs",{"paper_id":12865,"author_seq":12984,"given_name":12985,"surname":12986,"affiliation":63,"orcid":63},"39","Angelina","Ganebnaya",{"paper_id":12865,"author_seq":12988,"given_name":12989,"surname":8151,"affiliation":63,"orcid":63},"40","Shan",{"paper_id":12865,"author_seq":12991,"given_name":12992,"surname":4418,"affiliation":63,"orcid":63},"41","Jéssica",{"paper_id":12865,"author_seq":12994,"given_name":12995,"surname":12996,"affiliation":63,"orcid":63},"42","Annjo Klungervik","Greenall",{"paper_id":12865,"author_seq":12998,"given_name":12999,"surname":13000,"affiliation":63,"orcid":63},"43","Alba","Haveriku",{"paper_id":12865,"author_seq":13002,"given_name":13003,"surname":4962,"affiliation":63,"orcid":63},"44","Miao",{"paper_id":12865,"author_seq":13005,"given_name":13006,"surname":13007,"affiliation":63,"orcid":63},"45","Anamaria","Hodivoianu",{"paper_id":12865,"author_seq":13009,"given_name":13010,"surname":13011,"affiliation":63,"orcid":63},"46","Yu-Yin","Hsu",{"paper_id":12865,"author_seq":13013,"given_name":3454,"surname":13014,"affiliation":63,"orcid":63},"47","Isaksen",{"paper_id":12865,"author_seq":13016,"given_name":13017,"surname":13018,"affiliation":63,"orcid":63},"48","Andreia","Janeiro",{"paper_id":12865,"author_seq":901,"given_name":13020,"surname":13021,"affiliation":63,"orcid":63},"Kristine Jensen de","López",{"paper_id":12865,"author_seq":929,"given_name":13023,"surname":13024,"affiliation":63,"orcid":63},"Aleksandar","Jevremovic",{"paper_id":12865,"author_seq":13026,"given_name":13027,"surname":13028,"affiliation":63,"orcid":63},"51","Vojislav","Jovanovic",{"paper_id":12865,"author_seq":13030,"given_name":13031,"surname":13032,"affiliation":63,"orcid":63},"52","Hanna","Kędzierska",{"paper_id":12865,"author_seq":13034,"given_name":13035,"surname":13036,"affiliation":63,"orcid":63},"53","Nik","Kharlamov",{"paper_id":12865,"author_seq":13038,"given_name":2548,"surname":13039,"affiliation":63,"orcid":63},"54","Kosutar",{"paper_id":12865,"author_seq":13041,"given_name":13042,"surname":13043,"affiliation":63,"orcid":63},"55","Nelda","Kote",{"paper_id":12865,"author_seq":13045,"given_name":13046,"surname":13047,"affiliation":63,"orcid":63},"56","Vanja","Kovic",{"paper_id":12865,"author_seq":13049,"given_name":13050,"surname":13051,"affiliation":63,"orcid":63},"57","Izabela","Krejtz",{"paper_id":12865,"author_seq":13053,"given_name":13054,"surname":13055,"affiliation":63,"orcid":63},"58","Thyra","Krosness",{"paper_id":12865,"author_seq":13057,"given_name":13058,"surname":13059,"affiliation":63,"orcid":63},"59","Oleksandra","Kuvshynova",{"paper_id":12865,"author_seq":13061,"given_name":13062,"surname":13063,"affiliation":63,"orcid":63},"60","Eilam","Lavy",{"paper_id":12865,"author_seq":13065,"given_name":10518,"surname":13066,"affiliation":63,"orcid":63},"61","Lion",{"paper_id":12865,"author_seq":13068,"given_name":3411,"surname":13069,"affiliation":63,"orcid":63},"62","Łockiewicz",{"paper_id":12865,"author_seq":930,"given_name":13071,"surname":13072,"affiliation":63,"orcid":63},"Kaidi","Lõo",{"paper_id":12865,"author_seq":949,"given_name":13074,"surname":13075,"affiliation":63,"orcid":63},"Paula","Luegi",{"paper_id":12865,"author_seq":13077,"given_name":13078,"surname":13079,"affiliation":63,"orcid":63},"65","Mircea Mihai","Marin",{"paper_id":12865,"author_seq":13081,"given_name":10521,"surname":3843,"affiliation":63,"orcid":63},"66",{"paper_id":12865,"author_seq":13083,"given_name":7895,"surname":13084,"affiliation":63,"orcid":63},"67","Matvieieva",{"paper_id":12865,"author_seq":13086,"given_name":13087,"surname":13088,"affiliation":63,"orcid":63},"68","Diane C.","Mézière",{"paper_id":12865,"author_seq":13090,"given_name":13091,"surname":13092,"affiliation":63,"orcid":63},"69","Xavier","Mínguez-López",{"paper_id":12865,"author_seq":13094,"given_name":13095,"surname":13096,"affiliation":63,"orcid":63},"70","Valeriia","Modina",{"paper_id":12865,"author_seq":13098,"given_name":13099,"surname":13100,"affiliation":63,"orcid":63},"71","Jurgita","Motiejūnienė",{"paper_id":12865,"author_seq":13102,"given_name":13103,"surname":13104,"affiliation":63,"orcid":63},"72","Marie-Luise","Müller",{"paper_id":12865,"author_seq":13106,"given_name":13107,"surname":13108,"affiliation":63,"orcid":63},"73","Tolgonai Nasipbek","kyzy",{"paper_id":12865,"author_seq":13110,"given_name":13111,"surname":13112,"affiliation":63,"orcid":63},"74","Jamal Abdul","Nasir",{"paper_id":12865,"author_seq":13114,"given_name":13115,"surname":13116,"affiliation":63,"orcid":63},"75","Johanne S. K.","Nedergård",{"paper_id":12865,"author_seq":950,"given_name":13118,"surname":13119,"affiliation":63,"orcid":63},"Ayşegül","Özkan",{"paper_id":12865,"author_seq":972,"given_name":13121,"surname":13122,"affiliation":63,"orcid":63},"Patrizia","Paggio",{"paper_id":12865,"author_seq":13124,"given_name":13125,"surname":13126,"affiliation":63,"orcid":63},"78","Marijan","Palmović",{"paper_id":12865,"author_seq":13128,"given_name":13129,"surname":13130,"affiliation":63,"orcid":63},"79","Maria Christina","Panagiotopoulou",{"paper_id":12865,"author_seq":13132,"given_name":5238,"surname":13133,"affiliation":63,"orcid":63},"80","Parola",{"paper_id":12865,"author_seq":13135,"given_name":4074,"surname":13136,"affiliation":63,"orcid":63},"81","Pérez",{"paper_id":12865,"author_seq":13138,"given_name":13139,"surname":13140,"affiliation":63,"orcid":63},"82","Klaudia","Petersen",{"paper_id":12865,"author_seq":13142,"given_name":13143,"surname":13144,"affiliation":63,"orcid":63},"83","Anja","Podlesek",{"paper_id":12865,"author_seq":13146,"given_name":2161,"surname":13147,"affiliation":63,"orcid":63},"84","Pospíšilová",{"paper_id":12865,"author_seq":13149,"given_name":3411,"surname":13150,"affiliation":63,"orcid":63},"85","Praulina",{"paper_id":12865,"author_seq":13152,"given_name":13153,"surname":13154,"affiliation":63,"orcid":63},"86","Mikuláš","Preininger",{"paper_id":12865,"author_seq":13156,"given_name":13157,"surname":13158,"affiliation":63,"orcid":63},"87","Loredana","Pungă",{"paper_id":12865,"author_seq":13160,"given_name":5321,"surname":13161,"affiliation":63,"orcid":63},"88","Rossini",{"paper_id":12865,"author_seq":13163,"given_name":13164,"surname":13165,"affiliation":63,"orcid":63},"89","Špela","Rot",{"paper_id":12865,"author_seq":13167,"given_name":13168,"surname":13169,"affiliation":63,"orcid":63},"90","Habib Sani","Yahaya",{"paper_id":12865,"author_seq":13171,"given_name":13172,"surname":13173,"affiliation":63,"orcid":63},"91","Irina A.","Sekerina",{"paper_id":12865,"author_seq":973,"given_name":13175,"surname":13176,"affiliation":63,"orcid":63},"Anne Gabija","Skadina",{"paper_id":12865,"author_seq":989,"given_name":13178,"surname":13179,"affiliation":63,"orcid":63},"Jordi","Solé-Casals",{"paper_id":12865,"author_seq":13181,"given_name":13182,"surname":13183,"affiliation":63,"orcid":63},"94","Lonneke van der","Plas",{"paper_id":12865,"author_seq":13185,"given_name":13186,"surname":13187,"affiliation":63,"orcid":63},"95","Saara M.","Varjopuro",{"paper_id":12865,"author_seq":13189,"given_name":13190,"surname":13191,"affiliation":63,"orcid":63},"96","Spyridoula","Varlokosta",{"paper_id":12865,"author_seq":13193,"given_name":9328,"surname":13194,"affiliation":63,"orcid":63},"97","Veríssimo",{"paper_id":12865,"author_seq":13196,"given_name":13197,"surname":13198,"affiliation":63,"orcid":63},"98","Oskari Juhapekka","Virtanen",{"paper_id":12865,"author_seq":13200,"given_name":13201,"surname":13202,"affiliation":63,"orcid":63},"99","Nemanja","Vračar",{"paper_id":12865,"author_seq":13204,"given_name":13205,"surname":13206,"affiliation":63,"orcid":63},"100","Mila","Vulchanova",{"paper_id":12865,"author_seq":13208,"given_name":13209,"surname":13210,"affiliation":63,"orcid":63},"101","Ahmad Mustapha","Wali",{"paper_id":12865,"author_seq":13212,"given_name":13213,"surname":7319,"affiliation":63,"orcid":63},"102","Peizheng",{"paper_id":12865,"author_seq":13215,"given_name":13216,"surname":13217,"affiliation":63,"orcid":63},"103","Nilgün","Yücel",{"paper_id":12865,"author_seq":13219,"given_name":2813,"surname":2510,"affiliation":63,"orcid":63},"104",{"paper_id":12865,"author_seq":13221,"given_name":2131,"surname":13222,"affiliation":63,"orcid":63},"105","Hollenstein",{"paper_id":12865,"author_seq":990,"given_name":13224,"surname":13225,"affiliation":63,"orcid":63},"Lena","Jäger","We present the MultiplEYE Text Corpus, a large-scale, document-level, multi-parallel resource designed to advance cross-linguistic research on reading and language processing. The corpus provides paragraph-level alignment for texts in 39 languages spanning seven language families and seven scripts. Unlike many existing multilingual corpora, a substantial number of documents were originally written in languages other than English, reducing English-centric bias and supporting more typologically diverse investigations. The texts are carefully selected to balance linguistic richness with experimental feasibility, particularly for eye-tracking-while-reading studies. Developed within a multi-lab initiative, the MultiplEYE Text Corpus follows unified translation, alignment, and experimental design guidelines to ensure cross-linguistic comparability. Its inclusion of texts varying in type and difficulty enables research on discourselevel processing, genre effects, and individual differences across a wide range of languages. The text corpus and accompanying metadata provide a robust foundation for multilingual psycholinguistic and computational modeling research. Data and materials are publicly available at https:\u002F\u002Fdoi.org\u002F10.23668\u002Fpsycharchives.21750.",{"paper_id":13228,"title":13229,"year":7,"month":188,"day":63,"doi":13230,"resource_url":13231,"first_page":13232,"last_page":13233,"pdf_url":13234,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13235,"paper_type":860,"authors":13236,"abstract":13244},"lrec2026-main-535","Sanskrit Travelogue: A Large-Scale Unified and Annotated Corpus of Sanskrit Texts","10.63317\u002F5ibfaw5j9br6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-535","6722","6730","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.535.pdf","luca-etal-2026-sanskrit",[13237,13239,13242],{"paper_id":13228,"author_seq":247,"given_name":13238,"surname":1107,"affiliation":63,"orcid":63},"Giacomo De",{"paper_id":13228,"author_seq":232,"given_name":13240,"surname":13241,"affiliation":63,"orcid":63},"Danilo","Croce",{"paper_id":13228,"author_seq":218,"given_name":9117,"surname":13243,"affiliation":63,"orcid":63},"Basili","We present Sanskrit Travelogue, to our knowledge the largest open, unified and richly annotated Sanskrit corpus. Aggregating eight digital libraries, it comprises 12,394 texts, 73.1M tokens and 9M segments after de-duplication. A reproducible pipeline standardizes transliteration to IAST, reconciles heterogeneous metadata, preserves structural semantics (verse markers, chapter hierarchies, textual apparatus) and adds automatic annotations. We provide corpus-scale morphosyntactic annotation combining two systems: the BYT-5 Sanskrit model for compound and sandhi splitting, and the process-sanskrit library for inflection removal and morphological tagging through a hybrid deterministic-statistical cascade. For each segment we materialize synchronized representations: cleaned, analyzed (sandhi\u002Fcompound split), stemmed, diacritic-normalized and morphologically tagged. These representations are indexed jointly for retrieval. Both approaches achieve high accuracy (84.61% sentence-level exact matches for BYT-5 segmentation, 92.37% correct root extraction for compounds, 95.94% on the Yoga Sūtra). Manual evaluation on the Yoga Sūtra showed 98% correct root extraction when combining both methods, outperforming individual approaches. These annotations enable searching across orthographic sandhi and within compounds, robust lemma-level retrieval despite rich inflectional variation, and provide training material for segmentation and lemmatization while maintaining ambiguity for downstream modeling. We release the annotated corpus as TSV shards, code for corpus acquisition, processing and annotation, a query normalizer, all under a Creative Commons non-commercial license.",{"paper_id":13246,"title":13247,"year":7,"month":188,"day":63,"doi":13248,"resource_url":13249,"first_page":13250,"last_page":13251,"pdf_url":13252,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13253,"paper_type":860,"authors":13254,"abstract":13257},"lrec2026-main-536","The Foggia Occupator Corpus: Digitisation, Annotation, and Computational Analysis of an Occupation‑Era Newspaper (1945-1946)","10.63317\u002F2xbscmzsjers","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-536","6731","6739","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.536.pdf","ciletti-2026-foggia",[13255],{"paper_id":13246,"author_seq":247,"given_name":2897,"surname":13256,"affiliation":63,"orcid":63},"Ciletti","Historical newspapers are crucial sources yet often remain undigitised or lack machine-readable text. We present the Foggia Occupator corpus, a linguistically enriched, openly licensed resource built from twenty-two issues (Dec 1945–Aug 1946) of a weekly newspaper produced by U.S. personnel in occupied Foggia, Italy. High-resolution scans were processed via OCR with LLM‑assisted correction (GPT‑4o) and full human verification, then segmented into 874 articles ( 216k tokens). We annotate topics, named entities and typed relations via a semi‑automatic pipeline with manual reconciliation, and perform argument mining on civics‑ and conflict‑related content, yielding 1,735 arguments. The entity–relation layer supports network analyses that reveal sparse, modular structures linking military units, civic bodies, and social life. We release TEI‑XML with entity spans, JSON article files with metadata, CSVs of entities\u002Frelations with temporal counts, and an arguments JSON, all under a Creative Commons 4.0 licence. Beyond documenting an in‑between moment of reconstruction, the resource enables benchmarking for OCR‑robust NER\u002FRE and studies of framing, stance, and community structure in post‑war local media.",{"paper_id":13259,"title":13260,"year":7,"month":188,"day":63,"doi":13261,"resource_url":13262,"first_page":13263,"last_page":13264,"pdf_url":13265,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13266,"paper_type":860,"authors":13267,"abstract":13285},"lrec2026-main-537","SiDiaC-v.2.0: Sinhala Diachronic Corpus Version 2.0","10.63317\u002F2ybff4o55vrq","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-537","6740","6763","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.537.pdf","jayatilleke-etal-2026-sidiac",[13268,13271,13273,13276,13279,13282],{"paper_id":13259,"author_seq":247,"given_name":13269,"surname":13270,"affiliation":63,"orcid":63},"Nevidu","Jayatilleke",{"paper_id":13259,"author_seq":232,"given_name":13272,"surname":4040,"affiliation":63,"orcid":63},"Nisansa de",{"paper_id":13259,"author_seq":218,"given_name":13274,"surname":13275,"affiliation":63,"orcid":63},"Uthpala Nimanthi","Sooriya-Arachchi",{"paper_id":13259,"author_seq":203,"given_name":13277,"surname":13278,"affiliation":63,"orcid":63},"Gagani Kasundhi","Kulathilaka",{"paper_id":13259,"author_seq":188,"given_name":13280,"surname":13281,"affiliation":63,"orcid":63},"Azra","Safrullah",{"paper_id":13259,"author_seq":172,"given_name":13283,"surname":13284,"affiliation":63,"orcid":63},"Johan Nevin","Sofalas","SiDiaC-v.2.0 is the largest comprehensive Sinhala Diachronic Corpus to date, covering a period from 1800 CE to 1955 CE in terms of publication dates, and a historical span from the 5th to the 20th century CE in terms of written dates. The corpus consists of 244k words across 185 literary works that underwent thorough filtering, preprocessing, and copyright compliance checks, followed by extensive post-processing. Additionally, a subset of 59 documents totalling 70k words was annotated based on their written dates. Texts from the National Library of Sri Lanka were selected from the SiDiaC-v.1.0 non-filtered list, which was digitised using Google Document AI OCR. This was followed by post-processing to correct formatting issues, address code-mixing, include special tokens, and fix malformed tokens. The construction of SiDiaC-v.2.0 was informed by practices from other corpora, such as FarPaHC, SiDiaC-v.1.0, and CCOHA. This was particularly relevant for syntactic annotation and text normalisation strategies, given the shared characteristics of low-resource language status between Faroese and the similar cleaning strategies utilised in CCOHA. This corpus is categorised into two layers based on genres: primary and secondary. The primary categorisation is binary, assigning each book to either Non-Fiction or Fiction. The secondary categorisation is more detailed, grouping texts under specific genres such as Religious, History, Poetry, Language, and Medical. Despite facing challenges due to limited resources, SiDiaC-v.2.0 serves as a comprehensive resource for Sinhala NLP, building upon the work previously done in SiDiaC-v.1.0.",{"paper_id":13287,"title":13288,"year":7,"month":188,"day":63,"doi":13289,"resource_url":13290,"first_page":13291,"last_page":13292,"pdf_url":13293,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13294,"paper_type":860,"authors":13295,"abstract":13301},"lrec2026-main-538","ShAnEL-2: A Multilingual Benchmarking Dataset for Short-Answer Language Learning Exercises","10.63317\u002F3cvfqh22muoo","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-538","6764","6771","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.538.pdf","degraeuwe-etal-2026-shanel",[13296,13299],{"paper_id":13287,"author_seq":247,"given_name":13297,"surname":13298,"affiliation":63,"orcid":63},"Jasper","Degraeuwe",{"paper_id":13287,"author_seq":232,"given_name":1316,"surname":13300,"affiliation":63,"orcid":63},"Moerman","Before using GenAI models as EdTech tools, their pedagogical suitability should be corroborated. In this paper, we present ShAnEL-2, a novel multilingual dataset comprising 1,185 student responses to short-answer language learning exercises corrected by teachers. We use ShAnEL-2 to establish an initial benchmark of (1) \"off-the-shelf\" GenAI models and (2) retrieval-augmented generation (RAG) techniques for the automated correction of this exercise type. With an overall accuracy of 90% and recall of 95%, few-shot RAG (which adds previously corrected responses to the prompt) outperforms the off-the-shelf baseline and textbook RAG setup (which adds coursebook materials) by up to 7 (accuracy) and 5 (recall) percentage points. These results confirm that LLMs learn better from examples than from analysing context and highlight GenAI’s particular potential as a correction assistant for teachers.",{"paper_id":13303,"title":13304,"year":7,"month":188,"day":63,"doi":13305,"resource_url":13306,"first_page":13307,"last_page":13308,"pdf_url":13309,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13310,"paper_type":860,"authors":13311,"abstract":13330},"lrec2026-main-539","The Swedish Parliamentary Motions Corpus 1867-2024","10.63317\u002F26e6jkauk53v","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-539","6772","6782","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.539.pdf","borges-etal-2026-swedish",[13312,13314,13317,13320,13323,13325,13327],{"paper_id":13303,"author_seq":247,"given_name":3172,"surname":13313,"affiliation":63,"orcid":63},"Borges",{"paper_id":13303,"author_seq":232,"given_name":13315,"surname":13316,"affiliation":63,"orcid":63},"Fredrik Mohammadi","Norén",{"paper_id":13303,"author_seq":218,"given_name":13318,"surname":13319,"affiliation":63,"orcid":63},"Lotta Åberg","Brorsson",{"paper_id":13303,"author_seq":203,"given_name":13321,"surname":13322,"affiliation":63,"orcid":63},"Väinö","Yrjänäinen",{"paper_id":13303,"author_seq":188,"given_name":13031,"surname":13324,"affiliation":63,"orcid":63},"Bäck",{"paper_id":13303,"author_seq":172,"given_name":3172,"surname":13326,"affiliation":63,"orcid":63},"Klemmensen",{"paper_id":13303,"author_seq":155,"given_name":13328,"surname":13329,"affiliation":63,"orcid":63},"Måns","Magnusson","Motions submitted to the Swedish Parliament are important data for social science and humanities researchers. We introduce a new research corpus, the Swedish Parliamentary Motions Corpus, which is larger and more developed than previously available research corpora for the Swedish motions. The corpus contains annotated and structured parliamentary motions over more than 150 years, through the bicameral parliament (1867–1970) and Sweden’s current unicameral parliament (1971–). Along with the corpus, we describe procedures to measure and ensure transparency around issues related to the data quality of the corpus. In addition, we link motions’ authors to a rich metadata set, ensuring the corpus’s utility in various research applications.",{"paper_id":13332,"title":13333,"year":7,"month":188,"day":63,"doi":13334,"resource_url":13335,"first_page":13336,"last_page":13337,"pdf_url":13338,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13339,"paper_type":860,"authors":13340,"abstract":13347},"lrec2026-main-540","The Swedish Benchmark of Linguistic Minimal Pairs","10.63317\u002F33cfy28hybv5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-540","6783","6794","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.540.pdf","sjons-etal-2026-swedish",[13341,13342,13344],{"paper_id":13332,"author_seq":247,"given_name":1915,"surname":1916,"affiliation":63,"orcid":63},{"paper_id":13332,"author_seq":232,"given_name":7475,"surname":13343,"affiliation":63,"orcid":63},"Heinat",{"paper_id":13332,"author_seq":218,"given_name":13345,"surname":13346,"affiliation":63,"orcid":63},"Murathan","Kurfali","We introduce the Swedish Benchmark of Linguistic Minimal Pairs, a dataset for evaluating syntactic performance in language models. It includes 2,500 minimal pairs organized into 25 syntactic phenomena, with 100 pairs per phenomenon. Each pair contrasts a well-formed and an ill-formed sentence that differ minimally. For each phenomenon, we manually constructed ten pairs from scratch. We semi-automatically generated the remaining 90 pairs and manually adjusted them. A random sample was assessed by 40 participants, who selected the well-formed sentence in 98.05% of cases. We evaluate eleven state-of-the-art models. Results generally show that models handle local agreement well but struggle with certain long-distance dependencies and word order phenomena. Model size seems to matter less than the training domain. Prompt-based evaluation generally lowers performance. We show that model performance is stable across handcrafted and generated subsets and across sample sizes, suggesting that 100 pairs per phenomenon suffice for reliable evaluation. Future work will expand the number of phenomena.",{"paper_id":13349,"title":13350,"year":7,"month":188,"day":63,"doi":13351,"resource_url":13352,"first_page":13353,"last_page":13354,"pdf_url":13355,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13356,"paper_type":860,"authors":13357,"abstract":13364},"lrec2026-main-541","Exploring the Transfer of Irony Explanation Generation from English to Dutch","10.63317\u002F2yfyo57bytbv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-541","6795","6807","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.541.pdf","maladry-etal-2026-exploring",[13358,13359,13360,13363],{"paper_id":13349,"author_seq":247,"given_name":10445,"surname":10446,"affiliation":63,"orcid":63},{"paper_id":13349,"author_seq":232,"given_name":2294,"surname":2295,"affiliation":63,"orcid":63},{"paper_id":13349,"author_seq":218,"given_name":13361,"surname":13362,"affiliation":63,"orcid":63},"Cynthia Van","Hee",{"paper_id":13349,"author_seq":203,"given_name":2182,"surname":2183,"affiliation":63,"orcid":63},"Explanation generation has gained increasing attention in the field of NLP because it makes the output of classification models more intuitively understandable for humans. This is particularly relevant for complex semantic tasks such as irony detection, where there may not be any explicit linguistic markers. Generative models have shown great potential for irony explanation in earlier work, but most studies have been limited to English. Since this is the highest-resourced language, these capabilities may not be available in languages other than English. To address this gap, this paper analyses the performance of generative models for explanation generation in Dutch, a lower-resourced but closely related language to English. Our work shows that larger proprietary models, like GPT-4, can generate meaningful explanations based on relevant world knowledge, whereas smaller open-source models still struggle to perform this task. Besides quality evaluation, we also analyse the limitations of these models, showing that GPT models struggle most with verbosity and that both open-source and proprietary models exhibit circular reasoning (\"this text is ironic because the person expresses this in an ironic way”). Finally, open-source models struggle in particular for Dutch because they fail to produce the relevant world knowledge that is required to understand the irony. All models and data used for the experiments is available at iRONNIE on Hugging Face.",{"paper_id":13366,"title":13367,"year":7,"month":188,"day":63,"doi":13368,"resource_url":13369,"first_page":13370,"last_page":13371,"pdf_url":13372,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":13373,"bibkey":13374,"paper_type":860,"authors":13375,"abstract":13387},"lrec2026-main-542","DIDECO: An Annotated Dataset for Intent Detection in Digital Communications","10.63317\u002F3xsu5rdnw8hi","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-542","6808","6822","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.542.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.542_OptionalSupplementaryMaterial.zip","popovic-etal-2026-dideco",[13376,13379,13381,13382,13385],{"paper_id":13366,"author_seq":247,"given_name":13377,"surname":13378,"affiliation":63,"orcid":63},"Senaid","Popovic",{"paper_id":13366,"author_seq":232,"given_name":10717,"surname":13380,"affiliation":63,"orcid":63},"Riquet",{"paper_id":13366,"author_seq":218,"given_name":5346,"surname":3387,"affiliation":63,"orcid":63},{"paper_id":13366,"author_seq":203,"given_name":13383,"surname":13384,"affiliation":63,"orcid":63},"Fabien","Lauer",{"paper_id":13366,"author_seq":188,"given_name":3733,"surname":13386,"affiliation":63,"orcid":63},"Parmentier","This paper presents DIDECO, the first annotated dataset specifically designed for detecting both explicit and implicit intents in digital communications. We address a critical gap in cybersecurity research by developing a comprehensive taxonomy that distinguishes between explicit communicative goals (what is requested) and implicit persuasion mechanisms (how compliance is engineered). Grounded in Speech Act Theory and persuasion psychology principles, our taxonomy encompasses 20 distinct intent categories across explicit and implicit intents. We annotated 220 LLM-generated spear-phishing emails using a multi-label protocol with six trained annotators, yielding 2,162 intent annotations that reveal the layered complexity of malicious communications. Our analysis demonstrates that sophisticated attacks employ multiple concurrent intents, combining explicit communicative goals with implicit persuasion strategies. This dataset provides resources for developing intent-aware detection systems capable of identifying sophisticated social engineering attacks through semantic analysis.",{"paper_id":13389,"title":13390,"year":7,"month":188,"day":63,"doi":13391,"resource_url":13392,"first_page":13393,"last_page":13394,"pdf_url":13395,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":13396,"bibkey":13397,"paper_type":860,"authors":13398,"abstract":13405},"lrec2026-main-543","GUMBridge: A Corpus for Varieties of Bridging Anaphora","10.63317\u002F3sf73k63vuww","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-543","6823","6837","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.543.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.543_OptionalSupplementaryMaterial.zip","levine-etal-2026-gumbridge",[13399,13402],{"paper_id":13389,"author_seq":247,"given_name":13400,"surname":13401,"affiliation":63,"orcid":63},"Lauren","Levine",{"paper_id":13389,"author_seq":232,"given_name":13403,"surname":13404,"affiliation":63,"orcid":63},"Amir","Zeldes","Bridging is an anaphoric phenomenon where the referent of an entity in a discourse is dependent on a previous, non-identical entity for interpretation, such as in \"There is a house. The door is red,\" where the door is specifically understood to be the door of the aforementioned house. While there are several existing resources in English for bridging anaphora, most are small, provide limited coverage of the phenomenon, and\u002For provide limited genre coverage. In this paper, we introduce GUMBridge, a new resource for bridging, which includes 24 diverse genres of English, providing both broad coverage for the phenomenon, and granular annotations for the multi-subtype categorization of bridging varieties. We also present an evaluation of annotation quality and report on baseline performance using open and closed source contemporary LLMs on three tasks underlying our data, showing that bridging resolution and subtype classification remain difficult NLP tasks in the age of LLMs.",{"paper_id":13407,"title":13408,"year":7,"month":188,"day":63,"doi":13409,"resource_url":13410,"first_page":13411,"last_page":13412,"pdf_url":13413,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13414,"paper_type":860,"authors":13415,"abstract":13428},"lrec2026-main-544","Beyond Transcripts: Iterative Peer-Editing with Audio Unlocks High-Quality Human Summaries of Conversational Speech","10.63317\u002F4d596vd4x2xr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-544","6838","6855","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.544.pdf","chaparala-etal-2026-beyond",[13416,13419,13420,13423,13426,13427],{"paper_id":13407,"author_seq":247,"given_name":13417,"surname":13418,"affiliation":63,"orcid":63},"Kaavya","Chaparala",{"paper_id":13407,"author_seq":232,"given_name":1316,"surname":8314,"affiliation":63,"orcid":63},{"paper_id":13407,"author_seq":218,"given_name":13421,"surname":13422,"affiliation":63,"orcid":63},"Jesus Villalba","Lopez",{"paper_id":13407,"author_seq":203,"given_name":13424,"surname":13425,"affiliation":63,"orcid":63},"Laureano","Moro-Velazquez",{"paper_id":13407,"author_seq":188,"given_name":1625,"surname":11159,"affiliation":63,"orcid":63},{"paper_id":13407,"author_seq":172,"given_name":8327,"surname":8328,"affiliation":63,"orcid":63},"There are not enough established benchmarks for the task fo speech summarization. Creating new benchmarks demands human annotation, as LLMs could embed systemic errors and bias into datasets. We test ten annotation workflows varying input modality (audio, transcript, or both) and the inclusion of editing (self or peer-editing) to investigate potential quality tradeoffs from using human annotators to summarize audio. We compare human audio-based summaries to human transcript-based summaries to track the impact of the different information modalities on summary quality. We also compare the human outputs against four LLM benchmarks (three text, one audio) to examine whether human-written summaries are less informative than highly fluent automated outputs. We find that audio-based summaries are less informative and more compressed than transcript summaries. However, iterative peer-editing with audio mitigates this difference, enabling audio-based summaries to be as informative as their transcript counterparts and LLM summaries. These findings validate iterative peer-editing among human annotators for the creation of benchmarks informed by both lexical and prosodic information. This enables crucial dataset collection even in setting where transcripts are unavailable.",{"paper_id":13430,"title":13431,"year":7,"month":188,"day":63,"doi":13432,"resource_url":13433,"first_page":13434,"last_page":13435,"pdf_url":13436,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13437,"paper_type":860,"authors":13438,"abstract":13447},"lrec2026-main-545","SEEM-CZ: Annotation and Classification of Epistemic Markers in Czech","10.63317\u002F2ewzmb73r3h8","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-545","6856","6869","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.545.pdf","tpnkov-etal-2026-seem",[13439,13441,13443,13446],{"paper_id":13430,"author_seq":247,"given_name":1978,"surname":13440,"affiliation":63,"orcid":63},"Štěpánková",{"paper_id":13430,"author_seq":232,"given_name":1631,"surname":13442,"affiliation":63,"orcid":63},"Novák",{"paper_id":13430,"author_seq":218,"given_name":13444,"surname":13445,"affiliation":63,"orcid":63},"Tomáš","Musil",{"paper_id":13430,"author_seq":203,"given_name":3460,"surname":3461,"affiliation":63,"orcid":63},"We present a project focused on linguistic description, annotation and automatic classification of the so-called epistemic markers in Czech. These expressions, such as pravděpodobně ‘probably’, zřejmě ‘apparently’ and určitě ‘certainly’, typically operate within the pragmatic domain of language. We introduce a dataset containing manual annotations of the 40 most frequent epistemic markers in Czech, totalling almost 4,000 uses. This annotation was created using parallel InterCorp data (in Czech and English) and the TEITOK tool. We describe the annotation scheme used, the annotation process and data handling. The dataset forms the core of the emerging lexical database of these expressions (SEEMLex). Thanks to the comprehensive manual annotation, the dataset can also serve as a source of further pragmatic information and can be used as a basis for further linguistic research. The proposed annotation scheme can also be used for other languages. To demonstrate the dataset’s utility for automatic classification, we trained XLM-RoBERTa classifiers using 10-fold cross-validation, achieving 72.6% accuracy for type of use classification (6 classes) and 54.2% accuracy for degree of certainty classification (4 classes).",{"paper_id":13449,"title":13450,"year":7,"month":188,"day":63,"doi":13451,"resource_url":13452,"first_page":13453,"last_page":13454,"pdf_url":13455,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":13456,"bibkey":13457,"paper_type":860,"authors":13458,"abstract":13477},"lrec2026-main-546","When Words Don't Mean What They Say: Figurative Understanding in Bengali Idioms","10.63317\u002F546w2cys6m6t","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-546","6870","6879","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.546.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.546_OptionalSupplementaryMaterial.zip","sakhawat-etal-2026-when",[13459,13462,13465,13468,13471,13474],{"paper_id":13449,"author_seq":247,"given_name":13460,"surname":13461,"affiliation":63,"orcid":63},"Adib","Sakhawat",{"paper_id":13449,"author_seq":232,"given_name":13463,"surname":13464,"affiliation":63,"orcid":63},"Shamim Ara","Parveen",{"paper_id":13449,"author_seq":218,"given_name":13466,"surname":13467,"affiliation":63,"orcid":63},"Md Ruhul","Amin",{"paper_id":13449,"author_seq":203,"given_name":13469,"surname":13470,"affiliation":63,"orcid":63},"Tahera","Khatun",{"paper_id":13449,"author_seq":188,"given_name":13472,"surname":13473,"affiliation":63,"orcid":63},"Shamim Al","Mahmud",{"paper_id":13449,"author_seq":172,"given_name":13475,"surname":13476,"affiliation":63,"orcid":63},"Md Saiful","Islam","Figurative language understanding remains a significant challenge for Large Language Models (LLMs), especially for low-resource languages. To address this, we introduce the *Bangla Bagdhara* dataset, a large-scale, culturally grounded corpus of 10,361 Bengali idioms. Each idiom is annotated under a comprehensive 19-field schema, established and refined through a deliberative expert consensus process that captures its semantic, syntactic, cultural, and religious dimensions, providing a rich and structured resource for computational linguistics. To establish a robust benchmark for Bangla figurative language understanding, we evaluate 30 state-of-the-art multilingual and instruction-tuned LLMs on the task of inferring figurative meaning. Our results reveal a critical performance gap, with no model surpassing 50% accuracy, in stark contrast to significantly higher human performance (83.4%). This finding underscores the limitations of existing models in cross-linguistic and cultural reasoning. By releasing the *Bangla Bagdhara* dataset and benchmark, we provide foundational infrastructure for advancing figurative language understanding and cultural grounding in LLMs for Bengali and other low-resource languages.",{"paper_id":13479,"title":13480,"year":7,"month":188,"day":63,"doi":13481,"resource_url":13482,"first_page":13483,"last_page":13484,"pdf_url":13485,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":13486,"bibkey":13487,"paper_type":860,"authors":13488,"abstract":13495},"lrec2026-main-547","Human vs LLM in Conversational Repair Annotation: A New Resource and Comparative Study","10.63317\u002F32stceuq67kb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-547","6880","6892","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.547.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.547_OptionalSupplementaryMaterial.zip","ngo-etal-2026-human",[13489,13490,13492,13494],{"paper_id":13479,"author_seq":247,"given_name":4152,"surname":7585,"affiliation":63,"orcid":63},{"paper_id":13479,"author_seq":232,"given_name":2331,"surname":13491,"affiliation":63,"orcid":63},"Rollet",{"paper_id":13479,"author_seq":218,"given_name":3860,"surname":13493,"affiliation":63,"orcid":63},"Pelachaud",{"paper_id":13479,"author_seq":203,"given_name":5369,"surname":5370,"affiliation":63,"orcid":63},"Addressing the scarcity of annotated data for Other-Initiated Repair (OIR), when recipients interrupt conversation progressivity to signal trouble, prompting speakers to provide repair, this work introduces OIR annotations for the NOXI corpus, achieving considerable reliability. We evaluate whether LLMs can reliably annotate OIR sequences using structured Chain-of-Thought prompting and conduct comparative analysis across two corpora: NOXI (natural dialogue) and CABB-S (Dutch, task-oriented), finding weak alignment between LLMs and human annotations, particularly in recognizing trouble-signaling. Analyzing human-LLM disagreement using the LLM-generated explanations revealed limitations: models rely on lexical patterns rather than conversational context, construct reasonable-sounding but misleading narratives, highlighting crucial limitations for both automated annotation of complex interactional phenomena.",{"paper_id":13497,"title":13498,"year":7,"month":188,"day":63,"doi":13499,"resource_url":13500,"first_page":13501,"last_page":13502,"pdf_url":13503,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13504,"paper_type":860,"authors":13505,"abstract":13520},"lrec2026-main-548","GPT-NL Public Corpus: A Permissively Licensed, Dutch-First Dataset for LLM Pre-training","10.63317\u002F5fbtc336wwx2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-548","6893","6903","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.548.pdf","oort-etal-2026-gpt",[13506,13509,13511,13514,13517],{"paper_id":13497,"author_seq":247,"given_name":13507,"surname":13508,"affiliation":63,"orcid":63},"Jesse J. Van","Oort",{"paper_id":13497,"author_seq":232,"given_name":2510,"surname":13510,"affiliation":63,"orcid":63},"Brinkkemper",{"paper_id":13497,"author_seq":218,"given_name":13512,"surname":13513,"affiliation":63,"orcid":63},"Erik de","Graaf",{"paper_id":13497,"author_seq":203,"given_name":13515,"surname":13516,"affiliation":63,"orcid":63},"Bram","Vanroy",{"paper_id":13497,"author_seq":188,"given_name":13518,"surname":13519,"affiliation":63,"orcid":63},"Saskia","Lensink","We present the GPT-NL Public Corpus, the biggest permissively licensed corpus of Dutch language resources. The GPT-NL Public Corpus contains 21 Dutch-only collections totalling 36B preprocessed Dutch tokens not present in any other LLM pretraining corpus. Additionally, the corpus includes roughly 207B English, 232B Code, and 48B German\u002FDanish tokens taken from existing sets which we further curated for compliance. This corpus includes curated data from large existing corpora like Common Corpus and Common Crawl, as well as newly created Dutch-specific collections. Most newly created Dutch collections consist of content collected in collaboration with organisations or synthetically augmented content. All data is collected and evaluated with the aim of facilitating the creation of (commercial) language models that are lawful, useful and non-harmful. All data included in the GPT-NL Public Corpus is sourced from datasets with permissive licensing and is curated and redistributed under a CC-BY license. The full dataset is publicly available on the Hugging Face Hub.",{"paper_id":13522,"title":13523,"year":7,"month":188,"day":63,"doi":13524,"resource_url":13525,"first_page":13526,"last_page":13527,"pdf_url":13528,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13529,"paper_type":860,"authors":13530,"abstract":13549},"lrec2026-main-549","Estonian WinoGrande Dataset: Comparative Analysis of LLM Performance on Human and Machine Translation","10.63317\u002F27gw7fzp2jbx","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-549","6904","6918","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.549.pdf","ojastu-etal-2026-estonian",[13531,13534,13537,13540,13543,13546],{"paper_id":13522,"author_seq":247,"given_name":13532,"surname":13533,"affiliation":63,"orcid":63},"Marii","Ojastu",{"paper_id":13522,"author_seq":232,"given_name":13535,"surname":13536,"affiliation":63,"orcid":63},"Hele-Andra","Kuulmets",{"paper_id":13522,"author_seq":218,"given_name":13538,"surname":13539,"affiliation":63,"orcid":63},"Aleksei","Dorkin",{"paper_id":13522,"author_seq":203,"given_name":13541,"surname":13542,"affiliation":63,"orcid":63},"Marika","Borovikova",{"paper_id":13522,"author_seq":188,"given_name":13544,"surname":13545,"affiliation":63,"orcid":63},"Dage","Särg",{"paper_id":13522,"author_seq":172,"given_name":13547,"surname":13548,"affiliation":63,"orcid":63},"Kairit","Sirts","In this paper, we present a localized and culturally adapted Estonian translation of the test set from the widely used commonsense reasoning benchmark, WinoGrande. We detail the translation and adaptation process carried out by translation specialists and evaluate the performance of both proprietary and open source models on the human translated benchmark. Additionally, we explore the feasibility of achieving high-quality machine translation by incorporating insights from the manual translation process into the design of a detailed prompt. This prompt is specifically tailored to address both the linguistic characteristics of Estonian and the unique translation challenges posed by the WinoGrande dataset. Our findings show that model performance on the human translated Estonian dataset is slightly lower than on the original English test set, while performance on machine-translated data is notably worse. Additionally, our experiments indicate that prompt engineering offers limited improvement in translation quality or model accuracy, and highlight the importance of involving language specialists in dataset translation and adaptation to ensure reliable and interpretable evaluations of language competency and reasoning in large language models.",{"paper_id":13551,"title":13552,"year":7,"month":188,"day":63,"doi":13553,"resource_url":13554,"first_page":13555,"last_page":13556,"pdf_url":13557,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13558,"paper_type":860,"authors":13559,"abstract":13572},"lrec2026-main-550","GENIUS Keylog Corpus - a German High School Student Corpus with Keystroke Logging Data","10.63317\u002F4zdv5j2yj6vg","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-550","6919","6928","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.550.pdf","schaller-etal-2026-genius",[13560,13563,13566,13568,13570],{"paper_id":13551,"author_seq":247,"given_name":13561,"surname":13562,"affiliation":63,"orcid":63},"Nils-Jonathan","Schaller",{"paper_id":13551,"author_seq":232,"given_name":13564,"surname":13565,"affiliation":63,"orcid":63},"Thorben","Jansen",{"paper_id":13551,"author_seq":218,"given_name":4323,"surname":13567,"affiliation":63,"orcid":63},"Höft",{"paper_id":13551,"author_seq":203,"given_name":11711,"surname":13569,"affiliation":63,"orcid":63},"Pünjer",{"paper_id":13551,"author_seq":188,"given_name":1104,"surname":13571,"affiliation":63,"orcid":63},"Horbach","Student writing has been studied either as a final product (arguments in an already written text) or as a writing process (keystroke data), but not in an integrated manner. We present Anonymised Keylog Corpus, the first publicly available dataset (as far as we know) that combines both comprehensive argumentative annotations with keystroke logging (259 German argumentative essays written by high school students). Our analysis reveals that 96% of students wrote linearly without recursion and 88% omitted the conclusion section. Writing was mainly characterised by fluent writing without extensive pauses, mainly due to the time limit for completing the task. Additionally we suggest methodology on how to combine annotations with keystroke events and carried out an explorative analysis of writer profiles.",{"paper_id":13574,"title":13575,"year":7,"month":188,"day":63,"doi":13576,"resource_url":13577,"first_page":13578,"last_page":13579,"pdf_url":13580,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13581,"paper_type":860,"authors":13582,"abstract":13598},"lrec2026-main-551","OTA-BOUN: A Historical Turkish Dependency Treebank","10.63317\u002F3d985kzhy84r","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-551","6929","6938","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.551.pdf","tra-etal-2026-ota",[13583,13586,13589,13591,13594,13597],{"paper_id":13574,"author_seq":247,"given_name":13584,"surname":13585,"affiliation":63,"orcid":63},"Tarık Emre","Tıraş",{"paper_id":13574,"author_seq":232,"given_name":13587,"surname":13588,"affiliation":63,"orcid":63},"Nureddin Cüneyd","Ünal",{"paper_id":13574,"author_seq":218,"given_name":13590,"surname":12911,"affiliation":63,"orcid":63},"Ada",{"paper_id":13574,"author_seq":203,"given_name":13592,"surname":13593,"affiliation":63,"orcid":63},"Ece","Yurtseven",{"paper_id":13574,"author_seq":188,"given_name":13595,"surname":13596,"affiliation":63,"orcid":63},"Esma F. Bilgin","Taşdemir",{"paper_id":13574,"author_seq":172,"given_name":3939,"surname":3940,"affiliation":63,"orcid":63},"We present OTA-BOUN v2.0, the largest Universal Dependencies treebank for historical Turkish, consisting of 1,742 manually verified sentences sampled from late Ottoman texts. The annotation process followed a semi-automatic methodology: initial pre-annotation by the UDPipe 2.0 pipeline was refined through manual annotation of dependency relations, part-of-speech tags, and lemmas. A distinctive feature of OTA-BOUN is its dual-script representation: each sentence is provided both in the original Perso-Arabic script and its Latinized transcription, while tokens include aligned forms in both scripts. This dual-layer design enables research on script conversion, cross-lingual transfer, and historical–modern Turkish comparisons. Through detailed analyses on the aforementioned treebank, this study presents a unique and scalable resource, advancing computational studies of historical Turkish and supporting broader efforts in multilingual and diachronic NLP.",{"paper_id":13600,"title":13601,"year":7,"month":188,"day":63,"doi":13602,"resource_url":13603,"first_page":13604,"last_page":13605,"pdf_url":13606,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13607,"paper_type":860,"authors":13608,"abstract":13621},"lrec2026-main-552","TCMPHal: A Large-scale Dataset for Hallucination Detection in Traditional Chinese Medicine Pharmacy","10.63317\u002F2889b9sdvrsm","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-552","6939","6948","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.552.pdf","han-etal-2026-tcmphal",[13609,13610,13611,13613,13614,13617,13620],{"paper_id":13600,"author_seq":247,"given_name":4142,"surname":4143,"affiliation":63,"orcid":63},{"paper_id":13600,"author_seq":232,"given_name":4130,"surname":3676,"affiliation":63,"orcid":63},{"paper_id":13600,"author_seq":218,"given_name":13612,"surname":1899,"affiliation":63,"orcid":63},"Ziwen",{"paper_id":13600,"author_seq":203,"given_name":3270,"surname":3676,"affiliation":63,"orcid":63},{"paper_id":13600,"author_seq":188,"given_name":13615,"surname":13616,"affiliation":63,"orcid":63},"Jia","Meng",{"paper_id":13600,"author_seq":172,"given_name":13618,"surname":13619,"affiliation":63,"orcid":63},"John","Moraros",{"paper_id":13600,"author_seq":155,"given_name":4154,"surname":3676,"affiliation":63,"orcid":63},"The rapid proliferation of large language models (LLMs) in medicine highlights their potential to revolutionize research in Traditional Chinese Medicine (TCM). While these models have shown great promise in assisting TCM practitioners by answering herb-related questions, generating syndrome-differentiation reports, and recommending classical formulas, a persistent challenge that arises is the issue of hallucination, where LLMs might produce content that appears plausible yet inaccurate. This issue has received limited attention within the context of TCM research, leaving a significant gap in understanding how hallucination manifests within the unique theoretical frameworks and diagnostic principles. Motivated by this phenomenon, we present TCMPHal, the first dataset specifically curated for hallucination detection in TCM pharmacy, comprising 10,000 high-quality question-answer pairs with hallucination annotations. Our experimental results across diverse LLMs, under standard, knowledge-based, and search engine-augmented conditions, demonstrate the capabilities and limitations of these models. A notable observation is that, for thinking LLMs, incorporating search engine results yields minimal improvement over their intrinsic reasoning abilities. We further conduct an in-depth error analysis, paving the way for future research directions in this domain. We release the TCMPHal dataset at https:\u002F\u002Fgithub.com\u002Fhanninaa\u002FTCMP.",{"paper_id":13623,"title":13624,"year":7,"month":188,"day":63,"doi":13625,"resource_url":13626,"first_page":13627,"last_page":13628,"pdf_url":13629,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13630,"paper_type":860,"authors":13631,"abstract":13643},"lrec2026-main-553","AraREQ: A Dataset and End-to-End System for Conflict Detection and Resolution in Software Requirements","10.63317\u002F357spa5iu5zq","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-553","6949","6961","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.553.pdf","hammouda-etal-2026-arareq",[13632,13635,13638,13641],{"paper_id":13623,"author_seq":247,"given_name":13633,"surname":13634,"affiliation":63,"orcid":63},"Tymaa Hasanain","Hammouda",{"paper_id":13623,"author_seq":232,"given_name":13636,"surname":13637,"affiliation":63,"orcid":63},"Alaa","Aljabari",{"paper_id":13623,"author_seq":218,"given_name":13639,"surname":13640,"affiliation":63,"orcid":63},"Nagham Fahim","Hamad",{"paper_id":13623,"author_seq":203,"given_name":3960,"surname":13642,"affiliation":63,"orcid":63},"Jarrar","Conflict detection in software requirements is essential for ensuring specification consistency, improving project efficiency, and ensuring overall software quality. Despite its importance, research on this task, particularly for Arabic, remains limited due to the scarcity of annotated data and linguistic challenges. To address this gap, we introduce AraREQ, a large-scale Arabic dataset for requirement-level conflict detection and resolution. The dataset is constructed through a semi-automated Arabization process using Large Language Models (LLMs), followed by manual augmentation to address class imbalance. The final dataset comprises 27K Arabic requirement pairs. We benchmark four state-of-the-art LLMs under zero-shot and few-shot settings, establishing the first comprehensive evaluation for Arabic requirements conflict detection. Experimental results show that few-shot prompting consistently improves performance, particularly on the minority conflict class, demonstrating the effectiveness of example-based prompting. Finally, we introduce an end-to-end system that automatically detects potential conflicts in Arabic software requirements and generates resolution suggestions. All datasets, codes, and the end-to-end system are open-source and available at: https:\u002F\u002Fsina.birzeit.edu\u002FArReqConflicts\u002F",{"paper_id":13645,"title":13646,"year":7,"month":188,"day":63,"doi":13647,"resource_url":13648,"first_page":13649,"last_page":13650,"pdf_url":13651,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13652,"paper_type":860,"authors":13653,"abstract":13664},"lrec2026-main-554","MAD: A Corpus of Multilingual Argumentative Deliberation","10.63317\u002F3z4jk3uj7pfe","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-554","6962","6978","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.554.pdf","maguire-etal-2026-mad",[13654,13657,13658,13661,13662],{"paper_id":13645,"author_seq":247,"given_name":13655,"surname":13656,"affiliation":63,"orcid":63},"Eimear","Maguire",{"paper_id":13645,"author_seq":232,"given_name":10518,"surname":10519,"affiliation":63,"orcid":63},{"paper_id":13645,"author_seq":218,"given_name":13659,"surname":13660,"affiliation":63,"orcid":63},"Jacky","Visser",{"paper_id":13645,"author_seq":203,"given_name":10524,"surname":10525,"affiliation":63,"orcid":63},{"paper_id":13645,"author_seq":188,"given_name":13618,"surname":13663,"affiliation":63,"orcid":63},"Lawrence","We present a corpus of Multilingual Argumentative Deliberation (MAD), a manually annotated corpus of deliberative dialogues in English, German, Polish and Italian. Four groups each completed two variants of a ranking task, the NASA Survival Scenario; once in their native language and once in English. The corpus is annotated using Inference Anchoring Theory (IAT), a framework developed for analysing argument in dialogical settings, and widely used in argument mining. As an argument mining resource, MAD is distinct in offering equivalent instances of spontaneous argumentation across languages. In addition to use in argument mining, the annotation captures both argument relations and dialogue acts, enabling deeper analysis of argument and dialogue structure than typical of argument-only corpora. The design of the corpus enables studies of second-language effects in English-medium interaction, cross-linguistic argument comparisons for German, Polish and Italian, and speaker dialogue strategy consistency, amongst others. The primary annotated MAD corpus is freely available at https:\u002F\u002Fcorpora.aifdb.org\u002Fmad, while we additionally release the unannotated transcripts to facilitate repurposing of the material.",{"paper_id":13666,"title":13667,"year":7,"month":188,"day":63,"doi":13668,"resource_url":13669,"first_page":13670,"last_page":13671,"pdf_url":13672,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13673,"paper_type":860,"authors":13674,"abstract":13688},"lrec2026-main-555","Infox-QC: A Quebec-Focused French Corpus for Misinformation Detection and AI Robustness Assessment","10.63317\u002F2n9wqyfvm35z","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-555","6979","6989","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.555.pdf","doghmane-etal-2026-infox",[13675,13678,13681,13684,13687],{"paper_id":13666,"author_seq":247,"given_name":13676,"surname":13677,"affiliation":63,"orcid":63},"Moetaz","Doghmane",{"paper_id":13666,"author_seq":232,"given_name":13679,"surname":13680,"affiliation":63,"orcid":63},"Hazem","Amamou",{"paper_id":13666,"author_seq":218,"given_name":13682,"surname":13683,"affiliation":63,"orcid":63},"Thiziri","Sefsaf",{"paper_id":13666,"author_seq":203,"given_name":13685,"surname":13686,"affiliation":63,"orcid":63},"Alan","Davoust",{"paper_id":13666,"author_seq":188,"given_name":11438,"surname":11439,"affiliation":63,"orcid":63},"The pervasive spread of online misinformation, often through social media and political campaigns, makes detecting false claims a crucial task for mitigating societal risks. While the vast majority of fake news datasets are developed in English, a critical gap remains for low-resource languages, such as French. To address this, we introduce Infox-QC, a novel French-language corpus focused on misinformation relevant to the Quebec region. Beyond containing real true and fake news, Infox-QC includes two unique subsets of AI-generated fake news: one created by prompting an AI to paraphrase existing fake news, and a second generated by prompting an AI to fabricate fake news from real true reports. This innovative approach allows us to verify the robustness of detection systems against fabricated content, which modern LLMs can generate with convincing efficacy. We establish comprehensive baselines using traditional machine learning methods, BERT-based models, and Large Language Models, both with and without Retrieval-Augmented Generation (RAG). Our results demonstrate that RAG-augmented LLMs offer the strongest contextual understanding, while traditional models provide valuable interpretable baselines. We further provide an exploratory human–LLM thematic agreement analysis to assess annotation consistency. The Infox-QC resource fills a critical void in French-language NLP research, supporting future efforts to explore the regional and cultural dimensions of misinformation through cross-linguistic comparison.",{"paper_id":13690,"title":13691,"year":7,"month":188,"day":63,"doi":13692,"resource_url":13693,"first_page":13694,"last_page":13695,"pdf_url":13696,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13697,"paper_type":860,"authors":13698,"abstract":13702},"lrec2026-main-556","unarXive 2024: A Large-Scale Scientific Corpus for Citation-Aware Retrieval and Generation","10.63317\u002F2nqzwzhq3j3t","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-556","6990","6997","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.556.pdf","besrour-etal-2026-unarxive",[13699,13701],{"paper_id":13690,"author_seq":247,"given_name":1290,"surname":13700,"affiliation":63,"orcid":63},"Besrour",{"paper_id":13690,"author_seq":232,"given_name":1732,"surname":9252,"affiliation":63,"orcid":63},"Full-text collections of scientific papers are essential for NLP research and the training of language models. However, existing resources remain incomplete: they often lag behind the fast-paced growth of scientific publishing, lack comprehensive citation networks, and discard essential structural elements. In this work, we introduce unarXive 2024, a large-scale, richly structured corpus containing every arXiv submission from January 1991 to December 2024 – over 2.28 million documents across physics, mathematics, computer science, and other fields. Our release enhances each paper with detailed metadata, reconstructs a substantially more complete citation network than existing datasets, and preserves fine-grained structural information, including section boundaries, mathematical notation, and non-textual elements. Beyond the corpus itself, we provide dense and sparse indexes optimized for retrieval-augmented generation (RAG) over the full arXiv archive. All resources, including code and data, are publicly available: https:\u002F\u002Fgithub.com\u002Ffaerber-lab\u002FunarXive-2024",{"paper_id":13704,"title":13705,"year":7,"month":188,"day":63,"doi":13706,"resource_url":13707,"first_page":13708,"last_page":13709,"pdf_url":13710,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13711,"paper_type":860,"authors":13712,"abstract":13717},"lrec2026-main-557","EPIC-EuroParl-UdS: Information-Theoretic Perspectives on Translation and Interpreting","10.63317\u002F3txs6tgs4wsu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-557","6998","7013","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.557.pdf","kunilovskaya-etal-2026-epic",[13713,13715],{"paper_id":13704,"author_seq":247,"given_name":2960,"surname":13714,"affiliation":63,"orcid":63},"Kunilovskaya",{"paper_id":13704,"author_seq":232,"given_name":3799,"surname":13716,"affiliation":63,"orcid":63},"Pollklaesener","This paper introduces an updated and combined version of the bidirectional English–German EPIC-UdS (spoken) and EuroParl-UdS (written) corpora containing original European Parliament speeches as well as their translations and interpretations. The new version corrects metadata and text errors identified through previous use, refines the content, updates linguistic annotations, and adds new layers, including word alignment and word-level surprisal indices. The combined resource is designed to support research using information-theoretic approaches to language variation, particularly studies comparing written and spoken modes, and examining disfluencies in speech, as well as traditional translationese studies, including parallel (source vs. target) and comparable (original vs. translated) analyses. The paper outlines the updates introduced in this release, summarises previous results based on the corpus, and presents a new illustrative study. The study validates the integrity of the rebuilt spoken data and evaluates probabilistic measures derived from base and fine-tuned GPT-2 and machine translation models on the task of filler particles prediction in interpreting.",{"paper_id":13719,"title":13720,"year":7,"month":188,"day":63,"doi":13721,"resource_url":13722,"first_page":13723,"last_page":13724,"pdf_url":13725,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13726,"paper_type":860,"authors":13727,"abstract":13734},"lrec2026-main-558","FeedFetcher: A Resilient Web Feed Downloader for Corpus Construction","10.63317\u002F49txz3zreas2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-558","7014","7022","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.558.pdf","herman-etal-2026-feedfetcher",[13728,13730,13731],{"paper_id":13719,"author_seq":247,"given_name":1651,"surname":13729,"affiliation":63,"orcid":63},"Herman",{"paper_id":13719,"author_seq":232,"given_name":1380,"surname":962,"affiliation":63,"orcid":63},{"paper_id":13719,"author_seq":218,"given_name":13732,"surname":13733,"affiliation":63,"orcid":63},"Vit","Suchomel","Building large-scale, timestamped monitor corpora requires robust and efficient tools for continuous web data acquisition. We present FeedFetcher, an open-source, lightweight yet resilient downloader designed to collect linguistic data from RSS\u002FAtom web feeds. The tool enables continuous corpus updates by harvesting newly published web content with minimal downtime and high data integrity. Implemented in Rust for performance, memory safety, and scalable concurrency, FeedFetcher supports thousands of simultaneous connections while maintaining server politeness. The software is available under the GPL-3.0 license on https:\u002F\u002Fgithub.com\u002Fondra\u002Ffeed_fetcher. In our setup, the entire workflow integrates FeedFetcher with downstream text-processing pipelines for tokenization, lemmatization, corpus compilation and deployment. The system is currently used to update monitor corpora in 64 languages, producing approximately two billion tokens per month. These corpora are available in Sketch Engine. We also describe methods for discovering new web feeds, combining manual exploration with automated extraction from large-scale web crawls to expand linguistic coverage. We demonstrate the system’s applicability through a time-based analysis of word-frequency change, showing how long-term accumulation of timestamped data supports the study of lexical dynamics and language evolution.",{"paper_id":13736,"title":13737,"year":7,"month":188,"day":63,"doi":13738,"resource_url":13739,"first_page":13740,"last_page":13741,"pdf_url":13742,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13743,"paper_type":860,"authors":13744,"abstract":13749},"lrec2026-main-559","Human-in-the-Loop Mass Transcription and Ground Truth Annotation for Challenging Historical Documents","10.63317\u002F2m45sutdznvy","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-559","7023","7033","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.559.pdf","fischer-etal-2026-human",[13745,13747],{"paper_id":13736,"author_seq":247,"given_name":13746,"surname":5324,"affiliation":63,"orcid":63},"Norbert",{"paper_id":13736,"author_seq":232,"given_name":2510,"surname":13748,"affiliation":63,"orcid":63},"Puppe","Challenging historical documents still pose significant difficulties for fully automatic layout detection and text recognition, requiring lengthy, demanding correction. We describe our experiences with complex layouts and present our workflow with AdaptOCR, a web-based annotation tool designed to facilitate the efficient transcription and ground-truth annotation of demanding historical documents. Addressing the limitations of existing solutions, AdaptOCR prioritizes a streamlined workflow with an integrated trainable layout and OCR pipeline. The tool uses the PAGE standard to represent document structure and enables the annotation of baselines, regions, text lines and the correction of their transcriptions providing automatic OCR invocation and dictionary-based error detection. Furthermore, it supports flexible annotations with custom element types and attributes to cater to different project requirements. We demonstrate the effectiveness of the workflow and tool in two demanding applications: The transcription of a large corpus of historical printings and the detection \u002F annotation of handwritten artifacts within the private library of the Grimm brothers. In addition, we evaluate the dictionary-based correction and assess the efficiency improvements using AdaptOCR in a pilot study.",{"paper_id":13751,"title":13752,"year":7,"month":188,"day":63,"doi":13753,"resource_url":13754,"first_page":13755,"last_page":13756,"pdf_url":13757,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13758,"paper_type":860,"authors":13759,"abstract":13770},"lrec2026-main-560","CoMMA, a Large-scale Corpus of Multilingual Medieval Archives","10.63317\u002F5pjzh8ma5v76","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-560","7034","7045","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.560.pdf","clrice-etal-2026-comma",[13760,13761,13763,13766,13769],{"paper_id":13751,"author_seq":247,"given_name":3251,"surname":6798,"affiliation":63,"orcid":63},{"paper_id":13751,"author_seq":232,"given_name":2467,"surname":13762,"affiliation":63,"orcid":63},"Gabay",{"paper_id":13751,"author_seq":218,"given_name":13764,"surname":13765,"affiliation":63,"orcid":63},"Malamatenia","Vlachou-Efsthatiou",{"paper_id":13751,"author_seq":203,"given_name":13767,"surname":13768,"affiliation":63,"orcid":63},"Ariane","Pinche",{"paper_id":13751,"author_seq":188,"given_name":4363,"surname":6800,"affiliation":63,"orcid":63},"We present CoMMA, a large-scale corpus of medieval manuscripts produced through automatic text recognition. The corpus contains around 2.5b tokens drawn from more than 23,000 digitized manuscripts in Latin and Old French, harvested via IIIF. Unlike other resources, it is made of raw, non-normalized text enriched with layout analysis in various formats. We describe the pipeline used for large-scale acquisition and processing, and report quantitative and qualitative evaluations (average CER 9.7%). The resulting resource supports multiple use cases, from pretraining language models to corpus linguistic on historical languages and digital humanities applications.",{"paper_id":13772,"title":13773,"year":7,"month":188,"day":63,"doi":13774,"resource_url":13775,"first_page":13776,"last_page":13777,"pdf_url":13778,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13779,"paper_type":860,"authors":13780,"abstract":13790},"lrec2026-main-561","Conversion of the Clark Hall Dictionary of Old English to TEI with RDF: An End-to-end Pipeline for Lexicographic Resource Retrodigitization","10.63317\u002F3b2x67e8vd8g","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-561","7046","7055","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.561.pdf","stoliarov-etal-2026-conversion",[13781,13783,13786,13787,13789],{"paper_id":13772,"author_seq":247,"given_name":5318,"surname":13782,"affiliation":63,"orcid":63},"Stoliarov",{"paper_id":13772,"author_seq":232,"given_name":13784,"surname":13785,"affiliation":63,"orcid":63},"Maxim","Ionov",{"paper_id":13772,"author_seq":218,"given_name":2908,"surname":2909,"affiliation":63,"orcid":63},{"paper_id":13772,"author_seq":203,"given_name":4854,"surname":13788,"affiliation":63,"orcid":63},"Buzzoni",{"paper_id":13772,"author_seq":188,"given_name":1110,"surname":2903,"affiliation":63,"orcid":63},"In this submission we introduce a workflow\u002Fpipeline for creating TEI editions of legacy dictionaries using a parser based on a context-free grammar (CFG). We do this by describing a project which we are currently carrying out and which aims to create a digital edition of an Old English dictionary, Clark-Hall’s \"A Concise Anglo-Saxon Dictionary\" using this approach. We begin the article by motivating our CFG-based approach, discussing its advantages and disadvantages, and comparing to it other approaches. We argue that this approach is suitable to certain kinds of dictionaries, such as Clark Hall’s. We then describe the microstructure of the dictionary itself with a view both to justifying the kinds of rules which we subsequently describe and to outlining the kinds of resources to which we believe our approach is best suited. We then describe the CFG parser itself and give an account of our experiments in parsing the dictionary. Finally, we outline the enrichment of the parsed dictionary with RDFa and the benefits it has for the published data.",{"paper_id":13792,"title":13793,"year":7,"month":188,"day":63,"doi":13794,"resource_url":13795,"first_page":13796,"last_page":13797,"pdf_url":13798,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13799,"paper_type":860,"authors":13800,"abstract":13811},"lrec2026-main-562","AMORES: A Spanish Language Resource for an Extended Set of Moral Foundations","10.63317\u002F25owsh7mdw6i","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-562","7056","7068","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.562.pdf","araque-etal-2026-amores",[13801,13803,13805,13808],{"paper_id":13792,"author_seq":247,"given_name":9043,"surname":13802,"affiliation":63,"orcid":63},"Araque",{"paper_id":13792,"author_seq":232,"given_name":1668,"surname":13804,"affiliation":63,"orcid":63},"Molina",{"paper_id":13792,"author_seq":218,"given_name":13806,"surname":13807,"affiliation":63,"orcid":63},"Anny D. Alvarez","Nogales",{"paper_id":13792,"author_seq":203,"given_name":13809,"surname":13810,"affiliation":63,"orcid":63},"Carlos A.","Iglesias","This work addresses the need for linguistic resources that enable language models to understand and adapt to subjective and abstract concepts in the domain of moral values within texts. In light of the growing interest in the study of moral values and its limited exploration in Spanish-speaking contexts, this work addresses this gap by developing a novel Spanish-language corpus. Furthermore, the corpus’s development process ensures that the annotations capture a wide range of perspectives, resulting in a resource that reflects the diversity of moral interpretations in real-world contexts. Specifically, there are two main contributions. 1 The creation of the first large-scale Spanish corpus annotated according to Moral Foundations Theory. 2 We introduce an experimental framework that investigates how annotators’ religious orientations could shape moral annotation patterns and propagate to model behavior. To do so, we employ a prompt-based alignment method that improves moral detection regardless of religious alignment for which the model was trained. In this scenario, we explore whether language models can align moral interpretations across divergent belief orientations.",{"paper_id":13813,"title":13814,"year":7,"month":188,"day":63,"doi":13815,"resource_url":13816,"first_page":13817,"last_page":13818,"pdf_url":13819,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13820,"paper_type":860,"authors":13821,"abstract":13834},"lrec2026-main-563","The Moralization Corpus: Frame-Based Annotation and Analysis of Moralizing Speech Acts across Diverse Text Genres","10.63317\u002F28h9saps9vhr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-563","7069","7091","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.563.pdf","becker-etal-2026-moralization",[13822,13824,13827,13829,13832],{"paper_id":13813,"author_seq":247,"given_name":2960,"surname":13823,"affiliation":63,"orcid":63},"Becker",{"paper_id":13813,"author_seq":232,"given_name":13825,"surname":13826,"affiliation":63,"orcid":63},"Mirko","Sommer",{"paper_id":13813,"author_seq":218,"given_name":4323,"surname":13828,"affiliation":63,"orcid":63},"Tapken",{"paper_id":13813,"author_seq":203,"given_name":13830,"surname":13831,"affiliation":63,"orcid":63},"Yi Wan","Teh",{"paper_id":13813,"author_seq":188,"given_name":3696,"surname":13833,"affiliation":63,"orcid":63},"Brocai","Moralizations – arguments that invoke moral values to justify demands or positions – are a yet underexplored form of persuasive communication. We present the Moralization Corpus, a novel multi-genre dataset designed to analyze how moral values are strategically used in argumentative discourse. Moralizations are pragmatically complex and often implicit, posing significant challenges for both human annotators and NLP systems. We develop a frame-based annotation scheme that captures the constitutive elements of moralizations – moral values, demands, and discourse protagonists – and apply it to a diverse set of German texts, including political debates, news articles, and online discussions. The corpus enables fine-grained analysis of moralizing language across communicative formats and domains. We further evaluate several large language models (LLMs) under varied prompting conditions for the task of moralization detection and moralization component extraction and compare it to human annotations in order to investigate the challenges of automatic and manual analysis of moralizations. Results show that detailed prompt instructions have a greater effect than few-shot or explanation-based prompting, and that moralization remains a highly subjective and context-sensitive task. We release all data, annotation guidelines, and code to foster future interdisciplinary research on moral discourse and moral reasoning in NLP.",{"paper_id":13836,"title":13837,"year":7,"month":188,"day":63,"doi":13838,"resource_url":13839,"first_page":13840,"last_page":13841,"pdf_url":13842,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13843,"paper_type":860,"authors":13844,"abstract":13850},"lrec2026-main-564","Targum — a Multilingual New Testament Translation Corpus","10.63317\u002F2yiotxcyovir","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-564","7092","7105","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.564.pdf","rapacz-etal-2026-targum",[13845,13847],{"paper_id":13836,"author_seq":247,"given_name":4412,"surname":13846,"affiliation":63,"orcid":63},"Rapacz",{"paper_id":13836,"author_seq":232,"given_name":13848,"surname":13849,"affiliation":63,"orcid":63},"Aleksander","Smywiński-Pohl","Many European languages possess rich biblical translation histories, yet existing corpora — in prioritizing linguistic breadth — often fail to capture this depth. To address this gap, we introduce a multilingual corpus of 651 New Testament translations, of which 334 are unique, spanning five languages with 2.4–5.0× more translations per language than any prior corpus: English (194 unique versions from 390 total), French (41 from 78), Italian (17 from 33), Polish (29 from 48), and Spanish (53 from 102). Aggregated from 12 online biblical libraries and one preexisting corpus, each translation is annotated with metadata that maps the text to a standardized identifier for the work, its specific edition, and its year of revision. This canonicalization allows researchers to define \"uniqueness\" for their own needs: they can perform micro-level analyses on translation families, such as the KJV lineage, or conduct macro-level studies by deduplicating closely related texts. By providing the first multilingual resource with sufficient depth per language for flexible, multilevel analysis, the corpus fills a gap in the quantitative study of translation history.",{"paper_id":13852,"title":13853,"year":7,"month":188,"day":63,"doi":13854,"resource_url":13855,"first_page":13856,"last_page":13857,"pdf_url":13858,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13859,"paper_type":860,"authors":13860,"abstract":13868},"lrec2026-main-565","Trigger Warnings Are Grounded in a Shared Vocabulary: A Corpus Analysis with User-Generated Labels","10.63317\u002F5gaazphzhjgf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-565","7106","7125","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.565.pdf","heineking-etal-2026-trigger",[13861,13863,13864,13865,13866],{"paper_id":13852,"author_seq":247,"given_name":4763,"surname":13862,"affiliation":63,"orcid":63},"Heineking",{"paper_id":13852,"author_seq":232,"given_name":12330,"surname":12331,"affiliation":63,"orcid":63},{"paper_id":13852,"author_seq":218,"given_name":12327,"surname":12328,"affiliation":63,"orcid":63},{"paper_id":13852,"author_seq":203,"given_name":12352,"surname":12353,"affiliation":63,"orcid":63},{"paper_id":13852,"author_seq":188,"given_name":3843,"surname":13867,"affiliation":63,"orcid":63},"Potthast","Trigger warnings advise of potentially disturbing content. On that note: This document discusses abuse. But can we trust trigger warnings? For a warning to be credible, independent authors must have a shared understanding of the type of content that advises caution. We investigate for the first time whether trigger warnings are aligned with the vocabulary of texts written by uncoordinated authors. To quantify the lexical alignment of trigger warnings, we conduct a series of statistical tests on the texts of fan fiction authors who used warnings relating to emotional, physical, or sexual abuse. We find that the vocabulary of texts with these warnings is aligned with a curated dictionary of terms related to abuse. However, a high frequency of a term in texts with a warning does not necessarily indicate a semantic relation.",{"paper_id":13870,"title":13871,"year":7,"month":188,"day":63,"doi":13872,"resource_url":13873,"first_page":13874,"last_page":13875,"pdf_url":13876,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13877,"paper_type":860,"authors":13878,"abstract":13893},"lrec2026-main-566","ENEIDE: A High Quality Silver Standard Dataset for Named Entity Recognition and Linking in Historical Italian","10.63317\u002F3hwbiiv85eub","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-566","7126","7136","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.566.pdf","santini-etal-2026-eneide",[13879,13882,13884,13886,13889,13892],{"paper_id":13870,"author_seq":247,"given_name":13880,"surname":13881,"affiliation":63,"orcid":63},"Cristian","Santini",{"paper_id":13870,"author_seq":232,"given_name":4763,"surname":13883,"affiliation":63,"orcid":63},"Barzaghi",{"paper_id":13870,"author_seq":218,"given_name":5735,"surname":13885,"affiliation":63,"orcid":63},"Sernani",{"paper_id":13870,"author_seq":203,"given_name":13887,"surname":13888,"affiliation":63,"orcid":63},"Emanuele","Frontoni",{"paper_id":13870,"author_seq":188,"given_name":13890,"surname":13891,"affiliation":63,"orcid":63},"Laura","Melosi",{"paper_id":13870,"author_seq":172,"given_name":5461,"surname":5462,"affiliation":63,"orcid":63},"This paper introduces ENEIDE (Extracting Named Entities from Italian Digital Editions), a silver standard dataset for Named Entity Recognition and Linking (NERL) in historical Italian texts. The corpus comprises 2,111 documents with over 8,000 entity annotations semi-automatically extracted from two scholarly digital editions: Digital Zibaldone, the philosophical diary of the Italian poet Giacomo Leopardi (1798–1837), and Aldo Moro Digitale, the complete works of the Italian politician Aldo Moro (1916–1978). Annotations cover multiple entity types (person, location, organization, literary work) linked to Wikidata identifiers, including NIL entities that cannot be mapped to the knowledge graph. To the best of our knowledge, ENEIDE represents the first multi-domain, publicly available NERL dataset for historical Italian with training, development, and test splits. We present a methodology for semi-automatic annotations extraction from manually curated scholarly digital editions, including quality control and annotation enhancement procedures. Baseline experiments using state-of-the-art models demonstrate the dataset’s challenge for NERL and the gap between zero-shot approaches and fine-tuned models. The dataset’s diachronic coverage spanning two centuries makes it particularly suitable for temporal entity disambiguation and cross-domain evaluation. ENEIDE is released under a CC BY-NC-SA 4.0 license.",{"paper_id":13895,"title":13896,"year":7,"month":188,"day":63,"doi":13897,"resource_url":13898,"first_page":13899,"last_page":13900,"pdf_url":13901,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13902,"paper_type":860,"authors":13903,"abstract":13917},"lrec2026-main-567","YoNER: A New Yorùbá Multi-domain Named Entity Recognition Dataset","10.63317\u002F4o6m2hzpy7gs","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-567","7137","7148","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.567.pdf","falola-etal-2026-yoner",[13904,13907,13908,13911,13914,13916],{"paper_id":13895,"author_seq":247,"given_name":13905,"surname":13906,"affiliation":63,"orcid":63},"Peace Busola","Falola",{"paper_id":13895,"author_seq":232,"given_name":3598,"surname":3599,"affiliation":63,"orcid":63},{"paper_id":13895,"author_seq":218,"given_name":13909,"surname":13910,"affiliation":63,"orcid":63},"Solomon O.","Akinola",{"paper_id":13895,"author_seq":203,"given_name":13912,"surname":13913,"affiliation":63,"orcid":63},"Folashade T.","Ogunajo",{"paper_id":13895,"author_seq":188,"given_name":13915,"surname":3599,"affiliation":63,"orcid":63},"Emmanuel Oluwadunsin",{"paper_id":13895,"author_seq":172,"given_name":3629,"surname":3630,"affiliation":63,"orcid":63},"Named Entity Recognition (NER) is a foundational NLP task, yet research in Yorùbá has been constrained by limited and domain-specific resources. Existing resources, such as MasakhaNER (a manually annotated news-domain corpus) and WikiAnn (automatically created from Wikipedia), are valuable but restricted in domain coverage. To address this gap, we present YoNER, a new multidomain Yorùbá NER dataset that extends entity coverage beyond news and Wikipedia. The dataset comprises about 5,000 sentences and 100,000 tokens collected from five domains including Bible, Blogs, Movies, Radio broadcast and Wikipedia, and annotated with three entity types: Person (PER), Organization (ORG) and Location (LOC), following CoNLL-style guidelines. Annotation was conducted manually by three native Yorùbá speakers, with an inter-annotator agreement of over 0.70, ensuring high quality and consistency. We benchmark several transformer encoder models using cross-domain experiments with MasakhaNER 2.0, and we also assess the effect of few-shot in-domain data using YoNER and cross-lingual setups with English datasets. Our results show that African-centric models outperform general multilingual models for Yorùbá, but cross-domain performance drops substantially, particularly for blogs and movie domains. Furthermore, we observed that closely related formal domains, such as news and Wikipedia, transfer more effectively. In addition, we introduce a new Yorùbá-specific language model (OyoBERT) that outperforms multilingual models in in-domain evaluation. We publicly release the YoNER dataset and pretrained OyoBERT models to support future research on Yorùbá natural language processing.",{"paper_id":13919,"title":13920,"year":7,"month":188,"day":63,"doi":13921,"resource_url":13922,"first_page":13923,"last_page":13924,"pdf_url":13925,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13926,"paper_type":860,"authors":13927,"abstract":13932},"lrec2026-main-568","Linking Rationale to Decision on Internet Standards: A Retrieval-Based Approach Using Synthetic Data","10.63317\u002F3szh4omfcsxb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-568","7149","7162","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.568.pdf","bian-etal-2026-linking",[13928,13930],{"paper_id":13919,"author_seq":247,"given_name":13929,"surname":2772,"affiliation":63,"orcid":63},"Jie",{"paper_id":13919,"author_seq":232,"given_name":1732,"surname":13931,"affiliation":63,"orcid":63},"Welzl","The Internet Engineering Task Force (IETF) develops Internet-Drafts (I-Ds) and Requests for Comments (RFCs) as formal specifications for Internet Protocols. While these documents capture finalized technical standards, the rich design rationales and deliberations that shape them are often buried in informal discussions across mailing lists. These discussions are rarely linked explicitly to the specifications they inform, making it difficult to trace the origins of specific design decisions. We address this gap by generating synthetic data that explicitly links discussion threads to their corresponding RFC\u002FI‑D sections, producing roughly 350 000 such aligned instances. This data enables training a semantic embedding-based information retrieval (IR) system that, given an email discussion, retrieves the most relevant specification content. Our experiments show that this synthetic supervision helps models learn associations between informal discourse and formal documentation, though the task remains challenging due to the implicit and context-dependent nature of the links.",{"paper_id":13934,"title":13935,"year":7,"month":188,"day":63,"doi":13936,"resource_url":13937,"first_page":13938,"last_page":13939,"pdf_url":13940,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13941,"paper_type":860,"authors":13942,"abstract":13948},"lrec2026-main-569","The GELATO Dataset for Legislative NER","10.63317\u002F3axxkz9oh5th","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-569","7163","7177","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.569.pdf","flynn-etal-2026-gelato",[13943,13945,13946],{"paper_id":13934,"author_seq":247,"given_name":6059,"surname":13944,"affiliation":63,"orcid":63},"Flynn",{"paper_id":13934,"author_seq":232,"given_name":9732,"surname":9733,"affiliation":63,"orcid":63},{"paper_id":13934,"author_seq":218,"given_name":10798,"surname":13947,"affiliation":63,"orcid":63},"Newman","This paper introduces GELATO (Government, Executive, Legislative, and Treaty Ontology), a dataset of U.S. House and Senate bills from the 118th Congress annotated using a novel two-level named entity recognition ontology designed for U.S. legislative texts. We fine-tune transformer-based models (BERT, RoBERTa) of different architectures and sizes on this dataset for first-level prediction. We then use LLMs with optimized prompts to complete the second level prediction. The strong performance of RoBERTa and relatively weak performance of BERT models, as well as the application of LLMs as second-level predictors, support future research in legislative NER or downstream tasks using these model combinations as extraction tools.",{"paper_id":13950,"title":13951,"year":7,"month":188,"day":63,"doi":13952,"resource_url":13953,"first_page":13954,"last_page":13955,"pdf_url":13956,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13957,"paper_type":860,"authors":13958,"abstract":13966},"lrec2026-main-570","Controllable Sentence Simplification in Italian: Fine-Tuning Large Language Models on Automatically Generated Resources","10.63317\u002F5fgm358dfxt5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-570","7178","7191","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.570.pdf","papucci-etal-2026-controllable",[13959,13961,13963],{"paper_id":13950,"author_seq":247,"given_name":2897,"surname":13960,"affiliation":63,"orcid":63},"Papucci",{"paper_id":13950,"author_seq":232,"given_name":2891,"surname":13962,"affiliation":63,"orcid":63},"Venturi",{"paper_id":13950,"author_seq":218,"given_name":13964,"surname":13965,"affiliation":63,"orcid":63},"Felice","Dell'Orletta","This paper presents a study on readability-controlled Sentence Simplification for Italian, addressing the scarcity of annotated resources for low-resource languages. We introduce IMPaCTS (Italian Multilevel Parallel Corpus for Text Simplification), the first fully automatically created corpus of 1,444,160 original–simple sentence pairs automatically annotated with readability levels and linguistic features. It was generated using an Italian LLM prompted in zero-shot to produce multiple simplifications per input sentence. Increasing portions of the resource are used to fine-tune mono- and multilingual open-weight LLMs, conditioning them to generate simplifications at a target readability level. Results from automatic and human evaluations show that fine-tuning on IMPaCTS improves performance both in terms of task completion and adherence to the targeted readability levels compared to few-shot baselines.",{"paper_id":13968,"title":13969,"year":7,"month":188,"day":63,"doi":13970,"resource_url":13971,"first_page":13972,"last_page":13973,"pdf_url":13974,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13975,"paper_type":860,"authors":13976,"abstract":13986},"lrec2026-main-571","Evaluating LLM-based Text Simplification for German: Effects on Post-Editing Effort, Quality Ratings, and User Comprehension","10.63317\u002F3688akbpcnjn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-571","7192","7208","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.571.pdf","carrer-etal-2026-evaluating",[13977,13979,13981,13983,13984],{"paper_id":13968,"author_seq":247,"given_name":5050,"surname":13978,"affiliation":63,"orcid":63},"Carrer",{"paper_id":13968,"author_seq":232,"given_name":4651,"surname":13980,"affiliation":63,"orcid":63},"Säuberli",{"paper_id":13968,"author_seq":218,"given_name":3843,"surname":13982,"affiliation":63,"orcid":63},"Kappus",{"paper_id":13968,"author_seq":203,"given_name":4380,"surname":5324,"affiliation":63,"orcid":63},{"paper_id":13968,"author_seq":188,"given_name":3818,"surname":13985,"affiliation":63,"orcid":63},"Ebling","Automatic text simplification (ATS) seeks to automate the process of rewording within the same language to enhance readability and comprehension. Current evaluation practices for ATS systems predominantly rely on automatic metrics or assessments by experts and crowdworkers, often excluding the intended end users and other stakeholders, and thus limiting insights into the actual effectiveness of ATS models. In this study, we address this gap by conducting a multi-faceted, mixed-method evaluation of two LLM-based ATS systems for German (capito.ai and GPT-4o) and by involving end users, post-editors, and Easy Language experts. The findings highlight the effectiveness of the LLM-based ATS systems examined across several dimensions, including post-editing efficiency, expert quality assessments, and, in the case of GPT-4o-generated simplifications, user comprehension. Post-editing effort metrics, in particular, show an increase in productivity of around 30% compared to full manual simplification. Moreover, the results reveal substantial differences in perception and understanding among participant groups. These outcomes clearly indicate that ATS for German has recently made considerable progress and, crucially, underscore the importance of incorporating multiple stakeholders into ATS evaluation to better align system performance with accessibility goals.",{"paper_id":13988,"title":13989,"year":7,"month":188,"day":63,"doi":13990,"resource_url":13991,"first_page":13992,"last_page":13993,"pdf_url":13994,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":13995,"paper_type":860,"authors":13996,"abstract":14002},"lrec2026-main-572","Reading Time in the Wild: An Assessment of Readability Predictors Based on Naturally-Observed Reading Times","10.63317\u002F56xa82ywv9us","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-572","7209","7224","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.572.pdf","vaals-etal-2026-reading",[13997,14000,14001],{"paper_id":13988,"author_seq":247,"given_name":13998,"surname":13999,"affiliation":63,"orcid":63},"Sijbren van","Vaals",{"paper_id":13988,"author_seq":232,"given_name":10819,"surname":10820,"affiliation":63,"orcid":63},{"paper_id":13988,"author_seq":218,"given_name":2075,"surname":2076,"affiliation":63,"orcid":63},"Reading time has surfaced as a viable proxy for readability and comprehension. However, most studies used reading times obtained in controlled experimental settings with eye-tracking or self-paced reading tasks, which differs from uncontrolled, more naturalistic reading behaviour in the wild. Through a collaboration with a newspaper, we have access to a dataset of Dutch news articles with corresponding clickstream reading times averaged across thousands of readers. To address the issue, we evaluate how well common proxies for readability and comprehension hold on data from online readers. We first group the proxies in four dimensions and compute the correlation between the proxies and the average reading time per token for each dimension. Then we assess if the proxies can meaningfully predict reading time per token. The results are surprising: we find no meaningful correlation between any proxy and the average reading time per token, nor can any proxy be used for reliable prediction. Additionally, we rerun the prediction on corresponding, automatically simplified texts and surprisingly find increased predicted reading times per token. These results imply that clickstream reading time must be considered with caution as a proxy for readability or comprehension.",{"paper_id":14004,"title":14005,"year":7,"month":188,"day":63,"doi":14006,"resource_url":14007,"first_page":14008,"last_page":14009,"pdf_url":14010,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14011,"paper_type":860,"authors":14012,"abstract":14019},"lrec2026-main-573","Document-Level Text Simplification in Estonian Using Large Language Models","10.63317\u002F3ndqrcjckj32","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-573","7225","7235","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.573.pdf","muru-etal-2026-document",[14013,14016],{"paper_id":14004,"author_seq":247,"given_name":14014,"surname":14015,"affiliation":63,"orcid":63},"Meeri-Ly","Muru",{"paper_id":14004,"author_seq":232,"given_name":14017,"surname":14018,"affiliation":63,"orcid":63},"Eduard","Barbu","Document-level text simplification involves transformations that go beyond sentence-internal edits, addressing discourse coherence, anaphora resolution, and cross-paragraph consistency. Despite advances in sentence-level simplification for high-resource languages, document-level simplification in morphologically rich, low-resource languages such as Estonian remains largely unexplored. This study presents a comprehensive evaluation of five state-of-the-art multilingual large language models (LLMs) for document-level simplification in Estonian. Three prompting strategies are examined: single-pass generation, pipeline-based modular agents, and guideline-augmented pipelines. The evaluation framework integrates automatic metrics assessing readability, semantic preservation, and discourse coherence, alongside a structured manual annotation protocol. The findings indicate that Gemini-2.0 and LLaMA-3.3 produce outputs with near-native fluency and strong meaning preservation, whereas other models display notable grammatical and semantic limitations. This work contributes novel document-level coherence metrics, evidence-based prompting strategies, and publicly available resources for reproducibility.",{"paper_id":14021,"title":14022,"year":7,"month":188,"day":63,"doi":14023,"resource_url":14024,"first_page":14025,"last_page":14026,"pdf_url":14027,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14028,"paper_type":860,"authors":14029,"abstract":14034},"lrec2026-main-574","A Human-in\u002Fon-the-Loop Framework for Accessible Text Generation","10.63317\u002F2gtngp2nmx63","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-574","7236","7247","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.574.pdf","moreno-etal-2026-human",[14030,14032],{"paper_id":14021,"author_seq":247,"given_name":14031,"surname":10277,"affiliation":63,"orcid":63},"Lourdes",{"paper_id":14021,"author_seq":232,"given_name":8604,"surname":14033,"affiliation":63,"orcid":63},"Martínez","Plain Language and Easy-to-Read formats in text simplification are essential for cognitive accessibility. Yet current automatic simplification and evaluation pipelines remain largely automated, metric-driven, and fail to reflect user comprehension or normative standards. This paper introduces a hybrid framework that explicitly integrates human participation into LLM-based accessible text generation. Human-in-the-Loop (HiTL) contributions guide adjustments during generation, while Human-on-the-Loop (HoTL) supervision ensures systematic post-generation review. Empirical evidence from user studies and annotated resources is operationalized into (i) checklists aligned with standards, (ii) Event-Condition-Action trigger rules for activating expert oversight, and (iii) accessibility Key Performance Indicators (KPIs). The framework shows how human-centered mechanisms can be encoded for evaluation and reused to provide structured feedback that improves model adaptation. By embedding the human role in both generation and supervision, it establishes a traceable, reproducible, and auditable process for creating and evaluating accessible texts. In doing so, it integrates explainability and ethical accountability as core design principles, contributing to more transparent and inclusive NLP systems.",{"paper_id":14036,"title":14037,"year":7,"month":188,"day":63,"doi":14038,"resource_url":14039,"first_page":14040,"last_page":14041,"pdf_url":14042,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14043,"paper_type":860,"authors":14044,"abstract":14050},"lrec2026-main-575","Automatic Analysis of Collaboration through Human Conversational Data Resources: A Review","10.63317\u002F24ppzvsvikh2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-575","7248","7261","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.575.pdf","yu-etal-2026-automatic",[14045,14047,14049],{"paper_id":14036,"author_seq":247,"given_name":14046,"surname":2998,"affiliation":63,"orcid":63},"Yi",{"paper_id":14036,"author_seq":232,"given_name":2960,"surname":14048,"affiliation":63,"orcid":63},"Boritchev",{"paper_id":14036,"author_seq":218,"given_name":5369,"surname":5370,"affiliation":63,"orcid":63},"Collaboration is a task-oriented, high-level human behavior. In most cases, conversation serves as the primary medium for information exchange and coordination, making conversational data a valuable resource for the automatic analysis of collaborative processes. In this paper, we focus on verbal aspects of collaboration and conduct a review of collaboration analysis using task-oriented conversation resources, encompassing related theories, coding schemes, tasks, and modeling approaches. We aim to address the question of how to utilize task-oriented human-human conversational data for collaboration analysis. We hope our review will serve as a practical resource and illuminate unexplored areas for future collaboration analysis.",{"paper_id":14052,"title":14053,"year":7,"month":188,"day":63,"doi":14054,"resource_url":14055,"first_page":14056,"last_page":14057,"pdf_url":14058,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14059,"paper_type":860,"authors":14060,"abstract":14067},"lrec2026-main-576","Benchmarking Arabic Authorship Attribution and Style Transfer with Large Language Models","10.63317\u002F22xohncktzx8","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-576","7262","7278","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.576.pdf","hamed-etal-2026-benchmarking",[14061,14062,14063,14064],{"paper_id":14052,"author_seq":247,"given_name":4221,"surname":4222,"affiliation":63,"orcid":63},{"paper_id":14052,"author_seq":232,"given_name":4218,"surname":4219,"affiliation":63,"orcid":63},{"paper_id":14052,"author_seq":218,"given_name":4229,"surname":4230,"affiliation":63,"orcid":63},{"paper_id":14052,"author_seq":203,"given_name":14065,"surname":14066,"affiliation":63,"orcid":63},"Thamar","Solorio","Writing style is a fundamental component of natural language. However, significant research gaps remain in two key style-centric tasks: authorship attribution (AA) and authorship style transfer, particularly for Arabic. In this work, we revisit both tasks in that context. We introduce a new AA dataset comprising texts in Modern Standard and Dialectal Arabic. We train transformer-based AA models using dual cross-entropy and contrastive learning loss objectives, and validate model performance through human evaluation. We then utilize the trained AA model to benchmark a range of large language models (LLMs) on style recognition and generation tasks, providing new insights into their capabilities in modeling Arabic writing styles. Our work reveals limitations of current models and provides resources to advance research in this direction.",{"paper_id":14069,"title":14070,"year":7,"month":188,"day":63,"doi":14071,"resource_url":14072,"first_page":14073,"last_page":14074,"pdf_url":14075,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14076,"paper_type":860,"authors":14077,"abstract":14087},"lrec2026-main-577","ADHD-Lang: A Large-Scale Social Media Dataset for Verbal Behavior and Digital Phenotyping in Adult ADHD","10.63317\u002F3uw9c6ux3f7j","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-577","7279","7291","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.577.pdf","wiechmann-etal-2026-adhd",[14078,14080,14083,14085],{"paper_id":14069,"author_seq":247,"given_name":1668,"surname":14079,"affiliation":63,"orcid":63},"Wiechmann",{"paper_id":14069,"author_seq":232,"given_name":14081,"surname":14082,"affiliation":63,"orcid":63},"Elma","Kerz",{"paper_id":14069,"author_seq":218,"given_name":10323,"surname":14084,"affiliation":63,"orcid":63},"Kempa",{"paper_id":14069,"author_seq":203,"given_name":2998,"surname":14086,"affiliation":63,"orcid":63},"Qiao","We introduce ADHD-Lang, a large-scale language resource derived from Reddit to advance computational phenotyping of adult ADHD. The corpus is constructed using a high-precision self-disclosure pattern to confirm ADHD diagnoses and a matched control cohort, comprising 12,070 ADHD users (317,073 posts; 2.83M sentences) and 12,070 controls (174,765 posts; 1.27M sentences). In releasing ADHD-Lang to the research community, we also provide the first comprehensive baseline results, systematically examining the accuracy–transparency trade-off across three model families: (1) interpretable shallow machine learning models trained on clinically meaningful, expert-engineered language biomarkers; (2) a deep BiLSTM network trained on the same feature representations to capture temporal dynamics across users’ posts; and (3) black-box transformer-based models (BERT, RoBERTa, MentalRoBERTa) leveraging contextual embeddings—non-interpretable, high-dimensional representations. ADHD-Lang is released as a standardized benchmark to promote reproducible research and accelerate progress toward digital verbal-behavior phenotyping for adult ADHD.",{"paper_id":14089,"title":14090,"year":7,"month":188,"day":63,"doi":14091,"resource_url":14092,"first_page":14093,"last_page":14094,"pdf_url":14095,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14096,"paper_type":860,"authors":14097,"abstract":14113},"lrec2026-main-578","SynBullying: A Multi-LLM Synthetic Conversational Dataset for Cyberbullying Detection","10.63317\u002F4np8biner769","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-578","7292","7306","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.578.pdf","kazemi-etal-2026-synbullying",[14098,14101,14104,14106,14108,14111],{"paper_id":14089,"author_seq":247,"given_name":14099,"surname":14100,"affiliation":63,"orcid":63},"Arefeh","Kazemi",{"paper_id":14089,"author_seq":232,"given_name":14102,"surname":14103,"affiliation":63,"orcid":63},"Hamza","Qadeer",{"paper_id":14089,"author_seq":218,"given_name":14105,"surname":4284,"affiliation":63,"orcid":63},"Joachim",{"paper_id":14089,"author_seq":203,"given_name":14107,"surname":6564,"affiliation":63,"orcid":63},"Hossein",{"paper_id":14089,"author_seq":188,"given_name":14109,"surname":14110,"affiliation":63,"orcid":63},"Sri Balaaji Natarajan","Kalaivendan",{"paper_id":14089,"author_seq":172,"given_name":6381,"surname":14112,"affiliation":63,"orcid":63},"Davis","We introduce SynBullying, a synthetic multi-LLM conversational dataset for studying and detecting cyberbullying (CB). SynBullying provides a scalable and ethically safe alternative to human data collection by leveraging large language models (LLMs) to simulate realistic bullying interactions. The dataset offers (i) conversational structure, capturing multi-turn exchanges rather than isolated posts; (ii) context-aware annotations, where harmfulness is assessed within the conversational flow considering context, intent, and discourse dynamics; and (iii) fine-grained labeling, covering various CB categories for detailed linguistic and behavioral analysis. We evaluate SynBullying across five dimensions, including conversational structure, lexical patterns, sentiment\u002Ftoxicity, role dynamics, harm intensity, and CB-type distribution. We further examine its utility by testing its performance as standalone training data and as an augmentation source for CB classification.",{"paper_id":14115,"title":14116,"year":7,"month":188,"day":63,"doi":14117,"resource_url":14118,"first_page":14119,"last_page":14120,"pdf_url":14121,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14122,"paper_type":860,"authors":14123,"abstract":14142},"lrec2026-main-579","The Multilingual Euphemism Benchmark: Datasets and Baselines for Pragmatic Language Understanding","10.63317\u002F5im6zovfyg3t","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-579","7307","7319","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.579.pdf","poh-etal-2026-multilingual",[14124,14127,14129,14131,14134,14136,14139,14140,14141],{"paper_id":14115,"author_seq":247,"given_name":14125,"surname":14126,"affiliation":63,"orcid":63},"Whitney","Poh",{"paper_id":14115,"author_seq":232,"given_name":5233,"surname":14128,"affiliation":63,"orcid":63},"Sammartino",{"paper_id":14115,"author_seq":218,"given_name":13297,"surname":14130,"affiliation":63,"orcid":63},"Andrew",{"paper_id":14115,"author_seq":203,"given_name":14132,"surname":14133,"affiliation":63,"orcid":63},"Witold","Kieraś",{"paper_id":14115,"author_seq":188,"given_name":8892,"surname":14135,"affiliation":63,"orcid":63},"Zawadzka-Paluektau",{"paper_id":14115,"author_seq":172,"given_name":14137,"surname":14138,"affiliation":63,"orcid":63},"Iryna","Dilai",{"paper_id":14115,"author_seq":155,"given_name":4920,"surname":4921,"affiliation":63,"orcid":63},{"paper_id":14115,"author_seq":138,"given_name":4923,"surname":3168,"affiliation":63,"orcid":63},{"paper_id":14115,"author_seq":121,"given_name":2742,"surname":4918,"affiliation":63,"orcid":63},"Euphemisms are words or phrases used to soften or indirectly refer to taboo or sensitive topics. They pose interpretation challenges because the same expression may appear in different senses depending on context: literal, figurative but non-euphemistic, or euphemistic. For example, pull the plug may refer euphemistically to ending a patient’s life support, figuratively to canceling a project or funding, or literally to unplugging a device. Euphemisms also vary across languages and cultures in both their surface forms and the contexts in which they are conventionally used. Previous work introduced datasets for the computational study of euphemisms in five languages. We extend this line of work by introducing two new annotated datasets for euphemism detection in Polish and Ukrainian and by standardizing resources for all seven languages into a unified benchmark format that supports cross-lingual evaluation. Finally, we provide zero-shot and few-shot baselines using GPT-5-nano. We ran each configuration five times and report the average score, establishing reference scores for multilingual pragmatic understanding. We also performed pilot tests using Qwen3-4B on the English and Chinese datasets.",{"paper_id":14144,"title":14145,"year":7,"month":188,"day":63,"doi":14146,"resource_url":14147,"first_page":14148,"last_page":14149,"pdf_url":14150,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14151,"paper_type":860,"authors":14152,"abstract":14163},"lrec2026-main-580","Advancing Retrieval-Augmented Generation for Persian: Development of Language Models, Comprehensive Benchmarks, and Best Practices for Optimization","10.63317\u002F2ev2hvzqjmwh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-580","7320","7330","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.580.pdf","bourbourhosseinbeigi-etal-2026-advancing",[14153,14155,14156,14158,14161],{"paper_id":14144,"author_seq":247,"given_name":2548,"surname":14154,"affiliation":63,"orcid":63},"Bourbour Hosseinbeigi",{"paper_id":14144,"author_seq":232,"given_name":8836,"surname":8837,"affiliation":63,"orcid":63},{"paper_id":14144,"author_seq":218,"given_name":7147,"surname":14157,"affiliation":63,"orcid":63},"Asghari",{"paper_id":14144,"author_seq":203,"given_name":14159,"surname":14160,"affiliation":63,"orcid":63},"Mohammad Ali","Seif Kashani",{"paper_id":14144,"author_seq":188,"given_name":1749,"surname":14162,"affiliation":63,"orcid":63},"Abbasi","This paper examines the specific obstacles of constructing Retrieval-Augmented Generation (RAG) systems in low resource languages, with a focus on Persian’s complicated morphology and versatile syntax. The research aims to improve retrieval and generation accuracy by introducing Persian-specific models, namely MatinaRoberta (a masked language model) and MatinaSRoberta (a fine-tuned Sentence-BERT), along with a comprehensive benchmarking framework. Three datasets—general knowledge (PQuad), scientifically specialized texts, and organizational reports—were used to assess these models after they were trained on a varied corpus of 73.11 billion Persian tokens. The methodology involved extensive pretraining, fine-tuning with tailored loss functions, and systematic evaluations using both traditional metrics and the Retrieval-Augmented Generation Assessment (RAGAS) framework. The results show that MatinaSRoberta outperformed previous embeddings, achieving superior contextual relevance and retrieval accuracy across datasets. Temperature tweaking, chunk size modifications, and document summary indexing were explored to enhance RAG setups. Larger models like Llama-3.1 (70B) consistently demonstrated the highest generation accuracy, while smaller models faced challenges with domain-specific and formal contexts. The findings underscore the potential for developing RAG systems in Persian through customized embeddings and retrieval-generation settings and highlight the enhancement of NLP applications such as search engines and legal document analysis in low-resource languages.",{"paper_id":14165,"title":14166,"year":7,"month":188,"day":63,"doi":14167,"resource_url":14168,"first_page":14169,"last_page":14170,"pdf_url":14171,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14172,"paper_type":860,"authors":14173,"abstract":14182},"lrec2026-main-581","Corpus and Baselines for Distinguishing Authentic, AI-Generated, and AI-Enhanced Resumes","10.63317\u002F4o69wu63q5hq","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-581","7331","7344","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.581.pdf","loizidou-etal-2026-corpus",[14174,14176,14177,14179,14180],{"paper_id":14165,"author_seq":247,"given_name":1104,"surname":14175,"affiliation":63,"orcid":63},"Loizidou",{"paper_id":14165,"author_seq":232,"given_name":11594,"surname":11395,"affiliation":63,"orcid":63},{"paper_id":14165,"author_seq":218,"given_name":6373,"surname":14178,"affiliation":63,"orcid":63},"Esquivel",{"paper_id":14165,"author_seq":203,"given_name":11601,"surname":11602,"affiliation":63,"orcid":63},{"paper_id":14165,"author_seq":188,"given_name":3960,"surname":14181,"affiliation":63,"orcid":63},"Ocal","Job applicants are increasingly turning to generative AI to create or enhance their resumes, leading to challenges in fairness, integrity, and efficiency of modern recruitment processes. We present the first curated corpus of resumes annotated as to whether they are authentic, AI-enhanced, or fully AI-generated. The corpus is balanced across the three classes, comprising 420 resumes spanning five job descriptions in the Information Technology (IT) sector, with the authentic resumes anonymized. We establish strong baselines for this task using traditional and neural supervised machine learning approaches, including Logistic Regression, SVM, Random Forest, XGBoost, BERT, and Longformer. For the featurized approaches, we pair sparse TF-IDF (word\u002Fcharacter n-grams) with style features capturing length, punctuation, casing, contractions, lexical diversity (type-token ratio [TTR], number of hapax legomena), n-gram uniqueness, readability indices, and sentiment. Our analysis reveals systematic differences between the classes: AI-generated text features shorter, more uniform sentences, and fewer contractions; AI-enhanced text has the highest uniqueness and TTR; and authentic text has the widest variance across all features. XGBoost is the best performing method, achieving 95.29% accuracy and an F1 of 0.953. We make the corpus available for other researchers to build upon our work. We also benchmark two leading off-the-shelf AI–text detectors on our 420-resume corpus. Despite strong reports in other domains, Originality attains only 55.7% accuracy overall (71\u002F140 authentic, 81\u002F140 AI-generated, 82\u002F140 AI-enhanced correct), and Writer attains 25.0%, with the largest failures on AI-enhanced resumes, highlighting domain shift and cautioning against uncalibrated deployment.",{"paper_id":14184,"title":14185,"year":7,"month":188,"day":63,"doi":14186,"resource_url":14187,"first_page":14188,"last_page":14189,"pdf_url":14190,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14191,"paper_type":860,"authors":14192,"abstract":14208},"lrec2026-main-582","Mute Cods: A Multilingual Telegram Dataset with Benchmark Models for Conspiracy Theory Detection","10.63317\u002F48xmoewrde3v","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-582","7345","7358","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.582.pdf","laken-etal-2026-mute",[14193,14196,14199,14200,14201,14204,14205,14206,14207],{"paper_id":14184,"author_seq":247,"given_name":14194,"surname":14195,"affiliation":63,"orcid":63},"Katarina","Laken",{"paper_id":14184,"author_seq":232,"given_name":14197,"surname":14198,"affiliation":63,"orcid":63},"Erik Bran","Marino",{"paper_id":14184,"author_seq":218,"given_name":8604,"surname":8605,"affiliation":63,"orcid":63},{"paper_id":14184,"author_seq":203,"given_name":1709,"surname":4680,"affiliation":63,"orcid":63},{"paper_id":14184,"author_seq":188,"given_name":14202,"surname":14203,"affiliation":63,"orcid":63},"Søren Kirkegaard","Fomsgaard",{"paper_id":14184,"author_seq":172,"given_name":4677,"surname":4678,"affiliation":63,"orcid":63},{"paper_id":14184,"author_seq":155,"given_name":11196,"surname":9187,"affiliation":63,"orcid":63},{"paper_id":14184,"author_seq":138,"given_name":6445,"surname":9018,"affiliation":63,"orcid":63},{"paper_id":14184,"author_seq":121,"given_name":2548,"surname":8234,"affiliation":63,"orcid":63},"The proliferation of conspiracy theories and hateful messages on social media poses significant challenges for content moderation and public discourse. Despite their societal impact, existing datasets for automated conspiracy detection remain limited in scope and language coverage. We present a multilingual dataset of conspiracy content on Telegram comprising 5750 messages across English, Dutch, Italian, Spanish and Portuguese from 87 channels documented as disseminating conspiracist and extremist content. Domain experts annotated messages for conspiracist tone, population replacement conspiracy theories, vaccine conspiracies, and hate speech. We extensively report on difficulties and caveats when creating and annotating this type of dataset. We establish classification baselines by evaluating six models in zero-shot fashion and fine-tuning three encoder models, achieving F1 scores up to 0.800 for conspiracist tone, 0.846 for PRCT, 0.843 for vaccine-related conspiracy theories, and 0.734 for hate speech. Inter-annotator agreement was moderate, consistent with the complexity documented in similar annotation tasks.",{"paper_id":14210,"title":14211,"year":7,"month":188,"day":63,"doi":14212,"resource_url":14213,"first_page":14214,"last_page":14215,"pdf_url":14216,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14217,"paper_type":860,"authors":14218,"abstract":14225},"lrec2026-main-583","Push and Pull: Training Sentence Encoders with Contrastive Losses for Distance-Based Multi-Label Text Classification","10.63317\u002F4fiaod5mcdsr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-583","7359","7379","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.583.pdf","nooten-etal-2026-push",[14219,14222],{"paper_id":14210,"author_seq":247,"given_name":14220,"surname":14221,"affiliation":63,"orcid":63},"Jens Van","Nooten",{"paper_id":14210,"author_seq":232,"given_name":14223,"surname":14224,"affiliation":63,"orcid":63},"Andriy","Kosar","Despite the potential of Distance-Based Classification (DBC), a method that assigns labels to text by measuring semantic similarity between the text and the label representations, it has received very little attention for Multi-Label Text Classification (MLTC). Previous studies have focused on determining optimal thresholds, reaching promising results with contextual sentence encoders. We demonstrate that the performance of these models can be further improved by training them with contrastive losses, i.e., by bringing text representations closer to the corresponding true label representations in an embedding space. Using three supervised contrastive losses and three sentence encoders (Stella, GIST-Large, and BGE), we evaluated our approach on five English datasets (SemEval, BioTech, Reuters, AAPD, and LitCovid) and one Dutch dataset (EventDNA). The results show consistent substantial improvements over base sentence encoders, thereby narrowing the gap between DBC methods and fine-tuned or zero-shot approaches.",{"paper_id":14227,"title":14228,"year":7,"month":188,"day":63,"doi":14229,"resource_url":14230,"first_page":14231,"last_page":14232,"pdf_url":14233,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14234,"paper_type":860,"authors":14235,"abstract":14245},"lrec2026-main-584","PRIVaThe: An Annotated Dataset of Multi-Objectives Web Search Sessions","10.63317\u002F2n7axdztewkz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-584","7380","7390","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.584.pdf","ibarboure-etal-2026-privathe",[14236,14238,14240,14242],{"paper_id":14227,"author_seq":247,"given_name":3219,"surname":14237,"affiliation":63,"orcid":63},"Ibarboure",{"paper_id":14227,"author_seq":232,"given_name":11913,"surname":14239,"affiliation":63,"orcid":63},"Tanguy",{"paper_id":14227,"author_seq":218,"given_name":9906,"surname":14241,"affiliation":63,"orcid":63},"Amadieu",{"paper_id":14227,"author_seq":203,"given_name":14243,"surname":14244,"affiliation":63,"orcid":63},"Josiane","Mothe","This paper presents PRIVaThe, a new French-language dataset, consisting of 200 web search sessions from 100 participants performing two multi-objective, multi-hop tasks, designed to enable cross-user comparison of session-level search strategies. Unlike existing datasets that capture only query sequences or final answers, PRIVaThe provides explicit sub-objective decomposition traces for each session. We automatically annotate 3,162 queries with their addressed sub-objective(s) using validated open-weight LLMs (Mistral, LLama3, and Gemma) against human gold annotations. This annotation enables systematic analyses of how users distribute and sequence sub-objectives throughout their sessions, revealing distinct search strategies such as logical, global, and exploratory approaches.",{"paper_id":14247,"title":14248,"year":7,"month":188,"day":63,"doi":14249,"resource_url":14250,"first_page":14251,"last_page":14252,"pdf_url":14253,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14254,"paper_type":860,"authors":14255,"abstract":14272},"lrec2026-main-585","Towards Safer Calls for Everyone: Designing a Benchmark Dataset for Evaluating Voice Phishing Detection Models","10.63317\u002F5m4m2viovf4r","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-585","7391","7404","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.585.pdf","kang-etal-2026-safer",[14256,14258,14260,14262,14264,14267,14270],{"paper_id":14247,"author_seq":247,"given_name":14257,"surname":1336,"affiliation":63,"orcid":63},"Joeun",{"paper_id":14247,"author_seq":232,"given_name":14259,"surname":8691,"affiliation":63,"orcid":63},"Gyuri",{"paper_id":14247,"author_seq":218,"given_name":14261,"surname":11013,"affiliation":63,"orcid":63},"Chanhyuk",{"paper_id":14247,"author_seq":203,"given_name":14263,"surname":6008,"affiliation":63,"orcid":63},"Yongbin",{"paper_id":14247,"author_seq":188,"given_name":14265,"surname":14266,"affiliation":63,"orcid":63},"Younggyun","Hahm",{"paper_id":14247,"author_seq":172,"given_name":14268,"surname":14269,"affiliation":63,"orcid":63},"Shea","Husband",{"paper_id":14247,"author_seq":155,"given_name":14271,"surname":5173,"affiliation":63,"orcid":63},"Hansaem","Voice phishing is an evolving form of social engineering crime and requires the continuous advancement of detection technologies. We introduce a benchmark dataset designed to evaluate the practical performance of AI-based voice phishing detection models. The dataset includes diverse voice conversation scenarios and supports four evaluation tasks to assess open-source language models. Experimental results show that while some large-scale models demonstrate stable performance across multiple tasks, accuracy remains low in topic classification and dialogue structure recognition, regardless of model size. These findings highlight the complexity of voice phishing detection, which demands contextual reasoning and dialogue structure understanding beyond simple sentence-level comprehension. The proposed benchmark dataset provides a foundation for more robust evaluation and development of AI systems capable of detecting deceptive voice interactions, contributing to safer and more trustworthy communication environments",{"paper_id":14274,"title":14275,"year":7,"month":188,"day":63,"doi":14276,"resource_url":14277,"first_page":14278,"last_page":14279,"pdf_url":14280,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14281,"paper_type":860,"authors":14282,"abstract":14292},"lrec2026-main-586","Learning Long-Document Embeddings via Chunk–Context Entailment","10.63317\u002F4iz34o26i4tt","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-586","7405","7414","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.586.pdf","abro-etal-2026-learning",[14283,14286,14289],{"paper_id":14274,"author_seq":247,"given_name":14284,"surname":14285,"affiliation":63,"orcid":63},"Waheed Ahmed","Abro",{"paper_id":14274,"author_seq":232,"given_name":14287,"surname":14288,"affiliation":63,"orcid":63},"Naïm","Es-Sebbani",{"paper_id":14274,"author_seq":218,"given_name":14290,"surname":14291,"affiliation":63,"orcid":63},"Zied","Bouraoui","Learning faithful embeddings for long documents remains challenging, especially in domains like law and medicine where inputs are long, structured, and semantically heterogeneous. We introduce the Chunk Prediction Encoder (CPE), a self-supervised framework that treats chunk–context compatibility as an unsupervised NLI problem. Given a document, CPE masks a chunk and learns (i) a contrastive objective that aligns the masked document with its held-out chunk against in-batch negatives, and (ii) a binary entailment head that predicts whether a candidate chunk belongs to the document. This joint objective encourages both geometric smoothness and directional semantic consistency, yielding robust document-level embeddings. We evaluate CPE with hierarchical and sparse-attention backbones on five benchmarks spanning legal and biomedical domains under frozen-embedding and end-to-end fine-tuning protocols. CPE consistently outperforms baselines, and is more compute-efficient than prompt-only LLM baselines under matched token budgets. Ablations demonstrate the effect of chunk length, the contrastive-vs-entailment balance, and skimming strategies.",{"paper_id":14294,"title":14295,"year":7,"month":188,"day":63,"doi":14296,"resource_url":14297,"first_page":14298,"last_page":14299,"pdf_url":14300,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14301,"paper_type":860,"authors":14302,"abstract":14312},"lrec2026-main-587","Scientific Article Section Classification (SASC) Dataset","10.63317\u002F3rdo9r4az3iy","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-587","7415","7422","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.587.pdf","duransilva-etal-2026-scientific",[14303,14306,14308,14311],{"paper_id":14294,"author_seq":247,"given_name":14304,"surname":14305,"affiliation":63,"orcid":63},"Nicolau","Duran-Silva",{"paper_id":14294,"author_seq":232,"given_name":1296,"surname":14307,"affiliation":63,"orcid":63},"Moreno-Schneider",{"paper_id":14294,"author_seq":218,"given_name":14309,"surname":14310,"affiliation":63,"orcid":63},"César","Parra-Rojas",{"paper_id":14294,"author_seq":203,"given_name":3009,"surname":3010,"affiliation":63,"orcid":63},"We introduce a novel, publicly available dataset of scientific publications specifically designed to focused on the structural and semantic analysis of their full texts. This collection comprises 4,896 scholarly articles processed using GROBID and self-defined parsers for its segmentation and section parsing. To ensure broad utility and diversity, the dataset includes (≈1,000) papers from 4 specialized research areas: Energy, Cancer, Neuroscience, and Transportation, supplemented by an additional ≈1,000 papers randomly selected from general scientific domains. This dataset is annotated using a newly-defined hierarchical taxonomy comprising 2 levels: the first level contains 9 semantic classes (coarse-grained), while the second level contains 47 semantic classes (fine-grained). All source documents were ethically and legally sourced via OpenAIRE, and the corpus is restricted exclusively to content available under open licenses. License verification was performed through cross-referencing publisher metadata, landing pages, and the Unpaywall database. This curated dataset provides a robust and domain-diverse resource, ideal for developing and evaluating NLP models that require training on hierarchical structure of scientific literature.",{"paper_id":14314,"title":14315,"year":7,"month":188,"day":63,"doi":14316,"resource_url":14317,"first_page":14318,"last_page":14319,"pdf_url":14320,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14321,"paper_type":860,"authors":14322,"abstract":14334},"lrec2026-main-588","JMTEB and JMTEB-lite: Japanese Massive Text Embedding Benchmark and Its Lightweight Version","10.63317\u002F5ouzpv2f2f6k","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-588","7423","7434","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.588.pdf","li-etal-2026-jmteb",[14323,14325,14326,14329,14332,14333],{"paper_id":14314,"author_seq":247,"given_name":14324,"surname":3446,"affiliation":63,"orcid":63},"Shengzhe",{"paper_id":14314,"author_seq":232,"given_name":6175,"surname":6176,"affiliation":63,"orcid":63},{"paper_id":14314,"author_seq":218,"given_name":14327,"surname":14328,"affiliation":63,"orcid":63},"Ryokan","Ri",{"paper_id":14314,"author_seq":203,"given_name":14330,"surname":14331,"affiliation":63,"orcid":63},"Akihiko","Fukuchi",{"paper_id":14314,"author_seq":188,"given_name":8916,"surname":8917,"affiliation":63,"orcid":63},{"paper_id":14314,"author_seq":172,"given_name":2790,"surname":3527,"affiliation":63,"orcid":63},"We present JMTEB, a large-scale evaluation suite for Japanese text embedding models, designed to provide comprehensive coverage across multiple task types. The benchmark integrates 28 datasets across 5 tasks, enabling broad and challenging evaluation of model performance in diverse scenarios. While the full benchmark delivers thorough assessment, its scale poses practical challenges in terms of computation time and resource requirements. To address this, we construct JMTEB-lite, a lightweight version of JMTEB, by substantially reducing corpus size in retrieval-related tasks. JMTEB-lite significantly accelerates evaluation while maintaining high fidelity to the full benchmark. Together, JMTEB and JMTEB-lite form a flexible evaluation framework: the full version serves as a comprehensive standard for exhaustive benchmarking, while the lightweight version enables rapid iteration and efficient model selection. This dual approach facilitates both rigorous evaluation and practical development workflows, supporting the advancement of Japanese text embedding research.",{"paper_id":14336,"title":14337,"year":7,"month":188,"day":63,"doi":14338,"resource_url":14339,"first_page":14340,"last_page":14341,"pdf_url":14342,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14343,"paper_type":860,"authors":14344,"abstract":14352},"lrec2026-main-589","Construction of a Japanese RAG Benchmark Using Synthetic Documents on Non-existent Entities and Events","10.63317\u002F443h4s9sm3gy","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-589","7435","7445","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.589.pdf","li-etal-2026-construction",[14345,14346,14347,14349,14350,14351],{"paper_id":14336,"author_seq":247,"given_name":14324,"surname":3446,"affiliation":63,"orcid":63},{"paper_id":14336,"author_seq":232,"given_name":6175,"surname":6176,"affiliation":63,"orcid":63},{"paper_id":14336,"author_seq":218,"given_name":14348,"surname":7197,"affiliation":63,"orcid":63},"Hayato",{"paper_id":14336,"author_seq":203,"given_name":14330,"surname":14331,"affiliation":63,"orcid":63},{"paper_id":14336,"author_seq":188,"given_name":8916,"surname":8917,"affiliation":63,"orcid":63},{"paper_id":14336,"author_seq":172,"given_name":2790,"surname":3527,"affiliation":63,"orcid":63},"Retrieval-augmented generation (RAG) is a technique in which a large language model (LLM) generates answers based on relevant documents retrieved from an external document collection. Existing RAG evaluation benchmarks often use public data, such as Wikipedia and news articles, as the external document collection. However, these data are highly likely to be already included in the LLM’s pre-training corpus, which may prevent an accurate evaluation of the model’s ability to generate answers based on the retrieved documents. In this study, we construct a Japanese RAG benchmark by having an LLM synthesize documents about non-existent entities and events and use this collection of synthetic documents as the search target. Since these synthetic documents are not included in the LLM’s training data, the ability to generate answers based on retrieved documents can be evaluated more accurately. In addition to the synthetic documents, the benchmark is composed of questions and correct answers, which are created using a combination of LLMs and human effort. We then evaluated and analyzed the RAG performance of existing LLMs using the constructed benchmark.",{"paper_id":14354,"title":14355,"year":7,"month":188,"day":63,"doi":14356,"resource_url":14357,"first_page":14358,"last_page":14359,"pdf_url":14360,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14361,"paper_type":860,"authors":14362,"abstract":14370},"lrec2026-main-590","C4: A Multilingual Benchmark for Retrieval-Augmented Generation Based on the Catechism of the Catholic Church and Its Compendium","10.63317\u002F5a8nuzcc3cq3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-590","7446","7456","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.590.pdf","dniken-etal-2026-c4",[14363,14366,14368],{"paper_id":14354,"author_seq":247,"given_name":14364,"surname":14365,"affiliation":63,"orcid":63},"Pius von","Däniken",{"paper_id":14354,"author_seq":232,"given_name":8134,"surname":14367,"affiliation":63,"orcid":63},"Cieliebak",{"paper_id":14354,"author_seq":218,"given_name":1380,"surname":14369,"affiliation":63,"orcid":63},"Deriu","We introduce a new multilingual case study for evaluating retrieval augmented generation (RAG) systems, based on the Catechism of the Catholic Church and its Compendium. The Catechism is a structured document with numbered paragraphs, officially translated into many languages under strict editorial alignment. The Compendium reformulates this material into a question-answer format with explicit citations to the corresponding paragraphs. Together, they form a set of parallel monolingual corpora that share identical semantic structure, enabling direct, controlled comparison of RAG performance across languages. Beyond its theological origin, this text pair closely mirrors real-world applications of RAG in institutional contexts, such as querying internal policy documents with associated FAQ-style summaries, making it a practical testbed for multilingual retrieval and grounded answer generation. We release our data collection scripts and baseline results for further research.",{"paper_id":14372,"title":14373,"year":7,"month":188,"day":63,"doi":14374,"resource_url":14375,"first_page":14376,"last_page":14377,"pdf_url":14378,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14379,"paper_type":860,"authors":14380,"abstract":14385},"lrec2026-main-591","Contrastively Pre-trained Event Embeddings with Schema-free LLM Annotations","10.63317\u002F3sezhi63dcqv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-591","7457","7478","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.591.pdf","mtumbuka-etal-2026-contrastively",[14381,14383],{"paper_id":14372,"author_seq":247,"given_name":2510,"surname":14382,"affiliation":63,"orcid":63},"Mtumbuka",{"paper_id":14372,"author_seq":232,"given_name":3337,"surname":14384,"affiliation":63,"orcid":63},"Schockaert","Event extraction is a notoriously challenging problem, among others due to the scarcity of suitable training data. Moreover, event-centric knowledge bases are not available for most domains, making traditional distant supervision strategies difficult to implement. In this paper, we evaluate the potential of using LLM-generated annotations as an alternative distant supervision signal. Specifically, we create a synthetically labelled event extraction corpus, using an LLM to identify event triggers and arguments, and to provide corresponding free-text descriptions. We then pre-train event embedding models on this corpus using a contrastive loss, before fine-tuning them in the usual way. We empirically show the effectiveness of this approach.",{"paper_id":14387,"title":14388,"year":7,"month":188,"day":63,"doi":14389,"resource_url":14390,"first_page":14391,"last_page":14392,"pdf_url":14393,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14394,"paper_type":860,"authors":14395,"abstract":14415},"lrec2026-main-592","A Dataset of Psychiatric Hospital Notes with Temporal Information Annotations","10.63317\u002F4pj6fbqovg3f","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-592","7479","7484","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.592.pdf","miller-etal-2026-dataset",[14396,14399,14401,14403,14405,14407,14409,14412],{"paper_id":14387,"author_seq":247,"given_name":14397,"surname":14398,"affiliation":63,"orcid":63},"Timothy A.","Miller",{"paper_id":14387,"author_seq":232,"given_name":14400,"surname":12042,"affiliation":63,"orcid":63},"Gaby",{"paper_id":14387,"author_seq":218,"given_name":1061,"surname":14402,"affiliation":63,"orcid":63},"Harris",{"paper_id":14387,"author_seq":203,"given_name":14404,"surname":11013,"affiliation":63,"orcid":63},"Wonjin",{"paper_id":14387,"author_seq":188,"given_name":14406,"surname":1316,"affiliation":63,"orcid":63},"Spencer",{"paper_id":14387,"author_seq":172,"given_name":14408,"surname":11555,"affiliation":63,"orcid":63},"Boyu",{"paper_id":14387,"author_seq":155,"given_name":14410,"surname":14411,"affiliation":63,"orcid":63},"Meihua","Hall",{"paper_id":14387,"author_seq":138,"given_name":14413,"surname":14414,"affiliation":63,"orcid":63},"Guergana","Savova","Temporal information extraction is the task of identifying temporal entities in a text and relating them to each other. In medicine, electronic health records (EHRs) contain text that documents the sequence of events during an encounter with a patient, and sometimes the events prior to the encounter (e.g., social history). Temporality is especially important for the specialty of psychiatry. In this work, we describe the updates to the guidelines that allowed us to create a corpus of temporally-annotated psychiatric discharge summaries and progress notes. These updated guidelines were used to create a corpus of over 18000 events, 2200 time expressions, and 13,000 temporal relations. Temporal information extraction performance with a baseline system trained on non-psychiatric data obtains an F1 score of 0.152 on relation extraction, indicating the importance of this new dataset for making progress on temporal information extraction in the psychiatric domain.",{"paper_id":14417,"title":14418,"year":7,"month":188,"day":63,"doi":14419,"resource_url":14420,"first_page":14421,"last_page":14422,"pdf_url":14423,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14424,"paper_type":860,"authors":14425,"abstract":14434},"lrec2026-main-593","Format Matters: A Critical Evaluation of Output Formats for Prompting LLMs in SLU and NER","10.63317\u002F3osjjdr778fh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-593","7485","7497","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.593.pdf","lepagnol-etal-2026-format",[14426,14428,14429,14430,14432],{"paper_id":14417,"author_seq":247,"given_name":1159,"surname":14427,"affiliation":63,"orcid":63},"Lepagnol",{"paper_id":14417,"author_seq":232,"given_name":3956,"surname":10540,"affiliation":63,"orcid":63},{"paper_id":14417,"author_seq":218,"given_name":1316,"surname":10538,"affiliation":63,"orcid":63},{"paper_id":14417,"author_seq":203,"given_name":5419,"surname":14431,"affiliation":63,"orcid":63},"Servan",{"paper_id":14417,"author_seq":188,"given_name":5149,"surname":14433,"affiliation":63,"orcid":63},"Rosset","Output format is often an unreported factor in LLM evaluations for structured NLP tasks such as Slot Filling or Named Entity Recognition. This work proposes to explore the impact of the output structured format generated by LLMs. We show that measured performance and reliability depend on the requested format (JSON, XML or inline Key-Values). A study is performed across four SLU and three NER benchmarks and considering 13 instruction-tuned open-weight LLMs, using standardized and open-source prompts and parsers. This format-specific evaluation reveals statistically significant swings of 2-46 F1 points depending on model and dataset. Additionally, we propose a lightweight selection procedure to determine the best format per model-dataset combination using only a small development slice; thus reducing trial-and-error in practice.",{"paper_id":14436,"title":14437,"year":7,"month":188,"day":63,"doi":14438,"resource_url":14439,"first_page":14440,"last_page":14441,"pdf_url":14442,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14443,"paper_type":860,"authors":14444,"abstract":14457},"lrec2026-main-594","Identifying Imaging Follow-Up in Radiology Reports: A Comparative Analysis of Traditional ML and LLM Approaches","10.63317\u002F5eve2chrhdy5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-594","7498","7510","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.594.pdf","park-etal-2026-identifying",[14445,14447,14450,14452,14453,14454,14456],{"paper_id":14436,"author_seq":247,"given_name":14446,"surname":6876,"affiliation":63,"orcid":63},"Namu",{"paper_id":14436,"author_seq":232,"given_name":14448,"surname":14449,"affiliation":63,"orcid":63},"Giridhar Kaushik","Ramachandran",{"paper_id":14436,"author_seq":218,"given_name":4481,"surname":14451,"affiliation":63,"orcid":63},"Lybarger",{"paper_id":14436,"author_seq":203,"given_name":9846,"surname":9847,"affiliation":63,"orcid":63},{"paper_id":14436,"author_seq":188,"given_name":11940,"surname":11941,"affiliation":63,"orcid":63},{"paper_id":14436,"author_seq":172,"given_name":3843,"surname":14455,"affiliation":63,"orcid":63},"Gunn",{"paper_id":14436,"author_seq":155,"given_name":9849,"surname":9850,"affiliation":63,"orcid":63},"Large language models (LLMs) have shown considerable promise in clinical natural language processing, yet few domain-specific datasets exist to rigorously evaluate their performance on radiology tasks. In this work, we introduce an annotated corpus of 6,393 radiology reports from 586 patients, each labeled for follow-up imaging status, to support the development and benchmarking of follow-up adherence detection systems. Using this corpus, we systematically compared traditional machine-learning classifiers—logistic regression (LR), support vector machines (SVM), Longformer, and a fully fine-tuned Llama3-8B-Instruct—with recent generative LLMs. To evaluate generative LLMs, we tested GPT-4o and the open-source GPT-OSS-20B under two configurations: a baseline (Base) and a task-optimized (Advanced) setting that focused inputs on metadata, recommendation sentences, and their surrounding context. A refined prompt for GPT-OSS-20B further improved reasoning accuracy. Performance was assessed using precision, recall, and F1 scores with 95% confidence intervals estimated via non-parametric bootstrapping. Inter-annotator agreement was high (F1 = 0.846). GPT-4o (Advanced) achieved the best performance (F1 = 0.832), followed closely by GPT-OSS-20B (Advanced; F1 = 0.828). LR and SVM also performed strongly (F1 = 0.776 and 0.775), underscoring that while LLMs approach human-level agreement through prompt optimization, interpretable and resource-efficient models remain valuable baselines.",{"paper_id":14459,"title":14460,"year":7,"month":188,"day":63,"doi":14461,"resource_url":14462,"first_page":14463,"last_page":14464,"pdf_url":14465,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14466,"paper_type":860,"authors":14467,"abstract":14486},"lrec2026-main-595","Efficient Topic Extraction via Graph-Based Labeling: A Lightweight Alternative to Deep Models","10.63317\u002F5q74w4fjj6sw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-595","7511","7522","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.595.pdf","mekaoui-etal-2026-efficient",[14468,14471,14474,14477,14479,14482,14485],{"paper_id":14459,"author_seq":247,"given_name":14469,"surname":14470,"affiliation":63,"orcid":63},"Salma","Mekaoui",{"paper_id":14459,"author_seq":232,"given_name":14472,"surname":14473,"affiliation":63,"orcid":63},"Hiba","Sofyan",{"paper_id":14459,"author_seq":218,"given_name":14475,"surname":14476,"affiliation":63,"orcid":63},"Imane","Benchrif",{"paper_id":14459,"author_seq":203,"given_name":14475,"surname":14478,"affiliation":63,"orcid":63},"Amaaz",{"paper_id":14459,"author_seq":188,"given_name":14480,"surname":14481,"affiliation":63,"orcid":63},"Ilham","Chaker",{"paper_id":14459,"author_seq":172,"given_name":14483,"surname":14484,"affiliation":63,"orcid":63},"Arsalane","Zarghili",{"paper_id":14459,"author_seq":155,"given_name":8991,"surname":8992,"affiliation":63,"orcid":63},"Extracting topics from text has become an essential task, especially with the rapid growth of unstructured textual data. Most existing works rely on highly computational methods to address this challenge. In this paper, we argue that probabilistic and statistical approaches, such as topic modeling (TM), can offer effective alternatives that require fewer computational resources. TM is a statistical method that automatically discovers topics in large collections of unlabeled text; however, it produces topics as distributions of representative words, which often lack clear interpretability. Our objective is to perform topic labeling by assigning meaningful labels to these sets of words. To achieve this without relying on computationally expensive models, we propose a graph-based approach that not only enriches topic words with semantically related terms but also explores the relationships among them. By analyzing these connections within the graph, we derive suitable labels that accurately capture each topic’s meaning. We present a comparative study between our proposed method and several benchmarks, including ChatGPT-3.5 , across two different datasets. Our method achieved consistently better results than traditional benchmarks in terms of BERTScore and cosine similarity and produced results comparable to ChatGPT-3.5, while remaining computationally efficient. Finally, we discuss future directions for topic labeling and highlight potential research avenues for enhancing interpretability and automation.",{"paper_id":14488,"title":14489,"year":7,"month":188,"day":63,"doi":14490,"resource_url":14491,"first_page":14492,"last_page":14493,"pdf_url":14494,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14495,"paper_type":860,"authors":14496,"abstract":14508},"lrec2026-main-596","From Noise to Signal: When Outliers Seed New Topics","10.63317\u002F5c6zvq4nbjdq","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-596","7523","7533","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.596.pdf","zve-etal-2026-noise",[14497,14500,14503,14505],{"paper_id":14488,"author_seq":247,"given_name":14498,"surname":14499,"affiliation":63,"orcid":63},"Evangelia","Zve",{"paper_id":14488,"author_seq":232,"given_name":14501,"surname":14502,"affiliation":63,"orcid":63},"Gauvain","Bourgne",{"paper_id":14488,"author_seq":218,"given_name":4797,"surname":14504,"affiliation":63,"orcid":63},"Icard",{"paper_id":14488,"author_seq":203,"given_name":14506,"surname":14507,"affiliation":63,"orcid":63},"Jean-Gabriel","Ganascia","Outliers in dynamic topic modeling are often discarded as noise, yet some act as early signals of emerging topics. We introduce a temporal taxonomy of news document trajectories that distinguishes anticipatory outliers, documents that appear before a topic forms but later integrate into it, from those that reinforce existing topics or remain isolated. This taxonomy bridges weak-signal detection and dynamic topic modeling, clarifying how individual articles anticipate, initiate, or drift within evolving clusters. We implement it within a cumulative clustering framework using document- embeddings from eleven state-of-the-art language models and apply it retrospectively to HydroNewsFr, a French news corpus on the hydrogen economy curated for this study. Inter-model agreement on anticipatory outliers indicates that a small high-agreement subset yields robust confidence estimates. Complementary qualitative case studies further demonstrate their potential value as early indicators of emerging narratives. All reproducibility materials and results are available at https:\u002F\u002Fanonymous.4open.science\u002Fstatus\u002Flrec_from_noise_to_signal-B721.",{"paper_id":14510,"title":14511,"year":7,"month":188,"day":63,"doi":14512,"resource_url":14513,"first_page":14514,"last_page":14515,"pdf_url":14516,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14517,"paper_type":860,"authors":14518,"abstract":14524},"lrec2026-main-597","Explore Political Discourse with Transformers. Emergent Paradigmatic and Syntagmatic Representations.","10.63317\u002F3tgpydvmvpeu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-597","7534","7544","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.597.pdf","vanni-etal-2026-explore",[14519,14521],{"paper_id":14510,"author_seq":247,"given_name":4364,"surname":14520,"affiliation":63,"orcid":63},"Vanni",{"paper_id":14510,"author_seq":232,"given_name":14522,"surname":14523,"affiliation":63,"orcid":63},"Damon","Mayaffre","Textual data analysis lies at the heart of inductive reasoning in corpus linguistics. Corpus-driven approaches place the corpus at the center of working hypotheses and use statistical processing as an exploratory tool. With deep neural networks, the training corpus is also crucial, but the objectives are less exploratory. Nevertheless, the performance of Transformers in automatic language processing suggests that self-attention is an effective means of extracting structural information from corpora. In this article, we present interdisciplinary work that uses Transformers descriptively to shed light on linguistic phenomena present in a learning corpus. We propose using two feature-based interpretation methods in a case study of political speeches applied to a text generation task. The first method is a global approach that uses attention scores to analyse the training corpus. The second is a local approach that uses gradient-based features to analyse predictions. These methods are compared to standard statistical techniques, providing empirical confirmation of the observed phenomena. We conclude on the potential of Transformers as a heuristic tool for corpus linguistics.",{"paper_id":14526,"title":14527,"year":7,"month":188,"day":63,"doi":14528,"resource_url":14529,"first_page":14530,"last_page":14531,"pdf_url":14532,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14533,"paper_type":860,"authors":14534,"abstract":14539},"lrec2026-main-598","The Growing Gains and Pains of Iterative Web Corpora Crawling: Insights from South Slavic CLASSLA-web 2.0 Corpora","10.63317\u002F2p8hh2c5z2wv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-598","7545","7555","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.598.pdf","pungerek-etal-2026-growing",[14535,14536,14537,14538],{"paper_id":14526,"author_seq":247,"given_name":10942,"surname":10943,"affiliation":63,"orcid":63},{"paper_id":14526,"author_seq":232,"given_name":1625,"surname":10937,"affiliation":63,"orcid":63},{"paper_id":14526,"author_seq":218,"given_name":13732,"surname":13733,"affiliation":63,"orcid":63},{"paper_id":14526,"author_seq":203,"given_name":10934,"surname":10935,"affiliation":63,"orcid":63},"Crawling national top-level domains has proven to be highly effective for collecting texts in less-resourced languages. This approach has been recently used for South Slavic languages and resulted in the largest general corpora for this language group: the CLASSLA-web 1.0 corpora. Building on this success, we established a continuous crawling infrastructure for iterative national top-level domain crawling across South Slavic and related webs. We present the first outcome of this crawling infrastructure - the CLASSLA-web 2.0 corpus collection, with substantially larger web corpora containing 17.0 billion words in 38.1 million texts in seven languages: Bosnian, Bulgarian, Croatian, Macedonian, Montenegrin, Serbian, and Slovenian. In addition to genre categories, the new version is also automatically annotated with topic labels. Comparing CLASSLA-web 2.0 with its predecessor reveals that only one-fifth of the texts overlap, showing that re-crawling after just two years yields largely new content. However, while the new web crawls bring growing gains, we also notice growing pains - a manual inspection of top domains reveals a visible degradation of web content, as machine-generated sites now contribute a significant portion of texts.",{"paper_id":14541,"title":14542,"year":7,"month":188,"day":63,"doi":14543,"resource_url":14544,"first_page":14545,"last_page":14546,"pdf_url":14547,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14548,"paper_type":860,"authors":14549,"abstract":14553},"lrec2026-main-599","MaritimEmails: A Synthetic Dataset for Maritime Chartering Correspondence","10.63317\u002F4ewa9vv654ty","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-599","7556","7567","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.599.pdf","bruendler-etal-2026-maritimemails",[14550,14552],{"paper_id":14541,"author_seq":247,"given_name":4481,"surname":14551,"affiliation":63,"orcid":63},"Bruendler",{"paper_id":14541,"author_seq":232,"given_name":2467,"surname":2468,"affiliation":63,"orcid":63},"We introduce MaritimEmails, a large-scale synthetic corpus of 19,817 English-language email threads simulating maritime chartering negotiations between brokers and charterers. Email remains a dominant medium for business communication, yet no public corpora exist for this highly specialized domain due to confidentiality constraints. To address this gap, we generate domain-plausible negotiation exchanges using five contemporary language models under multiple prompting strategies, including Attribute Prompting and Base–Refine (BARE) approaches. Each thread includes structured annotations for vessels, ports, commodities, and Incoterms, enabling supervised training for information extraction and related tasks. Our comparative evaluation covering lexical and semantic diversity, sentiment balance, and verbosity shows that BARE generation increases linguistic variation while maintaining coherence. However, all models exhibit a systematic positivity bias, yielding less negative sentiment than is observed in the Enron reference corpus and likely also in many real negotiation settings. Baseline information extraction experiments with GLiNER and generative Qwen models yield up to 0.86 macro F1 on entity extraction, supporting the dataset’s usefulness. MaritimEmails, together with prompts, scripts, and documentation, is released for research use.",{"paper_id":14555,"title":14556,"year":7,"month":188,"day":63,"doi":14557,"resource_url":14558,"first_page":14559,"last_page":14560,"pdf_url":14561,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14562,"paper_type":860,"authors":14563,"abstract":14570},"lrec2026-main-600","eSciBench: An Extensible Scientific PDF Extraction Benchmark","10.63317\u002F4sxku4i2piqq","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-600","7568","7580","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.600.pdf","taillon-etal-2026-escibench",[14564,14567],{"paper_id":14555,"author_seq":247,"given_name":14565,"surname":14566,"affiliation":63,"orcid":63},"Noah Tremblay","Taillon",{"paper_id":14555,"author_seq":232,"given_name":14568,"surname":14569,"affiliation":63,"orcid":63},"Phillippe","Langlais","Automatically extracting information from PDF documents (such as authors, affiliations, references, tables, equations) may be transformative in Digital Humanities where meta-data accompanying a document is typically manually collected, a cumbersome process. In this work, we conduct a systematic benchmarking of PDF extractors on a set of 100 scientific articles (1949 pages) of the STEM domain that have been processed automatically, then carefully curated. Our benchmark, named eSciBench is openly accessible. Putting to the test 13 extractors on it reveals that although some extractors perform well overall, extracting information from scientific articles is far from a solved problem.",{"paper_id":14572,"title":14573,"year":7,"month":188,"day":63,"doi":14574,"resource_url":14575,"first_page":14576,"last_page":14577,"pdf_url":14578,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14579,"paper_type":860,"authors":14580,"abstract":14593},"lrec2026-main-601","Vrittanta-AS: Dataset Development and Benchmarking for Event Trigger Detection and Classification in Assamese","10.63317\u002F5ieyiqjkvgxt","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-601","7581","7591","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.601.pdf","kirti-etal-2026-vrittanta",[14581,14584,14587,14590],{"paper_id":14572,"author_seq":247,"given_name":14582,"surname":14583,"affiliation":63,"orcid":63},"Chaitanya","Kirti",{"paper_id":14572,"author_seq":232,"given_name":14585,"surname":14586,"affiliation":63,"orcid":63},"Dhrubajyoti","Pathak",{"paper_id":14572,"author_seq":218,"given_name":14588,"surname":14589,"affiliation":63,"orcid":63},"Ashish","Anand",{"paper_id":14572,"author_seq":203,"given_name":14591,"surname":14592,"affiliation":63,"orcid":63},"Prithwijit","Guha","Event trigger detection and classification aim to identify and categorize events within unstructured text. While prior research has primarily focused on news or biomedical corpora, the literary domain, especially short stories, remains largely underexplored. This gap is particularly pronounced for low-resource languages such as Assamese, where limited annotated data and complex narrative structures hinder progress. To address this challenge, we introduce Vrittanta-AS, a manually curated Assamese event trigger detection and classification dataset comprising 13,171 annotated events extracted from short stories. The dataset is designed to advance research in information extraction and narrative understanding for low-resource Indian languages. We conduct a comprehensive evaluation using classical machine learning methods, neural sequential architectures, pre-trained transformer models, and large language models (LLMs) on the proposed dataset. Experimental results demonstrate that IndicBERT v2 achieves the highest performance for both event trigger detection (85.86% micro-F1) and classification (65.21% macro-F1). Vrittanta-AS serves as an important step toward developing benchmark resources for event trigger detection and classification in Assamese literary text.",{"paper_id":14595,"title":14596,"year":7,"month":188,"day":63,"doi":14597,"resource_url":14598,"first_page":14599,"last_page":14600,"pdf_url":14601,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14602,"paper_type":860,"authors":14603,"abstract":14620},"lrec2026-main-602","From Facts to Hypotheses: Joint Detection of Biomedical Relations and Epistemic Commitment Using LLMs","10.63317\u002F5prxfaguzng8","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-602","7592","7605","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.602.pdf","gabryszak-etal-2026-facts",[14604,14606,14608,14611,14613,14616,14619],{"paper_id":14595,"author_seq":247,"given_name":1313,"surname":14605,"affiliation":63,"orcid":63},"Gabryszak",{"paper_id":14595,"author_seq":232,"given_name":14607,"surname":6462,"affiliation":63,"orcid":63},"Phuc Tran",{"paper_id":14595,"author_seq":218,"given_name":14609,"surname":14610,"affiliation":63,"orcid":63},"Arne","Binder",{"paper_id":14595,"author_seq":203,"given_name":10934,"surname":14612,"affiliation":63,"orcid":63},"Milosevic",{"paper_id":14595,"author_seq":188,"given_name":14614,"surname":14615,"affiliation":63,"orcid":63},"Felix-Sebastian","Keese",{"paper_id":14595,"author_seq":172,"given_name":14617,"surname":14618,"affiliation":63,"orcid":63},"Astrid","Rheinländer",{"paper_id":14595,"author_seq":155,"given_name":1041,"surname":1316,"affiliation":63,"orcid":63},"Determining the factual status of biomedical statements, whether affirmed, negated, or uncertain, is essential for accurate understanding. To support research in this area, we introduce BioRelFact, a publicly available, expert-annotated dataset of 1,767 English biomedical sentences labeled with nine relation types and five levels of epistemic commitment. Using this dataset, we evaluate eight large language models (LLMs) from the GPT, Qwen, and Gemma families for joint relation extraction and epistemic classification. Among the evaluated models, GPT-OSS-20B performs best in both tasks (F1 77.3 for relation, 65.3 for commitment), followed by GPT-4o (75.9 and 60.2), while Qwen3-8B (Thinking) shows strong performance despite its smaller size (74.6 and 57.2). Domain adaptation has mixed effects: relative to their general-purpose counterparts, MedGemma-27B improves (+3.6 F1 for relation, +4.4 for factuality), whereas Qwen2.5-Aloe-Beta-7B declines (–4.3 and –3.5, respectively). Moreover, definition-based few-shot prompts consistently yield the best results for most models, and an explorative analysis of prediction errors suggests which specific linguistic features may drive model confusions.",{"paper_id":14622,"title":14623,"year":7,"month":188,"day":63,"doi":14624,"resource_url":14625,"first_page":14626,"last_page":14627,"pdf_url":14628,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14629,"paper_type":860,"authors":14630,"abstract":14649},"lrec2026-main-603","SciLaD: A Large-Scale, Transparent, Reproducible Dataset for Natural Scientific Language Processing","10.63317\u002F4f2awjiigkbr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-603","7606","7618","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.603.pdf","foppiano-etal-2026-scilad",[14631,14633,14636,14639,14641,14643,14646,14647,14648],{"paper_id":14622,"author_seq":247,"given_name":1107,"surname":14632,"affiliation":63,"orcid":63},"Foppiano",{"paper_id":14622,"author_seq":232,"given_name":14634,"surname":14635,"affiliation":63,"orcid":63},"Sotaro","Takeshita",{"paper_id":14622,"author_seq":218,"given_name":14637,"surname":14638,"affiliation":63,"orcid":63},"Pedro Ortiz","Suarez",{"paper_id":14622,"author_seq":203,"given_name":6029,"surname":14640,"affiliation":63,"orcid":63},"Borisova",{"paper_id":14622,"author_seq":188,"given_name":14642,"surname":3647,"affiliation":63,"orcid":63},"Raia Abu",{"paper_id":14622,"author_seq":172,"given_name":14644,"surname":14645,"affiliation":63,"orcid":63},"Malte","Ostendorff",{"paper_id":14622,"author_seq":155,"given_name":5741,"surname":12809,"affiliation":63,"orcid":63},{"paper_id":14622,"author_seq":138,"given_name":1296,"surname":14307,"affiliation":63,"orcid":63},{"paper_id":14622,"author_seq":121,"given_name":3009,"surname":3010,"affiliation":63,"orcid":63},"SciLaD is a novel, large-scale dataset of scientific language constructed entirely using open-source frameworks and publicly available data sources. It comprises a curated English split containing over 10 million scientific publications and a multilingual, unfiltered TEI XML split including more than 35 million publications. We also publish the extensible pipeline for generating SciLaD. The dataset construction and processing workflow demonstrates how open-source tools can enable large-scale, scientific data curation while maintaining high data quality. Finally, we pre-train a RoBERTa model on our dataset and evaluate it across a comprehensive set of benchmarks, achieving performance comparable to other scientific language models of similar size, validating the quality and utility of SciLaD. We publish the dataset and evaluation pipeline to promote reproducibility, transparency, and further research in natural scientific language processing and understanding including scholarly document processing.",{"paper_id":14651,"title":14652,"year":7,"month":188,"day":63,"doi":14653,"resource_url":14654,"first_page":14655,"last_page":14656,"pdf_url":14657,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14658,"paper_type":860,"authors":14659,"abstract":14667},"lrec2026-main-604","CausalSense: Leveraging Common Sense Knowledge and LLMs for Joint Event Extraction and Relation Classification","10.63317\u002F4uiusu5nfowx","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-604","7619","7630","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.604.pdf","rebboud-etal-2026-causalsense",[14660,14663,14666],{"paper_id":14651,"author_seq":247,"given_name":14661,"surname":14662,"affiliation":63,"orcid":63},"Youssra","Rebboud",{"paper_id":14651,"author_seq":232,"given_name":14664,"surname":14665,"affiliation":63,"orcid":63},"Pasquale","Lisena",{"paper_id":14651,"author_seq":218,"given_name":3266,"surname":3267,"affiliation":63,"orcid":63},"Event Relation Extraction (ERE) aims to identify and classify semantic relationships between events expressed in text. While existing work has mainly addressed temporal or simple causal links, fine-grained causal relations such as enable, prevent, and intend remain insufficiently explored, partly due to limited and imbalanced labeled datasets. We present a novel framework that leverages large language models (LLMs) and common-sense knowledge to jointly perform event extraction and relation classification. Our contribution includes (1) the creation of the CausalSense large-scale dataset containing more than 500k sentences from news data and commonsense knowledge extracted from ATOMIC, and enriched synthetically; and (2) the evaluation of multiple architectures, including transformer-based models and end-to-end multitask systems for extracting fine-grained causal relationships. Experimental results show that our best-performing model achieves a 32.3% improvement in average F1-score over the current state of the art. The integration of commonsense knowledge substantially enhances fine-grained causal relation detection. The CausalSense dataset, our code and models are released as open source to support future research on causal event relationship extraction.",{"paper_id":14669,"title":14670,"year":7,"month":188,"day":63,"doi":14671,"resource_url":14672,"first_page":14673,"last_page":14674,"pdf_url":14675,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14676,"paper_type":860,"authors":14677,"abstract":14681},"lrec2026-main-605","Large Language Models Are Good Term Extractors: A Systematic Evaluation","10.63317\u002F49gta5itne3q","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-605","7631","7643","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.605.pdf","terryn-2026-large",[14678],{"paper_id":14669,"author_seq":247,"given_name":14679,"surname":14680,"affiliation":63,"orcid":63},"Ayla Rigouts","Terryn","This paper systematically evaluates modern large language models for automatic term extraction (ATE), examining GPT-5 and Mistral across four domains and three languages using the ACTER corpus. The study compares model sizes, evaluates reasoning-enhanced variants, and tests prompting strategies aligned with human annotation guidelines. Beyond extracting term lists, models provide term labels, confidence scores, and terminology management remarks. Current large language models achieve F1 scores of .36-.72; while seemingly low, this is competitive with supervised approaches and approaches the human inter-annotator agreement ceiling of  0.59. Larger models outperform smaller variants, with reasoning-enhanced models showing modest improvements. Qualitative error analysis reveals that evaluation methodology partly misrepresents model capabilities: many extractions classified as errors represent defensible boundary judgements, and apparent hallucinations are predominantly (though not exclusively) valid normalisations. Limitations remain in fine-grained categorisation and handling overly general expressions. However, the convergence of model scores with each other and with human inter-annotator agreement suggests that, for high-resource languages, basic ATE may no longer be the bottleneck in terminology management pipelines, and research should shift toward downstream tasks such as definition generation and ontology construction.",{"paper_id":14683,"title":14684,"year":7,"month":188,"day":63,"doi":14685,"resource_url":14686,"first_page":14687,"last_page":14688,"pdf_url":14689,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14690,"paper_type":860,"authors":14691,"abstract":14709},"lrec2026-main-606","A Large-Scale Dataset for Linking-Based Geocoding","10.63317\u002F2pv6oidqzqs9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-606","7644","7654","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.606.pdf","nakatani-etal-2026-large",[14692,14695,14698,14700,14702,14705,14708],{"paper_id":14683,"author_seq":247,"given_name":14693,"surname":14694,"affiliation":63,"orcid":63},"Hibiki","Nakatani",{"paper_id":14683,"author_seq":232,"given_name":14696,"surname":14697,"affiliation":63,"orcid":63},"Yuichiro","Yasui",{"paper_id":14683,"author_seq":218,"given_name":8412,"surname":14699,"affiliation":63,"orcid":63},"Wakamoto",{"paper_id":14683,"author_seq":203,"given_name":8585,"surname":14701,"affiliation":63,"orcid":63},"Ishii",{"paper_id":14683,"author_seq":188,"given_name":14703,"surname":14704,"affiliation":63,"orcid":63},"Tetsuhisa","Suizu",{"paper_id":14683,"author_seq":172,"given_name":14706,"surname":14707,"affiliation":63,"orcid":63},"Hiroki","Ouchi",{"paper_id":14683,"author_seq":155,"given_name":4449,"surname":4450,"affiliation":63,"orcid":63},"Linking-based geocoding is the task of linking location mentions in text to their corresponding entries in a geographic database (Geo-DB) and assigning precise coordinates. Although the task and its technology are essential for spatial information extraction, existing datasets are manually curated and lack sufficient data for training accurate models. To address this limitation, we automatically construct a large-scale dataset for linking-based geocoding by leveraging publicly available resources to generate data efficiently at scale. Specifically, we align location mentions in the first paragraphs of Japanese Wikipedia articles with their associated Wikidata entries containing geographic attributes. Wikipedia provides natural textual contexts, while Wikidata offers structured data such as coordinates, place types, and administrative divisions, which can serve as rich metadata for future extensions. Our experiments show that models trained on our dataset achieve strong performance not only on in-domain data, i.e., Wikipedia, but also on out-of-domain newspaper articles, and further confirm that hard negative mining substantially improves disambiguation among confusable candidates. Although the dataset focuses on Japanese, the construction method is language-agnostic and can be extended to other languages with sufficient Wikipedia and Wikidata coverage.",{"paper_id":14711,"title":14712,"year":7,"month":188,"day":63,"doi":14713,"resource_url":14714,"first_page":14715,"last_page":14716,"pdf_url":14717,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14718,"paper_type":860,"authors":14719,"abstract":14724},"lrec2026-main-607","FiNERVINER: Fine-grained Named Entity Recognition for Vulnerable Languages of India's North Eastern Region","10.63317\u002F3rs5mcedzvss","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-607","7655","7667","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.607.pdf","kaushik-etal-2026-finerviner",[14720,14723],{"paper_id":14711,"author_seq":247,"given_name":14721,"surname":14722,"affiliation":63,"orcid":63},"Prachuryya","Kaushik",{"paper_id":14711,"author_seq":232,"given_name":14588,"surname":14589,"affiliation":63,"orcid":63},"Named entity recognition (NER), particularly fine-grained NER (FgNER), extracts domain-specific entity information for Natural Language Processing (NLP) applications such as knowledge base construction and relation extraction. While manual annotation for creating relevant data is expensive, distant supervision often produces noisy data. Moreover, resources for coarse-grained and fine-grained NER in Indian languages, particularly in the vulnerable languages of India’s North Eastern Region, remain scarce. This work aims at creating such a resource for three vulnerable languages: \u003Ci>Bodo\u002FBoro (brx)\u003C\u002Fi>, \u003Ci>Manipuri\u002FMeitei (mni)\u003C\u002Fi>, and \u003Ci>Mizo\u002FLushai (lus)\u003C\u002Fi>, which are regarded as official languages in three Indian states and spoken by more than six million people across five countries in South and Southeast Asia. We use annotations projection from high-resource FgNER datasets using source-to-target parallel corpora and a projection tool built on a multilingual encoder. The dataset comprises over 198k sentences, 282k entities, and 2.8M tokens in each low-resource language. Our thorough analyses validate the dataset’s high quality. We further explore zero-shot and cross-lingual settings, examining the impact of script similarity and multilingualism in cross-lingual FgNER performance. The dataset, expert detector models, the agentic tool, and the interactive web application are available as open-source resources at: \u003Curl>https:\u002F\u002Fhf.co\u002Fcollections\u002FprachuryyaIITG\u002Ffinerviner\u003C\u002Furl>.",{"paper_id":14726,"title":14727,"year":7,"month":188,"day":63,"doi":14728,"resource_url":14729,"first_page":14730,"last_page":14731,"pdf_url":14732,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14733,"paper_type":860,"authors":14734,"abstract":14747},"lrec2026-main-608","APTFiNER: Annotation Preserving Translation for Fine-grained Named Entity Recognition","10.63317\u002F3w7rv4rg7nty","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-608","7668","7680","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.608.pdf","kaushik-etal-2026-aptfiner",[14735,14736,14738,14741,14743,14746],{"paper_id":14726,"author_seq":247,"given_name":14721,"surname":14722,"affiliation":63,"orcid":63},{"paper_id":14726,"author_seq":232,"given_name":14737,"surname":11400,"affiliation":63,"orcid":63},"Adittya",{"paper_id":14726,"author_seq":218,"given_name":14739,"surname":14740,"affiliation":63,"orcid":63},"Ajanta","Maurya",{"paper_id":14726,"author_seq":203,"given_name":14742,"surname":11395,"affiliation":63,"orcid":63},"Gautam",{"paper_id":14726,"author_seq":188,"given_name":14744,"surname":14745,"affiliation":63,"orcid":63},"V. V.","Saradhi",{"paper_id":14726,"author_seq":172,"given_name":14588,"surname":14589,"affiliation":63,"orcid":63},"We present APTFiNER, a novel fine-grained named entity recognition (FgNER) dataset covering six low-resource Indian languages spoken by over 400 million people across various nations. While creating FgNER resources through manual annotation is typically expensive and labor-intensive, distant supervision has emerged as a workable alternative. Yet, such FgNER datasets are often noisy, as each entity mentions are often assigned multiple entity types, which necessitates computationally demanding noise-aware models. Furthermore, resources for both coarse-grained and fine-grained NER tasks remain scarce for low-resource languages. To overcome this scarcity, we utilized the superior reasoning and translation capability of Gemini through the proposed annotation-preserving translation method and created a large-scale FgNER dataset comprising over 411 thousand sentences, 697 thousand entity mentions, and 5.8 million tokens in total. We translated the MultiCoNER2 English FgNER dataset to the target languages: \u003Ci>Assamese (as)\u003C\u002Fi>, \u003Ci>Marathi (mr)\u003C\u002Fi>, \u003Ci>Nepali (ne)\u003C\u002Fi>, \u003Ci>Tamil (ta)\u003C\u002Fi>, \u003Ci>Telugu (te)\u003C\u002Fi>, and a vulnerable language, \u003Ci>Bodo (brx)\u003C\u002Fi>. Through rigorous analyses and human evaluations, the effectiveness of our method and the high quality of the resulting dataset are ascertained with F1 score improvements of 8% in both Tamil and Telugu, and 25% in Marathi over the current state-of-the-art. The dataset, expert detector models, the agentic tool, and the interactive web application are available as open-source resources at: \u003Curl>https:\u002F\u002Fhf.co\u002Fcollections\u002FprachuryyaIITG\u002Faptfiner\u003C\u002Furl>.",{"paper_id":14749,"title":14750,"year":7,"month":188,"day":63,"doi":14751,"resource_url":14752,"first_page":14753,"last_page":14754,"pdf_url":14755,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14756,"paper_type":860,"authors":14757,"abstract":14763},"lrec2026-main-609","RelEx-PT: A Portuguese Sentence-Level Relation Extraction Dataset","10.63317\u002F473a5ncxrap4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-609","7681","7691","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.609.pdf","pinto-etal-2026-relex",[14758,14761,14762],{"paper_id":14749,"author_seq":247,"given_name":14759,"surname":14760,"affiliation":63,"orcid":63},"Tomás","Pinto",{"paper_id":14749,"author_seq":232,"given_name":10657,"surname":4040,"affiliation":63,"orcid":63},{"paper_id":14749,"author_seq":218,"given_name":3713,"surname":3714,"affiliation":63,"orcid":63},"We introduce RelEx-PT, a new sentence-level Relation Extraction dataset for Portuguese. Addressing the scarcity of high-quality, controlled resources for the language, RelEx-PT provides a balanced benchmark comprising 18 Wikidata-derived relation types across diverse domains. The dataset is built through a distant supervision pipeline that links Wikidata triples with Portuguese Wikipedia sentences and enhanced by a Natural Language Inference (NLI)-based filtering process, combining scalability with quality assurance. Additionally, we conduct baseline experiments to evaluate the dataset’s applicability across diverse extraction settings, including Relation Classification (RC), Relation Triple Extraction, and Open Information Extraction. These experiments leverage both prompting and fine-tuning strategies using Large Language Models. The results show that RelEx-PT effectively supports a range of extraction paradigms, yielding high performance in RC and competitive results in structured triple generation, while also highlighting key challenges in open-ended extraction.",{"paper_id":14765,"title":14766,"year":7,"month":188,"day":63,"doi":14767,"resource_url":14768,"first_page":14769,"last_page":14770,"pdf_url":14771,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14772,"paper_type":860,"authors":14773,"abstract":14780},"lrec2026-main-610","Benchmarking Portuguese Open Information Extraction","10.63317\u002F2wuxjj5bo7ax","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-610","7692","7700","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.610.pdf","silva-etal-2026-benchmarking",[14774,14775,14777,14778],{"paper_id":14765,"author_seq":247,"given_name":10347,"surname":4040,"affiliation":63,"orcid":63},{"paper_id":14765,"author_seq":232,"given_name":14776,"surname":4069,"affiliation":63,"orcid":63},"Mário",{"paper_id":14765,"author_seq":218,"given_name":4409,"surname":10650,"affiliation":63,"orcid":63},{"paper_id":14765,"author_seq":203,"given_name":14779,"surname":4424,"affiliation":63,"orcid":63},"Marlene","Open Information Extraction (OIE) has seen significant advancements for English, but progress in Portuguese has been hindered by a lack of resources such as Datasets and standardized evaluation benchmarks. This work addresses this critical gap by establishing the a systematic and reproducible benchmark for Portuguese OIE systems. We conduct a comprehensive evaluation of eight systems, spanning a decade of research and encompassing both rule-based and neural architectures. The performance of these systems is measured against three distinct Portuguese corpora (WIKI200, CETEN200, and Gamalho) using the established CaRB methodology. Our results reveal that no single system excels across all three datasets. Rule-based models perform strongly on general text (WIKI200, CETEN200) but falter on specialized corpora (Gamalho), while neural systems demonstrate more consistent but not superior performance. With overall F1 scores averaging around 40%, our findings confirm that Portuguese OIE remains a largely unsolved task. This benchmark provides a baseline for future research and highlights the need for a high-quality, manually annotated gold-standard dataset to drive meaningful progress in the field. The evaluation benchmark\u002Fframework is made publicly available at https:\u002F\u002Fgithub.com\u002Fgabrielrsilva11\u002FPT-OIE-Benchmark.",{"paper_id":14782,"title":14783,"year":7,"month":188,"day":63,"doi":14784,"resource_url":14785,"first_page":14786,"last_page":14787,"pdf_url":14788,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14789,"paper_type":860,"authors":14790,"abstract":14795},"lrec2026-main-611","A Scalable Pipeline for Novelty Detection in Skill Extraction Using Large Language Models","10.63317\u002F3twvjs5vmuwt","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-611","7701","7706","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.611.pdf","seifert-etal-2026-scalable",[14791,14794],{"paper_id":14782,"author_seq":247,"given_name":14792,"surname":14793,"affiliation":63,"orcid":63},"Gian","Seifert",{"paper_id":14782,"author_seq":232,"given_name":2467,"surname":2468,"affiliation":63,"orcid":63},"The rapid evolution of the labor market requires skill ontologies to be continuously updated, but manually identifying emerging skills in job advertisements is highly labor-intensive. This paper presents a scalable, multi-stage pipeline for automated novelty detection in skill extraction. The system combines Large Language Models (LLMs) for candidate generation, a re-matching and threshold-based filtering module (\"Turbo\"), that compares candidates against the existing ontology, and a two-step aggregation process that merges string-based and embedding-based clustering. Experiments on Swiss job advertisement datasets using GPT-4o, Gemini-2.0-flash, and DeepSeek-V3 show that the pipeline effectively reduces noise and manual curation effort: Turbo filtering lowered false positives by 82%, and aggregation reduced the number of items requiring review by 97%. Among the tested models, Gemini-2.0-flash achieved the highest precision, reaching a novelty detection ratio of up to 73% in the qualitative evaluation. These findings demonstrate the pipeline’s potential as an efficient tool for maintaining dynamic skill ontologies.",{"paper_id":14797,"title":14798,"year":7,"month":188,"day":63,"doi":14799,"resource_url":14800,"first_page":14801,"last_page":14802,"pdf_url":14803,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14804,"paper_type":860,"authors":14805,"abstract":14815},"lrec2026-main-612","Do LLMs Judge Distantly Supervised Named Entity Labels Well? Constructing the JudgeWEL Dataset","10.63317\u002F2s4piwjugsds","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-612","7707","7718","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.612.pdf","plum-etal-2026-do",[14806,14809,14812],{"paper_id":14797,"author_seq":247,"given_name":14807,"surname":14808,"affiliation":63,"orcid":63},"Alistair","Plum",{"paper_id":14797,"author_seq":232,"given_name":14810,"surname":14811,"affiliation":63,"orcid":63},"Laura Maria","Bernardy",{"paper_id":14797,"author_seq":218,"given_name":14813,"surname":14814,"affiliation":63,"orcid":63},"Tharindu","Ranasinghe","We present judgeWEL, a dataset for named entity recognition (NER) in Luxembourgish, automatically labelled and subsequently verified using large language models (LLM) in a novel pipeline. Building datasets for under-represented languages remains one of the major bottlenecks in natural language processing, where the scarcity of resources and linguistic particularities make large-scale annotation costly and potentially inconsistent. To address these challenges, we propose and evaluate a novel approach that leverages Wikipedia and Wikidata as structured sources of weak supervision. By exploiting internal links within Wikipedia articles, we infer entity types based on their corresponding Wikidata entries, thereby generating initial annotations with minimal human intervention. Because such links are not uniformly reliable, we mitigate noise by employing and comparing several LLMs to identify and retain only high-quality labelled sentences. The resulting corpus is approximately five times larger than the currently available Luxembourgish NER dataset and offers broader and more balanced coverage across entity categories, providing a substantial new resource for multilingual and low-resource NER research.",{"paper_id":14817,"title":14818,"year":7,"month":188,"day":63,"doi":14819,"resource_url":14820,"first_page":14821,"last_page":14822,"pdf_url":14823,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14824,"paper_type":860,"authors":14825,"abstract":14848},"lrec2026-main-613","From Articles to Premises: Building PrimeFacts, an Extraction Methodology and Resource for Fact-Checking Evidence","10.63317\u002F453datkp6z9s","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-613","7719","7731","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.613.pdf","sahitaj-etal-2026-articles",[14826,14829,14832,14834,14836,14838,14840,14843,14846,14847],{"paper_id":14817,"author_seq":247,"given_name":14827,"surname":14828,"affiliation":63,"orcid":63},"Premtim","Sahitaj",{"paper_id":14817,"author_seq":232,"given_name":14830,"surname":14831,"affiliation":63,"orcid":63},"Jawan","Kolanowski",{"paper_id":14817,"author_seq":218,"given_name":14833,"surname":14828,"affiliation":63,"orcid":63},"Ariana",{"paper_id":14817,"author_seq":203,"given_name":3442,"surname":14835,"affiliation":63,"orcid":63},"Solopova",{"paper_id":14817,"author_seq":188,"given_name":2525,"surname":14837,"affiliation":63,"orcid":63},"Upravitelev",{"paper_id":14817,"author_seq":172,"given_name":1668,"surname":14839,"affiliation":63,"orcid":63},"Röder",{"paper_id":14817,"author_seq":155,"given_name":14841,"surname":14842,"affiliation":63,"orcid":63},"Iffat","Maab",{"paper_id":14817,"author_seq":138,"given_name":14844,"surname":14845,"affiliation":63,"orcid":63},"Junichi","Yamagishi",{"paper_id":14817,"author_seq":121,"given_name":4763,"surname":12769,"affiliation":63,"orcid":63},{"paper_id":14817,"author_seq":104,"given_name":2545,"surname":4660,"affiliation":63,"orcid":63},"Fact-checking articles encode rich supporting evidence and reasoning, yet this evidence remains largely inaccessible to automated verification systems due to unstructured presentation. We introduce PrimeFacts, a methodology and resource for extracting fine-grained evidence from full fact-checking articles. We compile 13,106 PolitiFact articles with claims, verdicts, and all referenced sources, and we identify 49,718 in-article hyperlinks as natural anchors to pinpoint key evidence. Our framework leverages large language models (LLMs) to rewrite these anchor sentences into stand-alone, context-independent premises and investigates the extraction of additional implicit evidence. In evaluations on cross-article evidence retrieval and claim verification, the extracted premises substantially improve performance. Decontextualized evidence yields higher retrievability, achieving up to a 30% relative gain in Mean Reciprocal Rank over verbatim sentences, and using the evidence for verdict prediction raises Macro-F₁ by 10-20 points over the baseline. These gains are consistent across different verdict granularities (2-class vs. 5-class) and model architectures. A qualitative analysis indicates that the decontextualized premises remain faithful to the original sources. Our work highlights the promise of reusing fact-checkers’ evidence for automation and provides a large-scale resource of structured evidence from real-world fact-checks.",{"paper_id":14850,"title":14851,"year":7,"month":188,"day":63,"doi":14852,"resource_url":14853,"first_page":14854,"last_page":14855,"pdf_url":14856,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14857,"paper_type":860,"authors":14858,"abstract":14871},"lrec2026-main-614","EpiGator: An Event-based Surveillance System for Infectious Disease Outbreaks","10.63317\u002F5jrha624xs52","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-614","7732","7743","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.614.pdf","wu-etal-2026-epigator",[14859,14861,14864,14867,14870],{"paper_id":14850,"author_seq":247,"given_name":14860,"surname":7319,"affiliation":63,"orcid":63},"Yiheng",{"paper_id":14850,"author_seq":232,"given_name":14862,"surname":14863,"affiliation":63,"orcid":63},"Jue","Hou",{"paper_id":14850,"author_seq":218,"given_name":14865,"surname":14866,"affiliation":63,"orcid":63},"Trangcasanchai","Sathianpong",{"paper_id":14850,"author_seq":203,"given_name":14868,"surname":14869,"affiliation":63,"orcid":63},"Lidia","Pivovarova",{"paper_id":14850,"author_seq":188,"given_name":1064,"surname":7550,"affiliation":63,"orcid":63},"We present EpiGator, a novel event-based system for global surveillance of outbreaks of infectious epidemics that automatically processes streams of news articles and generates reports about the outbreaks, which is crucial for medical authorities. The goal of our work is to combine our experience in outbreak surveillance with state-of-the-art large language models (LLM), which allows us to reduce the overall cost of system development and maintenance. The EpiGator pipeline combines keyword filtering, relevance classification, event-based clustering, and multi-document summarization. A key novelty lies in using a fine-tuned LLM to identify articles relevant to ongoing outbreaks, followed by a zero-shot information extraction pipeline that normalizes the event features and clusters the related articles. For each cluster, we generate an outbreak summary using instruction-tuned LLMs. We evaluate EpiGator output against disease outbreak reports written by medical specialists.",{"paper_id":14873,"title":14874,"year":7,"month":188,"day":63,"doi":14875,"resource_url":14876,"first_page":14877,"last_page":14878,"pdf_url":14879,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14880,"paper_type":860,"authors":14881,"abstract":14891},"lrec2026-main-615","Relation Extraction across Entire Books to Reconstruct Community Networks: The AffilKG Datasets","10.63317\u002F4ddnnhq4dpfn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-615","7744","7754","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.615.pdf","cai-etal-2026-relation",[14882,14885,14888,14890],{"paper_id":14873,"author_seq":247,"given_name":14883,"surname":14884,"affiliation":63,"orcid":63},"Erica","Cai",{"paper_id":14873,"author_seq":232,"given_name":14886,"surname":14887,"affiliation":63,"orcid":63},"Sean","Mcquade",{"paper_id":14873,"author_seq":218,"given_name":4481,"surname":14889,"affiliation":63,"orcid":63},"Young",{"paper_id":14873,"author_seq":203,"given_name":2669,"surname":2670,"affiliation":63,"orcid":63},"When knowledge graphs (KGs) are automatically extracted from text, are they accurate enough for downstream analysis? Unfortunately, current annotated datasets cannot be used to evaluate this question, since the knowledge graphs they correspond to, constructed by mapping entities in the text to nodes and relations to edges, are typically highly disconnected, too small, or overly complex. To address this gap, we introduce AFFILKG, which is a collection of six datasets that are the first to pair complete book scans with large, labeled knowledge graphs. Each dataset features affiliation graphs, which are simple KGs that capture MEMBER relationships between PERSON and ORGANIZATION entities—useful in studies of migration, community interactions, and other social phenomena. In addition, three datasets include expanded KGs with a wider variety of relation types. Our preliminary experiments demonstrate significant variability in model performance across datasets, underscoring AFFILKG’s ability to enable two critical advances: (1) benchmarking how extraction errors propagate to graph-level analyses (e.g., community structure), and (2) validating KG extraction methods for real-world social science research.",{"paper_id":14893,"title":14894,"year":7,"month":188,"day":63,"doi":14895,"resource_url":14896,"first_page":14897,"last_page":14898,"pdf_url":14899,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14579,"paper_type":860,"authors":14900,"abstract":14904},"lrec2026-main-616","Vrittanta-EN: A Benchmark Dataset for Event Trigger Detection and Classification Advancing Event Understanding in English Narrative Discourse","10.63317\u002F25s8ijb2orwm","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-616","7755","7765","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.616.pdf",[14901,14902,14903],{"paper_id":14893,"author_seq":247,"given_name":14582,"surname":14583,"affiliation":63,"orcid":63},{"paper_id":14893,"author_seq":232,"given_name":14588,"surname":14589,"affiliation":63,"orcid":63},{"paper_id":14893,"author_seq":218,"given_name":14591,"surname":14592,"affiliation":63,"orcid":63},"Event trigger detection and classification involve identifying meaningful occurrences and categorizing them into predefined event types within narrative text. Despite extensive research on English event extraction in factual domains like news and biomedical text, narrative prose, such as short stories, has received comparatively little attention. To bridge this gap, Vrittanta-EN introduces a manually annotated English corpus comprising 11,272 event instances extracted from diverse short stories. The dataset captures a wide range of communicative, cognitive, and physical actions typical of narrative discourse. A comprehensive evaluation is conducted across a wide range of models, including classical machine learning baselines (SVM, Naive Bayes), neural sequential models (LSTM, BiLSTM, BiLSTM-CRF), encoder-only transformers (BERT, RoBERTa, ALBERT, DistilBERT, DeBERTa, ELECTRA), and encoder-decoder models (T5, BART), along with large language models (GPT-4.1, DeepSeek-V3.2-Exp, Claude Sonnet 4) under both zero-shot and five-shot settings. Experimental results show that ELECTRA achieved the highest overall performance for event trigger detection with an F1-score of 90.61%, while RoBERTa demonstrated superior performance for event classification with a macro F1 of 74.71%. These findings highlight the robustness of contextual transformer-based architectures for modeling narrative event structures in English short stories. The dataset, code, and annotation guidelines will be publicly released upon paper acceptance.",{"paper_id":14906,"title":14907,"year":7,"month":188,"day":63,"doi":14908,"resource_url":14909,"first_page":14910,"last_page":14911,"pdf_url":14912,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14913,"paper_type":860,"authors":14914,"abstract":14923},"lrec2026-main-617","MUC-4 Revisited: Document-level Event Analysis beyond Span-based Arguments","10.63317\u002F2kaxd8nu44bx","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-617","7766","7780","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.617.pdf","olsen-etal-2026-muc",[14915,14918,14920],{"paper_id":14906,"author_seq":247,"given_name":14916,"surname":14917,"affiliation":63,"orcid":63},"Helene Bøsei","Olsen",{"paper_id":14906,"author_seq":232,"given_name":2022,"surname":14919,"affiliation":63,"orcid":63},"Velldal",{"paper_id":14906,"author_seq":218,"given_name":14921,"surname":14922,"affiliation":63,"orcid":63},"Lilja","Øvrelid","Automatically predicting structured representations of events has long been a central goal in information extraction, yet most contemporary work remains limited to identifying contiguous text spans as event arguments. This span-centric formulation fails to capture higher-level aspects of real-world events, such as actor identities, temporal scope, and aggregated outcomes, that many event-centred applications depend on. While commonly treated as a standard extractive benchmark, MUC-4 originally combined span-based arguments with normalised, inferred, and categorical fields, reflecting a richer, application-driven design. In this paper, we revisit MUC-4 in its full original formulation, casting it as an abstractive event analysis task that connect traditional event extraction goals with modern generative and document-level paradigms. We provide the first systematic evaluation of fine-tuned generative models in this extended formulation on MUC-4, examining how post-training stages and model size affect performance across both span-based and higher-level, semantically grounded event information. An extensive error analysis highlights practical challenges and directions for future work.",{"paper_id":14925,"title":14926,"year":7,"month":188,"day":63,"doi":14927,"resource_url":14928,"first_page":14929,"last_page":14930,"pdf_url":14931,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14932,"paper_type":860,"authors":14933,"abstract":14940},"lrec2026-main-618","Historical Medical Knowledge Graphs and Ontologies from the Medical History of British India Corpus (1850-1950)","10.63317\u002F4pisentrkhfh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-618","7781","7790","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.618.pdf","almasi-etal-2026-historical",[14934,14937],{"paper_id":14925,"author_seq":247,"given_name":14935,"surname":14936,"affiliation":63,"orcid":63},"Mehrdad","Almasi",{"paper_id":14925,"author_seq":232,"given_name":14938,"surname":14939,"affiliation":63,"orcid":63},"Tugce","Karatas","This research presents a reproducible framework for constructing biomedical knowledge graphs and ontologies from digitized historical archives. Focusing on the Medical History of British India corpus (468 reports; ∼22.5M words; 1850–1950), our pipeline combines BioBERT-based entity recognition, LLM-guided relation extraction with LLM-based filtering, and clustering-based ontology induction. Reliability is strengthened through canonicalization, schema mapping to standardized biomedical relation types, and multi-metric edge scoring with temporal decay; a manual evaluation of 500 validated triples yields 0.892 precision. The resulting resources comprise 282,882 extracted relations, consolidated into 22,360 unique surface forms and organized into 71 thematic clusters. Frequent categories include After Treatment (∼1,242 mentions), Date of Inoculation (∼540), and diverse causal relations, while the induced ontology highlights six epidemic diseases: plague, cholera, malaria, kala azar, leprosy, and smallpox together with their characteristic interventions (e.g., quinine therapy, vaccination campaigns, hospital disinfection). Temporal analyses capture historically plausible trajectories: plague interventions peaking in the 1890s, cholera’s long-run decline, and tuberculosis departments rising after 1910. All code, relation inventories, ontologies, and visualizations are released in a GitHub Repository, enabling reproducibility and supporting research in historical NLP, biomedical informatics, and digital humanities.",{"paper_id":14942,"title":14943,"year":7,"month":188,"day":63,"doi":14944,"resource_url":14945,"first_page":14946,"last_page":14947,"pdf_url":14948,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14949,"paper_type":860,"authors":14950,"abstract":14956},"lrec2026-main-619","Graph-TempCZ: A Graph Representation of Software Mentions for Predicting Software Usage in Scientific Publications","10.63317\u002F2jopizgg4dzo","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-619","7791","7803","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.619.pdf","cao-etal-2026-graph",[14951,14953,14955],{"paper_id":14942,"author_seq":247,"given_name":14952,"surname":10237,"affiliation":63,"orcid":63},"Congfeng",{"paper_id":14942,"author_seq":232,"given_name":14954,"surname":1519,"affiliation":63,"orcid":63},"Pengyu",{"paper_id":14942,"author_seq":218,"given_name":981,"surname":982,"affiliation":63,"orcid":63},"Predicting how software is used, shared, and evolves across publications is essential to studying scientific progress. Existing methods for representing software usage in publications rely mainly on tabular or textual formats, which limit their structural expressiveness and consequently their ability to predict software usage. We address these gaps by representing software mentions and citations as a graph and formulating software usage prediction as a link prediction task. To support this study, we construct the first large-scale graph dataset of publication and software mentions, Graph-TempCZ, covering 1959-2022 with over six million mention relationships. Experiments using both traditional machine learning and Graph Neural Network (GNN) show that graph-based models substantially outperform feature-based baselines, achieving a 5.98% improvement in test accuracy. Temporal experiments further reveal that models trained on one year generalize effectively to nearby years but show gradual performance decay as the temporal gap increases. This work provides the first comprehensive foundation for analyzing software usage through a temporal graph representation.",{"paper_id":14958,"title":14959,"year":7,"month":188,"day":63,"doi":14960,"resource_url":14961,"first_page":14962,"last_page":14963,"pdf_url":14964,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14965,"paper_type":860,"authors":14966,"abstract":14976},"lrec2026-main-620","Automatic Suggestions Help Extending Eventive Ontology: A Case Study on SynSemClass","10.63317\u002F4suozbd35x5o","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-620","7804","7815","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.620.pdf","strakov-etal-2026-automatic",[14967,14970,14972,14975],{"paper_id":14958,"author_seq":247,"given_name":14968,"surname":14969,"affiliation":63,"orcid":63},"Jana","Straková",{"paper_id":14958,"author_seq":232,"given_name":2161,"surname":14971,"affiliation":63,"orcid":63},"Fučíková",{"paper_id":14958,"author_seq":218,"given_name":14973,"surname":14974,"affiliation":63,"orcid":63},"Zdenka","Uresova",{"paper_id":14958,"author_seq":203,"given_name":1380,"surname":3434,"affiliation":63,"orcid":63},"Despite substantial recent progress in many areas of NLP, semantic tasks remain particularly challenging. One such task is the creation (extension, or annotation) of semantic ontologies. In this work, we present a case study on the eventive SynSemClass ontology, focusing on the challenges of semantic annotation – that is extending the ontology with new lexical units and\u002For new concepts – both with and without automatic support. We consider two strategies for generating annotation suggestions: (i) a knowledge-driven approach based on a small, carefully curated corpus of verbal valency frames, and (ii) a corpus-driven approach using lemma-based suggestions from a large raw text collection, disregarding semantic homonymy. Our findings show that ontology annotation is inherently difficult, and that automatic annotations statistically significantly reduce this difficulty both in terms of inter-annotator agreement and when compared with gold expert annotations. We discuss the implications for semantic resource creation and extension, as well as the limits of automation in ontology annotation.",{"paper_id":14978,"title":14979,"year":7,"month":188,"day":63,"doi":14980,"resource_url":14981,"first_page":14982,"last_page":14983,"pdf_url":14984,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":14985,"paper_type":860,"authors":14986,"abstract":14993},"lrec2026-main-621","JPPB: Automatic Construction of a Soft-Labeled Japanese Patient Phrase Bank for Symptom Normalization","10.63317\u002F345uq5t7y98h","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-621","7816","7828","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.621.pdf","nishiyama-etal-2026-jppb",[14987,14988,14991,14992],{"paper_id":14978,"author_seq":247,"given_name":1451,"surname":1452,"affiliation":63,"orcid":63},{"paper_id":14978,"author_seq":232,"given_name":14989,"surname":14990,"affiliation":63,"orcid":63},"Mana","Kuramoto",{"paper_id":14978,"author_seq":218,"given_name":1460,"surname":1461,"affiliation":63,"orcid":63},{"paper_id":14978,"author_seq":203,"given_name":1472,"surname":1473,"affiliation":63,"orcid":63},"Patient-generated symptom expressions are linguistically diverse, often deviating from standardized medical terminology. This paper introduces the Japanese Patient Phrase Bank (JPPB), the first automatically constructed phrase-level normalization resource for Japanese patient language. JPPB introduces an embedding-based soft labeling framework that transforms traditional one-to-one dictionary mappings into graded and ambiguity-aware associations. This framework represents a shift from word-level to phrase-level normalization in Japanese. The resource covers 7,035 phrase–term pairs across 412 symptoms. Evaluation on the KEEPHA and MedNLP-SC datasets shows that soft labels consistently improve Top-1 accuracy and better approximate gold label distributions compared with hard labels. While LLM-based normalization achieved the highest scores, JPPB provides a lightweight and transparent alternative suitable for local deployment. This work demonstrates that large-scale, automatically generated phrase banks can achieve competitive performance relative to manually curated resources and serve as practical, scalable resources for medical natural language processing in Japanese.",{"paper_id":14995,"title":14996,"year":7,"month":188,"day":63,"doi":14997,"resource_url":14998,"first_page":14999,"last_page":15000,"pdf_url":15001,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15002,"paper_type":860,"authors":15003,"abstract":15010},"lrec2026-main-622","How I Met Your Snowclone: Unsupervised Discovery of Snowclone Patterns in Large Datasets","10.63317\u002F5iuorx8jxpiw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-622","7829","7844","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.622.pdf","bezanon-etal-2026-how",[15004,15006,15008],{"paper_id":14995,"author_seq":247,"given_name":1179,"surname":15005,"affiliation":63,"orcid":63},"BezanÃ§on",{"paper_id":14995,"author_seq":232,"given_name":4685,"surname":15007,"affiliation":63,"orcid":63},"Lejeune",{"paper_id":14995,"author_seq":218,"given_name":15009,"surname":11754,"affiliation":63,"orcid":63},"Marceau","Snowclones are a type of Multiword Expression (MWE) pattern that includes open slots, i.e. positions that can be filled with various words. For example, in the phrase \"May the X be with you,\" the slot X can be replaced with virtually any noun. A key feature of snowclones is that the original MWE remains recognizable, carrying its meaning into the new form. However, previous work has not shown whether such substitutions are limited to fixed positions. In practice, variations such as \"May the force bee with you\" are also possible. In this paper, we propose to use Locality Sensitive Hashing (LSH) to automatically extract snowclone patterns from the non-commercial IMDb dataset. This process results in the creation of the FROST lexicon, comprising 29,011 pattern candidates and 991,626 snowclone candidates distributed in 29 languages. We then annotate 1,500 discovered patterns and 1,000 snowclones from the FROST lexicon to assess its quality. Our findings suggest that (i) most substitutions in snowclones occur at consistent positions and (ii) snowclones can be reliably discovered at scale using LSH and similarity-based metrics. This work provides the first large-scale lexicon of snowclone-based MWEs and a method that can support future research on MWEs and snowclones discovery.",{"paper_id":15012,"title":15013,"year":7,"month":188,"day":63,"doi":15014,"resource_url":15015,"first_page":15016,"last_page":15017,"pdf_url":15018,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15019,"paper_type":860,"authors":15020,"abstract":15035},"lrec2026-main-623","HOME-KGQA: A Benchmark Dataset for Multimodal Knowledge Graph Question Answering on Household Daily Activities","10.63317\u002F25xhew5rnydb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-623","7845","7856","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.623.pdf","egami-etal-2026-home",[15021,15024,15027,15029,15030,15031,15032,15034],{"paper_id":15012,"author_seq":247,"given_name":15022,"surname":15023,"affiliation":63,"orcid":63},"Shusaku","Egami",{"paper_id":15012,"author_seq":232,"given_name":15025,"surname":15026,"affiliation":63,"orcid":63},"Aoi","Ohta",{"paper_id":15012,"author_seq":218,"given_name":4246,"surname":15028,"affiliation":63,"orcid":63},"Tsujimura",{"paper_id":15012,"author_seq":203,"given_name":2044,"surname":5819,"affiliation":63,"orcid":63},{"paper_id":15012,"author_seq":188,"given_name":3526,"surname":4604,"affiliation":63,"orcid":63},{"paper_id":15012,"author_seq":172,"given_name":5824,"surname":5825,"affiliation":63,"orcid":63},{"paper_id":15012,"author_seq":155,"given_name":5846,"surname":15033,"affiliation":63,"orcid":63},"Hamasaki",{"paper_id":15012,"author_seq":138,"given_name":4609,"surname":4610,"affiliation":63,"orcid":63},"Large Language Models (LLMs) provide flexible natural language processing capabilities, while knowledge graphs (KGs) offer explicit and structured knowledge. Integrating these two in a complementary manner enables the development of reliable and verifiable AI systems. In particular, knowledge graph question answering (KGQA) has attracted attention as a means to reduce LLM hallucinations and to leverage knowledge beyond the training data. However, existing KGQA benchmark datasets are biased toward encyclopedic knowledge, limited to a single modality, and lack fine-grained spatiotemporal data, which limits their applicability to real-world scenarios targeted by Embodied AI. We introduce HOME-KGQA, a novel KGQA benchmark dataset built on a multimodal KG of daily household activities. HOME-KGQA consists of complex, multi-hop natural language questions paired with graph database query languages. Compared to existing benchmarks, it includes more challenging questions that involve multi-level spatiotemporal reasoning, multimodal grounding, and aggregate functions. Experimental results show that the LLM-based KGQA methods fail to achieve performance comparable to that on existing datasets when evaluated on HOME-KGQA. This highlights significant challenges that should be addressed for the real-world deployment of KGQA systems. Our dataset is available at https:\u002F\u002Fgithub.com\u002Faistairc\u002Fhome-kgqa.",{"paper_id":15037,"title":15038,"year":7,"month":188,"day":63,"doi":15039,"resource_url":15040,"first_page":15041,"last_page":15042,"pdf_url":15043,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15044,"paper_type":860,"authors":15045,"abstract":15055},"lrec2026-main-624","Extending the Semantic Layer of the CompL-it Italian Lexicon: Traits, Semantic Types, and Definitions","10.63317\u002F3rvf2vbt4ier","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-624","7857","7866","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.624.pdf","giovannetti-etal-2026-extending",[15046,15048,15050,15052],{"paper_id":15037,"author_seq":247,"given_name":7590,"surname":15047,"affiliation":63,"orcid":63},"Giovannetti",{"paper_id":15037,"author_seq":232,"given_name":1104,"surname":15049,"affiliation":63,"orcid":63},"Bellandi",{"paper_id":15037,"author_seq":218,"given_name":1795,"surname":15051,"affiliation":63,"orcid":63},"Marchi",{"paper_id":15037,"author_seq":203,"given_name":15053,"surname":15054,"affiliation":63,"orcid":63},"Mafalda","Papini","The growing impact of Large Language Models has highlighted the need for explicit, interpretable linguistic knowledge. Lexical resources respond to this need by offering structured representations that complement and constrain the implicit semantics of neural models. This paper presents an extension of CompL-it, currently the most comprehensive open computational lexicon of Italian. Building on the semantic layer inherited from LexicO—itself derived from the PAROLE-SIMPLE-CLIPS resource—the work enriches CompL-it with semantic traits and references to semantic types. Moreover, an experiment was conducted to generate missing definitions through an automatic process supported by LLMs. The resulting resource thus combines human-curated and machine-extended knowledge, ensuring both linguistic precision and scalability. This enriched semantic layer enhances CompL-it’s interoperability within the Linguistic Linked Data framework and strengthens its usability for NLP tasks such as word sense disambiguation, semantic role labelling, and knowledge grounding.",{"paper_id":15057,"title":15058,"year":7,"month":188,"day":63,"doi":15059,"resource_url":15060,"first_page":15061,"last_page":15062,"pdf_url":15063,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15064,"paper_type":860,"authors":15065,"abstract":15072},"lrec2026-main-625","Integrating Knowledge Graph with Large Language Models for Multi-hop Question Generation","10.63317\u002F4fz4our5amwv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-625","7867","7882","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.625.pdf","chali-etal-2026-integrating",[15066,15069],{"paper_id":15057,"author_seq":247,"given_name":15067,"surname":15068,"affiliation":63,"orcid":63},"Yllias","Chali",{"paper_id":15057,"author_seq":232,"given_name":15070,"surname":15071,"affiliation":63,"orcid":63},"Al Hasib","Mahamud","Question generation (QG) is a fundamental task in natural language processing that involves generating fluent and grammatically correct questions from a given input context, optionally conditioned on an answer. Multi-hop question generation (MHQG), a more complex variant, requires reasoning over multiple pieces of information across diverse contexts to formulate coherent questions. In this work, we propose Knowledge Graph for Question Generation (KG4QG), a novel framework that integrates knowledge graphs with large language models to address the challenges of MHQG. Our approach constructs knowledge graphs from input contexts, encodes them using Graph Attention Networks (GAT), and leverages Sentence Transformers for contextual text embeddings. These enriched representations are then fed into large language models—specifically BART and T5—for multi-hop question generation. We evaluate KG4QG on the HotpotQA dataset, demonstrating that our method achieves superior performance compared to existing state-of-the-art approaches, highlighting the effectiveness of combining structured knowledge and pre-trained language models for complex question generation tasks.",{"paper_id":15074,"title":15075,"year":7,"month":188,"day":63,"doi":15076,"resource_url":15077,"first_page":15078,"last_page":15079,"pdf_url":15080,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15081,"paper_type":860,"authors":15082,"abstract":15087},"lrec2026-main-626","LocalGovPL: A Corpus of Speaker-Attributed Polish Local Government Transcripts","10.63317\u002F23722jebsz2d","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-626","7883","7893","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.626.pdf","czerski-etal-2026-localgovpl",[15083,15086],{"paper_id":15074,"author_seq":247,"given_name":15084,"surname":15085,"affiliation":63,"orcid":63},"Dariusz","Czerski",{"paper_id":15074,"author_seq":232,"given_name":4412,"surname":4413,"affiliation":63,"orcid":63},"We present LocalGovPL, a large-scale, speaker-annotated corpus of Polish local government meeting transcripts processed using an automatic two-stage LLM pipeline. The corpus consists of 31,900 sessions from 749 councils recorded between 2018–2025 (approximately 391M words). It is released in TEI P5 format with explicit links between utterances and registered participants. We collect transcripts from official local government portals using a dedicated crawler, normalize the text, and apply: (1) LLM-assisted extraction of person names and administrative roles; and (2) attribution of utterances to identified speakers using discourse cues. To evaluate attribution quality, we manually annotate 30 sessions and evaluate five LLM configurations using three evaluation protocols with speaker-aware word error rate (sWER). The strongest system, Gemini-2.5-pro, achieves 3.9% sWER for abstract speaker identification, 4.6% for known participants, and 5.9% for end-to-end processing with relaxed name matching. LocalGovPL enables large-scale analysis of local deliberative discourse and supports research on dialogue modeling, summarization, and political text analysis.",{"paper_id":15089,"title":15090,"year":7,"month":188,"day":63,"doi":15091,"resource_url":15092,"first_page":15093,"last_page":15094,"pdf_url":15095,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15096,"paper_type":860,"authors":15097,"abstract":15117},"lrec2026-main-627","Amharic DBpedia Chapter: A Knowledge Graph for a Low-Resource Language","10.63317\u002F536jn6g8rjnx","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-627","7894","7904","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.627.pdf","alemayehu-etal-2026-amharic",[15098,15101,15104,15107,15110,15113,15114],{"paper_id":15089,"author_seq":247,"given_name":15099,"surname":15100,"affiliation":63,"orcid":63},"HIzkiel Mitiku","Alemayehu",{"paper_id":15089,"author_seq":232,"given_name":15102,"surname":15103,"affiliation":63,"orcid":63},"Tilahun Abedissa","Taffa",{"paper_id":15089,"author_seq":218,"given_name":15105,"surname":15106,"affiliation":63,"orcid":63},"Meti Adane","Bayissa",{"paper_id":15089,"author_seq":203,"given_name":15108,"surname":15109,"affiliation":63,"orcid":63},"Andargachew Asfaw","Zewge",{"paper_id":15089,"author_seq":188,"given_name":15111,"surname":15112,"affiliation":63,"orcid":63},"Hamada","Zahera",{"paper_id":15089,"author_seq":172,"given_name":2531,"surname":2532,"affiliation":63,"orcid":63},{"paper_id":15089,"author_seq":155,"given_name":15115,"surname":15116,"affiliation":63,"orcid":63},"Axel-Cyrille Ngonga","Ngomo","DBpedia is a community-driven project that extracts structured knowledge from Wikipedia via language-specific chapters. We present the first steps toward the Amharic DBpedia chapter by extending the DBpedia Extraction Framework (DEF) to support Amharic Wikipedia, including language-specific components such as Ethiopian date parsers, an Ethiopian–Gregorian calendar converter, an Arabic–Ge’ez number converter, and Amharic template mappings, together with automated extraction pipelines and the publication of the resulting knowledge graph through a live website, DBpedia Databus collection, and query endpoints. For mapping, we evaluate the zero-shot NLLB-200 translation model on Amharic infobox property names, achieving a BLEU score of 45.31. For ontology alignment, we link mapped properties to DBpedia ontology properties across 58 DBpedia classes and benchmark multilingual encoders with Amharic support, including Afro-XLM-R Base, XLM-R Base, and Amharic fine-tuned mBERT. The fine-tuned Afro-XLM-R model achieves 92.1% Top-10 accuracy and strong ranking performance, as measured by Mean Reciprocal Rank (MRR). We release all resources developed for the Amharic DBpedia chapter, including the Ethiopian date parser, Ethiopian–Gregorian calendar converter, Arabic–Geʽez numeral converter, Amharic template mappings, automated extraction workflows, and the resulting Amharic DBpedia knowledge graph with public access via the DBpedia Databus collection, Tentris query endpoint, and the live website at am.dbpedia.org.",{"paper_id":15119,"title":15120,"year":7,"month":188,"day":63,"doi":15121,"resource_url":15122,"first_page":15123,"last_page":15124,"pdf_url":15125,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15126,"paper_type":860,"authors":15127,"abstract":15133},"lrec2026-main-628","Cygnet: Refactoring the Open Multilingual Wordnet","10.63317\u002F2v96snyewr2h","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-628","7905","7917","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.628.pdf","maudslay-etal-2026-cygnet",[15128,15131],{"paper_id":15119,"author_seq":247,"given_name":15129,"surname":15130,"affiliation":63,"orcid":63},"Rowan Hall","Maudslay",{"paper_id":15119,"author_seq":232,"given_name":7075,"surname":15132,"affiliation":63,"orcid":63},"Bond","The wordnet file specification empowers the creators of different wordnets by allowing them to encode the same information in multiple different ways. A drawback of this approach is that redundancy is introduced. As a consequence, different wordnets often contain conflicting records, creating issues when one attempts to conduct multilingual research using multiple wordnets simultaneously. To address this, we present the OMW Cygnet, an experimental reformulation of wordnet that is designed to eliminate conflicting records and improve modularity. We convert data in 47 languages from the Open Multilingual Wordnet into this format, and release a web browser which makes it easy to navigate multilingual wordnets.",{"paper_id":15135,"title":15136,"year":7,"month":188,"day":63,"doi":15137,"resource_url":15138,"first_page":15139,"last_page":15140,"pdf_url":15141,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15142,"paper_type":860,"authors":15143,"abstract":15151},"lrec2026-main-629","Masrad: Arabic Terminology Management Corpora with Semi-Automatic Construction","10.63317\u002F4azqkq8r4eez","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-629","7918","7926","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.629.pdf","nasser-etal-2026-masrad",[15144,15146,15148],{"paper_id":15135,"author_seq":247,"given_name":15145,"surname":2723,"affiliation":63,"orcid":63},"Mahdi",{"paper_id":15135,"author_seq":232,"given_name":13890,"surname":15147,"affiliation":63,"orcid":63},"Sayah",{"paper_id":15135,"author_seq":218,"given_name":15149,"surname":15150,"affiliation":63,"orcid":63},"Fadi","Zaraket","This paper presents Masrad (i.e. glossary in Arabic), a terminology dataset for Arabic terminology management, and a method with supporting tools for its semi-automatic construction. The entries in Masrad are (f,a) pairs of foreign (non-Arabic) terms f, appearing in specialized, academic and field-specific books next to their Arabic a counterparts. Masrad-Ex systematically extracts these pairs as a first step to construct Masrad. Masrad helps improving term consistency in academic translations and specialized Arabic documents, and automating cross-lingual text processing. Masrad-Ex leverages translated terms organically occurring in Arabic books, and considers several candidate pairs for each term phrase. The candidate Arabic terms occur next to the foreign terms, and vary in length. Masrad-Ex computes lexicographic, phonetic, morphological, and semantic similarity metrics for each candidate pair, and uses heuristic, machine learning, and machine learning with post-processing approaches to decide on the best candidate. This paper presents Masrad after thorough expert review and makes it available to the interested research community. The best performing Masrad-Ex approach achieved 90.5% precision and 92.4% recall.",{"paper_id":15153,"title":15154,"year":7,"month":188,"day":63,"doi":15155,"resource_url":15156,"first_page":15157,"last_page":15158,"pdf_url":15159,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15160,"paper_type":860,"authors":15161,"abstract":15174},"lrec2026-main-630","SentiMalti: A Maltese Sentiment Analysis Dataset and Models","10.63317\u002F4kw8df57bza3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-630","7927","7936","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.630.pdf","caruana-etal-2026-sentimalti",[15162,15165,15167,15169,15172],{"paper_id":15153,"author_seq":247,"given_name":15163,"surname":15164,"affiliation":63,"orcid":63},"Ian","Caruana",{"paper_id":15153,"author_seq":232,"given_name":6059,"surname":15166,"affiliation":63,"orcid":63},"Vella",{"paper_id":15153,"author_seq":218,"given_name":5741,"surname":15168,"affiliation":63,"orcid":63},"Zammit",{"paper_id":15153,"author_seq":203,"given_name":15170,"surname":15171,"affiliation":63,"orcid":63},"Kurt","Micallef",{"paper_id":15153,"author_seq":188,"given_name":4283,"surname":15173,"affiliation":63,"orcid":63},"Borg","We present SentiMalti, a new Maltese social media sentiment resource and accompanying baselines. We scrape user‑generated content from YouTube, Reddit, and Facebook, then apply a Maltese‑aware preprocessing pipeline (cleaning, personally identifiable information anonymisation, sentence splitting, and sentence‑level language filtering) to retain Maltese sentences while tolerating realistic code‑switching. The resulting crowdsourced dataset contains 2,327 sentences annotated for positive (39%), negative (31%), and neutral (30%) sentiment. We integrate prior Maltese datasets to create a combined benchmark of 3,772 instances. We evaluate fine‑tuned encoder models (BERTu, Glot500) and few‑shot prompting with instruction‑tuned multilingual LLMs (Aya‑101, Gemma 2 Instruct 9B). On the full test set, five‑shot Aya‑101 attains 68.65 macro‑F1, closely followed by a fine‑tuned BERTu at 68.36 macro‑F1. Error analysis reveals complementary strengths: BERTu better separates polarised classes, while Aya‑101 tends to over‑predict the neutral class. We release the dataset splits, code, and a fine‑tuned BERTu model to facilitate further work in Maltese NLP and sentiment analysis.",{"paper_id":15176,"title":15177,"year":7,"month":188,"day":63,"doi":15178,"resource_url":15179,"first_page":15180,"last_page":15181,"pdf_url":15182,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15183,"paper_type":860,"authors":15184,"abstract":15192},"lrec2026-main-631","Multilingual Structured Sentiment Analysis for Environmental Sustainability","10.63317\u002F4jdnsugtypgu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-631","7937","7954","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.631.pdf","ibrohim-etal-2026-multilingual",[15185,15188,15189,15191],{"paper_id":15176,"author_seq":247,"given_name":15186,"surname":15187,"affiliation":63,"orcid":63},"Muhammad Okky","Ibrohim",{"paper_id":15176,"author_seq":232,"given_name":2072,"surname":2073,"affiliation":63,"orcid":63},{"paper_id":15176,"author_seq":218,"given_name":5732,"surname":15190,"affiliation":63,"orcid":63},"Bosco",{"paper_id":15176,"author_seq":203,"given_name":6936,"surname":6937,"affiliation":63,"orcid":63},"To effectively address global environmental challenges, we must have tools that allow us to carefully monitor how citizens, policy makers and other stakeholders debate sustainability. However, there are currently very few NLP resources and tools specialized for this topic. This paper presents EnviS, a multilingual corpus (Italian, English, and Indonesian) for investigating the debate on environmental sustainability in social media using Structured Sentiment Analysis. We introduce a framework for the automatic aggregation of span-level annotations that preserves the annotators’ perspective and avoids manual intervention by safeguarding the quality of the annotations. We performed a series of experiments with four open-source instruction-based Large Language Models in zero-shot and few-shot settings, where we have measures the impact of the order and number of shots. The results further confirm the ineffectiveness of LLMs in extracting fine-grained sentiment information, being outperformed by a supervised state-of-the-art neural method trained on very few data. This questions the suitability of LLMs for rich knowledge\u002Finformation extraction tasks requiring manipulation of text spans. In particular, our error analysis indicates that LLMs mostly struggle in identifying the sentiment term or its associated polarity, failing to extract full sentiment triples.",{"paper_id":15194,"title":15195,"year":7,"month":188,"day":63,"doi":15196,"resource_url":15197,"first_page":15198,"last_page":15199,"pdf_url":15200,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15201,"paper_type":860,"authors":15202,"abstract":15211},"lrec2026-main-632","LLM-as-an-Annotator: Training Lightweight Models with LLM-Annotated Examples for Aspect Sentiment Tuple Prediction","10.63317\u002F43srcdyc52cd","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-632","7955","7972","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.632.pdf","hellwig-etal-2026-llm",[15203,15206,15208,15210],{"paper_id":15194,"author_seq":247,"given_name":15204,"surname":15205,"affiliation":63,"orcid":63},"Nils Constantin","Hellwig",{"paper_id":15194,"author_seq":232,"given_name":3824,"surname":15207,"affiliation":63,"orcid":63},"Fehle",{"paper_id":15194,"author_seq":218,"given_name":3838,"surname":15209,"affiliation":63,"orcid":63},"Kruschwitz",{"paper_id":15194,"author_seq":203,"given_name":3643,"surname":3375,"affiliation":63,"orcid":63},"Training models for Aspect-Based Sentiment Analysis (ABSA) tasks requires manually annotated data, which is expensive and time-consuming to obtain. This paper introduces LA-ABSA, a novel approach that leverages Large Language Model (LLM)-generated annotations to fine-tune lightweight models for complex ABSA tasks. We evaluate our approach on five datasets for Target Aspect Sentiment Detection (TASD) and Aspect Sentiment Quad Prediction (ASQP). Our approach outperformed previously reported augmentation strategies and achieved competitive performance with LLM-prompting in low-resource scenarios, while providing substantial energy efficiency benefits. For example, using 50 annotated examples for in-context learning (ICL) to guide the annotation of unlabeled data, LA-ABSA achieved an F1 score of 49.85 for ASQP on the SemEval Rest16 dataset, closely matching the performance of ICL prompting with Gemma-3-27B (51.10), while requiring significantly lower computational resources.",{"paper_id":15213,"title":15214,"year":7,"month":188,"day":63,"doi":15215,"resource_url":15216,"first_page":15217,"last_page":15218,"pdf_url":15219,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15220,"paper_type":860,"authors":15221,"abstract":15226},"lrec2026-main-633","Extending Czech Aspect-Based Sentiment Analysis with Opinion Terms: Dataset and LLM Benchmarks","10.63317\u002F4hkzdnwfztkz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-633","7973","7984","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.633.pdf","md-etal-2026-extending",[15222,15223,15225],{"paper_id":15213,"author_seq":247,"given_name":5422,"surname":5423,"affiliation":63,"orcid":63},{"paper_id":15213,"author_seq":232,"given_name":3469,"surname":15224,"affiliation":63,"orcid":63},"Priban",{"paper_id":15213,"author_seq":218,"given_name":3469,"surname":5417,"affiliation":63,"orcid":63},"This paper introduces a novel Czech dataset in the restaurant domain for aspect-based sentiment analysis (ABSA), enriched with annotations of opinion terms. The dataset supports three distinct ABSA tasks involving opinion terms, accommodating varying levels of complexity. Leveraging this dataset, we conduct extensive experiments using modern Transformer-based models, including large language models (LLMs), in monolingual, cross-lingual, and multilingual settings. To address cross-lingual challenges, we propose a translation and label alignment methodology leveraging LLMs, which yields consistent improvements. Our results highlight the strengths and limitations of state-of-the-art models, especially when handling the linguistic intricacies of low-resource languages like Czech. A detailed error analysis reveals key challenges, including the detection of subtle opinion terms and nuanced sentiment expressions. The dataset establishes a new benchmark for Czech ABSA, and our proposed translation–alignment approach offers a scalable solution for adapting ABSA resources to other low-resource languages.",{"paper_id":15228,"title":15229,"year":7,"month":188,"day":63,"doi":15230,"resource_url":15231,"first_page":15232,"last_page":15233,"pdf_url":15234,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15235,"paper_type":860,"authors":15236,"abstract":15241},"lrec2026-main-634","AnnoABSA: A Web-Based Annotation Tool for Aspect-Based Sentiment Analysis with Retrieval-Augmented Suggestions","10.63317\u002F56mac6pxbke6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-634","7985","7998","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.634.pdf","hellwig-etal-2026-annoabsa",[15237,15238,15239,15240],{"paper_id":15228,"author_seq":247,"given_name":15204,"surname":15205,"affiliation":63,"orcid":63},{"paper_id":15228,"author_seq":232,"given_name":3824,"surname":15207,"affiliation":63,"orcid":63},{"paper_id":15228,"author_seq":218,"given_name":3838,"surname":15209,"affiliation":63,"orcid":63},{"paper_id":15228,"author_seq":203,"given_name":3643,"surname":3375,"affiliation":63,"orcid":63},"We introduce AnnoABSA, the first web-based annotation tool to support the full spectrum of Aspect-Based Sentiment Analysis (ABSA) tasks. The tool is highly customizable, enabling flexible configuration of sentiment elements and task-specific requirements. Alongside manual annotation, AnnoABSA provides optional Large Language Model (LLM)-based retrieval-augmented generation (RAG) suggestions that offer context-aware assistance in a human-in-the-loop approach, keeping the human annotator in control. To improve prediction quality over time, the system retrieves the ten most similar examples that are already annotated and adds them as few-shot examples in the prompt, ensuring that suggestions become increasingly accurate as the annotation process progresses. Released as open-source software under the MIT License, AnnoABSA is freely accessible and easily extendable for research and practical applications.",{"paper_id":15243,"title":15244,"year":7,"month":188,"day":63,"doi":15245,"resource_url":15246,"first_page":15247,"last_page":15248,"pdf_url":15249,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15250,"paper_type":860,"authors":15251,"abstract":15256},"lrec2026-main-635","Zero-Shot to Full-Resource: Cross-lingual Transfer Strategies for Aspect-Based Sentiment Analysis","10.63317\u002F3fpqpgsdobd6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-635","7999","8013","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.635.pdf","fehle-etal-2026-zero",[15252,15253,15254,15255],{"paper_id":15243,"author_seq":247,"given_name":3824,"surname":15207,"affiliation":63,"orcid":63},{"paper_id":15243,"author_seq":232,"given_name":15204,"surname":15205,"affiliation":63,"orcid":63},{"paper_id":15243,"author_seq":218,"given_name":3838,"surname":15209,"affiliation":63,"orcid":63},{"paper_id":15243,"author_seq":203,"given_name":3643,"surname":3375,"affiliation":63,"orcid":63},"Aspect-based Sentiment Analysis (ABSA) extracts fine-grained opinions toward specific aspects within text but remains largely English-focused despite major advances in transformer-based and instruction-tuned models. This work presents a multilingual evaluation of state-of-the-art ABSA approaches across seven languages and four subtasks (ACD, ACSA, TASD, ASQP). We systematically compare different transformer architectures under zero-resource, data-only, and full-resource settings, using cross-lingual transfer, code-switching and machine translation. Fine-tuned Large Language Models (LLMs) achieve the highest overall scores, particularly in complex generative tasks, while few-shot counterparts approach this performance in simpler setups, where smaller encoder models also remain competitive. Cross-lingual training on multiple non-target languages yields the strongest transfer for fine-tuned LLMs, while smaller encoder or seq-to-seq models benefit most from code-switching, highlighting architecture-specific strategies for multilingual ABSA. We further contribute two new German datasets, an adapted GERestaurant and the first German ASQP dataset (GERest), to encourage multilingual ABSA research beyond English.",{"paper_id":15258,"title":15259,"year":7,"month":188,"day":63,"doi":15260,"resource_url":15261,"first_page":15262,"last_page":15263,"pdf_url":15264,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15265,"paper_type":860,"authors":15266,"abstract":15271},"lrec2026-main-636","LoveHate: Stance Detection and Generation for Multiple Topics in User-generated Comments in Russian and English","10.63317\u002F33jtqgqtp59s","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-636","8014","8025","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.636.pdf","evgrafova-etal-2026-lovehate",[15267,15269,15270],{"paper_id":15258,"author_seq":247,"given_name":8892,"surname":15268,"affiliation":63,"orcid":63},"Evgrafova",{"paper_id":15258,"author_seq":232,"given_name":2182,"surname":2183,"affiliation":63,"orcid":63},{"paper_id":15258,"author_seq":218,"given_name":2294,"surname":2295,"affiliation":63,"orcid":63},"This paper introduces LoveHate, a new multi-topic corpus of user-generated arguments in Russian, collected from the historical data of the debate platform lovehate.ru. The dataset contains nearly 19,000 posts spanning 16 socially and politically relevant topics, each mapped to binary pro and con stances. We test multiple approaches to stance detection and stance generation across Russian and English data, including translated variants, using both classifier-based (Roberta, RuRoberta) and instruction-tuned generative (Llama, Qwen) models. Results demonstrate that language-specific pretraining yields the strongest performance for stance classification (F1 = 0.892 with RuRoberta), while multilingual generative models – when fine-tuned on sufficient data – can effectively generate stance in Russian without explicit Russian pretraining. Cross-domain experiments show that English datasets generalize better across corpora, whereas Russian data capture language- and culture-specific argumentation but are less effective for generalizable models. Generating topics remains a more challenging task for both Russian and English data. The dataset and accompanying results contribute to multilingual stance research and provide a valuable new resource for argument mining in Russian.",{"paper_id":15273,"title":15274,"year":7,"month":188,"day":63,"doi":15275,"resource_url":15276,"first_page":15277,"last_page":15278,"pdf_url":15279,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15280,"paper_type":860,"authors":15281,"abstract":15284},"lrec2026-main-637","From Trial by Fire to Sleep like a Baby: A Lexicon of Anxiety Associations for 20K English Multi-Word Expressions","10.63317\u002F4kx7maur4jao","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-637","8026","8039","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.637.pdf","mohammad-2026-trial",[15282],{"paper_id":15273,"author_seq":247,"given_name":15283,"surname":1932,"affiliation":63,"orcid":63},"Saif M.","Anxiety is the unease about a possible future negative outcome. In recent years, there has been growing interest in understanding how anxiety relates to our health, well-being, body, mind, and behaviour. This includes work on lexical resources for word–anxiety association. However, there is very little anxiety-related work on larger units of text such as multiword expressions (MWE). Here, we introduce the first large-scale lexicon capturing descriptive norms of anxiety associations for more than 20k English MWEs. We show that the anxiety associations are highly reliable. We use the lexicon to study prevalence of different types of anxiety- and calmness-associated MWEs; and how that varies across two-, three-, and four-word sequences. We also study the extent to which the anxiety association of MWEs is compositional (due to its constituent words). The lexicon enables a wide variety of anxiety-related research in psychology, NLP, public health, and social sciences. The lexicon is freely available: https:\u002F\u002Fsaifmohammad.com\u002Fworrylex.html",{"paper_id":15286,"title":15287,"year":7,"month":188,"day":63,"doi":15288,"resource_url":15289,"first_page":15290,"last_page":15291,"pdf_url":15292,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15293,"paper_type":860,"authors":15294,"abstract":15301},"lrec2026-main-638","Entity-Level Sentiment Analysis with Sentence Relevance Detection","10.63317\u002F35ideqx4jk89","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-638","8040","8055","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.638.pdf","rnningstad-etal-2026-entity",[15295,15298,15299,15300],{"paper_id":15286,"author_seq":247,"given_name":15296,"surname":15297,"affiliation":63,"orcid":63},"Egil","Rønningstad",{"paper_id":15286,"author_seq":232,"given_name":1064,"surname":11820,"affiliation":63,"orcid":63},{"paper_id":15286,"author_seq":218,"given_name":14921,"surname":14922,"affiliation":63,"orcid":63},{"paper_id":15286,"author_seq":203,"given_name":2022,"surname":14919,"affiliation":63,"orcid":63},"The task of entity-level sentiment analysis (Elsa) is to extract sentiment scores for a given entity (such as person names or organization names) from a text. Elsa is a challenging task and involves processing of longer documents, where several entities may be mentioned with varying importance for the final score aggregation. Fine-tuning encoder-based Transformers (such as BERT) constitutes the state of the art for sentiment predictions, however, these models are still limited by their restricted input lengths. Decoder-only models so far still underperform on the task. We approach the context limitation by learning to extract segments that are relevant for the sentiment prediction for a given entity, without preprocessing by chunking and aggregation. For decoder models, we explore fine-tuning these through supervised fine-tuning and pairwise comparison, a method borrowed from reward modeling for preference optimization. Both methods perform well and set a new standard for the Elsa task. We further show that pairwise classification is faster, simpler, and shows less variance than the more common direct supervision for this task.",{"paper_id":15303,"title":15304,"year":7,"month":188,"day":63,"doi":15305,"resource_url":15306,"first_page":15307,"last_page":15308,"pdf_url":15309,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15310,"paper_type":860,"authors":15311,"abstract":15330},"lrec2026-main-639","Enhancing Multi-Label Emotion Analysis and Corresponding Intensities for Ethiopian Languages","10.63317\u002F4a4fd8aj2q2a","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-639","8056","8075","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.639.pdf","belay-etal-2026-enhancing",[15312,15315,15317,15320,15322,15324,15327],{"paper_id":15303,"author_seq":247,"given_name":15313,"surname":15314,"affiliation":63,"orcid":63},"Tadesse Destaw","Belay",{"paper_id":15303,"author_seq":232,"given_name":15316,"surname":1268,"affiliation":63,"orcid":63},"Dawit Ketema",{"paper_id":15303,"author_seq":218,"given_name":15318,"surname":15319,"affiliation":63,"orcid":63},"Abinew Ali","Ayele",{"paper_id":15303,"author_seq":203,"given_name":6250,"surname":15321,"affiliation":63,"orcid":63},"Kolesnikova",{"paper_id":15303,"author_seq":188,"given_name":11716,"surname":15323,"affiliation":63,"orcid":63},"Ameer",{"paper_id":15303,"author_seq":172,"given_name":15325,"surname":15326,"affiliation":63,"orcid":63},"Grigori","Sidorov",{"paper_id":15303,"author_seq":155,"given_name":15328,"surname":15329,"affiliation":63,"orcid":63},"Seid Muhie","Yimam","Developing and integrating emotion-understanding models are essential for a wide range of human- computer interaction tasks, including customer feedback analysis, marketing research, and social media monitoring. Given that users often express multiple emotions simultaneously within a single instance, annotating emotion datasets in a multi-label format is critical for capturing this complexity. The EthioEmo dataset, a multilingual and multi-label emotion dataset for Ethiopian languages, lacks emotion intensity annotations, which are crucial for distinguishing varying degrees of emotion, as not all emotions are expressed with the same intensity. We extend the EthioEmo dataset to address this gap by adding emotion intensity annotations. Furthermore, we benchmark state-of-the-art encoder-only Pretrained Language Models (PLMs) and Large Language Models (LLMs) on this enriched dataset. Our results demonstrate that African-centric encoder-only models consistently outperform open-source LLMs, highlighting the importance of culturally and linguistically tailored small models in emotion understanding. Incorporating an emotion-intensity feature for multi-label emotion classification yields better performance. The data is available at https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FTadesse\u002FEthioEmo-intensities.",{"paper_id":15332,"title":15333,"year":7,"month":188,"day":63,"doi":15334,"resource_url":15335,"first_page":15336,"last_page":15337,"pdf_url":15338,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15339,"paper_type":860,"authors":15340,"abstract":15354},"lrec2026-main-640","A Japanese Dataset for Aspect-based Sentiment Polarity Classification and Emotion Intensity Estimation","10.63317\u002F59khdy9uv6uk","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-640","8076","8084","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.640.pdf","hanafusa-etal-2026-japanese",[15341,15343,15344,15346,15347,15348,15350,15352],{"paper_id":15332,"author_seq":247,"given_name":10865,"surname":15342,"affiliation":63,"orcid":63},"Hanafusa",{"paper_id":15332,"author_seq":232,"given_name":6350,"surname":6351,"affiliation":63,"orcid":63},{"paper_id":15332,"author_seq":218,"given_name":1463,"surname":15345,"affiliation":63,"orcid":63},"Maeda",{"paper_id":15332,"author_seq":203,"given_name":2790,"surname":2791,"affiliation":63,"orcid":63},{"paper_id":15332,"author_seq":188,"given_name":2793,"surname":2794,"affiliation":63,"orcid":63},{"paper_id":15332,"author_seq":172,"given_name":15349,"surname":12609,"affiliation":63,"orcid":63},"Hideaki",{"paper_id":15332,"author_seq":155,"given_name":10499,"surname":15351,"affiliation":63,"orcid":63},"Nakashima",{"paper_id":15332,"author_seq":138,"given_name":2768,"surname":15353,"affiliation":63,"orcid":63},"Nagahara","We manually construct and publicly release a Japanese dataset for Aspect-based Sentiment Analysis (ABSA), annotated with both sentiment polarity and the emotional intensities for Plutchik’s eight emotions. Existing datasets for Japanese ABSA only handle sentiment polarity classification. Therefore, we manually annotated Plutchik’s eight emotions with a four-point scale and sentiment polarity with a five-point scale to words in the Japanese sentiment analysis corpus WRIME. Analysis of this corpus revealed that word-level emotions more strongly reflect the reader’s objective impression than the writer’s subjective perspective. Furthermore, the results of evaluation experiments on word-level emotion estimation quantitatively demonstrated that while Large Language Models achieve high performance, they struggle with the estimation of the \"trust\" emotion. Additionally, we demonstrated that multi-task learning, utilizing both word and sentence levels, can improve performance on difficult-to-estimate subjective emotions.",{"paper_id":15356,"title":15357,"year":7,"month":188,"day":63,"doi":15358,"resource_url":15359,"first_page":15360,"last_page":15361,"pdf_url":15362,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15363,"paper_type":860,"authors":15364,"abstract":15374},"lrec2026-main-641","Assessing the Persuasive Effect of AI-Generated Image Support of Arguments","10.63317\u002F2b6xxx7vmyra","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-641","8085","8095","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.641.pdf","quadras-etal-2026-assessing",[15365,15368,15371],{"paper_id":15356,"author_seq":247,"given_name":15366,"surname":15367,"affiliation":63,"orcid":63},"Mackwyn","Quadras",{"paper_id":15356,"author_seq":232,"given_name":15369,"surname":15370,"affiliation":63,"orcid":63},"Manfred","Stede",{"paper_id":15356,"author_seq":218,"given_name":15372,"surname":15373,"affiliation":63,"orcid":63},"Henning","Wachsmuth","Argumentation is, at its core, an inherently verbal activity. Yet, other modalities may support arguments, one of which are images. In the argument mining community, this combination has not received much attention yet. While a few previous works studied whether images can make argumentative texts more effective in persuading people, the images that were considered matched the texts loosely only, or they were heavily text-based themselves. In this paper, we take the step to study to what extent the persuasive effect of textual arguments can be supported by images specifically created for this purpose. For a consistent experiment design, we combine NLP with image generation to synthesize both arguments and images with generative AI, for five controversial topics and for two rhetorical strategies. In two consecutive user studies, we first determine the best-matching image for each argument and then compare the perceived effect of bare textual arguments to those that are supported by an image. Our results suggest that the images may increase the persuasive effect of argumentative texts, but with variance across topics.",{"paper_id":15376,"title":15377,"year":7,"month":188,"day":63,"doi":15378,"resource_url":15379,"first_page":15380,"last_page":15381,"pdf_url":15382,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15383,"paper_type":860,"authors":15384,"abstract":15392},"lrec2026-main-642","CIARAM: Class Imbalance Aware Generative Framework for Relational Argument Mining","10.63317\u002F22mrso3w6vcq","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-642","8096","8105","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.642.pdf","das-etal-2026-ciaram",[15385,15388,15390,15391],{"paper_id":15376,"author_seq":247,"given_name":15386,"surname":15387,"affiliation":63,"orcid":63},"Nilmadhab","Das",{"paper_id":15376,"author_seq":232,"given_name":15389,"surname":4533,"affiliation":63,"orcid":63},"Sayan",{"paper_id":15376,"author_seq":218,"given_name":14744,"surname":14745,"affiliation":63,"orcid":63},{"paper_id":15376,"author_seq":203,"given_name":14588,"surname":14589,"affiliation":63,"orcid":63},"Relational Argument Mining (RAM) is a key task of computational argumentation, which aims to classify the relationships such as Support or Attack between argument component (AC) pairs. Traditional approaches primarily rely on graph-based modelling with external knowledge sources, which are complex in nature. Also, these approaches struggle with RAM datasets when relation classes are imbalanced, as they are not designed for class-imbalanced scenarios. In this work, we propose CIARAM framework to reformulate RAM as a text-to-text generation problem to generate relational labels in a flattened text format. To address the class imbalance, we employ a data augmentation strategy using a decoder-only Large Language Model (LLM) to balance the underrepresented relation classes. Across five standard RAM benchmarks, CIARAM produces strong results, specifically with the billion-parameter model, with a substantial gain in performance compared to the latest baseline, demonstrating the strong potential of our approach.",{"paper_id":15394,"title":15395,"year":7,"month":188,"day":63,"doi":15396,"resource_url":15397,"first_page":15398,"last_page":15399,"pdf_url":15400,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15401,"paper_type":860,"authors":15402,"abstract":15411},"lrec2026-main-643","Surfacing Subtle Stereotypes: A Multilingual, Debate-Oriented Evaluation of Modern LLMs","10.63317\u002F4u9x3z4g8jfk","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-643","8106","8121","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.643.pdf","saeed-etal-2026-surfacing",[15403,15406,15408],{"paper_id":15394,"author_seq":247,"given_name":15404,"surname":15405,"affiliation":63,"orcid":63},"Muhammed Yahia Gaffar","Saeed",{"paper_id":15394,"author_seq":232,"given_name":2728,"surname":15407,"affiliation":63,"orcid":63},"Abdul-Mageed",{"paper_id":15394,"author_seq":218,"given_name":15409,"surname":15410,"affiliation":63,"orcid":63},"Shady","Shehata","Large language models (LLMs) are widely deployed for open-ended communication, yet most bias evaluations still rely on English, classification-style tasks. We introduce , a new multilingual, debate-style benchmark designed to reveal how narrative bias appears in realistic generative settings. Our dataset includes 8,400 structured debate prompts spanning four sensitive domains – Women’s Rights, Backwardness, Terrorism, and Religion – across seven languages ranging from high-resource (English, Chinese) to low-resource (Swahili, Nigerian Pidgin). Using four flagship models (GPT-4o, Claude 3.5 Haiku, DeepSeek-Chat, and LLaMA-3-70B), we generate over 100,000 debate responses and automatically classify which demographic groups are assigned stereotyped versus modern roles. Results show that all models reproduce entrenched stereotypes despite safety alignment: Arabs are overwhelmingly linked to Terrorism and Religion (≥89%), Africans to socioeconomic “backwardness” (up to 77%), and Western groups are consistently framed as modern or progressive. Biases grow sharply in lower-resource languages, revealing that alignment trained primarily in English does not generalize globally. Our findings highlight a persistent divide in multilingual fairness: current alignment methods reduce explicit toxicity but fail to prevent biased outputs in open-ended contexts. We release our benchmark and analysis framework to support the next generation of multilingual bias evaluation and safer, culturally inclusive model alignment",{"paper_id":15413,"title":15414,"year":7,"month":188,"day":63,"doi":15415,"resource_url":15416,"first_page":15417,"last_page":15418,"pdf_url":15419,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15420,"paper_type":860,"authors":15421,"abstract":15429},"lrec2026-main-644","Prompt-Based Stance Control in German: An Evaluation of LLMs for Experimental Research on Attitude Change","10.63317\u002F2tvnax68shcy","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-644","8122","8140","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.644.pdf","omiecienski-etal-2026-prompt",[15422,15424,15426],{"paper_id":15413,"author_seq":247,"given_name":2175,"surname":15423,"affiliation":63,"orcid":63},"Omiecienski",{"paper_id":15413,"author_seq":232,"given_name":11696,"surname":15425,"affiliation":63,"orcid":63},"Sindermann",{"paper_id":15413,"author_seq":218,"given_name":15427,"surname":15428,"affiliation":63,"orcid":63},"Agnieszka","Falenska","How much can Large Language Models (LLMs) influence the attitudes and opinions of their users? Answering this question requires controlled pre\u002Fpost-treatment experiments, where participants interact with LLMs that consistently adopt a predefined political stance. Such experiments, however, are only possible if LLMs can be reliably steered to hold these stances throughout the interactions. In this work, we evaluate whether state-of-the-art LLMs can be effectively stance-controlled in German, thereby enabling experiments on human–LLM interactions. First, using a corpus of realistic user prompts, we find that LLMs are predominantly neutral, making them infeasible for said experiments. We then show that a prompt-based stance control method can reliably guide models to argue for or against a particular topic. Finally, we analyze confounding factors like topic and stance of the initial user prompts. We find that control is easiest when the target stance aligns with topical priors of the model or a user’s prompt. Further, the models maintain a comparable style across target stances — a key prerequisite for pre\u002Fpost-treatment experiments. Taken together, our results demonstrate that stance-controlled LLMs are feasible and practically useful for experiments on user attitude change.",{"paper_id":15431,"title":15432,"year":7,"month":188,"day":63,"doi":15433,"resource_url":15434,"first_page":15435,"last_page":15436,"pdf_url":15437,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15438,"paper_type":860,"authors":15439,"abstract":15449},"lrec2026-main-645","CoSt-BR: A Language Resource for Conversational Stance Detection","10.63317\u002F3g3kx7kbdrkp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-645","8141","8146","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.645.pdf","fonseca-etal-2026-cost",[15440,15443,15446],{"paper_id":15431,"author_seq":247,"given_name":15441,"surname":15442,"affiliation":63,"orcid":63},"Felipe Penhorate Carvalho da","Fonseca",{"paper_id":15431,"author_seq":232,"given_name":15444,"surname":15445,"affiliation":63,"orcid":63},"Ivandre","Paraboni",{"paper_id":15431,"author_seq":218,"given_name":15447,"surname":15448,"affiliation":63,"orcid":63},"Luciano Antônio","Digiampietri","Stance detection is the computational task of determining the attitude (e.g., for, against, neutral) expressed in text toward a specific target topic. In its more conventional form, the task focuses on isolated, context-free input utterances. Conversational stance detection, by contrast, analyzes messages embedded within dialogue threads, enabling the interpretation of responses in relation to preceding discourse, and takes into account a greater variety of stance relations (e.g., support, deny, query, comment, etc.). Despite growing research attention, however, conversational stance detection remains relatively under-resourced and largely limited to the English language. To address these gaps, this study introduces CoSt-BR, a new corpus for conversational stance detection composed of a large set of annotated Reddit discussions in Brazilian Portuguese. In addition, the paper also reports benchmark results obtained using various computational methods, including supervised and prompt-based strategies, applied to the corpus data, providing baseline references for future research in this area.",{"paper_id":15451,"title":15452,"year":7,"month":188,"day":63,"doi":15453,"resource_url":15454,"first_page":15455,"last_page":15456,"pdf_url":15457,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15458,"paper_type":860,"authors":15459,"abstract":15466},"lrec2026-main-646","Less Is More? The Role of Demographic Author Information in Emotion Classification of Ambiguous Text","10.63317\u002F2cw8tpo82h55","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-646","8147","8161","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.646.pdf","weber-etal-2026-less",[15460,15463,15465],{"paper_id":15451,"author_seq":247,"given_name":15461,"surname":15462,"affiliation":63,"orcid":63},"Sabine","Weber",{"paper_id":15451,"author_seq":232,"given_name":6757,"surname":15464,"affiliation":63,"orcid":63},"Greschner",{"paper_id":15451,"author_seq":218,"given_name":1064,"surname":11820,"affiliation":63,"orcid":63},"Emotion annotation in text is a challenging task that often yields low inter-annotator agreement. Missing context, differences in world knowledge and extra-linguistic factors such as the author’s identity influence how emotions are perceived. When the text does not provide sufficient information, details about the author may help resolve ambiguity. We test the hypothesis that providing annotators with demographic information reduces disagreement in emotion annotation. We compare one group of annotators who sees each text alongside demographic information about its author, with a group who sees only the text. We find in our study with 500 annotators and 250 texts that displaying demographic information about the author of the text does not improve agreement between annotators, nor does it improve agreement with the gold label. The only exception are cases where the emotion polarity (positive or negative) is unclear. We also find that annotators perform overall better at identifying the correct emotion label when it aligns with gender stereotypes. Zero-shot prompting experiments with large language models do resemble the human annotation experimental results. Our findings suggest that providing demographic information is not a straightforward remedy for ambiguity in emotion annotation and careful consideration is needed when incorporating such data.",{"paper_id":15468,"title":15469,"year":7,"month":188,"day":63,"doi":15470,"resource_url":15471,"first_page":15472,"last_page":15473,"pdf_url":15474,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15475,"paper_type":860,"authors":15476,"abstract":15483},"lrec2026-main-647","Big Five Personality Prediction through Emotion-Conditioned Representations and Learnable Psycholinguistic Mapping","10.63317\u002F2ybdpcem2ohv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-647","8162","8173","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.647.pdf","zangari-etal-2026-big",[15477,15479,15481],{"paper_id":15468,"author_seq":247,"given_name":7631,"surname":15478,"affiliation":63,"orcid":63},"Zangari",{"paper_id":15468,"author_seq":232,"given_name":1018,"surname":15480,"affiliation":63,"orcid":63},"Schnyder",{"paper_id":15468,"author_seq":218,"given_name":1709,"surname":15482,"affiliation":63,"orcid":63},"Picca","Personality traits influence human behavior and social interactions, making their accurate prediction essential across multiple domains. The Big Five Model, a widely recognized framework in psychological science for assessing personality traits, has become the foundation for different computational approaches to personality prediction. In recent years, a growing body of research has highlighted the dynamic interplay between emotions and personality, as individuals navigate diverse emotional experiences that evoke distinct responses and ultimately shape their behavioral patterns. In this work, we present a novel framework that systematically integrates affective information into Pre-trained Language Models for Big Five Personality trait prediction. Our framework leverages text-based embeddings, emotion-conditioned features, and learnable psycholinguistic information that bridges affective dimensions with personality traits. This design preserves established psycholinguistic knowledge while enabling adaptive refinement through data-driven learning. Our experiments showed that our framework outperformed sentence embedding-based methods and Large Language Models across various datasets from different domains, achieving an average F1-score improvement of at least 15% in out-of-domain scenarios.",{"paper_id":15485,"title":15486,"year":7,"month":188,"day":63,"doi":15487,"resource_url":15488,"first_page":15489,"last_page":15490,"pdf_url":15491,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15492,"paper_type":860,"authors":15493,"abstract":15500},"lrec2026-main-648","SENSEI-ASG: A Challenging Dataset for Argument Summary Graph Parsing","10.63317\u002F3abueoaae2s2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-648","8174","8189","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.648.pdf","clayton-etal-2026-sensei",[15494,15496,15498],{"paper_id":15485,"author_seq":247,"given_name":2837,"surname":15495,"affiliation":63,"orcid":63},"Clayton",{"paper_id":15485,"author_seq":232,"given_name":2146,"surname":15497,"affiliation":63,"orcid":63},"Damonte",{"paper_id":15485,"author_seq":218,"given_name":3172,"surname":15499,"affiliation":63,"orcid":63},"Gaizauskas","We create, and make publicly available, a novel dataset for the task of Argument Summary Graph Parsing (ASGP), which we call SENSEI-ASG, based on annotating a subset of the SENSEI corpus. Given an argumentative dialogue, such as might be found in a social media exchange, ASGP is the task of creating an Argument Summary Graph, a data structure which consists of nodes containing summaries of arguments in a dialogue, and edges showing argumentative relations between them. We find that the only existing ASG dataset, Debatabase-ASG, is not representative of online debates in language use, length of the dialogues, or graph complexity. In contrast to Debatabase-ASG, which was created based on a curated debate collection, SENSEI-ASG contains examples of spontaneous debates arising in the comments sections of an online newspaper (namely, The Guardian). We achieve moderate inter-annotator agreement on the dataset, with a Cohen’s kappa of k=0.57, reflecting the inherent challenges in distinguishing argumentative from non-argumentative text. We propose baselines for the new dataset by fine-tuning Llama-3 for the ASGP task, using the two ASGP datasets and an additional out-of-domain argument mining dataset, the AAEC.",{"paper_id":15502,"title":15503,"year":7,"month":188,"day":63,"doi":15504,"resource_url":15505,"first_page":15506,"last_page":15507,"pdf_url":15508,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15509,"paper_type":860,"authors":15510,"abstract":15517},"lrec2026-main-649","Categorical Emotions or Appraisals - Which Emotion Model Explains Argument Convincingness Better?","10.63317\u002F3vrvrgvtnvhn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-649","8190","8203","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.649.pdf","greschner-etal-2026-categorical",[15511,15512,15515,15516],{"paper_id":15502,"author_seq":247,"given_name":6757,"surname":15464,"affiliation":63,"orcid":63},{"paper_id":15502,"author_seq":232,"given_name":15513,"surname":15514,"affiliation":63,"orcid":63},"Meike","Bauer",{"paper_id":15502,"author_seq":218,"given_name":15461,"surname":15462,"affiliation":63,"orcid":63},{"paper_id":15502,"author_seq":203,"given_name":1064,"surname":11820,"affiliation":63,"orcid":63},"The convincingness of an argument does not only depend on its structure (logos), the person who makes the argument (ethos), but also on the emotion that it causes in the recipient (pathos). While the overall intensity and categorical values of emotions in arguments have received considerable attention in the research community, we argue that the emotion an argument evokes in a recipient is subjective. It depends on the recipient’s goals, standards, prior knowledge, and stance. Appraisal theories lend themselves as a link between the subjective cognitive assessment of events and emotions. They have been used in event-centric emotion analysis, but their suitability for assessing argument convincingness remains unexplored. In this paper, we evaluate whether appraisal theories are suitable for emotion analysis in arguments by considering subjective cognitive evaluations of the importance and impact of an argument on its receiver. Based on the annotations in the recently published ContArgA corpus, we perform zero-shot prompting experiments to evaluate the importance of gold-annotated and predicted emotions and appraisals for the assessment of the subjective convincingness labels. We find that, while categorical emotion information does improve convincingness prediction, the improvement is more pronounced with appraisals. This work presents the first systematic comparison between emotion models for convincingness prediction, demonstrating the advantage of appraisals, providing insights for theoretical and practical applications in computational argumentation.",{"paper_id":15519,"title":15520,"year":7,"month":188,"day":63,"doi":15521,"resource_url":15522,"first_page":15523,"last_page":15524,"pdf_url":15525,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15526,"paper_type":860,"authors":15527,"abstract":15535},"lrec2026-main-650","Creation of the Estonian Subjectivity Dataset: Assessing the Degree of Subjectivity on a Scale","10.63317\u002F35rspcvi32vp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-650","8204","8216","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.650.pdf","gailit-etal-2026-creation",[15528,15531,15534],{"paper_id":15519,"author_seq":247,"given_name":15529,"surname":15530,"affiliation":63,"orcid":63},"Karl Gustav","Gailit",{"paper_id":15519,"author_seq":232,"given_name":15532,"surname":15533,"affiliation":63,"orcid":63},"Kadri","Muischnek",{"paper_id":15519,"author_seq":218,"given_name":13547,"surname":13548,"affiliation":63,"orcid":63},"This article presents the creation of an Estonian-language dataset for document-level subjectivity, analyzes the resulting annotations, and reports an initial experiment of automatic subjectivity analysis using a large language model (LLM). The dataset comprises of 1,000 documents—300 journalistic articles and 700 randomly selected web texts—each rated for subjectivity on a continuous scale from 0 (fully objective) to 100 (fully subjective) by four annotators. As the inter-annotator correlations were moderate, with some texts receiving scores at the opposite ends of the scale, a subset of texts with the most divergent scores was re-annotated, with the inter-annotator correlation improving. In addition to human annotations, the dataset includes scores generated by GPT-5 as an experiment on annotation automation. These scores were similar to human annotators, however several differences emerged, suggesting that while LLM based automatic subjectivity scoring is feasible, it is not an interchangeable alternative to human annotation, and its suitability depends on the intended application.",{"paper_id":15537,"title":15538,"year":7,"month":188,"day":63,"doi":15539,"resource_url":15540,"first_page":15541,"last_page":15542,"pdf_url":15543,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15544,"paper_type":860,"authors":15545,"abstract":15553},"lrec2026-main-651","Mitigating Misinterpretation in Policy Documents through Automated Language Understanding","10.63317\u002F32bvjtupxb64","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-651","8217","8234","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.651.pdf","biswas-etal-2026-mitigating",[15546,15548,15551],{"paper_id":15537,"author_seq":247,"given_name":15547,"surname":12633,"affiliation":63,"orcid":63},"Momojit",{"paper_id":15537,"author_seq":232,"given_name":15549,"surname":15550,"affiliation":63,"orcid":63},"Anka Chandrahas","Tummepalli",{"paper_id":15537,"author_seq":218,"given_name":15552,"surname":10207,"affiliation":63,"orcid":63},"Preethu Rose","Policy documents often employ intricate and technical language, posing comprehension challenges for policyholders and increasing the risk of misinterpretation, financial losses, and legal disputes. To address these issues, we propose an automated framework leveraging Retrieval-Augmented Generation to identify and clarify potentially mis-interpretable paragraphs within policy documents. The framework consists of two key modules: the Annotation module and the Rectification module. The Annotation module employs both paragraph-level and document-level contextual reasoning to classify paragraphs into categories indicative of potential misinterpretation. The Rectification module resolves these ambiguities by generating targeted interpretation queries, retrieving relevant document-level context, and incorporating external knowledge sources. Applied to a corpus of 240 real-world policy documents, the Annotation module produced a benchmark dataset comprising 11,000 annotated paragraphs, enabling systematic evaluation of interpretability issues. We assessed the dataset’s quality through expert-driven manual reviews and large-scale automated evaluations using fine-tuned Pretrained Language Model. For the Rectification module, we evaluated five open-source Large Language Models: Mistral-2-7B, Mistral-3-7B, LLaMA-2-7B, LLaMA-3-8B, andSaul-7B. Among these, Mistral-2-7B achieved the highest human evaluation scores: 0.912 for Clarity, 0.914 for Fidelity, and 0.934 for Usefulness. This work demonstrates the practical feasibility of utilizing automated frameworks to enhance the clarity and comprehensibility of complex policy documents, thereby mitigating risks associated with misinterpretation and its adverse consequences.",{"paper_id":15555,"title":15556,"year":7,"month":188,"day":63,"doi":15557,"resource_url":15558,"first_page":15559,"last_page":15560,"pdf_url":15561,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15562,"paper_type":860,"authors":15563,"abstract":15578},"lrec2026-main-652","Sovereign AI-based Public Services Are Viable and Affordable","10.63317\u002F2jrbcyao9v63","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-652","8235","8245","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.652.pdf","branco-etal-2026-sovereign",[15564,15566,15568,15569,15571,15573,15576],{"paper_id":15555,"author_seq":247,"given_name":4409,"surname":15565,"affiliation":63,"orcid":63},"Branco",{"paper_id":15555,"author_seq":232,"given_name":15567,"surname":4418,"affiliation":63,"orcid":63},"Luis M. S.",{"paper_id":15555,"author_seq":218,"given_name":7059,"surname":1578,"affiliation":63,"orcid":63},{"paper_id":15555,"author_seq":203,"given_name":15570,"surname":1578,"affiliation":63,"orcid":63},"Eduardo",{"paper_id":15555,"author_seq":188,"given_name":15572,"surname":4040,"affiliation":63,"orcid":63},"João Ricardo",{"paper_id":15555,"author_seq":172,"given_name":15574,"surname":15575,"affiliation":63,"orcid":63},"Nuno","Marques",{"paper_id":15555,"author_seq":155,"given_name":15577,"surname":4069,"affiliation":63,"orcid":63},"Madalena","The rapid expansion of AI-based remote services has intensified debates about the long-term implications of growing structural concentration in infrastructure and expertise. As AI capabilities become increasingly intertwined with geopolitical interests, the availability and reliability of foundational AI services can no longer be taken for granted. This issue is particularly pressing for AI-enabled public services for citizens, as governments and public agencies are progressively adopting 24\u002F7 AI-driven support systems typically operated through commercial offerings from a small oligopoly of global technology providers. This paper challenges the prevailing assumption that general-purpose architectures, offered by these providers, are the optimal choice for all application contexts. Through practical experimentation, we demonstrate that viable and cost-effective alternatives exist—alternatives that align with principles of digital and cultural sovereignty. Our findings provide an empirical illustration that sovereign AI-based public services are both technically feasible and economically sustainable, capable of operating effectively on premises with modest computational and financial resources while maintaining cultural and digital autonomy. The technical insights and deployment lessons reported here are intended to inform the adoption of similar sovereign AI public services by national agencies and governments worldwide.",{"paper_id":15580,"title":15581,"year":7,"month":188,"day":63,"doi":15582,"resource_url":15583,"first_page":15584,"last_page":15585,"pdf_url":15586,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15587,"paper_type":860,"authors":15588,"abstract":15595},"lrec2026-main-653","A Typology of Synthetic Datasets for Dialogue Processing in Clinical Contexts","10.63317\u002F3mrn3tpidamx","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-653","8246","8263","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.653.pdf","bedrick-etal-2026-typology",[15589,15591,15594],{"paper_id":15580,"author_seq":247,"given_name":3337,"surname":15590,"affiliation":63,"orcid":63},"Bedrick",{"paper_id":15580,"author_seq":232,"given_name":15592,"surname":15593,"affiliation":63,"orcid":63},"A. Seza","Dogruoz",{"paper_id":15580,"author_seq":218,"given_name":12880,"surname":12881,"affiliation":63,"orcid":63},"Synthetic datasets are used across linguistic domains and NLP tasks, particularly in scenarios where authentic data is limited (or even non-existent). One such domain is that of clinical (healthcare) contexts, where there exist significant and long-standing challenges (e.g., privacy, anonymization, and data governance) which have led to the development of an increasing number of synthetic datasets. One increasingly important category of clinical dataset is that of clinical dialogues which are especially sensitive and difficult to collect. Therefore, they are commonly synthesized. While such synthetic datasets have been shown to be sufficient in some situations, little theory exists to inform how they may be best used and generalized to new applications. In this paper, we provide an overview of how synthetic datasets are created, evaluated and used for dialogue related tasks in the medical domain. Additionally, we propose a novel typology for use in classifying types and degrees of data synthesis, to facilitate comparison and evaluation.",{"paper_id":15597,"title":15598,"year":7,"month":188,"day":63,"doi":15599,"resource_url":15600,"first_page":15601,"last_page":15602,"pdf_url":15603,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15604,"paper_type":860,"authors":15605,"abstract":15627},"lrec2026-main-654","Text+: A National Hub Including Legacy Language Data","10.63317\u002F4vx5d59r6m29","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-654","8264","8275","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.654.pdf","barth-etal-2026-text",[15606,15607,15608,15610,15611,15612,15614,15617,15620,15621,15624],{"paper_id":15597,"author_seq":247,"given_name":2175,"surname":12809,"affiliation":63,"orcid":63},{"paper_id":15597,"author_seq":232,"given_name":12218,"surname":12219,"affiliation":63,"orcid":63},{"paper_id":15597,"author_seq":218,"given_name":1095,"surname":15609,"affiliation":63,"orcid":63},"Ecker",{"paper_id":15597,"author_seq":203,"given_name":2813,"surname":5324,"affiliation":63,"orcid":63},{"paper_id":15597,"author_seq":188,"given_name":1041,"surname":12499,"affiliation":63,"orcid":63},{"paper_id":15597,"author_seq":172,"given_name":5684,"surname":15613,"affiliation":63,"orcid":63},"Hemmer",{"paper_id":15597,"author_seq":155,"given_name":15615,"surname":15616,"affiliation":63,"orcid":63},"Timm","Lehmberg",{"paper_id":15597,"author_seq":138,"given_name":15618,"surname":15619,"affiliation":63,"orcid":63},"Thorsten","Trippel",{"paper_id":15597,"author_seq":121,"given_name":4651,"surname":12501,"affiliation":63,"orcid":63},{"paper_id":15597,"author_seq":104,"given_name":15622,"surname":15623,"affiliation":63,"orcid":63},"Arden","Zimmermann",{"paper_id":15597,"author_seq":87,"given_name":15625,"surname":15626,"affiliation":63,"orcid":63},"Claus","Zinn","Text+ is the German distributed research data infrastructure for literary studies, linguistics, and spoken and written language. Its resources consist of contemporary and historical literary and media texts, deeply annotated material, transcripts of spoken and sign language, and original recordings. Text+ provides access to its resources according to the FAIR guidelines: Findable due to standard-conformant metadata, Accessible with single sign-on authentication, Interoperable via open data formats, and Reproducible through web services and extensive documentation. The 30+ partners of Text+ are archives, libraries, universities, and other research institutions. The partners are autonomous, and they differ in the amount of data and processing capabilities they provide. In this paper, we describe the hub architecture of Text+, which gives users a central and FAIR point of access to research data that continues to be distributed across the Text+ partner institutions. The architecture serves as a blueprint to evolving research infrastructures that aim at maintaining (and empowering) their research data contributors.",{"paper_id":15629,"title":15630,"year":7,"month":188,"day":63,"doi":15631,"resource_url":15632,"first_page":15633,"last_page":15634,"pdf_url":15635,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15636,"paper_type":860,"authors":15637,"abstract":15656},"lrec2026-main-655","Can NLP Tackle Hate Speech in the Real World? Stakeholder-Informed Feedback and Survey on Counterspeech","10.63317\u002F3mhk48f7x47b","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-655","8276","8290","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.655.pdf","dinkar-etal-2026-can",[15638,15641,15643,15644,15647,15650,15653],{"paper_id":15629,"author_seq":247,"given_name":15639,"surname":15640,"affiliation":63,"orcid":63},"Tanvi","Dinkar",{"paper_id":15629,"author_seq":232,"given_name":15642,"surname":3284,"affiliation":63,"orcid":63},"Aiqi",{"paper_id":15629,"author_seq":218,"given_name":12445,"surname":12446,"affiliation":63,"orcid":63},{"paper_id":15629,"author_seq":203,"given_name":15645,"surname":15646,"affiliation":63,"orcid":63},"Poppy","Gerrard-Abbott",{"paper_id":15629,"author_seq":188,"given_name":15648,"surname":15649,"affiliation":63,"orcid":63},"Nancie A.","Gunson",{"paper_id":15629,"author_seq":172,"given_name":15651,"surname":15652,"affiliation":63,"orcid":63},"Gavin","Abercrombie",{"paper_id":15629,"author_seq":155,"given_name":15654,"surname":15655,"affiliation":63,"orcid":63},"Ioannis","Konstas","Counterspeech, i.e. the practice of responding to online hate speech, has gained traction in NLP as a promising intervention. While early work emphasised collaboration with non-governmental organisation stakeholders, recent research trends have shifted toward automated pipelines that reuse a small set of legacy datasets, often without input from affected communities. This paper presents a systematic review of 74 NLP studies on counterspeech, analysing the extent to which stakeholder participation influences dataset creation, model development, and evaluation. To complement this analysis, we conducted a participatory case study that spanned close to two years with five NGOs specialising in online Gender-Based Violence (oGBV), identifying stakeholder-informed practices for counterspeech generation. Our findings reveal a growing disconnect between current NLP research and the needs of communities most impacted by toxic online content. We conclude with concrete recommendations for re-centring stakeholder expertise in counterspeech research.",{"paper_id":15658,"title":15659,"year":7,"month":188,"day":63,"doi":15660,"resource_url":15661,"first_page":15662,"last_page":15663,"pdf_url":15664,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15665,"paper_type":860,"authors":15666,"abstract":15676},"lrec2026-main-656","Towards Complex Debate Understanding: Predicting Claim Impact Scores through the Modelling of Claim Interactions","10.63317\u002F56u9na3ackbw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-656","8291","8302","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.656.pdf","brouat-etal-2026-complex",[15667,15669,15671,15674],{"paper_id":15658,"author_seq":247,"given_name":5346,"surname":15668,"affiliation":63,"orcid":63},"Brouat",{"paper_id":15658,"author_seq":232,"given_name":12147,"surname":15670,"affiliation":63,"orcid":63},"Surdeanu",{"paper_id":15658,"author_seq":218,"given_name":15672,"surname":15673,"affiliation":63,"orcid":63},"Srdjan","Vesic",{"paper_id":15658,"author_seq":203,"given_name":15570,"surname":15675,"affiliation":63,"orcid":63},"Blanco","Structured debates can be naturally modeled as argument graphs, with claims connected by support and attack relations, a representation formalised in Computational Argumentation Theory. In this paper, we propose a novel neural architecture that jointly models both the textual content of claims and their relational structure. Claims are encoded using contextualised embeddings and compressed through a feedforward compression layer. Then, a graph attention network explicitly captures attack\u002Fsupport interactions. Trained on real-world debates from the Kialo platform, our model predicts the distribution of user-assigned impact votes for each claim. It achieves a mean absolute error (MAE) of 0.068, significantly outperforming both text-only and structure-only baselines. Further experiments show strong out-of-domain generalisation across thematic clusters, as well as suggestive correlations between the model’s attention patterns and human voting behaviour. An analysis of linguistic and graph-based features suggests that the model relies on latent argumentative patterns as well as the text. Our findings also shed light on language differences between strong and weak claims, as determined by humans as well as by our best model.",{"paper_id":15678,"title":15679,"year":7,"month":188,"day":63,"doi":15680,"resource_url":15681,"first_page":15682,"last_page":15683,"pdf_url":15684,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15685,"paper_type":860,"authors":15686,"abstract":15693},"lrec2026-main-657","Is There Anything More Deceptive than an Obvious Fact? Investigating Implicitness in User-Generated Argumentative Text","10.63317\u002F48pa29hp9qoy","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-657","8303","8316","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.657.pdf","sviridova-etal-2026-is",[15687,15689,15691],{"paper_id":15678,"author_seq":247,"given_name":6029,"surname":15688,"affiliation":63,"orcid":63},"Sviridova",{"paper_id":15678,"author_seq":232,"given_name":2968,"surname":15690,"affiliation":63,"orcid":63},"Cabrio",{"paper_id":15678,"author_seq":218,"given_name":15692,"surname":6094,"affiliation":63,"orcid":63},"Serena","While various attempts towards unveiling implicitness in argumentation have been made, particularly towards improving automatic detection and reconstruction of implicit components and background knowledge, the task remains overly challenging. In this paper, we present, to the best of our knowledge, the first fine-grained typology of implicitness in argumentation, distinguishing among implicature, ambiguity, and presupposition. Applying this typology, we annotate 78 full-length discussions from the Change My View forum, building the largest publicly available dataset of real-world enthymemes with implicitness types labeled. For comparison, we additionally annotate 112 short argumentative texts from the Microtext corpus to examine how text length and complexity influence the automatic analysis of natural arguments. Leveraging these datasets, we establish strong baselines for two tasks: (i) enthymeme detection and (ii) fine-grained implicitness classification, with both encoder-only and large language models, highlighting the challenge of modeling implicit reasoning in long, unstructured discourse.",{"paper_id":15695,"title":15696,"year":7,"month":188,"day":63,"doi":15697,"resource_url":15698,"first_page":15699,"last_page":15700,"pdf_url":15701,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":15702,"bibkey":15703,"paper_type":860,"authors":15704,"abstract":15720},"lrec2026-main-658","Best-Worst Scaling of Hype in Biomedical Research: Building an Intensity Lexicon of Promotional Adjectives","10.63317\u002F3cdc4v7fff2y","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-658","8317","8326","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.658.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.658_OptionalSupplementaryMaterial.zip","millar-etal-2026-best",[15705,15708,15711,15714,15717],{"paper_id":15695,"author_seq":247,"given_name":15706,"surname":15707,"affiliation":63,"orcid":63},"Neil","Millar",{"paper_id":15695,"author_seq":232,"given_name":15709,"surname":15710,"affiliation":63,"orcid":63},"Dipesh","Satav",{"paper_id":15695,"author_seq":218,"given_name":15712,"surname":15713,"affiliation":63,"orcid":63},"Bojan","Batalo",{"paper_id":15695,"author_seq":203,"given_name":15715,"surname":15716,"affiliation":63,"orcid":63},"Erica K.","Shimomoto",{"paper_id":15695,"author_seq":188,"given_name":15718,"surname":15719,"affiliation":63,"orcid":63},"Ryosuke L.","Ohniwa","Promotional language, or \"hype\", is increasingly common in biomedical research reporting. Adjectives such as groundbreaking, robust, and impactful can engage readers but also risk imposing value judgements and undermining objectivity. Detecting and assessing such language requires distinguishing degrees of promotional intensity (e.g., new \u003C novel \u003C groundbreaking \u003C revolutionary), yet no such graded resource exists. We present an intensity-scaled lexicon of 303 promotional adjectives attested in biomedical writing across eight evaluative domains (e.g. IMPORTANCE, NOVELTY, RIGOUR). Ratings were obtained through Best–Worst Scaling (BWS) with human participants evaluating adjectives for promotional strength in the context of scientific research reporting. We refer to this as the Hyplex resource (Hype Lexicon). The ratings show high internal consistency (r = 0.87; 95% CI [0.85, 0.89]) and correlate most strongly with arousal and dominance in the NRC VAD Lexicon, suggesting that promotional intensity aligns more with reader activation and perceptions of assertiveness than simple positivity. We also release an online BWS platform integrated with the R package bwsTools to support intensity-scaling research in other domains.",{"paper_id":15722,"title":15723,"year":7,"month":188,"day":63,"doi":15724,"resource_url":15725,"first_page":15726,"last_page":15727,"pdf_url":15728,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15729,"paper_type":860,"authors":15730,"abstract":15734},"lrec2026-main-659","Trust Me, I Can Convince You: The Contextualized Argument Appraisal Framework and the ContArgA Corpus","10.63317\u002F484rpnvebop5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-659","8327","8346","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.659.pdf","greschner-etal-2026-trust",[15731,15732,15733],{"paper_id":15722,"author_seq":247,"given_name":6757,"surname":15464,"affiliation":63,"orcid":63},{"paper_id":15722,"author_seq":232,"given_name":15461,"surname":15462,"affiliation":63,"orcid":63},{"paper_id":15722,"author_seq":218,"given_name":1064,"surname":11820,"affiliation":63,"orcid":63},"Emotions that somebody develops based on an argument do not only depend on the argument itself - they are also influenced by a subjective evaluation of the argument’s potential impact on the self. For instance, an argument to ban plastic bottles might cause fear of losing a job for a bottle industry worker, which lowers the convincingness – presumably independent of its content. While binary emotionality of arguments has been studied, such cognitive appraisal models have only been proposed in other subtasks of emotion analysis, but not in the context of arguments and their convincingness. To fill this research gap, we propose the Contextualized Argument Appraisal Framework to model the interplay between the sender, receiver, and argument. We adapt established appraisal models from psychology to argument mining, including argument pleasantness, familiarity, response urgency, and expected effort, as well as convincingness variables. To evaluate the framework and pave the way for computational modeling, we develop a novel role-playing-based annotation setup, mimicking real-world exposure to arguments. Participants disclose their emotion, explain the main cause, the argument appraisal, and the perceived convincingness. To consider the subjective nature of such annotations, we also collect demographic data and personality traits of both the participants and ask them to disclose the same variables for their perception of the argument sender. The analysis of the resulting corpus of 4000 annotations reveals that convincingness is positively correlated with positive emotions (e.g., trust) and negatively correlated with negative emotions (e.g., anger). The appraisal variables particularly point to the importance of the annotator’s familiarity with the argument.",{"paper_id":15736,"title":15737,"year":7,"month":188,"day":63,"doi":15738,"resource_url":15739,"first_page":15740,"last_page":15741,"pdf_url":15742,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15743,"paper_type":860,"authors":15744,"abstract":15753},"lrec2026-main-660","Towards Clinical Applications of NLP: Detecting Emotion Regulation via Emotional Categories and Expression Modes in French Transcriptions","10.63317\u002F2bvg2jdxxcuc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-660","8347","8364","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.660.pdf","klein-etal-2026-clinical",[15745,15747,15750],{"paper_id":15736,"author_seq":247,"given_name":15746,"surname":6295,"affiliation":63,"orcid":63},"Salome",{"paper_id":15736,"author_seq":232,"given_name":15748,"surname":15749,"affiliation":63,"orcid":63},"Amalia","Todirascu",{"paper_id":15736,"author_seq":218,"given_name":15751,"surname":15752,"affiliation":63,"orcid":63},"Hélène","Vassiliadou","We present an annotated corpus of patient interview transcriptions, labeled for emotionality, polarity, intensity, and emotional category (at the sentence level), and for expression mode (at the token level). Three modes of expression are distinguished: Designated (explicit), Suggested (implicit causes), and Manifested (implicit consequences). The corpus has been collected during the GREMO-LING project and is used to measure the linguistic expressions of emotions in patients’ narratives. The corpus, consisting of 7,471 sentences, was used to fine-tune and evaluate several transformer-based language models, including the French BERT family. Sentence classification was performed for emotionality, emotion categories and expression modes. The best-performing models achieved F1 scores of 0.87 (emotionality, fine-tuned DistilCamemBERT), 0.58 (emotion categories, CamemBERTaV2), and 0.70 (expression modes, CamemBERT). We obtain solid results despite the high complexity of non-standard, spoken-derived data. These findings confirm the feasibility and relevance of automatic emotion detection in clinical discourse. We provide publicly available guidelines, annotated corpora and models, thereby establishing a methodological foundation for future research on the linguistic assessment of emotional regulation and its clinical implications, such as the evaluation of the Dialectical Behavioral Theray (DBT) in enhancing patients’ emotion regulation skills.",{"paper_id":15755,"title":15756,"year":7,"month":188,"day":63,"doi":15757,"resource_url":15758,"first_page":15759,"last_page":15760,"pdf_url":15761,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15762,"paper_type":860,"authors":15763,"abstract":15771},"lrec2026-main-661","R.U.Psycho? A Framework for Robust Unified Psychometric Testing of Language Models","10.63317\u002F4d7ofew6usug","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-661","8365","8386","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.661.pdf","schelb-etal-2026-psycho",[15764,15766,15768,15769],{"paper_id":15755,"author_seq":247,"given_name":1296,"surname":15765,"affiliation":63,"orcid":63},"Schelb",{"paper_id":15755,"author_seq":232,"given_name":2637,"surname":15767,"affiliation":63,"orcid":63},"Borin",{"paper_id":15755,"author_seq":218,"given_name":1061,"surname":9018,"affiliation":63,"orcid":63},{"paper_id":15755,"author_seq":203,"given_name":4651,"surname":15770,"affiliation":63,"orcid":63},"Spitz","Generative language models are increasingly being subjected to psychometric questionnaires intended for human testing, in efforts to establish their traits, as benchmarks for alignment, or to simulate participants in social science experiments. While this growing body of work sheds light on the likeness of model responses to those of humans, concerns are warranted regarding the rigour and reproducibility with which these experiments may be conducted. Instabilities in model outputs, sensitivity to prompt design, parameter settings, and a large number of available model versions increase documentation requirements. Consequently, generalization of findings is often complex and reproducibility is far from guaranteed. In this paper, we present R.U.Psycho, a framework for designing and running robust and reproducible psychometric experiments on generative language models that reduces the required coding expertise. We demonstrate the capability of our framework on a variety of psychometric questionnaires, which lend support to prior findings in the literature. R.U.Psycho is available as a Python package at https:\u002F\u002Fgithub.com\u002Fjulianschelb\u002Frupsycho.",{"paper_id":15773,"title":15774,"year":7,"month":188,"day":63,"doi":15775,"resource_url":15776,"first_page":15777,"last_page":15778,"pdf_url":15779,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15780,"paper_type":860,"authors":15781,"abstract":15800},"lrec2026-main-662","Code-switching as a Bias Indicator in LLMs: \"the Consequences Are Not the Same Para Nosotros\"","10.63317\u002F2mq6kqjk9bng","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-662","8387","8399","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.662.pdf","ducel-etal-2026-code",[15782,15783,15786,15789,15792,15794,15797,15799],{"paper_id":15773,"author_seq":247,"given_name":5343,"surname":5344,"affiliation":63,"orcid":63},{"paper_id":15773,"author_seq":232,"given_name":15784,"surname":15785,"affiliation":63,"orcid":63},"Aurélie","Névéol",{"paper_id":15773,"author_seq":218,"given_name":15787,"surname":15788,"affiliation":63,"orcid":63},"Vidit","Khazanchi",{"paper_id":15773,"author_seq":203,"given_name":15790,"surname":15791,"affiliation":63,"orcid":63},"Loïc","Leclere",{"paper_id":15773,"author_seq":188,"given_name":1995,"surname":15793,"affiliation":63,"orcid":63},"Pedrini",{"paper_id":15773,"author_seq":172,"given_name":15795,"surname":15796,"affiliation":63,"orcid":63},"Léa","Bouchet",{"paper_id":15773,"author_seq":155,"given_name":4797,"surname":15798,"affiliation":63,"orcid":63},"Caissial",{"paper_id":15773,"author_seq":138,"given_name":5349,"surname":5350,"affiliation":63,"orcid":63},"Code-switching is a widespread linguistic practice among bilingual speakers. While recent studies have addressed the impact of code-switching on downstream task performance, the potential biases and harms that language models may cause when prompted with code-switching have yet to be investigated. The objective of this study is to investigate whether code-switching constitutes an implicit indicator of ethnicity that can be leveraged to unveil covert racist or xenophobic bias in language models. The present paper introduces a methodology to compare generated texts that were prompted with code-switching vs. with monolingual inputs. It is applied on both Hinglish and Spanglish, two popular forms of code-switching that are omnipresent in Indian and Hispanic communities. With a decision tree approach, we tackle various types of semantic differences through the use of semantic resources, stereotypes lists, POS-tagging and sentiment classifiers. Over 84k text pairs are generated with 3 popular large language models. Overall, around 50% of generated text pairs are not semantically equivalent, and 25% of the time, there is a potential for harm against the Indian or Hispanic community. The different possible harms are further discussed, relying on sociological studies to argue that bias and harms against socially discriminated communities have greater consequences.",{"paper_id":15802,"title":15803,"year":7,"month":188,"day":63,"doi":15804,"resource_url":15805,"first_page":15806,"last_page":15807,"pdf_url":15808,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15809,"paper_type":860,"authors":15810,"abstract":15813},"lrec2026-main-663","Exploration of How Hate Is Framed on Social Media","10.63317\u002F38db94nbbg2e","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-663","8400","8414","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.663.pdf","ailneni-etal-2026-exploration",[15811,15812],{"paper_id":15802,"author_seq":247,"given_name":8189,"surname":8190,"affiliation":63,"orcid":63},{"paper_id":15802,"author_seq":232,"given_name":8192,"surname":8193,"affiliation":63,"orcid":63},"Understanding how hate is framed in multimodal social media content is crucial for developing interpretable and robust hate detection systems. We present the MM-HateFrames Dataset, a large-scale resource encoding 2,298 Hate Frames (HFs) and their corresponding rationales discovered from two benchmark datasets—Hateful Memes and MMHS150K—comprising over 11K+ social media multimodal posts. This allowed us to explore several generative and non-generative methods to automatically discover the way hate is framed when relying on MM-HateFrames, including clustering-based methods and large multimodal models (LMMs) under zero-shot and few-shot settings. Experimental evaluations show that few-shot LMMs prompting generates the most coherent and sound frame articulations. The MM-HateFrames Dataset provides a valuable foundation for future research in hate speech understanding, frame articulation, and explainable multimodal NLP, enabling models to interpret not only whether content is hateful but also how hate is conceptually framed.",{"paper_id":15815,"title":15816,"year":7,"month":188,"day":63,"doi":15817,"resource_url":15818,"first_page":15819,"last_page":15820,"pdf_url":15821,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15822,"paper_type":860,"authors":15823,"abstract":15836},"lrec2026-main-664","Are Social Biases in LLMs Consistent across Generative Tasks? A Case Study for Basque","10.63317\u002F52zk8uyjrw5k","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-664","8415","8430","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.664.pdf","zulaika-etal-2026-are",[15824,15827,15829,15831,15834],{"paper_id":15815,"author_seq":247,"given_name":15825,"surname":15826,"affiliation":63,"orcid":63},"Muitze","Zulaika",{"paper_id":15815,"author_seq":232,"given_name":2092,"surname":15828,"affiliation":63,"orcid":63},"Saralegi",{"paper_id":15815,"author_seq":218,"given_name":5233,"surname":15830,"affiliation":63,"orcid":63},"Shershneva",{"paper_id":15815,"author_seq":203,"given_name":15832,"surname":15833,"affiliation":63,"orcid":63},"Lia","Gonzalez",{"paper_id":15815,"author_seq":188,"given_name":6057,"surname":15835,"affiliation":63,"orcid":63},"Fullaondo","Most bias benchmarks for Large Language Models (LLMs) rely on multiple-choice formats, overlooking subtler biases that emerge in open-ended text generation. This gap is particularly relevant for low-resource languages like Basque, where culturally grounded evaluation resources are limited. We introduce BasqBBG (Basque Bias Benchmark for Generation), the first systematic benchmark for social bias in Basque Natural Language Generation (NLG), covering eight bias categories—including a newly added feminism dimension—adapted from the BasqBBQ dataset. We validate an LLM-as-a-Judge framework against expert human evaluations on two NLG tasks (story continuation and generative QA), achieving strong agreement (agreement of 0.78 in bias presence and 0.92 in bias directionality). We scale this approach to ten additional tasks and five models. Results show that bias levels vary markedly across tasks and depend more on model family than size: Llama-based models exhibit higher and less consistent bias (45–50%), whereas GPT-4o and the Gemma-based Kimu-9B remain substantially fairer (≤20%). Our findings highlight the need for task-aware, language-specific frameworks to assess social bias in generative LLMs. Keywords: Large Language Models, Social Bias, Basque, Natural Language Generation, Benchmarking, Manual Evaluation, LLM-as-a-judge.",{"paper_id":15838,"title":15839,"year":7,"month":188,"day":63,"doi":15840,"resource_url":15841,"first_page":15842,"last_page":15843,"pdf_url":15844,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15845,"paper_type":860,"authors":15846,"abstract":15862},"lrec2026-main-665","Fine-grained Narrative Classification in Biased News Articles","10.63317\u002F2ddvvr4zijyh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-665","8431","8445","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.665.pdf","afroz-etal-2026-fine",[15847,15850,15853,15856,15859,15861],{"paper_id":15838,"author_seq":247,"given_name":15848,"surname":15849,"affiliation":63,"orcid":63},"Zeba","Afroz",{"paper_id":15838,"author_seq":232,"given_name":15851,"surname":15852,"affiliation":63,"orcid":63},"Harsh","Vardhan",{"paper_id":15838,"author_seq":218,"given_name":15854,"surname":15855,"affiliation":63,"orcid":63},"Pawan","Bhakuni",{"paper_id":15838,"author_seq":203,"given_name":15857,"surname":15858,"affiliation":63,"orcid":63},"Aanchal","Punia",{"paper_id":15838,"author_seq":188,"given_name":15860,"surname":2247,"affiliation":63,"orcid":63},"Rajdeep",{"paper_id":15838,"author_seq":172,"given_name":6204,"surname":6205,"affiliation":63,"orcid":63},"Narratives are the cognitive and emotional scaffolds of propaganda. They organize isolated persuasive techniques into coherent stories that justify actions, attribute blame, and evoke identification with ideological camps. In this paper, we propose a novel fine-grained narrative classification in biased news article. We also explore article-bias classification as the pre-cursor task to narrative classification and fine-grained persuassive technique identification. We develop INDI-PROP, the first ideologically grounded fine-grain narrative dataset with multi-level annotation for analyzing propaganda in Indian news media. Our dataset INDI-PROP comprises 1,266 articles focusing on two polarizing socio-political events in recent times: CAA\u002FNRC and the Farmers’ protest. Each article is annotated at three hierarchical levels: (i) ideological article-bias (pro-government, pro-opposition, neutral), (ii) event-specific fine-grained narrative frames anchored in ideological polarity and communicative intent, and (iii) persuasive techniques. We propose FANTA and TPTC, two GPT-4o guided multi-hop prompt-based reasoning frameworks for the bias, narrative, and persuasive technique classification. FANTA leverages multi-layered communicative phenomenon by integrating information extraction and contextual framing for hierarchical reasoning. On the other hand, TPTC adopts systematic decomposition of persuasive cues via a two-stage approach. Our evaluation suggest substantial improvement over underlying baselines in each case.",{"paper_id":15864,"title":15865,"year":7,"month":188,"day":63,"doi":15866,"resource_url":15867,"first_page":15868,"last_page":15869,"pdf_url":15870,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15871,"paper_type":860,"authors":15872,"abstract":15880},"lrec2026-main-666","A Shoal of Voices: Parallel Read Speech from Professional Swedish Narrators","10.63317\u002F3vwydgxw2bvt","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-666","8446","8454","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.666.pdf","tnnander-etal-2026-shoal",[15873,15875,15878],{"paper_id":15864,"author_seq":247,"given_name":3799,"surname":15874,"affiliation":63,"orcid":63},"Tånnander",{"paper_id":15864,"author_seq":232,"given_name":15876,"surname":15877,"affiliation":63,"orcid":63},"Jim","O'Regan",{"paper_id":15864,"author_seq":218,"given_name":4278,"surname":15879,"affiliation":63,"orcid":63},"Edlund","We present a shoal of voices in Storspigg–TBI, a legally cleared, professionally recorded Swedish speech corpus derived from talking-book production at the Swedish Agency for Accessible Media (MTM). The corpus contains 1 000 information messages read by 99 narrators under controlled studio conditions. The material has undergone full legal assessment and a three-sweep adoption process ensuring provenance, FAIR\u002FFACT compliance, and reproducibility in collaboration with the national research infrastructure Språkbanken Tal. The paper describes the legal framework, data-selection and curation pipeline, as well as initial automatic transcription using Swedish Whisper and wav2vec 2.0 models. The resulting corpus provides a high-quality reference resource for speech science and technology, supporting research on inter-speaker variation, prosody, and evaluation under consistent acoustic and linguistic conditions.",{"paper_id":15882,"title":15883,"year":7,"month":188,"day":63,"doi":15884,"resource_url":15885,"first_page":15886,"last_page":15887,"pdf_url":15888,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15889,"paper_type":860,"authors":15890,"abstract":15900},"lrec2026-main-667","Deep Learning-Based Multi-Aspect Pronunciation Assessment for Individuals with Down Syndrome","10.63317\u002F4g3dwy2kmira","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-667","8455","8464","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.667.pdf","fernndezgarca-etal-2026-deep",[15891,15893,15895,15898],{"paper_id":15882,"author_seq":247,"given_name":1061,"surname":15892,"affiliation":63,"orcid":63},"Fernández-García",{"paper_id":15882,"author_seq":232,"given_name":14309,"surname":15894,"affiliation":63,"orcid":63},"González-Ferreras",{"paper_id":15882,"author_seq":218,"given_name":15896,"surname":15897,"affiliation":63,"orcid":63},"Valentín","Cardeñoso-Payo",{"paper_id":15882,"author_seq":203,"given_name":7965,"surname":15899,"affiliation":63,"orcid":63},"Corrales-Astorgano","This paper explores the use of an annotated speech corpus to assess multiple dimensions of speech quality—particularly phonetic, fluency and prosody—in individuals with Down syndrome, with the aim of informing the development of automated assessment tools. We conducted a series of experiments using the GOPT model, together with representations extracted from fine-tuning Wav2Vec models focused on phoneme classification. Model predictions were compared against expert annotations from a speech-language pathologist using Pearson correlation. Results demonstrate significant improvements over prior work, with correlations up to 0.49 in certain aspects, particularly for phonetic and fluency dimensions, while prosody remained more challenging to model. The study highlights the potential of Transformer-based architectures for atypical speech assessment and underscores the challenges inherent in assessing atypical speech, particularly due to variability linked to specific disfluency types.",{"paper_id":15902,"title":15903,"year":7,"month":188,"day":63,"doi":15904,"resource_url":15905,"first_page":15906,"last_page":15907,"pdf_url":15908,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15909,"paper_type":860,"authors":15910,"abstract":15918},"lrec2026-main-668","WikIPA: Integrating WikiPron and Lingua Libre for Multilingual IPA Transcription","10.63317\u002F2am4iw3bfhjb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-668","8465","8475","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.668.pdf","cassotti-etal-2026-wikipa",[15911,15914,15915],{"paper_id":15902,"author_seq":247,"given_name":15912,"surname":15913,"affiliation":63,"orcid":63},"Pierluigi","Cassotti",{"paper_id":15902,"author_seq":232,"given_name":2877,"surname":2878,"affiliation":63,"orcid":63},{"paper_id":15902,"author_seq":218,"given_name":15916,"surname":15917,"affiliation":63,"orcid":63},"Domenico De","Cristofaro","We present WikIPA, a new multilingual benchmark designed for automatic speech-to-IPA (STIPA) transcription. By integrating human-curated IPA transcriptions from WikiPron with spoken recordings and metadata from Lingua Libre, WikIPA connects textual phonetic representations with real speech across 78 languages. This open resource supports both broad (phonemic) and narrow (phonetic) transcription tasks, enabling fine-grained evaluation of multilingual phonetic transcription systems. WikIPA provides over 289,000 paired entries and serves as a large-scale foundation for STIPA. We benchmark several state-of-the-art STIPA systems, including MultIPA, (Lo)WhIPA, and ZIPA. Results show that ZIPA achieves the lowest mean error rates across most languages, outperforming Whisper- and Wav2Vec-based baselines. Error analyses reveal that remaining discrepancies largely stem from minor phonetic confusions rather than complete transcription failures, emphasizing the challenge of modeling fine-grained articulatory variation. WikIPA thus establishes the first systematic, multilingual evaluation framework for speech-to-IPA transcription and highlights the potential of combining open, community-driven resources to advance STIPA evaluation.",{"paper_id":15920,"title":15921,"year":7,"month":188,"day":63,"doi":15922,"resource_url":15923,"first_page":15924,"last_page":15925,"pdf_url":15926,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15927,"paper_type":860,"authors":15928,"abstract":15950},"lrec2026-main-669","How Pragmatics Shape Articulation: A Computational Case Study in STEM ASL Discourse","10.63317\u002F2wjnaaabgz4d","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-669","8476","8490","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.669.pdf","imai-etal-2026-how",[15929,15932,15934,15937,15940,15942,15944,15947],{"paper_id":15920,"author_seq":247,"given_name":15930,"surname":15931,"affiliation":63,"orcid":63},"Saki","Imai",{"paper_id":15920,"author_seq":232,"given_name":1359,"surname":15933,"affiliation":63,"orcid":63},"Kezar",{"paper_id":15920,"author_seq":218,"given_name":15935,"surname":15936,"affiliation":63,"orcid":63},"Laurel","Aichler",{"paper_id":15920,"author_seq":203,"given_name":15938,"surname":15939,"affiliation":63,"orcid":63},"Mert","Inan",{"paper_id":15920,"author_seq":188,"given_name":15941,"surname":12261,"affiliation":63,"orcid":63},"Erin",{"paper_id":15920,"author_seq":172,"given_name":9378,"surname":15943,"affiliation":63,"orcid":63},"Wooten",{"paper_id":15920,"author_seq":155,"given_name":15945,"surname":15946,"affiliation":63,"orcid":63},"Lorna Cobban","Quandt",{"paper_id":15920,"author_seq":138,"given_name":15948,"surname":15949,"affiliation":63,"orcid":63},"Malihe","Alikhani","Most state-of-the-art sign language models are trained on interpreter or isolated vocabulary data, which overlooks the variability that characterizes natural dialogue. However, human communication dynamically adapts to contexts and interlocutors through spatiotemporal changes and articulation style. This specifically manifests itself in educational settings, where novel vocabularies are used by teachers, and students. To address this gap, we collect a motion capture dataset of American Sign Language (ASL) STEM (Science, Technology, Engineering, and Mathematics) dialogue that enables quantitative comparison between dyadic interactive signing, solo signed lecture, and interpreted articles. Using continuous kinematic features, we disentangle dialogue-specific entrainment from individual effort reduction and show spatiotemporal changes across repeated mentions of STEM terms. On average, dialogue signs are 24.6%-44.6% shorter in duration than the isolated signs, and show significant reductions absent in monologue contexts. Finally, we evaluate sign embedding models on their ability to recognize STEM signs and approximate how entrained the participants become over time. Our study bridges linguistic analysis and computational modeling to understand how pragmatics shape sign articulation and its representation in sign language technologies.",{"paper_id":15952,"title":15953,"year":7,"month":188,"day":63,"doi":15954,"resource_url":15955,"first_page":15956,"last_page":15957,"pdf_url":15958,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15959,"paper_type":860,"authors":15960,"abstract":15965},"lrec2026-main-670","Setting the Stage for Disfluency: Implications of Contextual Task Framing Effects for the Design of Listening Tasks","10.63317\u002F2sy7k27kmz37","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-670","8491","8497","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.670.pdf","kirkland-etal-2026-setting",[15961,15964],{"paper_id":15952,"author_seq":247,"given_name":15962,"surname":15963,"affiliation":63,"orcid":63},"Ambika","Kirkland",{"paper_id":15952,"author_seq":232,"given_name":4278,"surname":15879,"affiliation":63,"orcid":63},"Speech disfluencies have been shown to impact both judgments about a speaker’s competence and decisions about which source of information to rely on. However, fluency effects more broadly are highly sensitive to context: they are strongest when there is little other information available to inform judgments and decisions, and can be attenuated or even reversed by metacognitive processes. Speech is generally experienced in the context of interactions, where listeners have access to a plethora of information about the speaker and other parameters relevant to decision-making. It is hence crucial to consider how the outcomes of studies on speech disfluencies might be impacted by the framing of experimental tasks and the information available to participants. We carried out a decision-making task where participants had to choose which of two speakers, one fluent and one disfluent, had answered a trivia question correctly. The task was presented in the context of three scenarios which provided different information about the speakers. We replicated previous findings that listeners preferred fluent answers in only one of these three contexts, demonstrating the importance of task framing.",{"paper_id":15967,"title":15968,"year":7,"month":188,"day":63,"doi":15969,"resource_url":15970,"first_page":15971,"last_page":15972,"pdf_url":15973,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":15974,"paper_type":860,"authors":15975,"abstract":15991},"lrec2026-main-671","ACAData: Parallel Dataset of Academic Data for Machine Translation","10.63317\u002F4fkj9gvuqsdd","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-671","8498","8519","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.671.pdf","lacunza-etal-2026-acadata",[15976,15979,15982,15985,15987,15988,15989],{"paper_id":15967,"author_seq":247,"given_name":15977,"surname":15978,"affiliation":63,"orcid":63},"Iñaki","Lacunza",{"paper_id":15967,"author_seq":232,"given_name":15980,"surname":15981,"affiliation":63,"orcid":63},"Javier Garcia","Gilabert",{"paper_id":15967,"author_seq":218,"given_name":15983,"surname":15984,"affiliation":63,"orcid":63},"Francesca De Luca","Fornaciari",{"paper_id":15967,"author_seq":203,"given_name":8612,"surname":15986,"affiliation":63,"orcid":63},"Aula-Blasco",{"paper_id":15967,"author_seq":188,"given_name":1276,"surname":7976,"affiliation":63,"orcid":63},{"paper_id":15967,"author_seq":172,"given_name":12730,"surname":12731,"affiliation":63,"orcid":63},{"paper_id":15967,"author_seq":155,"given_name":3411,"surname":15990,"affiliation":63,"orcid":63},"Villegas","We present ACAData, a high-quality parallel dataset for academic translation, that consists of two subsets: ACAD-Train, which contains approximately 1.5 million human-generated paragraph pairs across 12 languages, and ACAD-Bench, a curated evaluation set of almost 6,000 translations covering 12 directions. To validate its usefulness, we fine-tune two Large Language Models (LLMs) on ACAD-Train and benchmark them on ACAD-Bench against specialized machine-translation systems, general-purpose, open-weight LLMs, and several large-scale proprietary models. Experimental results demonstrate that fine tuning on ACAD-Train leads to improvements in academic translation quality by +6.1 and +12.4 d-BLEU points on average for 7B and 2B models respectively, while also improving long-context translation in a general domain by up to 24.9% when translating out of English. The fine-tuned top-performing model surpasses the best proprietary and open-weight models on the academic translation domain. By releasing ACAD-Train, ACAD-Bench and the fine-tuned models, we provide the community with a valuable resource to advance research in the academic domain and long-context translation.",{"paper_id":15993,"title":15994,"year":7,"month":188,"day":63,"doi":15995,"resource_url":15996,"first_page":15997,"last_page":15998,"pdf_url":15999,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16000,"paper_type":860,"authors":16001,"abstract":16009},"lrec2026-main-672","A Single Model Ensemble Framework for Neural Machine Translation Using Pivot Translation","10.63317\u002F5jpaoar9p6cf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-672","8520","8534","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.672.pdf","oh-etal-2026-single",[16002,16004,16007],{"paper_id":15993,"author_seq":247,"given_name":16003,"surname":12592,"affiliation":63,"orcid":63},"Seokjin",{"paper_id":15993,"author_seq":232,"given_name":16005,"surname":16006,"affiliation":63,"orcid":63},"Keonwoong","Noh",{"paper_id":15993,"author_seq":218,"given_name":16008,"surname":8696,"affiliation":63,"orcid":63},"Woohwan","Despite the recent remarkable advances in neural machine translation, translation quality for low-resource language pairs remains subpar. Ensembling multiple systems is a widely adopted technique to enhance performance, often accomplished by combining probability distributions. However, previous approaches face the challenge of high computational costs for training multiple models. Furthermore, for black-box models, averaging token-level probabilities at each decoding step is not feasible. To address the problems of multi-model ensemble methods, we present a pivot-based single model ensemble. The proposed strategy consists of two steps: pivot-based candidate generation and post-hoc aggregation. In the first step, we generate candidates through pivot translation. This can be achieved with only a single model and facilitates knowledge transfer from high-resource pivot languages, resulting in candidates that are not only diverse but also more accurate. Next, in the aggregation step, we select k high-quality candidates from the generated candidates and merge them to generate a final translation that outperforms the existing candidates. Our experimental results show that our method produces translations of superior quality by leveraging candidates from pivot translation to capture the subtle nuances of the source sentence.",{"paper_id":16011,"title":16012,"year":7,"month":188,"day":63,"doi":16013,"resource_url":16014,"first_page":16015,"last_page":16016,"pdf_url":16017,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16018,"paper_type":860,"authors":16019,"abstract":16031},"lrec2026-main-673","Gender Disambiguation in Machine Translation: Diagnostic Evaluation in Decoder-Only Architectures","10.63317\u002F4wphxianzxf6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-673","8535","8550","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.673.pdf","manna-etal-2026-gender",[16020,16022,16025,16028,16030],{"paper_id":16011,"author_seq":247,"given_name":12436,"surname":16021,"affiliation":63,"orcid":63},"Manna",{"paper_id":16011,"author_seq":232,"given_name":16023,"surname":16024,"affiliation":63,"orcid":63},"Hosein","Mohebbi",{"paper_id":16011,"author_seq":218,"given_name":16026,"surname":16027,"affiliation":63,"orcid":63},"Afra","Alishahi",{"paper_id":16011,"author_seq":203,"given_name":10009,"surname":16029,"affiliation":63,"orcid":63},"Blain",{"paper_id":16011,"author_seq":188,"given_name":2161,"surname":8209,"affiliation":63,"orcid":63},"While Large Language Models achieve state-of-the-art results across a wide range of NLP tasks, they remain prone to systematic biases. Among these, gender bias is particularly salient in MT, due to systematic differences across languages in whether and how gender is marked. As a result, translation often requires disambiguating implicit source signals into explicit gender-marked forms. In this context, standard benchmarks may capture broad disparities but fail to reflect the full complexity of gender bias in modern MT. In this paper, we extend recent frameworks on bias evaluation by: (i) introducing a novel measure coined ’Prior Bias’, capturing a model’s default gender assumptions, and (ii) applying the framework to decoder-only MT models. Our results show that, despite their scale and state-of-the-art status, decoder-only models do not generally outperform encoder-decoder architectures on gender-specific metrics; however, post-training (e.g., instruction tuning) not only improves contextual awareness but also reduces the masculine Prior Bias.",{"paper_id":16033,"title":16034,"year":7,"month":188,"day":63,"doi":16035,"resource_url":16036,"first_page":16037,"last_page":16038,"pdf_url":16039,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16040,"paper_type":860,"authors":16041,"abstract":16050},"lrec2026-main-674","Building a One-Million-Pair Bokmål–Nynorsk Translation Corpus: A Quality-First Harvesting and Cleaning Pipeline","10.63317\u002F4jidtc8558q6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-674","8551","8555","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.674.pdf","kummervold-etal-2026-building",[16042,16045,16048],{"paper_id":16033,"author_seq":247,"given_name":16043,"surname":16044,"affiliation":63,"orcid":63},"Per E.","Kummervold",{"paper_id":16033,"author_seq":232,"given_name":16046,"surname":16047,"affiliation":63,"orcid":63},"Thea","Tollersrud",{"paper_id":16033,"author_seq":218,"given_name":12985,"surname":16049,"affiliation":63,"orcid":63},"Zanardi","We present a high-quality parallel corpus for translation between Norwegian Bokmål (nb) and Nynorsk (nn), two closely related written standards of Norwegian. The corpus was assembled from two complementary sources: Nasjonal digital læringsarena (NDLA), an educational platform, and Nynorsk pressekontor (NPK), a newswire service. Our methodology prioritizes precision over volume, employing a multi-stage filtering pipeline designed to address the specific challenges of aligning near-neighbor languages. This pipeline combines paragraph-level alignment, deduplication, multilingual semantic similarity scoring, language identification confidence checks, structural consistency tests, and strict bidirectional adjudication by a Large Language Model (LLM). To address the common problem of untranslated or placeholder \"pending\" copies, we apply a rule that flags pairs with zero semantic distance when the Nynorsk side shows weak evidence of being distinctively Nynorsk. After filtering, we retained 191,695 pairs from NDLA and 809,164 pairs from NPK, resulting in a merged corpus of 1,000,859 parallel paragraphs. This resource demonstrates that a precision-oriented pipeline can produce data better suited for training robust machine translation systems and instruction-tuned models than larger but noisier alternatives.",{"paper_id":16052,"title":16053,"year":7,"month":188,"day":63,"doi":16054,"resource_url":16055,"first_page":16056,"last_page":16057,"pdf_url":16058,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16059,"paper_type":860,"authors":16060,"abstract":16078},"lrec2026-main-675","New Trends for Modern Machine Translation with Large Reasoning Models","10.63317\u002F24ab4svxeipr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-675","8556","8566","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.675.pdf","liu-etal-2026-new",[16061,16063,16066,16068,16071,16073,16076],{"paper_id":16052,"author_seq":247,"given_name":16062,"surname":3916,"affiliation":63,"orcid":63},"Sinuo",{"paper_id":16052,"author_seq":232,"given_name":16064,"surname":16065,"affiliation":63,"orcid":63},"Chenyang","Lyu",{"paper_id":16052,"author_seq":218,"given_name":16067,"surname":7319,"affiliation":63,"orcid":63},"Minghao",{"paper_id":16052,"author_seq":203,"given_name":16069,"surname":16070,"affiliation":63,"orcid":63},"Zifu","Shang",{"paper_id":16052,"author_seq":188,"given_name":16072,"surname":3676,"affiliation":63,"orcid":63},"Longyue",{"paper_id":16052,"author_seq":172,"given_name":16074,"surname":16075,"affiliation":63,"orcid":63},"Weihua","Luo",{"paper_id":16052,"author_seq":155,"given_name":16077,"surname":1519,"affiliation":63,"orcid":63},"Kaifu","Recent advances in Large Reasoning Models (LRMs), particularly those leveraging Chain-of-Thought reasoning (CoT), have opened brand new possibilities for Machine Translation (MT). This position paper argues that LRMs substantially transform traditional neural MT as well as LLMs-based MT paradigms by reframing translation as a dynamic reasoning task that requires contextual, cultural, and linguistic understanding and reasoning. We identify three foundational shifts: 1) contextual coherence, where LRMs resolve ambiguities and preserve discourse structure through explicit reasoning over cross-sentence and complex context or even lack of context; 2) cultural intentionality, enabling models to adapt outputs by inferring speaker intent, audience expectations, and socio-linguistic norms; 3) self-reflection, LRMs can perform self-reflection during inference to correct the potential translation errors, particularly in extremely noisy cases, showing better robustness compared to simply mapping X->Y translation. We explore various scenarios in translation including stylized translation, document-level translation and multimodal translation by showcasing empirical examples that demonstrate the superiority of LRMs in translation. We also identify several interesting phenomena for LRMs for MT including auto-pivot translation as well as the critical challenges such as over-localisation in translation and inference efficiency. In conclusion, we argue that LRMs redefine translation systems not merely as text converters but as multilingual cognitive agents capable of reasoning about meaning beyond the text. This paradigm shift reminds us to think of problems in translation beyond traditional translation scenarios in a much broader context with LRMs - what we can achieve on top of it.",{"paper_id":16080,"title":16081,"year":7,"month":188,"day":63,"doi":16082,"resource_url":16083,"first_page":16084,"last_page":16085,"pdf_url":16086,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16087,"paper_type":860,"authors":16088,"abstract":16096},"lrec2026-main-676","MaitH 1.0: A Parallel Corpus and Baseline for Low-Resource Maithili-Hindi Translation","10.63317\u002F4otutrpimz7y","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-676","8567","8576","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.676.pdf","dubey-etal-2026-maith",[16089,16092,16094],{"paper_id":16080,"author_seq":247,"given_name":16090,"surname":16091,"affiliation":63,"orcid":63},"Kamanksha Prasad","Dubey",{"paper_id":16080,"author_seq":232,"given_name":16093,"surname":14740,"affiliation":63,"orcid":63},"Chandresh",{"paper_id":16080,"author_seq":218,"given_name":2247,"surname":16095,"affiliation":63,"orcid":63},"Padmanabh","Maithili is one of the 22 official languages recognized in the Indian Constitution. The literature of Maithili is rich; however, due to current socio-political changes, the language is on the verge of extinction. Therefore, it is crucial to develop a corpus for low-resource Indic languages like Maithili to ensure that the dream of “No Language Left Behind\" (NLLB) is realized. With this in mind, we contribute a corpus (1,05,600 sentences) containing both manually curated and synthetically generated. Additionally, we propose a strong baseline on the Maithali-Hindi pair using multilingual pretrained models such as IndicTrans2, mBART50, mT5, and NLLB-200 distilled. We evaluate the translation systems using standard performance metrics, including BLEU, CHRF2, TER, COMET, METEOR, and BERTScore. Comparative experiments conducted against the existing NLLB dataset (5,50,300 sentence pairs) demonstrate that our proposed dataset consistently yields superior translation quality. Finally, these results demonstrate that, even with a smaller corpus size, high-quality, task-specific data significantly enhance translation accuracy for low-resource Indian languages, such as Maithili.",{"paper_id":16098,"title":16099,"year":7,"month":188,"day":63,"doi":16100,"resource_url":16101,"first_page":16102,"last_page":16103,"pdf_url":16104,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16105,"paper_type":860,"authors":16106,"abstract":16111},"lrec2026-main-677","NRD: A Hybrid Disentanglement Framework for Mitigating Interference in Multilingual Machine Translation","10.63317\u002F55wnhwvmezwx","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-677","8577","8586","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.677.pdf","zhang-etal-2026-nrd",[16107,16109],{"paper_id":16098,"author_seq":247,"given_name":16108,"surname":1519,"affiliation":63,"orcid":63},"Jiarui",{"paper_id":16098,"author_seq":232,"given_name":9725,"surname":16110,"affiliation":63,"orcid":63},"Deng","Negative interference from cross-lingual conflicting syntactic patterns is a primary obstacle in Multilingual Neural Machine Translation (MNMT). We trace this problem to the entanglement of transferable, universal semantics with non-transferable, language-specific syntactic structures. Existing methods, relying on disjoint training-only specialization or inference-only filtering, fail to fully resolve this fundamental entanglement. To address this, we propose NRD (Neuron Representation Disentanglement), a two-stage hybrid framework that couples training-time specialization with inference-time filtering. First, a Specialization Fine-tuning stage identifies functional neurons via a semantic-invariant activation-variance metric and reinforces intrinsic modularity through sparse updates. Second, a Dynamic Representation Filtering stage purifies semantic representations at inference by adaptively suppressing syntax-sensitive neurons, guided by each language’s pre-computed gradient consistency. On the OPUS-100 benchmark, NRD outperforms strong baselines, achieving an average gain of +1.9 BLEU on supervised directions. On the WMT-10 zero-shot benchmark, it obtains a substantial +7.1 BLEU, demonstrating robust cross-lingual generalization. These results provide strong evidence that our hybrid approach effectively purifies semantic representations by mitigating syntactic interference, paving the way for more robust cross-lingual generalization.",{"paper_id":16113,"title":16114,"year":7,"month":188,"day":63,"doi":16115,"resource_url":16116,"first_page":16117,"last_page":16118,"pdf_url":16119,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16120,"paper_type":860,"authors":16121,"abstract":16129},"lrec2026-main-678","Linguistic and Demographic Factors in an Online Free Translation Task","10.63317\u002F58knfppiwdz3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-678","8587","8595","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.678.pdf","lee-etal-2026-linguistic",[16122,16124,16126],{"paper_id":16113,"author_seq":247,"given_name":16123,"surname":1359,"affiliation":63,"orcid":63},"Tyler",{"paper_id":16113,"author_seq":232,"given_name":6855,"surname":16125,"affiliation":63,"orcid":63},"Stenger",{"paper_id":16113,"author_seq":218,"given_name":16127,"surname":16128,"affiliation":63,"orcid":63},"Tania","Avgustinova","Humans are remarkably adept of understanding unfamiliar languages, in part by utilizing resources from languages they do know. In this study, we investigated how various linguistic factors (word order, lexical distance) and demographic factors affected the speed and correctness of translations in a multilingual scenario. In free translation task conducted online, participants read Polish noun phrases and translated them into English text. The noun phrases were varied between noun-adjective and adjective-noun word order, and the number of international words varied among the stimuli. Both the accuracy and total response time were recorded, and additional demographic data was recorded for all participants. Participants were more successful at translating noun phrases composed of two international terms than those with one or no such words. Additionally, speakers of other Slavic languages were more accurate despite not knowing Polish than participants who knew no Slavic languages. Although word order had little or no effect on accuracy for participants overall, speakers of Slavic languages translated the noun-adjective stimuli more accurately overall.",{"paper_id":16131,"title":16132,"year":7,"month":188,"day":63,"doi":16133,"resource_url":16134,"first_page":16135,"last_page":16136,"pdf_url":16137,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16138,"paper_type":860,"authors":16139,"abstract":16144},"lrec2026-main-679","Biases in Translation: Assessing Opinion Distortion in Machine Translated Texts","10.63317\u002F2pjio9ho8rxg","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-679","8596","8614","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.679.pdf","shafiabadi-etal-2026-biases",[16140,16143],{"paper_id":16131,"author_seq":247,"given_name":16141,"surname":16142,"affiliation":63,"orcid":63},"Nazanin","Shafiabadi",{"paper_id":16131,"author_seq":232,"given_name":1219,"surname":1220,"affiliation":63,"orcid":63},"Current machine translation (MT) evaluation practices largely assume that high lexical and semantic fidelity implies preservation of meaning. We question this assumption by introducing a framework for detecting and quantifying translation-induced distortion—the systematic alteration of a text’s subjective properties during translation. Focusing on stance as a socially consequential property, we formalize stance preservation as an invariance problem and adapt two classical statistical tests, McNemar’s test and the two-proportion Z-test, to diagnose systematic opinion shifts between source texts and their translations. Unlike standard MT metrics such as BLEU or COMET, which prioritize surface similarity and adequacy, our approach explicitly targets preservation of subjective meaning. In controlled experiments with synthetically distorted translations, we demonstrate that the proposed tests are sensitive to graded levels of stance manipulation. We apply our framework to evaluate twelve multilingual models and find that none reliably preserve stance across all tested language directions. Our findings reveal a critical gap in current MT evaluation practices and highlight the need for explicit evaluation of subjective meaning preservation in socially and politically sensitive contexts.",{"paper_id":16146,"title":16147,"year":7,"month":188,"day":63,"doi":16148,"resource_url":16149,"first_page":16150,"last_page":16151,"pdf_url":16152,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16153,"paper_type":860,"authors":16154,"abstract":16163},"lrec2026-main-680","When Translations Surprise: Human Awareness of Predictability in Translations","10.63317\u002F44g3kbidmew4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-680","8615","8627","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.680.pdf","garcaromero-etal-2026-when",[16155,16157,16160],{"paper_id":16146,"author_seq":247,"given_name":13880,"surname":16156,"affiliation":63,"orcid":63},"García-Romero",{"paper_id":16146,"author_seq":232,"given_name":16158,"surname":16159,"affiliation":63,"orcid":63},"Miquel","Esplà-Gomis",{"paper_id":16146,"author_seq":218,"given_name":16161,"surname":16162,"affiliation":63,"orcid":63},"Felipe","Sanchez-Marti­nez","Machine translation (MT) has achieved near-human quality for some language pairs, yet its output remains distinct from human translation, primarily in its predictability. While MT systems generate low-perplexity text, humans produce less predictable outputs. This raises the question of whether humans can intuitively use this difference in predictability to distinguish between human- and machine-translated text. We report on a study with 30 native Spanish speakers tasked with identifying the origin of English-to-Spanish translations. We compared their performance against two perplexity-based baselines: a large language model capturing fluency, and a neural MT model, conditioned on the source text, capturing both fluency and adequacy. Our findings reveal that human judgments correlate with fluency-based perplexity, but show no correlation with the perplexity that also accounts for adequacy. This suggests that annotators’ decisions are driven by the target text’s fluency. Consequently, a simple computational baseline using source-aware perplexity significantly outperforms human annotators. This work contributes to a deeper understanding of human perception of MT, highlighting a potential bias in current evaluation protocols toward fluency over adequacy. This bias may lead to an overestimation of the capabilities of highly fluent systems and underscores the need for evaluation methods ensuring translation adequacy is not overlooked.",{"paper_id":16165,"title":16166,"year":7,"month":188,"day":63,"doi":16167,"resource_url":16168,"first_page":16169,"last_page":16170,"pdf_url":16171,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16172,"paper_type":860,"authors":16173,"abstract":16184},"lrec2026-main-681","Bidirectional Chinese and English Passive Sentences Dataset for Machine Translation","10.63317\u002F3cj49u95jf24","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-681","8628","8638","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.681.pdf","ma-etal-2026-bidirectional",[16174,16176,16179,16181],{"paper_id":16165,"author_seq":247,"given_name":16175,"surname":913,"affiliation":63,"orcid":63},"Xinyue",{"paper_id":16165,"author_seq":232,"given_name":16177,"surname":16178,"affiliation":63,"orcid":63},"Pol","Pastells",{"paper_id":16165,"author_seq":218,"given_name":1674,"surname":16180,"affiliation":63,"orcid":63},"Farrus",{"paper_id":16165,"author_seq":203,"given_name":16182,"surname":16183,"affiliation":63,"orcid":63},"Mariona","Taule","Machine Translation (MT) evaluation has gone beyond metrics, towards more specific linguistic phenomena. Regarding English-Chinese language pairs, passive sentences are constructed and distributed differently due to language variation, thus need special attention in MT. This paper proposes a bidirectional multi-domain dataset of passive sentences, extracted from five Chinese-English parallel corpora and annotated automatically with structure labels according to human translation, and a test set with manually verified annotation. The dataset consists of 73,965 parallel sentence pairs (2,358,731 English words, 3,498,229 Chinese characters). We evaluate two state-of-the-art open-source MT systems with our dataset, and four commercial models with the test set. The results show that, unlike humans, models are more influenced by the voice of the source text rather than the general voice usage of the source language, and therefore tend to maintain the passive voice when translating a passive in either direction. However, models demonstrate some knowledge of the low frequency and predominantly negative context of Chinese passives, leading to higher voice consistency with human translators in English-to-Chinese translation than in Chinese-to-English translation. Commercial NMT models scored higher in metric evaluations, but LLMs showed a better ability to use diverse alternative translations. Datasets and annotation script will be shared upon request.",{"paper_id":16186,"title":16187,"year":7,"month":188,"day":63,"doi":16188,"resource_url":16189,"first_page":16190,"last_page":16191,"pdf_url":16192,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16193,"paper_type":860,"authors":16194,"abstract":16197},"lrec2026-main-682","CoTERM: A Consistency-Oriented Term Metric for MT System Evaluation","10.63317\u002F3pc9e4hsupuk","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-682","8639","8661","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.682.pdf","hazem-etal-2026-coterm",[16195,16196],{"paper_id":16186,"author_seq":247,"given_name":13403,"surname":13679,"affiliation":63,"orcid":63},{"paper_id":16186,"author_seq":232,"given_name":9606,"surname":9607,"affiliation":63,"orcid":63},"Proper treatment of terms is an important and critical aspect in machine translation. It is therefore necessary to use appropriate metrics to evaluate MT system outputs from terminology perspective. However, despite the great improvements witnessed in the recent NMT and LLM models, MT system evaluation metrics that shed light on specific aspects of term translations are yet to be fully explored. In this paper, we propose CoTERM, a new metric for automatic evaluation of term translations based on the Herfindahl-Hirshman Index (HHI). CoTERM measures target term closeness to one or more reference translations, taking into account the fundamental criteria for translating terms, i.e. (i) accuracy; (ii) consistency at document or corpus levels; and (iii) appropriateness to the domain conventions with regard to term variations. The proposed metric correlates strongly with human raters, and empirical evaluations of a wide range of NMTs and LLMs show that the best MT systems in standard metrics are not necessarily the best at treating terms. CoTERM is thus shown to be highly useful for diagnosing MT systems’ term translation performance and conveniently seen as complementary to generic measures for MT system evaluations.",{"paper_id":16199,"title":16200,"year":7,"month":188,"day":63,"doi":16201,"resource_url":16202,"first_page":16203,"last_page":16204,"pdf_url":16205,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16206,"paper_type":860,"authors":16207,"abstract":16244},"lrec2026-main-683","SiniticMTError: A Machine Translation Dataset with Error Annotations for Sinitic Languages","10.63317\u002F2bbqypchz5b6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-683","8662","8684","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.683.pdf","liu-etal-2026-siniticmterror",[16208,16209,16211,16212,16215,16218,16221,16224,16227,16229,16232,16234,16237,16239,16241],{"paper_id":16199,"author_seq":247,"given_name":11711,"surname":3916,"affiliation":63,"orcid":63},{"paper_id":16199,"author_seq":232,"given_name":16210,"surname":10109,"affiliation":63,"orcid":63},"Junghyun",{"paper_id":16199,"author_seq":218,"given_name":11727,"surname":1359,"affiliation":63,"orcid":63},{"paper_id":16199,"author_seq":203,"given_name":16213,"surname":16214,"affiliation":63,"orcid":63},"Ethan Yue Heng","Cheung",{"paper_id":16199,"author_seq":188,"given_name":16216,"surname":16217,"affiliation":63,"orcid":63},"Shou-Yi","Hung",{"paper_id":16199,"author_seq":172,"given_name":16219,"surname":16220,"affiliation":63,"orcid":63},"Elsie","Chan",{"paper_id":16199,"author_seq":155,"given_name":16222,"surname":16223,"affiliation":63,"orcid":63},"Shiyao","Qian",{"paper_id":16199,"author_seq":138,"given_name":16225,"surname":16226,"affiliation":63,"orcid":63},"RUNTONG","LIANG",{"paper_id":16199,"author_seq":121,"given_name":16228,"surname":10897,"affiliation":63,"orcid":63},"Kimlan",{"paper_id":16199,"author_seq":104,"given_name":16230,"surname":16231,"affiliation":63,"orcid":63},"Wing Yu","Yip",{"paper_id":16199,"author_seq":87,"given_name":16233,"surname":2233,"affiliation":63,"orcid":63},"York Hay",{"paper_id":16199,"author_seq":73,"given_name":16235,"surname":16236,"affiliation":63,"orcid":63},"Tsz Fung","Yau",{"paper_id":16199,"author_seq":55,"given_name":16238,"surname":9590,"affiliation":63,"orcid":63},"Ka Ieng Charlotte",{"paper_id":16199,"author_seq":38,"given_name":16240,"surname":7319,"affiliation":63,"orcid":63},"You-Wei",{"paper_id":16199,"author_seq":17,"given_name":16242,"surname":16243,"affiliation":63,"orcid":63},"Richard Tzong-Han","Tsai","Despite major advances in machine translation (MT) in recent years, progress remains limited for many low-resource languages that lack large-scale training data and linguistic resources. In this paper, we introduce SINITICMTERROR, a novel fine-grained dataset that builds on existing parallel corpora to provide error span, error type, and error severity annotations in machine-translated examples from English to Mandarin, Cantonese, and Wu Chinese, along with a Mandarin-Hokkien component derived from a non-parallel source. Our dataset serves as a resource for the MT community to fine-tune models with error detection capabilities, supporting research on translation quality estimation, error-aware generation, and low-resource language evaluation. We also establish baseline results using language models to benchmark translation error detection performance. Specifically, we evaluate multiple open source and closed source LLMs using span-level and correlation-based MQM metrics, revealing their limited precision, underscoring the need for our dataset. Finally, we report our rigorous annotation process by native speakers, with analyses on pilot studies, iterative feedback, insights, and patterns in error type and severity.",{"paper_id":16246,"title":16247,"year":7,"month":188,"day":63,"doi":16248,"resource_url":16249,"first_page":16250,"last_page":16251,"pdf_url":16252,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16253,"paper_type":860,"authors":16254,"abstract":16263},"lrec2026-main-684","Ancient Greek to Modern Greek Machine Translation: A Novel Benchmark and Fine-Tuning Experiments on LLMs and NMT Models","10.63317\u002F4cdk64dgm2w9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-684","8685","8698","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.684.pdf","mavromatis-etal-2026-ancient",[16255,16258,16261,16262],{"paper_id":16246,"author_seq":247,"given_name":16256,"surname":16257,"affiliation":63,"orcid":63},"Spyridon","Mavromatis",{"paper_id":16246,"author_seq":232,"given_name":16259,"surname":16260,"affiliation":63,"orcid":63},"Sokratis","Sofianopoulos",{"paper_id":16246,"author_seq":218,"given_name":5871,"surname":5872,"affiliation":63,"orcid":63},{"paper_id":16246,"author_seq":203,"given_name":2960,"surname":2961,"affiliation":63,"orcid":63},"Machine Translation (MT) for Ancient Greek (AG) to Modern Greek (MG) is a low-resource task, constrained by the lack of large-scale, high-quality parallel data. We address this gap by introducing the AG-MG Parallel Corpus, a new resource containing 132,481 sentence-aligned pairs derived from literary, historical, and biblical texts. We present a novel corpus creation pipeline that combines web-scraped, excerpt-level data with a multi-stage sentence-level alignment, and refinement process. Our method uses VecAlign with LaBSE embeddings, which we first fine-tune on a manually-aligned AG-MG subset, followed by an LLM-based error\u002Fmisalignment correction phase using Gemini 2.5 Flash to ensure high alignment quality. Furthermore, we provide the first comprehensive benchmark of modern MT models on this task, evaluating three fine-tuning strategies across NMT models (NLLB, M2M100) and a Greek LLM (Llama-Krikri-8B). Our experiments show that fine-tuning yields significant improvements over base models, increasing performance by up to +10.3 BLEU points. Specifically, full-parameter fine-tuning of Llama-Krikri-8B achieves the highest overall performance with a BLEU score of 13.16, while the QLoRA-adapted M2M100-1.2B model demonstrates the largest relative gains and highly competitive results. Our dataset and models represent a significant contribution to Greek NLP.",{"paper_id":16265,"title":16266,"year":7,"month":188,"day":63,"doi":16267,"resource_url":16268,"first_page":16269,"last_page":16270,"pdf_url":16271,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16272,"paper_type":860,"authors":16273,"abstract":16283},"lrec2026-main-685","Linguistic Knowledge-Infused Fine-Tuning for Mitigating Gender Bias in Machine Translation","10.63317\u002F3suzdcws7pba","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-685","8699","8709","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.685.pdf","estrada-etal-2026-linguistic",[16274,16277,16278,16280,16281],{"paper_id":16265,"author_seq":247,"given_name":16275,"surname":16276,"affiliation":63,"orcid":63},"Luis Ernesto Garcia","Estrada",{"paper_id":16265,"author_seq":232,"given_name":12724,"surname":12725,"affiliation":63,"orcid":63},{"paper_id":16265,"author_seq":218,"given_name":9420,"surname":16279,"affiliation":63,"orcid":63},"Escolano",{"paper_id":16265,"author_seq":203,"given_name":12730,"surname":12731,"affiliation":63,"orcid":63},{"paper_id":16265,"author_seq":188,"given_name":12763,"surname":16282,"affiliation":63,"orcid":63},"Basta","Large Language Models (LLMs) achieve strong performance in machine translation (MT) but often encode gender bias, particularly when translating from non-gendered into gendered languages. This paper introduces a fine-tuning strategy to mitigate such bias in English-Spanish and English-Catalan translation. Using parameter-efficient LoRA fine-tuning, we apply linguistic knowledge infusion—a reasoning-based method that trains models to identify gendered referents and syntactic cues before generating translations. Experiments with Mistral–7B and Salamandrata–7B on MT-GenEval show that linguistically infused models improve gender accuracy by 15 percentage points and reduce gender gaps by 27 points in English-Spanish translation, with comparable trends for Catalan. Gains are strongest for Mistral, suggesting that explicit linguistic reasoning particularly benefits general-purpose LLMs. Overall, these results demonstrate that structured linguistic priors can enhance fairness and referential consistency in multilingual machine translation.",{"paper_id":16285,"title":16286,"year":7,"month":188,"day":63,"doi":16287,"resource_url":16288,"first_page":16289,"last_page":16290,"pdf_url":16291,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16292,"paper_type":860,"authors":16293,"abstract":16297},"lrec2026-main-686","What Triggers My Model? Contrastive Explanations Inform Gender Choices by Translation Models","10.63317\u002F2m6k3ghxvgh8","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-686","8710","8724","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.686.pdf","hackenbuchner-2026-what",[16294],{"paper_id":16285,"author_seq":247,"given_name":16295,"surname":16296,"affiliation":63,"orcid":63},"Janiça","Hackenbuchner","Interpretability can be implemented to understand decisions taken by (black box) models, such as neural machine translation (NMT) or large language models (LLMs). Yet, research in this area has been limited in relation to a manifested problem in these models: gender bias. In this work, we aim to move away from simply measuring bias to exploring its origins. Working with gender-ambiguous natural source data, this exploratory study examines which context, in the form of input tokens in the source sentence (EN), influences (or triggers) the NMT model’s choice of a certain gender inflection in the target languages (DE\u002FES). To analyse this, we compute saliency attribution based on contrastive translations. We first address the challenge of the lack of a scoring threshold and specifically examine different attribution levels of source words on the model’s gender decisions in the translation. We compare salient source words with human perceptions of gender and demonstrate a noticeable overlap between human perceptions and model attribution. Additionally, we provide a linguistic analysis of salient words. Our work showcases the relevance of understanding model translation decisions in terms of gender, how this compares to human decisions and that this information should be leveraged to mitigate gender bias.",{"paper_id":16299,"title":16300,"year":7,"month":188,"day":63,"doi":16301,"resource_url":16302,"first_page":16303,"last_page":16304,"pdf_url":16305,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16306,"paper_type":860,"authors":16307,"abstract":16317},"lrec2026-main-687","ViKhoMT: A Vietnamese–K'Ho Neural Machine Translation Dataset and Evaluation for Community Health Communication","10.63317\u002F4tvv9uk7fqgn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-687","8725","8739","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.687.pdf","truong-etal-2026-vikhomt",[16308,16310,16312,16315],{"paper_id":16299,"author_seq":247,"given_name":16309,"surname":6462,"affiliation":63,"orcid":63},"Tram",{"paper_id":16299,"author_seq":232,"given_name":16311,"surname":2395,"affiliation":63,"orcid":63},"Vinh",{"paper_id":16299,"author_seq":218,"given_name":16313,"surname":16314,"affiliation":63,"orcid":63},"Dang Van","Thin",{"paper_id":16299,"author_seq":203,"given_name":16316,"surname":2395,"affiliation":63,"orcid":63},"Ngan","The Vietnamese government is prioritizing the socio-economic development and societal integration of ethnic minorities, including the K’Ho people. However, the lack of digital resources creates significant communication barriers, particularly in the critical domain of community health. To address this gap, we introduce ViKhoMT, a new, professionally curated Vietnamese-K’Ho parallel dataset containing approximately 10,000 sentence pairs focused on community health communication. To demonstrate the dataset’s quality and establish performance benchmarks, we conducted comprehensive evaluations by fine-tuning several pre-trained Neural Machine Translation (NMT) models. Our experiments show that a system based on the M2M100 architecture achieves BLEU scores of 60.5 for K’Ho-to-Vietnamese and 56.4 for Vietnamese-to-K’Ho, respectively. We release our dataset to the research community for free research purposes to support future studies and the development of practical translation tools for the K’Ho community. The dataset is publicly available at https:\u002F\u002Fgithub.com\u002FNgocTram2711\u002FViKhoMT.",{"paper_id":16319,"title":16320,"year":7,"month":188,"day":63,"doi":16321,"resource_url":16322,"first_page":16323,"last_page":16324,"pdf_url":16325,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16326,"paper_type":860,"authors":16327,"abstract":16332},"lrec2026-main-688","Hindsight Quality Prediction Experiments in Multi-Candidate Human-Post-Edited Machine Translation","10.63317\u002F24puen8nstzh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-688","8740","8755","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.688.pdf","marmonier-etal-2026-hindsight",[16328,16330,16331],{"paper_id":16319,"author_seq":247,"given_name":3449,"surname":16329,"affiliation":63,"orcid":63},"Marmonier",{"paper_id":16319,"author_seq":232,"given_name":4363,"surname":6800,"affiliation":63,"orcid":63},{"paper_id":16319,"author_seq":218,"given_name":4114,"surname":6796,"affiliation":63,"orcid":63},"This paper investigates two complementary paradigms for predicting machine translation quality: source-side difficulty prediction and candidate-side quality estimation (QE). The rapid adoption of Large Language Models (LLMs) into machine translation (MT) workflows is reshaping the research landscape, yet its impact on established quality prediction paradigms remains underexplored. We study this issue through a series of \"hindsight\" experiments on a unique, multi-candidate dataset resulting from a genuine machine translation post-editing (MTPE) project. The dataset consists of over 6,000 English source segments with nine translation hypotheses from a diverse set of traditional neural MT systems and advanced LLMs, all evaluated against a single, final human post-edited reference. Using Kendall’s rank correlation, we assess the predictive power of source-side difficulty metrics, candidate-side QE models and position heuristics against two gold-standard scores: TER (as a proxy for post-editing effort) and COMET (as a proxy for human judgment). Our analysis yields three primary findings: (1) On the source side, the predictive power of difficulty metrics is highly contingent on the reference metric used; features that strongly correlate with COMET (e.g., segment length, neural predictors) show much weaker correlation to TER. (2) On the candidate side, we find a significant mismatch between QE model rankings and final human-adjudicated quality, and further show that modern QE metrics are significantly more aligned with the quality of traditional neural MT outputs than with those from general-purpose LLMs. (3) While we confirm a statistically significant positional bias in document-level LLMs (i.e., the tendency for translation quality to degrade for segments occurring later in a document) its practical impact on translation quality appears to be negligible. These findings highlight that the architectural shift towards LLMs alters the reliability of established quality prediction methods while simultaneously mitigating previous challenges in document-level translation.",{"paper_id":16334,"title":16335,"year":7,"month":188,"day":63,"doi":16336,"resource_url":16337,"first_page":16338,"last_page":16339,"pdf_url":16340,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16341,"paper_type":860,"authors":16342,"abstract":16353},"lrec2026-main-689","PETra: A Multilingual Corpus of Pragmatic Explicitation in Translation","10.63317\u002F56tberz7nmwy","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-689","8756","8766","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.689.pdf","osmelak-etal-2026-petra",[16343,16345,16348,16351,16352],{"paper_id":16334,"author_seq":247,"given_name":915,"surname":16344,"affiliation":63,"orcid":63},"Osmelak",{"paper_id":16334,"author_seq":232,"given_name":16346,"surname":16347,"affiliation":63,"orcid":63},"Koel Dutta","Chowdhury",{"paper_id":16334,"author_seq":218,"given_name":16349,"surname":16350,"affiliation":63,"orcid":63},"Uliana","Sentsova",{"paper_id":16334,"author_seq":203,"given_name":5732,"surname":9572,"affiliation":63,"orcid":63},{"paper_id":16334,"author_seq":188,"given_name":3659,"surname":3660,"affiliation":63,"orcid":63},"Translators often enrich texts with background details that make implicit cultural meanings explicit for new audiences. This phenomenon, known as pragmatic explicitation, has been widely discussed in translation theory but rarely modeled computationally. We introduce PeTra, the first multilingual corpus and detection framework for pragmatic explicitation. The corpus consists of 2,900 sentence pairs from TED-Multi and Europarl, covers twelve language pairs, and includes additions such as entity descriptions, measurement conversions, and translator remarks. We identify candidates through null alignments and refine them using active learning with human annotation. Our results show that entity and system-level (e.g., metric conversions) explicitations are most frequent, and that active learning improves classifier accuracy by 7-8 percentage points, achieving up to 0.88 accuracy and 0.82 F1 for the best transfer languages. PeTra establishes pragmatic explicitation as a measurable, cross-linguistic phenomenon and takes a step towards building culturally aware machine translation.",{"paper_id":16355,"title":16356,"year":7,"month":188,"day":63,"doi":16357,"resource_url":16358,"first_page":16359,"last_page":16360,"pdf_url":16361,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16362,"paper_type":860,"authors":16363,"abstract":16369},"lrec2026-main-690","A Dataset for Probing Translationese Preferences in English-to-Swedish Translation","10.63317\u002F2dj5zcpvjwf2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-690","8767","8779","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.690.pdf","kunz-etal-2026-dataset",[16364,16365,16367],{"paper_id":16355,"author_seq":247,"given_name":10259,"surname":10260,"affiliation":63,"orcid":63},{"paper_id":16355,"author_seq":232,"given_name":13143,"surname":16366,"affiliation":63,"orcid":63},"Jarochenko",{"paper_id":16355,"author_seq":218,"given_name":4654,"surname":16368,"affiliation":63,"orcid":63},"Bollmann","Translations often carry traces of the source language, a phenomenon known as translationese. We introduce the first freely available English-to-Swedish dataset contrasting translationese sentences with idiomatic alternatives, designed to probe intrinsic preferences of language models. It includes error tags and descriptions of the problems in the original translations. In experiments evaluating smaller Swedish and multilingual LLMs with our dataset, we find that they often favor the translationese phrasing. Human alternatives are chosen more often when the English source sentence is omitted, indicating that exposure to the source biases models toward literal translations, although even without context models often prefer the translationese variant. Our dataset and findings provide a resource and benchmark for developing models that produce more natural, idiomatic output in non-English languages.",{"paper_id":16371,"title":16372,"year":7,"month":188,"day":63,"doi":16373,"resource_url":16374,"first_page":16375,"last_page":16376,"pdf_url":16377,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16378,"paper_type":860,"authors":16379,"abstract":16386},"lrec2026-main-691","STAR-IL: A Dataset for Style-Aware Machine Translation of Product Reviews in Indian Languages","10.63317\u002F4oq85vioi2tu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-691","8780","8793","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.691.pdf","shetye-etal-2026-star",[16380,16383,16385],{"paper_id":16371,"author_seq":247,"given_name":16381,"surname":16382,"affiliation":63,"orcid":63},"Ketaki","Shetye",{"paper_id":16371,"author_seq":232,"given_name":16384,"surname":11395,"affiliation":63,"orcid":63},"Dipti Misra",{"paper_id":16371,"author_seq":218,"given_name":10567,"surname":10568,"affiliation":63,"orcid":63},"Product reviews on e-commerce platforms are a critical form of user-generated content that influence consumer decisions. However, these reviews are predominantly in English, creating a significant accessibility barrier for users who are not fluent in English. When translating into major Indian languages using the current models, the outputs often fail to capture domain-specific features and colloquial style, resulting in stylistically unnatural texts. To address this gap, we introduce **STAR-IL**, a human-annotated, multilingual, parallel corpus for style-aware translation of product reviews. We evaluate the performance of several state-of-the-art models on our dataset for the task of product review translation. Our experiments show that models fine-tuned on STAR-IL achieve significant average performance gain of **5.77** points in BLEU and **3.78** points in COMET, when compared to their baselines, across all languages. Our dataset provides a valuable benchmark for future research in style-aware product review translation. The STAR-IL dataset is publicly available at https:\u002F\u002Fgithub.com\u002Fltrc\u002FSTAR-IL-Corpus.",{"paper_id":16388,"title":16389,"year":7,"month":188,"day":63,"doi":16390,"resource_url":16391,"first_page":16392,"last_page":16393,"pdf_url":16394,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16395,"paper_type":860,"authors":16396,"abstract":16402},"lrec2026-main-692","Cultural and Knowledge Biases in LLMs through the Lens of Entity-Aware Machine Translation","10.63317\u002F3jxgnspt4srr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-692","8794","8812","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.692.pdf","xu-etal-2026-cultural",[16397,16399,16401],{"paper_id":16388,"author_seq":247,"given_name":16398,"surname":3290,"affiliation":63,"orcid":63},"Lu",{"paper_id":16388,"author_seq":232,"given_name":1107,"surname":16400,"affiliation":63,"orcid":63},"Moroni",{"paper_id":16388,"author_seq":218,"given_name":9117,"surname":9118,"affiliation":63,"orcid":63},"Large Language Models (LLMs) demonstrate strong multilingual capabilities yet exhibit systematic cultural biases that affect entity-aware machine translation. While external knowledge integration improves translation accuracy, the extent of these benefits across varying degrees of cultural specificity remains unexplored. We propose a three-level cultural specificity framework: Culturally Agnostic, Culturally Sensitive, and Culturally Local, to systematically analyze how cultural context affects entity translation difficulty and the utility of external knowledge. Through experiments spanning 11 LLMs and 10 languages, we demonstrate that external knowledge provides substantially greater improvements for culturally local entities (up to 70% in m-ETA) compared to culturally agnostic ones. Our analysis reveals distinct behavioral patterns across model tiers: closed and open-weight models show synergistic improvements in both entity accuracy and overall translation quality, while open-data models struggle with instruction-following despite improved entity accuracy.",{"paper_id":16404,"title":16405,"year":7,"month":188,"day":63,"doi":16406,"resource_url":16407,"first_page":16408,"last_page":16409,"pdf_url":16410,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16411,"paper_type":860,"authors":16412,"abstract":16421},"lrec2026-main-693","Referenceless Evaluation of Machine Translation Models by Ranking Performance in Romanian to English Translate-train Settings","10.63317\u002F25w292xcpi55","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-693","8813","8823","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.693.pdf","feraru-etal-2026-referenceless",[16413,16416,16418],{"paper_id":16404,"author_seq":247,"given_name":16414,"surname":16415,"affiliation":63,"orcid":63},"Mihail","Feraru",{"paper_id":16404,"author_seq":232,"given_name":7178,"surname":16417,"affiliation":63,"orcid":63},"Diaconu",{"paper_id":16404,"author_seq":218,"given_name":16419,"surname":16420,"affiliation":63,"orcid":63},"Bogdan Dumitru","Alexe","We propose a referenceless evaluation method for machine translation (MT) models by assessing their performance in translate-train scenarios across a variety of natural language processing (NLP) tasks. The approach ranks MT systems based on the downstream impact of their translations on independent NLP models trained on translated data, thus eliminating the need for professional ground-truth references. We evaluate four prominent MT tools — ChatGPT 3.5 Turbo, DeepL, Google Translate, and Mistral 7B Instruct v0.2 — on the Romanian→English language pair and analyze their influence on text summarization, sentiment analysis, and authorship identification. To further test the generalization and robustness of our method, we extend the evaluation to a cross-modality setup using out-of-domain speech data. In this setting, speech segments are transcribed with Whisper-Large, translated into English, and used in a four-class domain classification task (children’s stories, audiobooks, film dialogues, podcasts). Our findings show that translation improves downstream performance for sentiment analysis and summarization, while stylistically rich texts such as poetry or noisy ASR transcriptions suffer degradation. The proposed ranking metric correlates strongly with human judgments and remains sensitive to translation quality even in multimodal pipelines, providing a scalable and practical alternative to reference-based MT evaluation.",{"paper_id":16423,"title":16424,"year":7,"month":188,"day":63,"doi":16425,"resource_url":16426,"first_page":16427,"last_page":16428,"pdf_url":16429,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16430,"paper_type":860,"authors":16431,"abstract":16436},"lrec2026-main-694","Every Word Presented in Context: Syntactic Coverage as Objective for Low-Resource Machine Translation with Large Language Models","10.63317\u002F5jpokiam9tjt","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-694","8824","8837","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.694.pdf","frontull-etal-2026-every",[16432,16434],{"paper_id":16423,"author_seq":247,"given_name":9569,"surname":16433,"affiliation":63,"orcid":63},"Frontull",{"paper_id":16423,"author_seq":232,"given_name":1316,"surname":16435,"affiliation":63,"orcid":63},"Ströhle","Large Language Models (LLMs) have demonstrated strong capabilities in multilingual machine translation. However, they underperform for low-resource languages, indicating the need for more explicit instructional guidance. In this work, we introduce Fragment-Shot Prompting, a novel few-shot prompting method that aims to retrieve examples for every word occurring in the sentence to be translated, illustrating their use and meaning in context. We evaluate our method on translation between Italian, Ladin (Val Badia) and Ladin (Gherdëina) and compare its performance with zero-shot prompting, random few-shot prompting, as well as established lexical and semantic retrieval strategies. We conduct these experiments using state-of-the-art LLMs, including GPT-3.5, GPT-4o, o1-mini, LlaMA-3.3, and DeepSeek-R1. Our results demonstrate that LLMs can extract substantial value from limited data when translating from a low- to the high-resource language. However, this does not apply to translations into the low-resource languages, where the prompting method plays a much more important role. In particular, our method consistently delivers the best results and enables significant gains. Even though translation performance into Ladin remains limited with the available resources, our results highlight the importance of syntactic coverage for improving translation accuracy and ariant-specific adaptation in low-resource scenarios.",{"paper_id":16438,"title":16439,"year":7,"month":188,"day":63,"doi":16440,"resource_url":16441,"first_page":16442,"last_page":16443,"pdf_url":16444,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16445,"paper_type":860,"authors":16446,"abstract":16452},"lrec2026-main-695","Multilingual KokoroChat: A Multi-LLM Ensemble Translation Method for Creating a Multilingual Counseling Dialogue Dataset","10.63317\u002F2d2qyq4zbps7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-695","8838","8849","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.695.pdf","suzuki-etal-2026-multilingual",[16447,16449,16451],{"paper_id":16438,"author_seq":247,"given_name":16448,"surname":8011,"affiliation":63,"orcid":63},"Ryoma",{"paper_id":16438,"author_seq":232,"given_name":16450,"surname":4148,"affiliation":63,"orcid":63},"Zhiyang",{"paper_id":16438,"author_seq":218,"given_name":4571,"surname":4572,"affiliation":63,"orcid":63},"To address the critical scarcity of high-quality, publicly available counseling dialogue datasets, we created Multilingual KokoroChat by translating KokoroChat, a large-scale manually authored Japanese counseling corpus, into both English and Chinese. A key challenge in this process is that the optimal model for translation varies by input, making it impossible for any single model to consistently guarantee the highest quality. In a sensitive domain like counseling, where the highest possible translation fidelity is essential, relying on a single LLM is therefore insufficient. To overcome this challenge, we developed and employed a novel multi-LLM ensemble method. Our approach first generates diverse hypotheses from multiple distinct LLMs. A single LLM then produces a high-quality translation based on an analysis of the respective strengths and weaknesses of all presented hypotheses. The quality of “Multilingual KokoroChat” was rigorously validated through human preference studies. These evaluations confirmed that the translations produced by our ensemble method were preferred from any individual state-of-the-art LLM. This strong preference confirms the superior quality of our method’s outputs. The Multilingual KokoroChat is available at https:\u002F\u002Fgithub.com\u002FUEC-InabaLab\u002FMultilingualKokoroChat.",{"paper_id":16454,"title":16455,"year":7,"month":188,"day":63,"doi":16456,"resource_url":16457,"first_page":16458,"last_page":16459,"pdf_url":16460,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16461,"paper_type":860,"authors":16462,"abstract":16487},"lrec2026-main-696","NepTam: A Nepali-Tamang Parallel Corpus and Baseline Machine Translation Experiments","10.63317\u002F37edei5qcjb3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-696","8850","8861","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.696.pdf","ghimire-etal-2026-neptam",[16463,16466,16469,16472,16475,16477,16480,16482,16484,16486],{"paper_id":16454,"author_seq":247,"given_name":16464,"surname":16465,"affiliation":63,"orcid":63},"Rupak Raj","Ghimire",{"paper_id":16454,"author_seq":232,"given_name":16467,"surname":16468,"affiliation":63,"orcid":63},"Bipesh","Subedi",{"paper_id":16454,"author_seq":218,"given_name":16470,"surname":16471,"affiliation":63,"orcid":63},"Balaram","Prasain",{"paper_id":16454,"author_seq":203,"given_name":16473,"surname":16474,"affiliation":63,"orcid":63},"Prakash","Poudyal",{"paper_id":16454,"author_seq":188,"given_name":8566,"surname":16476,"affiliation":63,"orcid":63},"Acharya",{"paper_id":16454,"author_seq":172,"given_name":16478,"surname":16479,"affiliation":63,"orcid":63},"Nischal","Karki",{"paper_id":16454,"author_seq":155,"given_name":16481,"surname":6202,"affiliation":63,"orcid":63},"Rupak",{"paper_id":16454,"author_seq":138,"given_name":16483,"surname":11395,"affiliation":63,"orcid":63},"Rishikesh Kumar",{"paper_id":16454,"author_seq":121,"given_name":10259,"surname":16485,"affiliation":63,"orcid":63},"Poudel",{"paper_id":16454,"author_seq":104,"given_name":6705,"surname":6706,"affiliation":63,"orcid":63},"Modern Translation Systems heavily rely on high-quality, large parallel datasets for state-of-the-art performance. However, such resources are largely unavailable for most of the South Asian languages. Among them, Nepali and Tamang fall into such category, with Tamang being among the least digitally resourced languages in the region. This work addresses the gap by developing NepTam20K, a 20K gold standard parallel corpus, and NepTam80K, an 80K synthetic Nepali–Tamang parallel corpus, both sentence-aligned and designed to support machine translation. The datasets were created through a pipeline involving data scraping from Nepali news and online sources, pre-processing, semantic filtering, balancing for tense and polarity (in NepTam20K dataset), expert translation into Tamang by native speakers of the language, and verification by an expert Tamang linguist. The dataset covers five domains: Agriculture, Health, Education and Technology, Culture, and General Communication. To evaluate the dataset, baseline machine translation experiments were carried out using various multilingual pre-trained models:mBART, M2M-100, NLLB-200, and a vanilla Transformer model. The fine-tuning on the NLLB-200 achieved the highest sacreBLEU scores of 40.92 (Nepali → Tamang) and 45.26 (Tamang → Nepali).",{"paper_id":16489,"title":16490,"year":7,"month":188,"day":63,"doi":16491,"resource_url":16492,"first_page":16493,"last_page":16494,"pdf_url":16495,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16496,"paper_type":860,"authors":16497,"abstract":16502},"lrec2026-main-697","Scoring the Translation: On Target Automatic Keyword-Based Evaluation of Machine Translation in the Sports Domain","10.63317\u002F3ctpkpj58fwf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-697","8862","8871","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.697.pdf","steingrmsson-etal-2026-scoring",[16498,16499],{"paper_id":16489,"author_seq":247,"given_name":9234,"surname":9235,"affiliation":63,"orcid":63},{"paper_id":16489,"author_seq":232,"given_name":16500,"surname":16501,"affiliation":63,"orcid":63},"Einar","Sigurdsson","We take a closer look at the results of a recent translation shared task at WMT 2025 (the Conference on Machine Translation) and analyse the errors in the output of the four highest-scoring systems. We revise the automatic evaluation method used in Sigurðsson et al. (2025) and compare it to manual evaluation of six machine translation systems. We find that our results are in line with the manual evaluation, indicating that the test suite can be well suited for evaluating machine translation in this domain. Finally, we publish a list of domain-specific sports terms, namely, in the domains of basketball, chess, football, golf and gymnastics.",{"paper_id":16504,"title":16505,"year":7,"month":188,"day":63,"doi":16506,"resource_url":16507,"first_page":16508,"last_page":16509,"pdf_url":16510,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16511,"paper_type":860,"authors":16512,"abstract":16521},"lrec2026-main-698","Towards Improving Multimodal Machine Translation with LLMs: A Focus on Indic Languages","10.63317\u002F4od6be42j78m","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-698","8872","8882","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.698.pdf","dash-etal-2026-improving",[16513,16516,16519],{"paper_id":16504,"author_seq":247,"given_name":16514,"surname":16515,"affiliation":63,"orcid":63},"Amulya Ratna","Dash",{"paper_id":16504,"author_seq":232,"given_name":16517,"surname":16518,"affiliation":63,"orcid":63},"Chirag","Wadhwa",{"paper_id":16504,"author_seq":218,"given_name":16520,"surname":11395,"affiliation":63,"orcid":63},"Yashvardhan","Recent advances in Multimodal Machine Translation (MMT) have attempted to address ambiguity and polysemy in text alone by enabling models to draw additional contextual cues from paired images, thereby improving disambiguation and translation accuracy. Datasets such as Multi30K and Visual Genome have significantly advanced this line of research. However, these datasets do not always compel models to rely on visual information. The CoMMuTE dataset takes a stronger step in this direction by serving as an evaluation benchmark specifically designed around ambiguous English sentences that can only be correctly interpreted with their accompanying images. In this work, we extend CoMMuTE to two Indic languages, introducing IndicCoMMuTE — an evaluation dataset for assessing MMT systems on low-resource Indic languages. We benchmark a range of open-source multimodal Large Language Models (\u003C 15B parameters) and a strong text-only baseline across eight languages. We fine-tune one of these LLMs on two Indic languages. Our findings provide insights into the strengths and limitations of LLMs and establish IndicCoMMuTE as a valuable benchmark for future research on Multimodal Machine Translation in Indic languages.",{"paper_id":16523,"title":16524,"year":7,"month":188,"day":63,"doi":16525,"resource_url":16526,"first_page":16527,"last_page":16528,"pdf_url":16529,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16530,"paper_type":860,"authors":16531,"abstract":16538},"lrec2026-main-699","Parallel Sentence Filtering for Low-Resource Language Pairs: A Case Study for Upper Sorbian, German, and Czech","10.63317\u002F2ym7m4jx58jj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-699","8883","8896","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.699.pdf","jiang-etal-2026-parallel",[16532,16534,16537],{"paper_id":16523,"author_seq":247,"given_name":16533,"surname":3284,"affiliation":63,"orcid":63},"Ruiyang",{"paper_id":16523,"author_seq":232,"given_name":16535,"surname":16536,"affiliation":63,"orcid":63},"Shu","Okabe",{"paper_id":16523,"author_seq":218,"given_name":869,"surname":870,"affiliation":63,"orcid":63},"As parallel corpora for low-resource languages are scarce, and automatic approaches to mine sentence pairs can lead to noisy datasets, parallel sentence filtering aims to detect only actual translations. We study here two language pairs: Upper Sorbian–German and Czech–German to represent both high and low availability of data resources. To evaluate filtering performance, we generate synthetic datasets by combining existing parallel corpora with synthetic non-parallel pairs, notably with five types of local semantic changes on the German side, such as negation or modality transformations. We represent sentences using three multilingual language models, XLM-R, Glot500m, and LaBSE, and train classifiers for the task. All three model representations led to worse filtering quality when pairs were altered more subtly, such as an antonym replacement. We still observed that a language model pre-trained on the considered language achieves more robust classification performance when sentence pairs are more ambiguous. We also evaluated a cross-lingual approach where the classifier is trained on the Czech–German pair and then applied to the Upper Sorbian–German pair. Such a language transfer paves the way for filtering other low-resource language pairs in the future.",{"paper_id":16540,"title":16541,"year":7,"month":188,"day":63,"doi":16542,"resource_url":16543,"first_page":16544,"last_page":16545,"pdf_url":16546,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16547,"paper_type":860,"authors":16548,"abstract":16552},"lrec2026-main-700","OpenSubtitles2024: A Massively Parallel Dataset of Movie Subtitles for MT Development and Evaluation","10.63317\u002F4ivg578ub2ob","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-700","8897","8907","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.700.pdf","tiedemann-etal-2026-opensubtitles2024",[16549,16550],{"paper_id":16540,"author_seq":247,"given_name":3472,"surname":3473,"affiliation":63,"orcid":63},{"paper_id":16540,"author_seq":232,"given_name":16551,"surname":16075,"affiliation":63,"orcid":63},"Hengyu","This paper introduces OpenSubtitles2024, a massively parallel dataset compiled from translated subtitles. The collection includes an extensive collection of aligned training data based on user-contributed subtitles derived from OpenSubtitles.org and a dedicated held-out dataset for development and evaluation of machine translation and multilingual language models. The collection provides an increased language coverage and doubles the size of the previous edition. Furthermore, a careful procedure was applied to reserve a subset of the most recent subtitles for system development and evaluation. The collection covers 92 languages and language variants, aligned in over 3,000 bitexts containing 40 billion tokens in 7.7 million subtitle files. The test set comprises 2,022 language pairs. In addition, we also provide a multi-parallel test set that refers to a subset of the held-out data with synchronized alignments across 40 languages and 15 subtitles.",{"paper_id":16554,"title":16555,"year":7,"month":188,"day":63,"doi":16556,"resource_url":16557,"first_page":16558,"last_page":16559,"pdf_url":16560,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16561,"paper_type":860,"authors":16562,"abstract":16568},"lrec2026-main-701","CREST: Universal Safety Guardrails through Cluster-Guided Cross-Lingual Transfer","10.63317\u002F253ofxgdtz24","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-701","8908","8922","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.701.pdf","bansal-etal-2026-crest",[16563,16566],{"paper_id":16554,"author_seq":247,"given_name":16564,"surname":16565,"affiliation":63,"orcid":63},"Lavish","Bansal",{"paper_id":16554,"author_seq":232,"given_name":16567,"surname":3084,"affiliation":63,"orcid":63},"Naman","Ensuring content safety in large language models (LLMs) is essential for their deployment in real-world applications. However, existing safety guardrails are predominantly tailored for high-resource languages, leaving a significant portion of the world’s population underrepresented who communicate in low-resource languages. To address this, we introduce CREST (CRoss-lingual Efficient Safety Transfer), a parameter-efficient multilingual safety classification model that supports 100 languages with only 0.5B parameters. By training on a strategically chosen subset of only 13 high-resource languages, our model utilizes cluster-based cross-lingual transfer from a few to 100 languages, enabling effective generalization to both unseen high-resource and low-resource languages. This approach addresses the challenge of limited training data in low-resource settings. We conduct comprehensive evaluations across six safety benchmarks to demonstrate that CREST outperforms existing state-of-the-art guardrails of comparable scale and achieves competitive results against models with significantly larger parameter counts (≥ 2.5B parameters). Our findings highlight the limitations of language-specific guardrails and underscore the importance of developing universal, language-agnostic safety systems that can scale effectively to serve global populations.",{"paper_id":16570,"title":16571,"year":7,"month":188,"day":63,"doi":16572,"resource_url":16573,"first_page":16574,"last_page":16575,"pdf_url":16576,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16577,"paper_type":860,"authors":16578,"abstract":16580},"lrec2026-main-702","Semantic Alignment across Ancient Egyptian Language Stages via Normalization-Aware Multitask Learning","10.63317\u002F3v9kjvq9f3qp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-702","8923","8936","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.702.pdf","huang-2026-semantic",[16579],{"paper_id":16570,"author_seq":247,"given_name":4962,"surname":1837,"affiliation":63,"orcid":63},"We study word-level semantic alignment across four historical stages of Ancient Egyptian. These stages differ in script and orthography, and parallel data are scarce. We jointly train a compact encoder-decoder model with a shared byte-level tokenizer on all four stages, combining masked language modeling (MLM), translation language modeling (TLM), sequence-to-sequence translation, and part-of-speech tagging under a task-aware loss with fixed weights and uncertainty-based scaling. To reduce surface divergence we add Latin transliteration and IPA reconstruction as auxiliary views. We integrate these views through KL-based consistency and through embedding-level fusion. We evaluate alignment quality using pairwise metrics, specifically ROC-AUC and triplet accuracy, on curated Egyptian–English and intra-Egyptian cognate datasets. Translation yields the strongest gains. IPA with KL consistency improves cross-branch alignment, while early fusion demonstrates limited efficacy. Although the overall alignment remains limited, the findings provide a reproducible baseline and practical guidance for modeling historical languages under real constraints. They also show how normalization and task design shape what counts as alignment in typologically distant settings.",{"paper_id":16582,"title":16583,"year":7,"month":188,"day":63,"doi":16584,"resource_url":16585,"first_page":16586,"last_page":16587,"pdf_url":16588,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16589,"paper_type":860,"authors":16590,"abstract":16598},"lrec2026-main-703","Conditioning LLMs to Generate Code-Switched Text","10.63317\u002F443bxexszimg","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-703","8937","8953","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.703.pdf","heredia-etal-2026-conditioning",[16591,16593,16596,16597],{"paper_id":16582,"author_seq":247,"given_name":12730,"surname":16592,"affiliation":63,"orcid":63},"Heredia",{"paper_id":16582,"author_seq":232,"given_name":16594,"surname":16595,"affiliation":63,"orcid":63},"Gorka","Labaka",{"paper_id":16582,"author_seq":218,"given_name":10169,"surname":10170,"affiliation":63,"orcid":63},{"paper_id":16582,"author_seq":203,"given_name":1276,"surname":1277,"affiliation":63,"orcid":63},"Code-switching (CS) is still a critical challenge in Natural Language Processing (NLP), due to the limited availability of large-scale, diverse CS datasets for robust training and evaluation. Despite recent advances, the capabilities and limitations of LLMs in handling CS are still not fully understood. In this work, we investigate the extent to which LLMs can be used in a framework for CS text generation, focusing on the English-Spanish language pair. Our proposed methodology consists of back-translating natural CS sentences into monolingual English, and using the resulting parallel corpus to fine-tune LLMs to turn monolingual sentences into CS. We thoroughly analyse the models’ performance through a study on human preferences, a qualitative error analysis, an evaluation with popular reference-based metrics and LLM-based judgment. Results show that fine-tuning can be a key step to ensure that current LLMs consistently generate fluent code-switched text and that our methodology generates high-quality outputs, expanding research opportunities in CS communication. We find that traditional metrics do not correlate with human judgement when assessing the quality of the generated CS data, but LLM-based judgment aligns more closely with human preferences. We release our code and generated dataset under a CC-BY-NC-SA license.",{"paper_id":16600,"title":16601,"year":7,"month":188,"day":63,"doi":16602,"resource_url":16603,"first_page":16604,"last_page":16605,"pdf_url":16606,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16607,"paper_type":860,"authors":16608,"abstract":16618},"lrec2026-main-704","Are the LLMs Capable of Maintaining at Least the Language Genus?","10.63317\u002F38cn6xjcqa4p","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-704","8954","8970","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.704.pdf","mitrovi-etal-2026-are",[16609,16611,16613,16616],{"paper_id":16600,"author_seq":247,"given_name":16610,"surname":1730,"affiliation":63,"orcid":63},"Sandra",{"paper_id":16600,"author_seq":232,"given_name":1061,"surname":16612,"affiliation":63,"orcid":63},"Kletz",{"paper_id":16600,"author_seq":218,"given_name":16614,"surname":16615,"affiliation":63,"orcid":63},"Ljiljana","Dolamic",{"paper_id":16600,"author_seq":203,"given_name":5741,"surname":16617,"affiliation":63,"orcid":63},"Rinaldi","Large Language Models (LLMs) display notable variation in multilingual behavior, yet the role of genealogical language structure in shaping this variation remains underexplored. In this paper, we investigate whether LLMs exhibit sensitivity to linguistic genera by extending prior analyses on the MultiQ dataset. We first check if models prefer to switch to genealogically related languages when prompt language fidelity is not maintained. Next, we investigate whether knowledge consistency is better preserved within than across genera. We show that genus-level effects are present but strongly conditioned by training resource availability. We further observe distinct multilingual strategies across LLMs families. Our findings suggest that LLMs encode aspects of genus-level structure, but training data imbalances remain the primary factor shaping their multilingual performance.",{"paper_id":16620,"title":16621,"year":7,"month":188,"day":63,"doi":16622,"resource_url":16623,"first_page":16624,"last_page":16625,"pdf_url":16626,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16627,"paper_type":860,"authors":16628,"abstract":16636},"lrec2026-main-705","Gender Bias in MT for a Genderless Language: New Benchmarks for Basque","10.63317\u002F352cdsej8fcp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-705","8971","8984","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.705.pdf","murillo-etal-2026-gender",[16629,16632,16633,16634,16635],{"paper_id":16620,"author_seq":247,"given_name":16630,"surname":16631,"affiliation":63,"orcid":63},"Amaia","Murillo",{"paper_id":16620,"author_seq":232,"given_name":7978,"surname":7979,"affiliation":63,"orcid":63},{"paper_id":16620,"author_seq":218,"given_name":9041,"surname":1180,"affiliation":63,"orcid":63},{"paper_id":16620,"author_seq":203,"given_name":9041,"surname":1180,"affiliation":63,"orcid":63},{"paper_id":16620,"author_seq":188,"given_name":9046,"surname":9047,"affiliation":63,"orcid":63},"Large language models (LLMs) and machine translation (MT) systems are increasingly used in our daily lives, but their outputs can reproduce gender bias present in the training data. Most resources for evaluating such biases are designed for English and reflect its sociocultural context, which limits their applicability to other languages. This work addresses this gap by introducing two new datasets to evaluate gender bias in translations involving Basque, a low-resource and genderless language. WinoMTeus adapts the WinoMT benchmark to examine how gender-neutral Basque occupations are translated into gendered languages such as Spanish and French. FLORES+Gender, in turn, extends the FLORES+ benchmark to assess whether translation quality varies when translating from gendered languages (Spanish and English) into Basque depending on the gender of the referent. We evaluate several general-purpose LLMs and open and proprietary MT systems. The results reveal a systematic preference for masculine forms and, in some models, a slightly higher quality for masculine referents. Overall, these findings show that gender bias is still deeply rooted in these models, and highlight the need to develop evaluation methods that consider both linguistic features and cultural context.",{"paper_id":16638,"title":16639,"year":7,"month":188,"day":63,"doi":16640,"resource_url":16641,"first_page":16642,"last_page":16643,"pdf_url":16644,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16645,"paper_type":860,"authors":16646,"abstract":16653},"lrec2026-main-706","Optimizing Multilingual LLMs via Federated Learning: A Study of Client Language Composition","10.63317\u002F4eyoaxvbuw3y","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-706","8985","8996","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.706.pdf","sant-etal-2026-optimizing",[16647,16650,16652],{"paper_id":16638,"author_seq":247,"given_name":16648,"surname":16649,"affiliation":63,"orcid":63},"Aleix","Sant",{"paper_id":16638,"author_seq":232,"given_name":13178,"surname":16651,"affiliation":63,"orcid":63},"Luque",{"paper_id":16638,"author_seq":218,"given_name":9420,"surname":16279,"affiliation":63,"orcid":63},"Federated Learning (FL) of Large Language Models (LLMs) in multilingual environments presents significant challenges stemming from heterogeneous language distributions across clients and disparities in language resource availability. To address these challenges, we extended the FederatedScope-LLM framework to support multilingual instruction-tuning experiments with LLMs. We also introduced a novel client-specific early stopping mechanism, Local Dynamic Early Stopping (LDES-FL), which allows clients to pause and resume local training based on client-side validation performance, enhancing training efficiency and sustainability. Through a series of experiments, we studied how client language composition — from fully monolingual to increasingly multilingual clients — affects multilingual quality, fairness and training cost. Monolingual local fine-tuning remains the most effective for single-language specialization, whereas federated training is better suited to learning a single balanced multilingual model. In FL, increasing within-client multilinguality leads to stronger and fairer global models, narrows the gap to centralized multilingual fine-tuning, and yields the largest gains for lower-resource languages, albeit at the cost of more optimization steps. Overall, our results identify client language composition as a key design variable in multilingual FL, shaping performance, fairness and efficiency.",{"paper_id":16655,"title":16656,"year":7,"month":188,"day":63,"doi":16657,"resource_url":16658,"first_page":16659,"last_page":16660,"pdf_url":16661,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":16662,"bibkey":16663,"paper_type":860,"authors":16664,"abstract":16671},"lrec2026-main-707","Multilingual Target-Stance Extraction","10.63317\u002F3q975uotc5m6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-707","8997","9007","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.707.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.707_OptionalSupplementaryMaterial.zip","mines-etal-2026-multilingual",[16665,16668],{"paper_id":16655,"author_seq":247,"given_name":16666,"surname":16667,"affiliation":63,"orcid":63},"Ethan Leigh","Mines",{"paper_id":16655,"author_seq":232,"given_name":16669,"surname":16670,"affiliation":63,"orcid":63},"Bonnie J.","Dorr","Social media enables data-driven analysis of public opinion on contested issues. Target-Stance Extraction (TSE) is the task of identifying the target discussed in a document and the document’s stance towards that target. Many works classify stance towards a given target in a multilingual setting, but all prior work in TSE is English-only. This work introduces the first multilingual TSE benchmark, spanning Catalan, Estonian, French, Italian, Mandarin, and Spanish corpora. It manages to extend the original TSE pipeline to a multilingual setting without requiring separate models for each language. Our model pipeline achieves a modest F1 score of 12.78, underscoring the increased difficulty of the multilingual task relative to English-only setups and highlighting target prediction as the primary bottleneck. We are also the first to demonstrate the sensitivity of TSE’s F1 score to different target verbalizations. Together these serve as a much-needed baseline for resources, algorithms, and evaluation criteria in multilingual TSE.",{"paper_id":16673,"title":16674,"year":7,"month":188,"day":63,"doi":16675,"resource_url":16676,"first_page":16677,"last_page":16678,"pdf_url":16679,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16680,"paper_type":860,"authors":16681,"abstract":16695},"lrec2026-main-708","MUNIChus: MUltilingual News Image Captioning Benchmark","10.63317\u002F3cu2uxnphh3g","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-708","9008","9017","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.708.pdf","chen-etal-2026-munichus",[16682,16683,16684,16687,16690,16693,16694],{"paper_id":16673,"author_seq":247,"given_name":9865,"surname":1840,"affiliation":63,"orcid":63},{"paper_id":16673,"author_seq":232,"given_name":14807,"surname":14808,"affiliation":63,"orcid":63},{"paper_id":16673,"author_seq":218,"given_name":16685,"surname":16686,"affiliation":63,"orcid":63},"Hansi","Hettiarachchi",{"paper_id":16673,"author_seq":203,"given_name":16688,"surname":16689,"affiliation":63,"orcid":63},"Diptesh","Kanojia",{"paper_id":16673,"author_seq":188,"given_name":16691,"surname":16692,"affiliation":63,"orcid":63},"Saroj","Basnet",{"paper_id":16673,"author_seq":172,"given_name":6445,"surname":6446,"affiliation":63,"orcid":63},{"paper_id":16673,"author_seq":155,"given_name":14813,"surname":14814,"affiliation":63,"orcid":63},"The goal of news image captioning is to generate captions by integrating news article content with corresponding images, highlighting the relationship between textual context and visual elements. The majority of research on news image captioning focuses on English, primarily because datasets in other languages are scarce. To address this limitation, we release the first multilingual news image captioning benchmark, MUNIChus, comprising 9 languages, including several low-resource languages such as Sinhala and Urdu. We evaluate various state-of-the-art neural news image captioning models on MUNIChus and find that news image captioning remains challenging. We also make MUNIChus publicly available as a public leaderboard with over 20 models already benchmarked. We hope that MUNIChus will enable further advancements in developing and evaluating multilingual news image captioning models.",{"paper_id":16697,"title":16698,"year":7,"month":188,"day":63,"doi":16699,"resource_url":16700,"first_page":16701,"last_page":16702,"pdf_url":16703,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16704,"paper_type":860,"authors":16705,"abstract":16717},"lrec2026-main-709","GlossMATE: Multi-Agent Translator Explanations for Glosses","10.63317\u002F5bhwdv5gcjqz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-709","9018","9029","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.709.pdf","yang-etal-2026-glossmate",[16706,16708,16710,16712,16714],{"paper_id":16697,"author_seq":247,"given_name":16707,"surname":6675,"affiliation":63,"orcid":63},"Changbing",{"paper_id":16697,"author_seq":232,"given_name":4377,"surname":16709,"affiliation":63,"orcid":63},"Littell",{"paper_id":16697,"author_seq":218,"given_name":10347,"surname":16711,"affiliation":63,"orcid":63},"Bernier-Colborne",{"paper_id":16697,"author_seq":203,"given_name":16713,"surname":16398,"affiliation":63,"orcid":63},"Yanfei",{"paper_id":16697,"author_seq":188,"given_name":16715,"surname":16716,"affiliation":63,"orcid":63},"Mengzhe","Geng","This paper introduces GlossMATE, a multi-agent critique-and-judge system that translates the gloss line in Interlinear Glossed Text (IGT) into fluent English using Large Language Models (LLMs). GlossMATE integrates linguist-provided resources (e.g., gloss-tag explanations, lexicon entries, curated IGT) with in-context learning and a multi-agent critique-and-judge procedure that iteratively evaluates and refines candidate translations. Our experiments show that leveraging analogous examples, explicit linguistic explanations, and collaborative agent interactions can enhance translation quality across several low-resource and polysynthetic languages. We also incorporate human linguists into the critique loop for selected languages. Case studies on three Indigenous languages further demonstrate the complementary strengths of human-in-the-loop feedback and multi-agent reasoning for language documentation tasks.",{"paper_id":16719,"title":16720,"year":7,"month":188,"day":63,"doi":16721,"resource_url":16722,"first_page":16723,"last_page":16724,"pdf_url":16725,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16726,"paper_type":860,"authors":16727,"abstract":16734},"lrec2026-main-710","Diagnosing Translated Benchmarks: An Automated Quality Assurance Study of the EU20 Benchmark Suite","10.63317\u002F46mkktmq3ytw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-710","9030","9043","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.710.pdf","thellmann-etal-2026-diagnosing",[16728,16730,16733],{"paper_id":16719,"author_seq":247,"given_name":13139,"surname":16729,"affiliation":63,"orcid":63},"Thellmann",{"paper_id":16719,"author_seq":232,"given_name":16731,"surname":16732,"affiliation":63,"orcid":63},"Bernhard","Stadler",{"paper_id":16719,"author_seq":218,"given_name":1732,"surname":9252,"affiliation":63,"orcid":63},"Machine-translated benchmark datasets reduce costs and offer scale, but noise, loss of structure, and uneven quality weaken confidence. What matters is not merely whether we can translate, but also whether we can measure and verify translation reliability at scale. We study translation quality in the EU20 benchmark suite, which comprises five established benchmarks translated into 20 languages, via a three-step automated quality assurance approach: (i) a structural corpus audit with targeted fixes; (ii) quality profiling using a neural metric (COMET, reference-free and reference-based) with translation service comparisons (DeepL \u002F ChatGPT \u002F Google); and (iii) an LLM-based span-level translation error landscape. Trends are consistent: datasets with lower COMET scores exhibit a higher share of accuracy\u002Fmistranslation errors at span level (notably HellaSwag; ARC is comparatively clean). Reference-based COMET on MMLU against human-edited samples points in the same direction. We release cleaned\u002Fcorrected versions of the EU20 datasets, and code for reproducibility. In sum, automated quality assurance offers practical, scalable indicators that help prioritize review – complementing, not replacing, human gold standards.",{"paper_id":16736,"title":16737,"year":7,"month":188,"day":63,"doi":16738,"resource_url":16739,"first_page":16740,"last_page":16741,"pdf_url":16742,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16743,"paper_type":860,"authors":16744,"abstract":16749},"lrec2026-main-711","Resource-Lean Lexicon Induction for German Dialects","10.63317\u002F2feouaji2rxe","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-711","9044","9050","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.711.pdf","litschko-etal-2026-resource",[16745,16746,16747],{"paper_id":16736,"author_seq":247,"given_name":3172,"surname":3173,"affiliation":63,"orcid":63},{"paper_id":16736,"author_seq":232,"given_name":3175,"surname":3176,"affiliation":63,"orcid":63},{"paper_id":16736,"author_seq":218,"given_name":5321,"surname":16748,"affiliation":63,"orcid":63},"Frassinelli","Automatic induction of high-quality dictionaries is essential for building lexical resources, yet low-resource languages and dialects pose several challenges: limited access to annotators, high degree of spelling variations, and poor performance of large language models (LLMs). We empirically show that statistical models (random forests) trained on string similarity features are surprisingly effective for inducing German dialect lexicons. They outperform LLMs, enable cross-dialect transfer, and offer a lightweight data-driven alternative. We evaluate our models intrinsically on bilingual lexicon induction (BLI) and extrinsically on dialect information retrieval (IR). On BLI, random forests outperform Mistral-123b while being more resource-lean. On dialect IR with BM25, using our dialect dictionaries for query expansion yields relative improvements of up to 28.9% in nDCG@10 and 50.7% in Recall@100. Motivated by the resource scarcity in dialects, we further investigate the extent to which models transfer across different German dialects, and their performance under varying amounts of training data.",{"paper_id":16751,"title":16752,"year":7,"month":188,"day":63,"doi":16753,"resource_url":16754,"first_page":16755,"last_page":16756,"pdf_url":16757,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16758,"paper_type":860,"authors":16759,"abstract":16767},"lrec2026-main-712","FENCE: A Financial and Multimodal Jailbreak Detection Dataset","10.63317\u002F4a35sc6sgwwv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-712","9051","9064","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.712.pdf","kim-etal-2026-fence",[16760,16762,16764],{"paper_id":16751,"author_seq":247,"given_name":16761,"surname":5173,"affiliation":63,"orcid":63},"Mirae",{"paper_id":16751,"author_seq":232,"given_name":16763,"surname":6008,"affiliation":63,"orcid":63},"Seonghun",{"paper_id":16751,"author_seq":218,"given_name":16765,"surname":16766,"affiliation":63,"orcid":63},"Youngjun","Kwak","Large Language Models (LLMs) and Vision Language Models (VLMs). VLMs are particularly vulnerable because they process both text and images, creating broader attack surfaces. However, available resources for jailbreak detection are scarce, particularly in finance. To address this gap, we present FENCE, a bilingual (Korean–English) multimodal dataset for training and evaluating jailbreak detectors in financial applications. FENCE comprises 10k finance-domain text–image pairs across more than 15 finance categories, constructed via a three-step pipeline: transforming real-world financial FAQs into harmful queries using GPT-4o, collecting query-relevant images via keyword-based crawling, and fusing text and images with diverse layout strategies. Labels were assigned using GPT-4o as an evaluator, with human validation confirming 95% agreement. Experiments on 15 commercial and open-source VLMs reveal consistent vulnerabilities, with GPT-4o showing measurable attack success rates and open-source models displaying greater exposure. A baseline detector trained on FENCE achieves 99% in-distribution accuracy and maintains strong performance on external benchmarks. FENCE provides a focused resource for advancing multimodal jailbreak detection in finance and supporting safer AI deployment in sensitive domains. Content Warning: This paper includes example data that may be offensive.",{"paper_id":16769,"title":16770,"year":7,"month":188,"day":63,"doi":16771,"resource_url":16772,"first_page":16773,"last_page":16774,"pdf_url":16775,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16776,"paper_type":860,"authors":16777,"abstract":16785},"lrec2026-main-713","Evaluating Multimodal Large Language Models on Vertically Written Japanese Text","10.63317\u002F4kgkk6mqmtwg","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-713","9065","9081","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.713.pdf","sasagawa-etal-2026-evaluating",[16778,16781,16784],{"paper_id":16769,"author_seq":247,"given_name":16779,"surname":16780,"affiliation":63,"orcid":63},"Keito","Sasagawa",{"paper_id":16769,"author_seq":232,"given_name":16782,"surname":16783,"affiliation":63,"orcid":63},"Shuhei","Kurita",{"paper_id":16769,"author_seq":218,"given_name":2790,"surname":3527,"affiliation":63,"orcid":63},"Multimodal Large Language Models (MLLMs) have seen rapid advances in recent years and are now being applied to visual document understanding tasks. They are expected to process a wide range of document images across languages, including Japanese. Understanding documents from images requires models to read what are written in them. Since some Japanese documents are written vertically, support for vertical writing is essential. However, research specifically focused on vertically written Japanese text remains limited. In this study, we evaluate the reading capability of existing MLLMs on vertically written Japanese text. First, we generate a synthetic Japanese OCR dataset by rendering Japanese texts into images, and use it for both model fine-tuning and evaluation. This dataset includes Japanese text in both horizontal and vertical writing. We also create an evaluation dataset sourced from the real-world document images containing vertically written Japanese text. Using these datasets, we demonstrate that the existing MLLMs perform worse on vertically written Japanese text than on horizontally written Japanese text. Furthermore, we show that training MLLMs on our synthesized Japanese OCR dataset results in improving the performance of models that previously could not handle vertical writing. The datasets and code are publicly available (https:\u002F\u002Fgithub.com\u002Fllm-jp\u002Feval_vertical_ja).",{"paper_id":16787,"title":16788,"year":7,"month":188,"day":63,"doi":16789,"resource_url":16790,"first_page":16791,"last_page":16792,"pdf_url":16793,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16794,"paper_type":860,"authors":16795,"abstract":16808},"lrec2026-main-714","ProMQA-Assembly: Multimodal Procedural QA Dataset on Assembly","10.63317\u002F2cbr6347bzzu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-714","9082","9104","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.714.pdf","hasegawa-etal-2026-promqa",[16796,16797,16798,16799,16802,16804,16806,16807],{"paper_id":16787,"author_seq":247,"given_name":5821,"surname":5822,"affiliation":63,"orcid":63},{"paper_id":16787,"author_seq":232,"given_name":5816,"surname":5817,"affiliation":63,"orcid":63},{"paper_id":16787,"author_seq":218,"given_name":2044,"surname":5819,"affiliation":63,"orcid":63},{"paper_id":16787,"author_seq":203,"given_name":16800,"surname":16801,"affiliation":63,"orcid":63},"Susan E.","Holm",{"paper_id":16787,"author_seq":188,"given_name":16803,"surname":3676,"affiliation":63,"orcid":63},"Yuran",{"paper_id":16787,"author_seq":172,"given_name":16805,"surname":1039,"affiliation":63,"orcid":63},"Xuanang",{"paper_id":16787,"author_seq":155,"given_name":5824,"surname":5825,"affiliation":63,"orcid":63},{"paper_id":16787,"author_seq":138,"given_name":5827,"surname":5828,"affiliation":63,"orcid":63},"Assistants on assembly tasks show great potential to benefit humans ranging from helping with everyday tasks to interacting in industrial settings. However, evaluation resources in assembly activities are underexplored. To foster system development, we propose a new multimodal QA evaluation dataset on assembly activities. Our dataset, ProMQA-Assembly, consists of 646 QA pairs that require multimodal understanding of human activity videos and their instruction manuals in an online-style manner. For cost effectiveness in the data creation, we adopt a semi-automated QA annotation approach, where LLMs generate candidate QA pairs and humans verify them. We further improve QA generation by integrating fine-grained action labels to diversify question types. Additionally, we create 81 instruction task graphs for our target assembly tasks. These newly created task graphs are used in our benchmarking experiment, as well as in facilitating the human verification process. With our dataset, we benchmark models, including competitive proprietary multimodal models. We find that ProMQA-Assembly contains challenging multimodal questions, where reasoning models showcase promising results. We believe our new evaluation dataset contributes to the further development of procedural-activity assistants.",{"paper_id":16810,"title":16811,"year":7,"month":188,"day":63,"doi":16812,"resource_url":16813,"first_page":16814,"last_page":16815,"pdf_url":16816,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16817,"paper_type":860,"authors":16818,"abstract":16829},"lrec2026-main-715","K-MIND: Korean Multimodal INteraction Data for Dyadic Conversation Analysis","10.63317\u002F3mz4q73vpu6q","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-715","9105","9117","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.715.pdf","yang-etal-2026-mind",[16819,16821,16823,16825,16827],{"paper_id":16810,"author_seq":247,"given_name":16820,"surname":6675,"affiliation":63,"orcid":63},"Jae Hee",{"paper_id":16810,"author_seq":232,"given_name":16822,"surname":1356,"affiliation":63,"orcid":63},"Yuha",{"paper_id":16810,"author_seq":218,"given_name":16824,"surname":1356,"affiliation":63,"orcid":63},"Saim",{"paper_id":16810,"author_seq":203,"given_name":16826,"surname":5173,"affiliation":63,"orcid":63},"Je Woo",{"paper_id":16810,"author_seq":188,"given_name":16828,"surname":12683,"affiliation":63,"orcid":63},"Jin Yea","We present the Korean Multimodal INteraction Data (K-MIND), a large-scale corpus of dyadic Korean dialogue that is designed to capture the multimodal richness of social interaction. The dataset includes 292 participants and 200 sets (935 clips) spanning 115 hours and 30 minutes, all aligned across verbal, paraverbal, and nonverbal modalities such as transcripts, acoustic features, and visual signals. For these modalities, we propose a comprehensive annotation scheme that enables nuanced yet consistent labeling of complex communicative behaviors, balancing theoretical soundness with practical feasibility. We further report analysis results of the corpus, including label distributions, within- and cross-layer analyses. These analyses illuminate the key properties of dyadic K-MIND and demonstrate its utility for advancing research in human–computer interaction as well as in interdisciplinary domains. To ensure continuous refinement, the corpus and framework are being validated in complementary studies and have been extended to triadic interactions (K-MIND Triadic) that model group dynamics, which will be included in upcoming releases.",{"paper_id":16831,"title":16832,"year":7,"month":188,"day":63,"doi":16833,"resource_url":16834,"first_page":16835,"last_page":16836,"pdf_url":16837,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16838,"paper_type":860,"authors":16839,"abstract":16846},"lrec2026-main-716","Do Multimodal LLMs Understand Order? Measuring the Fragility of Multimodal Reasoning under Input Order Perturbations","10.63317\u002F4jtpgzks8pbr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-716","9118","9128","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.716.pdf","wei-etal-2026-do",[16840,16842,16844,16845],{"paper_id":16831,"author_seq":247,"given_name":16841,"surname":3270,"affiliation":63,"orcid":63},"Sheng-Lun",{"paper_id":16831,"author_seq":232,"given_name":16843,"surname":4705,"affiliation":63,"orcid":63},"Yu-Ling",{"paper_id":16831,"author_seq":218,"given_name":1836,"surname":1837,"affiliation":63,"orcid":63},{"paper_id":16831,"author_seq":203,"given_name":1839,"surname":1840,"affiliation":63,"orcid":63},"Multimodal reasoning has progressed rapidly with large vision-language models (LVLMs), yet their robustness under input variations remains underexplored. This study investigates positional bias in LVLMs for multimodal multiple-choice questions. Our analysis shows that model predictions are sensitive to both choice and modality ordering. We conduct a large-scale evaluation on MMMU, CVQA, and MMBench using fourteen representative models. Further analysis examines how question properties, including difficulty, domain, and image type, affect robustness. We also assess whether text-based mitigation strategies transfer to the VQA setting and perform ablation studies on self-consistency and reasoning complexity. Overall, our findings provide the first comprehensive understanding of positional bias from a vision-language perspective, highlighting key challenges in achieving stable multimodal reasoning.",{"paper_id":16848,"title":16849,"year":7,"month":188,"day":63,"doi":16850,"resource_url":16851,"first_page":16852,"last_page":16853,"pdf_url":16854,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16855,"paper_type":860,"authors":16856,"abstract":16862},"lrec2026-main-717","Early Fusion with Contrastive Learning: A Lightweight Alternative for Multi-modal Classification","10.63317\u002F3ioe7epfs78m","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-717","9129","9138","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.717.pdf","wernlein-etal-2026-early",[16857,16859,16861],{"paper_id":16848,"author_seq":247,"given_name":12079,"surname":16858,"affiliation":63,"orcid":63},"Wernlein",{"paper_id":16848,"author_seq":232,"given_name":16860,"surname":14968,"affiliation":63,"orcid":63},"Abhik",{"paper_id":16848,"author_seq":218,"given_name":998,"surname":999,"affiliation":63,"orcid":63},"With the emergence of numerous modalities, such as text, image, audio, etc., the use of effective multimodal systems has increased significantly. However, one of the significant challenges faced by such multimodal systems is effectively aligning and integrating diverse modalities. Several models have been proposed to address these issues; however, state-of-the-art performance is achieved by complex, heavyweight models (complexity measured in terms of trainable parameters) alone. Hence, we propose a simple yet effective lightweight framework explicitly designed for multimodal classification tasks, utilising the early fusion method combined with a contrastive learning approach. The early fusion method focuses on fusing different modalities at the input level, whereas contrastive learning allows a single modality to capture intra-modality relationships. Experiments on three different genres of multimodal classification datasets demonstrate that the proposed lightweight framework achieves performance comparable to the most competitive heavyweight state-of-the-art models and, in some cases, even outperforms them.",{"paper_id":16864,"title":16865,"year":7,"month":188,"day":63,"doi":16866,"resource_url":16867,"first_page":16868,"last_page":16869,"pdf_url":16870,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16871,"paper_type":860,"authors":16872,"abstract":16884},"lrec2026-main-718","Multimodal Entrainment and Feedback in Online Group Meetings","10.63317\u002F3e7gn6grbki4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-718","9139","9149","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.718.pdf","paggio-etal-2026-multimodal",[16873,16874,16877,16878,16881],{"paper_id":16864,"author_seq":247,"given_name":13121,"surname":13122,"affiliation":63,"orcid":63},{"paper_id":16864,"author_seq":232,"given_name":16875,"surname":16876,"affiliation":63,"orcid":63},"Manex","Agirrezabal",{"paper_id":16864,"author_seq":218,"given_name":12397,"surname":5732,"affiliation":63,"orcid":63},{"paper_id":16864,"author_seq":203,"given_name":16879,"surname":16880,"affiliation":63,"orcid":63},"Bart","Jongejan",{"paper_id":16864,"author_seq":188,"given_name":16882,"surname":16883,"affiliation":63,"orcid":63},"Costanza","Navarretta","This paper presents the results of a study on multimodal speaker behaviour in a corpus of online Zoom meetings. We investigate two questions: i) whether speakers display a higher degree of head movement when they exchange verbal feedback than when they don’t, as would be expected if verbal and gestural feedback reinforce one other, and ii) whether they move more or less similarly under the same conditions. Several linear mixed models were fitted to test the difference in head movement values in target and control intervals of two different durations. The results indicate that speakers indeed entrain by moving their heads more in target intervals where verbal feedback is present. This result confirms our expectations. However, speakers also appear to move in less similar ways in the same target intervals. This dissimilarity can be explained by the fact that not all speakers give the same type of gestural feedback, but also by noise created by non-communicative movements in which speakers adjust their positions or reach out for objects during the meeting.",{"paper_id":16886,"title":16887,"year":7,"month":188,"day":63,"doi":16888,"resource_url":16889,"first_page":16890,"last_page":16891,"pdf_url":16892,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16893,"paper_type":860,"authors":16894,"abstract":16907},"lrec2026-main-719","MMCIG: Multimodal Cover Image Generation for Text-only Documents and Its Dataset Construction via Pseudo-labeling","10.63317\u002F5qb2harcr757","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-719","9150","9161","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.719.pdf","kim-etal-2026-mmcig",[16895,16897,16899,16901,16904],{"paper_id":16886,"author_seq":247,"given_name":16896,"surname":5173,"affiliation":63,"orcid":63},"Hyeyeon",{"paper_id":16886,"author_seq":232,"given_name":16898,"surname":4143,"affiliation":63,"orcid":63},"Sungwoo",{"paper_id":16886,"author_seq":218,"given_name":16900,"surname":8456,"affiliation":63,"orcid":63},"Jingun",{"paper_id":16886,"author_seq":203,"given_name":16902,"surname":16903,"affiliation":63,"orcid":63},"Hidetaka","Kamigaito",{"paper_id":16886,"author_seq":188,"given_name":16905,"surname":16906,"affiliation":63,"orcid":63},"Manabu","Okumura","In this study, we introduce a novel cover image generation task that produces both a concise summary and a visually corresponding image from a text-only document. Because no existing datasets are available for this task, we propose a multimodal pseudo-labeling method to construct high-quality datasets at low cost. We first collect documents with summaries, multiple images, and captions, and then exclude factually inconsistent instances. Our approach selects one image from multiple images accompanying each document. Using the gold summary, we independently rank both the images and their captions. Then, we annotate a pseudo-label for an image when both the image and its corresponding caption are ranked first in their respective rankings. Finally, we remove documents that contain direct image references within texts. Experimental results demonstrate that the proposed multimodal pseudo-labeling method constructs more precise datasets and generates higher quality images than text- and image-only pseudo-labeling methods, which consider captions and images separately.",{"paper_id":16909,"title":16910,"year":7,"month":188,"day":63,"doi":16911,"resource_url":16912,"first_page":16913,"last_page":16914,"pdf_url":16915,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16916,"paper_type":860,"authors":16917,"abstract":16919},"lrec2026-main-720","Multimodal Reference by Means of the Pronoun We and Hand Gestures in a Novel Corpus of Parliamentary Opening Debates","10.63317\u002F27nvrmnnxgqh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-720","9162","9171","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.720.pdf","navarretta-2026-multimodal",[16918],{"paper_id":16909,"author_seq":247,"given_name":16882,"surname":16883,"affiliation":63,"orcid":63},"Political discourse has persuasion as its main goal and the identification of the referents of pronouns in it is of great importance. This paper presents a novel multimodal corpus of Danish parliamentary opening debates. It also describes a study of multimodal reference by means of the first-person plural pronoun \"vi\" (we) and the co-occurring hand gestures in a subset of the corpus. The data in the study consists of 219 speeches of two prime ministers from the opening debates in 2013, 2014, and 2021-2024. In the speeches, the prime ministers answer questions of parliament members from the opposition. The uses of the first-person plural pronoun in political speeches are particularly interesting since the pronouns can refer to different groups, such as the government, the parliament, the country, or a specific party and can be used by politicians to achieve consensus or distinguish their politics from that of others. The main hypothesis we want to investigate in the study is whether the pointing gestures vary in their trajectory depending on the intended referents. The results of our study confirm this hypothesis for the most frequent referent types and show how pointing hand gestures are used by the two prime ministers to help their audience individuating the correct referents of \"vi\", and emphasise them. Our data also indicates that co-speech hand gestures are in some cases used to show the attitude of the speakers toward what they are saying.",{"paper_id":16921,"title":16922,"year":7,"month":188,"day":63,"doi":16923,"resource_url":16924,"first_page":16925,"last_page":16926,"pdf_url":16927,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16928,"paper_type":860,"authors":16929,"abstract":16940},"lrec2026-main-721","Multimodal Large Language Models for Low-Resource Languages: A Case Study for Basque","10.63317\u002F2ry23e89ew5v","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-721","9172","9187","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.721.pdf","arana-etal-2026-multimodal",[16930,16932,16935,16938],{"paper_id":16921,"author_seq":247,"given_name":4380,"surname":16931,"affiliation":63,"orcid":63},"Arana",{"paper_id":16921,"author_seq":232,"given_name":16933,"surname":16934,"affiliation":63,"orcid":63},"Julen","Etxaniz",{"paper_id":16921,"author_seq":218,"given_name":16936,"surname":16937,"affiliation":63,"orcid":63},"Ander","Salaberria",{"paper_id":16921,"author_seq":203,"given_name":16594,"surname":16939,"affiliation":63,"orcid":63},"Azkune","Current Multimodal Large Language Models exhibit very strong performance for several demanding tasks. While commercial MLLMs deliver acceptable performance in low-resource languages, comparable results remain unattained within the open science community. In this paper, we aim to develop a strong MLLM for a low-resource language, namely Basque. For that purpose, we develop our own training and evaluation image-text datasets, leveraging state-of-the-art translation systems. Using two different Large Language Models as backbones, the Llama-3.1-Instruct model and a Basque-adapted variant called Latxa, we explore several data mixtures for training, encompassing Basque and English languages for both multimodal and text-only data. Evaluating our MLLMs for close-ended and open-ended generation tasks, we show that: i) low ratios of Basque multimodal data (around 20%) are already enough to obtain solid results on Basque benchmarks, and ii) contrary to expected, a Basque instructed backbone LLM is not required to obtain a strong MLLM in Basque. Additionally, we specify the optimal data mixture strategy, the effects of multimodal data in text-only tasks, and analyze evaluation approaches for open-ended generation tasks. Our results pave the way to develop MLLMs for other low-resource languages by openly releasing our resources.",{"paper_id":16942,"title":16943,"year":7,"month":188,"day":63,"doi":16944,"resource_url":16945,"first_page":16946,"last_page":16947,"pdf_url":16948,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16949,"paper_type":860,"authors":16950,"abstract":16965},"lrec2026-main-722","Real-Time Generation of Game Video Commentary with Multimodal LLMs: Pause-Aware Decoding Approaches","10.63317\u002F5m3djogm95q9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-722","9188","9201","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.722.pdf","afzal-etal-2026-real",[16951,16954,16955,16956,16959,16960,16962,16964],{"paper_id":16942,"author_seq":247,"given_name":16952,"surname":16953,"affiliation":63,"orcid":63},"Anum","Afzal",{"paper_id":16942,"author_seq":232,"given_name":1463,"surname":10869,"affiliation":63,"orcid":63},{"paper_id":16942,"author_seq":218,"given_name":4609,"surname":4610,"affiliation":63,"orcid":63},{"paper_id":16942,"author_seq":203,"given_name":16957,"surname":16958,"affiliation":63,"orcid":63},"Katsuhito","Sudoh",{"paper_id":16942,"author_seq":188,"given_name":10871,"surname":10872,"affiliation":63,"orcid":63},{"paper_id":16942,"author_seq":172,"given_name":7028,"surname":16961,"affiliation":63,"orcid":63},"Neubig",{"paper_id":16942,"author_seq":155,"given_name":2175,"surname":16963,"affiliation":63,"orcid":63},"Matthes",{"paper_id":16942,"author_seq":138,"given_name":3526,"surname":4604,"affiliation":63,"orcid":63},"Real-time video commentary generation provides textual descriptions of ongoing events in videos. It supports accessibility and engagement in domains such as sports, esports, and livestreaming. Commentary generation involves two essential decisions: what to say and when to say it. While recent prompting-based approaches using multimodal large language models (MLLMs) have shown strong performance in content generation, they largely ignore the timing aspect. We investigate whether in-context prompting alone can support real-time commentary generation that is both semantically relevant and well-timed. We propose two prompting-based decoding strategies: 1) a fixed-interval approach, and 2) a novel dynamic interval-based decoding approach that adjusts the next prediction timing based on the estimated duration of the previous utterance. Both methods enable pause-aware generation without any fine-tuning. Experiments on Japanese and English datasets of racing and fighting games show that the dynamic interval-based decoding can generate commentary more closely aligned with human utterance timing and content using prompting alone. We release a multilingual benchmark dataset, trained models, and implementations to support future research on real-time video commentary generation.",{"paper_id":16967,"title":16968,"year":7,"month":188,"day":63,"doi":16969,"resource_url":16970,"first_page":16971,"last_page":16972,"pdf_url":16973,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":16974,"paper_type":860,"authors":16975,"abstract":16999},"lrec2026-main-723","ARB: A Comprehensive Arabic Multimodal Reasoning Benchmark","10.63317\u002F2gyndds6s2us","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-723","9202","9216","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.723.pdf","ghaboura-etal-2026-arb",[16976,16978,16981,16984,16987,16990,16993,16996,16998],{"paper_id":16967,"author_seq":247,"given_name":2548,"surname":16977,"affiliation":63,"orcid":63},"Ghaboura",{"paper_id":16967,"author_seq":232,"given_name":16979,"surname":16980,"affiliation":63,"orcid":63},"Shubham","Patle",{"paper_id":16967,"author_seq":218,"given_name":16982,"surname":16983,"affiliation":63,"orcid":63},"Ketan","More",{"paper_id":16967,"author_seq":203,"given_name":16985,"surname":16986,"affiliation":63,"orcid":63},"Wafa Hamad Mohamed","Alghallabi",{"paper_id":16967,"author_seq":188,"given_name":16988,"surname":16989,"affiliation":63,"orcid":63},"Omkar","Thawakar",{"paper_id":16967,"author_seq":172,"given_name":16991,"surname":16992,"affiliation":63,"orcid":63},"Jorma","Laaksonen",{"paper_id":16967,"author_seq":155,"given_name":16994,"surname":16995,"affiliation":63,"orcid":63},"Hisham","Cholakkal",{"paper_id":16967,"author_seq":138,"given_name":16997,"surname":2909,"affiliation":63,"orcid":63},"Salman",{"paper_id":16967,"author_seq":121,"given_name":5093,"surname":5094,"affiliation":63,"orcid":63},"As Large Multimodal Models (LMMs) become more capable, there is growing interest in evaluating their reasoning processes alongside their final outputs. However, most existing benchmarks remain focused on English, overlooking languages with rich linguistic and cultural depth such as Arabic. To address this gap, we introduce the Comprehensive Arabic Multimodal Reasoning Benchmark (ARB), the first benchmark designed to evaluate step-by-step reasoning in Arabic across both textual and visual modalities. ARB covers 11 diverse domains and over 40 subfields, including visual reasoning, optical character recognition, scientific analysis, and cultural interpretation. It comprises 2,219 multimodal samples paired with over 8K human-curated reasoning steps and corresponding actions, verified through a human-in-the-loop process. We evaluated 15 state-of-the-art open- and closed-source LMMs and found persistent challenges in coherence, faithfulness, and cultural grounding. ARB provides a structured framework for diagnosing multimodal reasoning in underrepresented languages, marking a critical step toward inclusive, transparent, and culturally aware AI systems. The benchmark, rubric, and evaluation suite are publicly available",{"paper_id":17001,"title":17002,"year":7,"month":188,"day":63,"doi":17003,"resource_url":17004,"first_page":17005,"last_page":17006,"pdf_url":17007,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17008,"paper_type":860,"authors":17009,"abstract":17017},"lrec2026-main-724","Event Chronography in Multi-modal Data: The BME Method for Quantitative Analyses","10.63317\u002F3yhonsvzhzq9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-724","9217","9225","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.724.pdf","murat-etal-2026-event",[17010,17013,17015],{"paper_id":17001,"author_seq":247,"given_name":17011,"surname":17012,"affiliation":63,"orcid":63},"Anaïs Claire","Murat",{"paper_id":17001,"author_seq":232,"given_name":2960,"surname":17014,"affiliation":63,"orcid":63},"Koutsombogera",{"paper_id":17001,"author_seq":218,"given_name":5983,"surname":17016,"affiliation":63,"orcid":63},"Vogel","Methods for investigating multi-modality in human interactions remain open to refinement. Although the annotation process has been facilitated by tools like Elan, synchronising exported cross-tier data for further quantitative analyses remains challenging. We present the BME method: a new approach to data alignment. The idea is straightforward: instead of comparing exact times of onsets, durations, etc., the BME method focuses on their organisation. First, the method describes every annotation by at least two events: its beginning (B) and end (E). Then, it aligns them in chronological order. Middles (M) are precipitated to track events from other tiers which might occur between Bs and Es. We explore three cases in which such an arrangement of multi-modal data can benefit the scientific community: first, in getting insights about the dynamics and dependencies between tiers, second, in contemplating event-based duration rather than time-based ones, and, third, in contributing cross-annotator agreement assessment methods.",{"paper_id":17019,"title":17020,"year":7,"month":188,"day":63,"doi":17021,"resource_url":17022,"first_page":17023,"last_page":17024,"pdf_url":17025,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17026,"paper_type":860,"authors":17027,"abstract":17033},"lrec2026-main-725","CANVAS: A Multimodal Dataset of Chinese Textbook Images for Bias and Representation Analysis","10.63317\u002F2xmqax7numnm","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-725","9226","9239","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.725.pdf","zhu-etal-2026-canvas",[17028,17030,17032],{"paper_id":17019,"author_seq":247,"given_name":17029,"surname":9726,"affiliation":63,"orcid":63},"Haotian",{"paper_id":17019,"author_seq":232,"given_name":17031,"surname":2998,"affiliation":63,"orcid":63},"Kefan",{"paper_id":17019,"author_seq":218,"given_name":10109,"surname":3446,"affiliation":63,"orcid":63},"Social biases in educational materials can subtly shape students’ perceptions of social roles and participation. However, most existing bias benchmarks for Chinese language models focus on text or isolated images, overlooking the multimodal scenes commonly found in educational textbooks. To address this gap, we introduce CANVAS (Chinese ANnotated Visual And Social scenes), a multimodal dataset constructed from Chinese elementary science textbooks and annotated across multiple social dimensions. CANVAS provides fine-grained labels for each depicted character’s demographics, social roles, interactions, and power-related attributes within visual scenes. The dataset is created using a semi-automated pipeline in which a vision–language model generates preliminary structured annotations that are subsequently verified and refined by human annotators. The current release focuses on the Grade 6 science subset and serves as an initial annotated version of the dataset. Using this subset, we present an illustrative case study demonstrating how scene-level and interactional annotations in CANVAS can be used to analyze gender representation in textbook images. By extending bias analysis to full educational scenes, CANVAS provides a new resource for studying representation and fairness in multimodal educational materials and supports future research in NLP, computer vision, and education.",{"paper_id":17035,"title":17036,"year":7,"month":188,"day":63,"doi":17037,"resource_url":17038,"first_page":17039,"last_page":17040,"pdf_url":17041,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17042,"paper_type":860,"authors":17043,"abstract":17060},"lrec2026-main-726","MM-Conv: A Multimodal Dataset and Benchmark for Context-Aware Grounding in 3D Dialogue","10.63317\u002F37fzwjphsb9y","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-726","9240","9253","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.726.pdf","deichler-etal-2026-mm",[17044,17046,17047,17050,17052,17055,17058],{"paper_id":17035,"author_seq":247,"given_name":2742,"surname":17045,"affiliation":63,"orcid":63},"Deichler",{"paper_id":17035,"author_seq":232,"given_name":15876,"surname":15877,"affiliation":63,"orcid":63},{"paper_id":17035,"author_seq":218,"given_name":17048,"surname":17049,"affiliation":63,"orcid":63},"Fethiye Irmak","Dogan",{"paper_id":17035,"author_seq":203,"given_name":2742,"surname":17051,"affiliation":63,"orcid":63},"Klezovich",{"paper_id":17035,"author_seq":188,"given_name":17053,"surname":17054,"affiliation":63,"orcid":63},"Lubos","Marcinek",{"paper_id":17035,"author_seq":172,"given_name":17056,"surname":17057,"affiliation":63,"orcid":63},"Iolanda","Leite",{"paper_id":17035,"author_seq":155,"given_name":3601,"surname":17059,"affiliation":63,"orcid":63},"Beskow","Grounding language in the physical world requires AI systems to interpret references that emerge dynamically during conversation. While current vision-language models (VLMs) excel at static image tasks, they struggle to resolve ambiguous expressions in spontaneous, multi-turn dialogue. We address this gap by introducing MM-Conv—speak, point, look—a benchmark for referential communication in dynamic 3D environments, built from 6.7 hours of egocentric VR interaction with synchronized speech, motion, gaze, and 3D scene geometry. The benchmark includes over 4,200 manually verified referring expressions spanning full, partitive, and pronominal types, enabling systematic evaluation of multimodal reference resolution.",{"paper_id":17062,"title":17063,"year":7,"month":188,"day":63,"doi":17064,"resource_url":17065,"first_page":17066,"last_page":17067,"pdf_url":17068,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17069,"paper_type":860,"authors":17070,"abstract":17074},"lrec2026-main-727","Erase Persona, Forget Lore: Benchmarking Multimodal Copyright Unlearning in Large Vision Language Models","10.63317\u002F3zvek95uex2j","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-727","9254","9265","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.727.pdf","kwon-etal-2026-erase",[17071,17072,17073],{"paper_id":17062,"author_seq":247,"given_name":10125,"surname":8456,"affiliation":63,"orcid":63},{"paper_id":17062,"author_seq":232,"given_name":10122,"surname":10123,"affiliation":63,"orcid":63},{"paper_id":17062,"author_seq":218,"given_name":6880,"surname":5173,"affiliation":63,"orcid":63},"Large Vision-Language Models (LVLMs), trained on web-scale data, risk memorizing and regenerating copyrighted visual content like characters and logos, creating significant challenges. Machine unlearning offers a path to mitigate these risks by removing specific content post-training, but evaluating its effectiveness, especially in the complex multimodal setting of LVLMs, remains an open problem. Current evaluation methods often lack robustness or fail to capture the nuances of cross-modal concept erasure. To address this critical gap, we introduce the CoVUBench benchmark, the first framework specifically designed for evaluating copyright content unlearning in LVLMs. CoVUBench utilizes procedurally generated, legally safe synthetic data coupled with systematic visual variations—spanning compositional changes and diverse domain manifestations—to ensure realistic and robust evaluation of unlearning generalization. Our comprehensive, multimodal evaluation protocol assesses both forgetting efficacy from the copyright holder’s perspective and the preservation of general model utility from the deployer’s viewpoint. By rigorously measuring this crucial trade-off, CoVUBench provides a standardized tool to advance the development of responsible and effective unlearning methods for LVLMs.",{"paper_id":17076,"title":17077,"year":7,"month":188,"day":63,"doi":17078,"resource_url":17079,"first_page":17080,"last_page":17081,"pdf_url":17082,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17083,"paper_type":860,"authors":17084,"abstract":17095},"lrec2026-main-728","DREAM: A Multicultural Multimodal Dataset Linking Dialogues and Realistic Image Sequences","10.63317\u002F2v7b4xhs2d5g","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-728","9266","9281","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.728.pdf","mallo-etal-2026-dream",[17085,17088,17090,17092],{"paper_id":17076,"author_seq":247,"given_name":17086,"surname":17087,"affiliation":63,"orcid":63},"Juan","Mallo",{"paper_id":17076,"author_seq":232,"given_name":6445,"surname":17089,"affiliation":63,"orcid":63},"Estecha-Garitagoitia",{"paper_id":17076,"author_seq":218,"given_name":2531,"surname":17091,"affiliation":63,"orcid":63},"Cordoba",{"paper_id":17076,"author_seq":203,"given_name":17093,"surname":17094,"affiliation":63,"orcid":63},"Luis Fernando","D'Haro","An ongoing challenge in multimodal language research is creating and interpreting dialogues that preserve visual and cultural consistency across turns. We introduce DREAM (Dialogue to REAlistic Multicultural Image Sequences), a multicultural multimodal resource that ties dialogues grounded in explicit persona profiles to photorealistic, storyboard-like image sequences. Each of the 1,000 dialogues includes two rich persona profiles (structured traits plus descriptive language), two matching photorealistic portraits, and a collection of scene-level images depicting key dialogue moments. The pipeline integrates profile augmentation, culturally-sensitive prompt engineering, and turn selection to craft cohesive visual narratives, promoting character consistency across images. This is accomplished through a controlled generation process employing large language and image models. Beyond dialogue grounding, DREAM supports appearance-based demographic perception and culture-aware rendering: models can be evaluated on their ability to (i) perceive age, gender presentation, and broad ethnicity appearance clusters from profile portraits, and (ii) maintain these characteristics in dialogue scenes. We provide a unified JSON format integrating profiles, dialogue text, and visual turns, facilitating research on visually anchored dialogue understanding, consistency, and generation. A dual evaluation protocol combines human judgments (realism, coherence, consistency, and demographic perception) with automated portrait analysis via GPT-5. Ethical considerations, limitations, and recommended applications are discussed.",{"paper_id":17097,"title":17098,"year":7,"month":188,"day":63,"doi":17099,"resource_url":17100,"first_page":17101,"last_page":17102,"pdf_url":17103,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17104,"paper_type":860,"authors":17105,"abstract":17109},"lrec2026-main-729","Multimodal Task Interference: A Benchmark and Analysis of History-Target Mismatch in Multimodal LLMs","10.63317\u002F36ae8bm4re6t","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-729","9282","9290","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.729.pdf","kawarada-etal-2026-multimodal",[17106,17107,17108],{"paper_id":17097,"author_seq":247,"given_name":8585,"surname":8586,"affiliation":63,"orcid":63},{"paper_id":17097,"author_seq":232,"given_name":3526,"surname":4604,"affiliation":63,"orcid":63},{"paper_id":17097,"author_seq":218,"given_name":4609,"surname":4610,"affiliation":63,"orcid":63},"Task interference, the performance degradation caused by task switches within a single conversation, has been studied exclusively in text-only settings despite the growing prevalence of multimodal dialogue systems. We introduce a benchmark for evaluating this phenomenon in multimodal LLMs, covering six tasks across text and vision with systematic variation of history-target along three axes: modality mismatch, reasoning mismatch, and answer format mismatch. Experiments on both open-weights and proprietary models reveal that task interference is highly directional: switching from text-only to image-based targets causes severe performance drops, while the reverse transition yields minimal degradation. Interference is further amplified when mismatches co-occur across multiple dimensions, and is driven most strongly by modality differences, followed by answer format, while reasoning requirement shifts cause minimal degradation.",{"paper_id":17111,"title":17112,"year":7,"month":188,"day":63,"doi":17113,"resource_url":17114,"first_page":17115,"last_page":17116,"pdf_url":17117,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17118,"paper_type":860,"authors":17119,"abstract":17127},"lrec2026-main-730","Can Video LLMs See Through Illusions? Video-Illusion QA Benchmark Dataset","10.63317\u002F2s4rwea9k5ji","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-730","9291","9300","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.730.pdf","ohira-etal-2026-can",[17120,17123,17126],{"paper_id":17111,"author_seq":247,"given_name":17121,"surname":17122,"affiliation":63,"orcid":63},"Souto","Ohira",{"paper_id":17111,"author_seq":232,"given_name":17124,"surname":17125,"affiliation":63,"orcid":63},"Tosho","Hirasawa",{"paper_id":17111,"author_seq":218,"given_name":2776,"surname":2777,"affiliation":63,"orcid":63},"Recent advances in multimodal learning have sparked growing interest in understanding how large vision-language models interpret optical illusions. While the behavior of image LLMs—which handle one image and text but not video input—on visual illusion images has been actively explored, research on their video counterparts remains limited. Video LLMs, which process sequential frames, are gaining prominence in areas such as robotics and autonomous driving. Understanding how they handle visual illusions over time is crucial for safety and may also reveal their potential as computational models of human cognition. To address this gap, we present the Video-Illusion QA Benchmark (VILQA), a novel video question answering (QA) benchmark mainly composed of carefully curated illusion videos that exhibit temporally driven perceptual phenomena. To the best of our knowledge, VILQA is the largest and most comprehensive benchmark for temporally-driven visual illusions. We evaluate several video LLMs on this benchmark from multiple perspectives. Some models were able to perceive visual illusions in a way similar to the general human experience and demonstrated an ability to resist illusions even more effectively than humans. The constructed dataset is available at https:\u002F\u002Fgithub.com\u002FSDS-NLP\u002FVILQA.",{"paper_id":17129,"title":17130,"year":7,"month":188,"day":63,"doi":17131,"resource_url":17132,"first_page":17133,"last_page":17134,"pdf_url":17135,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":17136,"bibkey":17137,"paper_type":860,"authors":17138,"abstract":17148},"lrec2026-main-731","To Skip, to Swap or to Not Swap? Identifying Step Transition Types in Instructional Manuals","10.63317\u002F5jvvjyarv88s","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-731","9301","9320","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.731.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.731_OptionalSupplementaryMaterial.zip","yang-etal-2026-skip",[17139,17141,17143,17145],{"paper_id":17129,"author_seq":247,"given_name":17140,"surname":6675,"affiliation":63,"orcid":63},"Hsiu-Yu",{"paper_id":17129,"author_seq":232,"given_name":1732,"surname":17142,"affiliation":63,"orcid":63},"Roth",{"paper_id":17129,"author_seq":218,"given_name":4651,"surname":17144,"affiliation":63,"orcid":63},"Bulling",{"paper_id":17129,"author_seq":203,"given_name":17146,"surname":17147,"affiliation":63,"orcid":63},"Carina","Silberer","Large language models (LLMs) are increasingly used as procedural planners that provide guidance across applications. However, in human-assistive scenarios where the environment and users’ knowledge constantly change, their ability to detect various step types for generating alternative plans is underexplored. To address this gap, we introduce a novel evaluation task and dataset to assess if models can identify steps that are sequential, interchangeable, and optional in textual instructions across five domains in a step-by-step manner. We compare seven LLM families from both open-source and proprietary spaces across varying sizes to a visually-informed baseline based on procedural knowledge graphs (PKG). Our results suggest that LLMs encode procedural knowledge, enabling them to identify step types with increasing effectiveness as training parameters and data size grow. However, all LLMs exhibit inconsistencies in reasoning on the mutual exclusivity of interchangeable and sequential step pairs. In contrast, the symbolic PKG baseline demonstrates stronger consistency in this aspect. Comprehensive analyses furthermore uncover limitations in LLMs’ procedural reasoning abilities.",{"paper_id":17150,"title":17151,"year":7,"month":188,"day":63,"doi":17152,"resource_url":17153,"first_page":17154,"last_page":17155,"pdf_url":17156,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17157,"paper_type":860,"authors":17158,"abstract":17174},"lrec2026-main-732","Fruitcakes and Cupcakes Emerging from Noise: The ComposiGen Dataset of Compounds and Their Compositionality","10.63317\u002F5irr54jotvxf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-732","9321","9338","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.732.pdf","godbersen-etal-2026-fruitcakes",[17159,17162,17165,17167,17169,17170,17173],{"paper_id":17150,"author_seq":247,"given_name":17160,"surname":17161,"affiliation":63,"orcid":63},"Jule","Godbersen",{"paper_id":17150,"author_seq":232,"given_name":17163,"surname":17164,"affiliation":63,"orcid":63},"Sinan Cem","Kurtyigit",{"paper_id":17150,"author_seq":218,"given_name":17166,"surname":3841,"affiliation":63,"orcid":63},"Emma Raimundo",{"paper_id":17150,"author_seq":203,"given_name":7850,"surname":17168,"affiliation":63,"orcid":63},"Rakshit",{"paper_id":17150,"author_seq":188,"given_name":5321,"surname":16748,"affiliation":63,"orcid":63},{"paper_id":17150,"author_seq":172,"given_name":17171,"surname":17172,"affiliation":63,"orcid":63},"Sabine Schulte im","Walde",{"paper_id":17150,"author_seq":155,"given_name":17146,"surname":17147,"affiliation":63,"orcid":63},"Compounds are a complex linguistic phenomenon, as variation in their degree of compositionality often makes their interpretation non-straightforward. We consider the task of visual-linguistic compositionality prediction for English noun-noun compounds, i.e., predicting the degrees to which a compound’s meaning is predictable from its constituents. We introduce a new dataset, *ComposiGen*, which provides constituent-specific human-elicited compositionality ratings for compounds of different concreteness categories, and includes generated visual representations for both compounds and their constituents. To enable controlled comparisons, we structure *ComposiGen* such that head constituents are shared across multiple compounds (e.g., *wedding cake*, *cup cake*). We suggest a novel parameter-based approach leveraging constituent-to-compound image transformations to predict different degrees of visual constituent contributions to compound meaning. While our novel approach requires further exploration for validation, our overall results show that the generated images, in particular in combination with text, provide valuable information, and that simple late fusion outperforms multimodal transformers. Taken together, our findings highlight a promising avenue for future research on more efficient multimodal models for compositionality prediction. Our novel dataset offers a rich resource for future in-depth research, including the exploration of visual, constituent-based compound formation.",{"paper_id":17176,"title":17177,"year":7,"month":188,"day":63,"doi":17178,"resource_url":17179,"first_page":17180,"last_page":17181,"pdf_url":17182,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17183,"paper_type":860,"authors":17184,"abstract":17189},"lrec2026-main-733","Large Language Models' Internal Perception of Symbolic Music","10.63317\u002F495qbkvs5cdk","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-733","9339","9348","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.733.pdf","shin-etal-2026-large",[17185,17186],{"paper_id":17176,"author_seq":247,"given_name":14130,"surname":1356,"affiliation":63,"orcid":63},{"paper_id":17176,"author_seq":232,"given_name":17187,"surname":17188,"affiliation":63,"orcid":63},"Kunitake","Kaneko","Large language models (LLMs) excel at modeling relationships between strings in natural language and have shown promise in extending to other symbolic domains like coding or mathematics. However, the extent to which they implicitly model symbolic music remains underexplored. This paper investigates how LLMs represent musical concepts by generating symbolic music data from textual prompts describing combinations of genres and styles, and evaluating their utility through recognition and generation tasks. We produce a dataset of LLM-generated MIDI files without relying on explicit musical training. We then train neural networks entirely on this LLM-generated MIDI dataset and perform genre and style classification as well as melody completion, benchmarking their performance against established models. Our results demonstrate that LLMs can infer rudimentary musical structures and temporal relationships from text, highlighting both their potential to implicitly encode musical patterns and their limitations due to a lack of explicit musical context, shedding light on their generative capabilities for symbolic music.",{"paper_id":17191,"title":17192,"year":7,"month":188,"day":63,"doi":17193,"resource_url":17194,"first_page":17195,"last_page":17196,"pdf_url":17197,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17198,"paper_type":860,"authors":17199,"abstract":17223},"lrec2026-main-734","Entity Image and Mixed-Modal Image Retrieval Datasets","10.63317\u002F2fnaa4f79qa5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-734","9349","9357","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.734.pdf","blaga-etal-2026-entity",[17200,17203,17205,17208,17210,17213,17215,17218,17221],{"paper_id":17191,"author_seq":247,"given_name":17201,"surname":17202,"affiliation":63,"orcid":63},"Cristian-Ioan","Blaga",{"paper_id":17191,"author_seq":232,"given_name":17204,"surname":10565,"affiliation":63,"orcid":63},"Paul Suganthan G",{"paper_id":17191,"author_seq":218,"given_name":17206,"surname":17207,"affiliation":63,"orcid":63},"Sahil","Dua",{"paper_id":17191,"author_seq":203,"given_name":4523,"surname":17209,"affiliation":63,"orcid":63},"Srinivasan",{"paper_id":17191,"author_seq":188,"given_name":17211,"surname":17212,"affiliation":63,"orcid":63},"Enrique","Alfonseca",{"paper_id":17191,"author_seq":172,"given_name":1625,"surname":17214,"affiliation":63,"orcid":63},"Dornbach",{"paper_id":17191,"author_seq":155,"given_name":17216,"surname":17217,"affiliation":63,"orcid":63},"Tom","Duerig",{"paper_id":17191,"author_seq":138,"given_name":17219,"surname":17220,"affiliation":63,"orcid":63},"Imed","Zitouni",{"paper_id":17191,"author_seq":121,"given_name":17222,"surname":5518,"affiliation":63,"orcid":63},"Zhe","Despite advances in multimodal learning, challenging benchmarks for mixed-modal image retrieval that combines visual and textual information are lacking. This paper introduces a novel benchmark to rigorously evaluate image retrieval that demands deep cross-modal contextual understanding. We present two new datasets: the Entity Image Dataset (EI), providing canonical images for Wikipedia entities, and the Mixed-Modal Image Retrieval Dataset (MMIR), derived from the WIT dataset. The MMIR benchmark features two challenging query types requiring models to ground textual descriptions in the context of provided visual entities: single entity-image queries (one entity image with descriptive text) and multi-entity-image queries (multiple entity images with relational text). We empirically validate the benchmark’s utility as both a training corpus and an evaluation set for mixed-modal retrieval. The quality of both datasets is further affirmed through crowd-sourced human annotations.",{"paper_id":17225,"title":17226,"year":7,"month":188,"day":63,"doi":17227,"resource_url":17228,"first_page":17229,"last_page":17230,"pdf_url":17231,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17232,"paper_type":860,"authors":17233,"abstract":17239},"lrec2026-main-735","Generating Sign Language Poses from HamNoSys and Natural Language Descriptions","10.63317\u002F466di7tv7dpd","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-735","9358","9367","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.735.pdf","mximo-etal-2026-generating",[17234,17236],{"paper_id":17225,"author_seq":247,"given_name":9924,"surname":17235,"affiliation":63,"orcid":63},"Máximo",{"paper_id":17225,"author_seq":232,"given_name":17237,"surname":17238,"affiliation":63,"orcid":63},"Luis","Chiruzzo","One of the steps involved in the process of sign language generation is generating a sequence of poses that represent the signs. This paper presents a method for using textual information to improve the translation of signs in HamNoSys format into sequences of poses. The method comprises a description generator that translates HamNoSys into a textual description, an LLM fine-tuned to the task of predicting a pose sequence from a HamNoSys description, and a VQ-VAE network that encodes and decodes pose sequences as a list of discrete symbols. Our experiments found that even using simple dictionary descriptions of HamNoSys, it is possible to improve the predictions of pose sequences by leveraging the information from a pretrained LLM.",{"paper_id":17241,"title":17242,"year":7,"month":188,"day":63,"doi":17243,"resource_url":17244,"first_page":17245,"last_page":17246,"pdf_url":17247,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17248,"paper_type":860,"authors":17249,"abstract":17256},"lrec2026-main-736","Evaluating Discriminability of Vision-Language Models","10.63317\u002F2iiwqjvxmcca","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-736","9368","9385","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.736.pdf","muraoka-etal-2026-evaluating",[17250,17253],{"paper_id":17241,"author_seq":247,"given_name":17251,"surname":17252,"affiliation":63,"orcid":63},"Masayasu","Muraoka",{"paper_id":17241,"author_seq":232,"given_name":17254,"surname":17255,"affiliation":63,"orcid":63},"Naoaki","Okazaki","We study the discriminative ability of vision-language models (VLMs). This ability refers to processing information by distinguishing key details from unnecessary or redundant parts to achieve specific goals. It is vital for the practical use of VLMs in applications like visual chatbots. Whereas recent VLMs have shown decent performance on various multimodal capabilities, their discriminative ability has not been thoroughly explored to date. To this end, we construct DiscriBench to evaluate the discriminability of VLMs in various daily life activities. We carefully design the dataset to require distinguishing information in both vision and language modalities, and semi-manually craft questions in English and Japanese, making them solvable without relying on external knowledge or expertise. Experimental results demonstrate a large performance gap (14.0 to 69.3 points) between humans and existing VLMs in discriminability, where humans can solve the task with an accuracy of 90% or higher. By reducing the difficulty of discriminability, our ablation studies elucidate that vision encoders cannot distinguish visual details well, given generally similar but partially different images. Besides, we observe that VLMs show inconsistent inference between modalities. We will publish DiscriBench (1,200 samples) to foster research in this direction.",{"paper_id":17258,"title":17259,"year":7,"month":188,"day":63,"doi":17260,"resource_url":17261,"first_page":17262,"last_page":17263,"pdf_url":17264,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17265,"paper_type":860,"authors":17266,"abstract":17270},"lrec2026-main-737","Seeing the Other Side: Diagnostic Tasks for Viewpoint Reasoning in Vision–Language Models","10.63317\u002F2b34cz9k64ug","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-737","9386","9395","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.737.pdf","takenaka-etal-2026-seeing",[17267,17269],{"paper_id":17258,"author_seq":247,"given_name":892,"surname":17268,"affiliation":63,"orcid":63},"Takenaka",{"paper_id":17258,"author_seq":232,"given_name":4264,"surname":4265,"affiliation":63,"orcid":63},"Humans can integrate multiple visual perspectives and infer how an object appears from unseen sides. This study investigates whether Large Vision Language Models (LVLMs) exhibit a comparable ability for reference-grounded spatial reasoning. We propose two diagnostic tasks: Opposite-Side Reasoning, which determines whether two images show the same object from opposite viewpoints, and Viewpoint Identification, which predicts the viewpoint of a target image using a reference image and its label. An additional condition, Viewpoint Identification (no-ref), removes reference information to reveal cases solvable without it, distinguishing genuine reasoning from bias-driven shortcuts. Our evaluation shows that both open and proprietary LVLMs fall far short of human performance. Even state-of-the-art proprietary LVLMs with relatively high accuracy retain many correct answers when reference information is removed, suggesting that their success often relies on linguistic or dataset-driven priors rather than genuine reference-based reasoning. These findings indicate that current LVLMs have not yet achieved consistent, reference-grounded spatial reasoning. Our datasets in this work will be released on the Hugging Face Hub to support future research on multimodal viewpoint reasoning and spatial understanding.",{"paper_id":17272,"title":17273,"year":7,"month":188,"day":63,"doi":17274,"resource_url":17275,"first_page":17276,"last_page":17277,"pdf_url":17278,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17279,"paper_type":860,"authors":17280,"abstract":17289},"lrec2026-main-738","Multi-modal, Multi-task, Multi-criteria Automatic Evaluation with Vision Language Models","10.63317\u002F575aknbvd9hs","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-738","9396","9408","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.738.pdf","oi-etal-2026-multi",[17281,17284,17285,17286],{"paper_id":17272,"author_seq":247,"given_name":17282,"surname":17283,"affiliation":63,"orcid":63},"Masanari","Oi",{"paper_id":17272,"author_seq":232,"given_name":5846,"surname":17188,"affiliation":63,"orcid":63},{"paper_id":17272,"author_seq":218,"given_name":17254,"surname":17255,"affiliation":63,"orcid":63},{"paper_id":17272,"author_seq":203,"given_name":17287,"surname":17288,"affiliation":63,"orcid":63},"Nakamasa","Inoue","Vision-language models (VLMs) have shown impressive abilities across a range of multi-modal tasks. However, existing metrics for evaluating the quality of text generated by VLMs typically focus on an overall evaluation for a specific task, such as image captioning. While the overall evaluation is essential for any task, the criteria prioritized can differ depending on the task, making it challenging for current metrics to adapt to multi-task scenarios. To address this limitation, we propose HarmonicEval, a reference-free comprehensive evaluation metric that aggregates criterion-wise scores to produce the overall score in a bottom-up manner. Furthermore, to assess the generalizability of automatic evaluation metrics in multi-task scenarios, we construct the Multi-task Multi-criteria Human Evaluation (MMHE) benchmark, which comprises 18,000 expert human judgments across four multi-modal tasks. Our experiments demonstrate that HarmonicEval achieves higher correlations with human judgments than conventional metrics while providing numerical scores for each criterion. Our code and data will be available publicly.",{"paper_id":17291,"title":17292,"year":7,"month":188,"day":63,"doi":17293,"resource_url":17294,"first_page":17295,"last_page":17296,"pdf_url":17297,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17298,"paper_type":860,"authors":17299,"abstract":17308},"lrec2026-main-739","Challenges in Image-Caption Association in Portuguese: Evaluating the CLIP Model on the FM30K Dataset","10.63317\u002F5mwksx2sayjr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-739","9409","9419","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.739.pdf","benedet-etal-2026-challenges",[17300,17303,17306,17307],{"paper_id":17291,"author_seq":247,"given_name":17301,"surname":17302,"affiliation":63,"orcid":63},"Vitória Colonetti","Benedet",{"paper_id":17291,"author_seq":232,"given_name":17304,"surname":17305,"affiliation":63,"orcid":63},"Gutavo Lopes","Tamiosso",{"paper_id":17291,"author_seq":218,"given_name":4030,"surname":4031,"affiliation":63,"orcid":63},{"paper_id":17291,"author_seq":203,"given_name":4020,"surname":4021,"affiliation":63,"orcid":63},"In recent decades, multimodal models such as CLIP have achieved significant advances in associating images and texts. However, most of these advances stem from models trained almost exclusively in English, which limits their effectiveness in other languages. This challenge is particularly relevant for Brazilian Portuguese, a language that still lacks dedicated multimodal resources and relies predominantly on automatic translations. This work investigates the performance of CLIP-based multimodal models in the task of associating images and descriptions written in Brazilian Portuguese. The analysis begins with a zero-shot scenario, in which different CLIP variants are directly evaluated on the FM30k dataset, composed of images and captions originally written in Portuguese. An additional experiment with automatic translations is also conducted to examine the impact of language on cross-modal retrieval tasks. Subsequently, fine-tuning is performed on the textual encoder of the ViT-B\u002F32 model, keeping the visual encoder frozen, with the goal of adapting the model to the target language. The results show that models originally trained in English perform worse in Portuguese, while linguistically adapted variants, either multilingual or Portuguese-specific, achieve superior performance. The proposed fine-tuning approach was able to reduce this performance gap, leading to notable improvements. In the image-to-text scenario, the model achieved an absolute increase of 27.65 percentage points in the Accuracy@1 metric, representing a 209% relative gain over the original CLIP ViT-B\u002F32. In the text-to-image scenario, the gain was 15.47 percentage points, amounting to an even higher 385% relative improvement, contributing to a more balanced association between images and captions.",{"paper_id":17310,"title":17311,"year":7,"month":188,"day":63,"doi":17312,"resource_url":17313,"first_page":17314,"last_page":17315,"pdf_url":17316,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17317,"paper_type":860,"authors":17318,"abstract":17325},"lrec2026-main-740","A Large-Scale Instruction-Tuning Dataset and Models for Slovenian Vision-Language Tasks","10.63317\u002F2e3jf6e7tcoh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-740","9420","9433","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.740.pdf","martinc-etal-2026-large",[17319,17322],{"paper_id":17310,"author_seq":247,"given_name":17320,"surname":17321,"affiliation":63,"orcid":63},"Matej","Martinc",{"paper_id":17310,"author_seq":232,"given_name":17323,"surname":17324,"affiliation":63,"orcid":63},"Domen","Vreš","Vision-language models (VLMs) represent a significant leap forward in artificial intelligence, yet their development has been predominantly focused on English, creating a digital divide for speakers of less-resourced languages. This paper addresses this gap by introducing the first large-scale, general instruction-tuning dataset for the less-resourced Slovenian language. Comprising over one million text-image pairs, the dataset was constructed through a multi-pronged approach: automatic curation from Slovenian news media and Wikipedia, and machine translation of the English LLaVA-665k dataset. To demonstrate the dataset’s efficacy, we fine-tuned two pre-trained, multilingual Gemma-3 models (4B and 12B parameters) on this new resource. Our evaluation, conducted on a new manually curated test set, reveals that the fine-tuned models named SVILA (Slovenian Vision Language Assistant) exhibit substantial performance gains on a variety of vision question answering, visual grounding, and optical character recognition tasks when compared to their baseline counterparts. This establishes our methodology as an effective blueprint for enhancing VLM capabilities in other less-resourced languages. The dataset is publicly available in the Slovenian language resource repository CLARIN.SI (http:\u002F\u002Fhdl.handle.net\u002F11356\u002F2050) and both fine-tuned models are published on the Hugging Face platform (https:\u002F\u002Fhuggingface.co\u002FGaMS-Beta\u002FSVILA-1-12B and https:\u002F\u002Fhuggingface.co\u002FGaMS-Beta\u002FSVILA-1-4B).",{"paper_id":17327,"title":17328,"year":7,"month":188,"day":63,"doi":17329,"resource_url":17330,"first_page":17331,"last_page":17332,"pdf_url":17333,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17334,"paper_type":860,"authors":17335,"abstract":17503},"lrec2026-main-741","A Parallel Cross-Lingual Benchmark for Multimodal Idiomaticity Understanding","10.63317\u002F5cvnbcoktfo2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-741","9434","9448","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.741.pdf","torunoluselamet-etal-2026-parallel",[17336,17339,17342,17343,17344,17347,17348,17349,17350,17352,17355,17358,17361,17364,17366,17368,17371,17374,17376,17377,17380,17381,17384,17385,17388,17389,17390,17393,17396,17398,17399,17400,17403,17405,17406,17409,17410,17413,17415,17416,17419,17422,17424,17426,17429,17432,17433,17436,17438,17439,17442,17443,17444,17447,17448,17450,17452,17455,17458,17461,17464,17467,17470,17471,17474,17476,17479,17482,17484,17487,17489,17490,17492,17493,17495,17496,17499,17501],{"paper_id":17327,"author_seq":247,"given_name":17337,"surname":17338,"affiliation":63,"orcid":63},"Dilara","Torunoğlu-Selamet",{"paper_id":17327,"author_seq":232,"given_name":17340,"surname":17341,"affiliation":63,"orcid":63},"Doğukan","Arslan",{"paper_id":17327,"author_seq":218,"given_name":7059,"surname":10321,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":203,"given_name":3270,"surname":4962,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":188,"given_name":17345,"surname":17346,"affiliation":63,"orcid":63},"Doruk","Eryiğit",{"paper_id":17327,"author_seq":172,"given_name":1316,"surname":10327,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":155,"given_name":9471,"surname":9472,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":138,"given_name":4071,"surname":10334,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":121,"given_name":17351,"surname":17346,"affiliation":63,"orcid":63},"Gülşen",{"paper_id":17327,"author_seq":104,"given_name":17353,"surname":17354,"affiliation":63,"orcid":63},"Ágnes","Abuczki",{"paper_id":17327,"author_seq":87,"given_name":17356,"surname":17357,"affiliation":63,"orcid":63},"Aida","Cardoso",{"paper_id":17327,"author_seq":73,"given_name":17359,"surname":17360,"affiliation":63,"orcid":63},"Alesia","Lazarenka",{"paper_id":17327,"author_seq":55,"given_name":17362,"surname":17363,"affiliation":63,"orcid":63},"Dina","Almassova",{"paper_id":17327,"author_seq":38,"given_name":17365,"surname":9009,"affiliation":63,"orcid":63},"Amália",{"paper_id":17327,"author_seq":17,"given_name":2742,"surname":17367,"affiliation":63,"orcid":63},"Kanellopoulou",{"paper_id":17327,"author_seq":2971,"given_name":17369,"surname":17370,"affiliation":63,"orcid":63},"Antoni","Brosa-Rodriguez",{"paper_id":17327,"author_seq":2974,"given_name":17372,"surname":17373,"affiliation":63,"orcid":63},"Baiba","Valkovska",{"paper_id":17327,"author_seq":857,"given_name":6821,"surname":17375,"affiliation":63,"orcid":63},"Wojtowicz",{"paper_id":17327,"author_seq":877,"given_name":8879,"surname":8280,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":2984,"given_name":17378,"surname":17379,"affiliation":63,"orcid":63},"Carlos Manuel","Hidalgo-Ternero",{"paper_id":17327,"author_seq":2988,"given_name":9461,"surname":9462,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":2992,"given_name":17382,"surname":17383,"affiliation":63,"orcid":63},"Danka","Jokić",{"paper_id":17327,"author_seq":2996,"given_name":5321,"surname":5322,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":3000,"given_name":17386,"surname":17387,"affiliation":63,"orcid":63},"Eleni","Triantafyllidi",{"paper_id":17327,"author_seq":3004,"given_name":2022,"surname":14919,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":3008,"given_name":6725,"surname":6726,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":3478,"given_name":17391,"surname":17392,"affiliation":63,"orcid":63},"Giedre Valunaite","Oleskeviciene",{"paper_id":17327,"author_seq":3482,"given_name":17394,"surname":17395,"affiliation":63,"orcid":63},"Ieva","Rizgeliene",{"paper_id":17327,"author_seq":3486,"given_name":17397,"surname":13176,"affiliation":63,"orcid":63},"Inguna",{"paper_id":17327,"author_seq":12954,"given_name":6855,"surname":6856,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":12958,"given_name":8282,"surname":8283,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":12961,"given_name":17401,"surname":17402,"affiliation":63,"orcid":63},"Jauza Akbar","Krito",{"paper_id":17327,"author_seq":12965,"given_name":17404,"surname":9466,"affiliation":63,"orcid":63},"Jelena M.",{"paper_id":17327,"author_seq":878,"given_name":8211,"surname":8212,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":900,"given_name":17407,"surname":17408,"affiliation":63,"orcid":63},"Josue Alejandro","Sauca",{"paper_id":17327,"author_seq":12973,"given_name":10974,"surname":10975,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":12976,"given_name":17411,"surname":17412,"affiliation":63,"orcid":63},"Kingsley O.","Ugwuanyi",{"paper_id":17327,"author_seq":12980,"given_name":13890,"surname":17414,"affiliation":63,"orcid":63},"Rituma",{"paper_id":17327,"author_seq":12984,"given_name":14921,"surname":14922,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":12988,"given_name":17417,"surname":17418,"affiliation":63,"orcid":63},"Maha Tufail","Agro",{"paper_id":17327,"author_seq":12991,"given_name":17420,"surname":17421,"affiliation":63,"orcid":63},"Manzura","Abjalova",{"paper_id":17327,"author_seq":12994,"given_name":2960,"surname":17423,"affiliation":63,"orcid":63},"Chatzigrigoriou",{"paper_id":17327,"author_seq":12998,"given_name":17425,"surname":1566,"affiliation":63,"orcid":63},"María del Mar Sánchez",{"paper_id":17327,"author_seq":13002,"given_name":17427,"surname":17428,"affiliation":63,"orcid":63},"Marija","Pendevska",{"paper_id":17327,"author_seq":13005,"given_name":17430,"surname":17431,"affiliation":63,"orcid":63},"Masoumeh","Seyyedrezaei",{"paper_id":17327,"author_seq":13009,"given_name":9474,"surname":9475,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":13013,"given_name":17434,"surname":17435,"affiliation":63,"orcid":63},"Momina","Ahsan",{"paper_id":17327,"author_seq":13016,"given_name":17437,"surname":2909,"affiliation":63,"orcid":63},"Muhammad Ahsan Riaz",{"paper_id":17327,"author_seq":901,"given_name":8547,"surname":8548,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":929,"given_name":17440,"surname":17441,"affiliation":63,"orcid":63},"Nilay Erdem","Ayyıldız",{"paper_id":17327,"author_seq":13026,"given_name":6722,"surname":6723,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":13030,"given_name":5624,"surname":5625,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":13034,"given_name":17445,"surname":17446,"affiliation":63,"orcid":63},"Numaan","Naeem",{"paper_id":17327,"author_seq":13038,"given_name":9455,"surname":9456,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":13041,"given_name":9455,"surname":17449,"affiliation":63,"orcid":63},"Yatsyshyna",{"paper_id":17327,"author_seq":13045,"given_name":3652,"surname":17451,"affiliation":63,"orcid":63},"Orel",{"paper_id":17327,"author_seq":13049,"given_name":17453,"surname":17454,"affiliation":63,"orcid":63},"Petra","Giommarelli",{"paper_id":17327,"author_seq":13053,"given_name":17456,"surname":17457,"affiliation":63,"orcid":63},"Petya","Osenova",{"paper_id":17327,"author_seq":13057,"given_name":17459,"surname":17460,"affiliation":63,"orcid":63},"Radovan","Garabik",{"paper_id":17327,"author_seq":13061,"given_name":17462,"surname":17463,"affiliation":63,"orcid":63},"Regina E.","Semou",{"paper_id":17327,"author_seq":13065,"given_name":17465,"surname":17466,"affiliation":63,"orcid":63},"Rozane","Rebechi",{"paper_id":17327,"author_seq":13068,"given_name":17468,"surname":17469,"affiliation":63,"orcid":63},"Salsabila Zahirah","Pranida",{"paper_id":17327,"author_seq":930,"given_name":8276,"surname":8277,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":949,"given_name":17472,"surname":17473,"affiliation":63,"orcid":63},"Sanni","Nimb",{"paper_id":17327,"author_seq":13077,"given_name":17475,"surname":3647,"affiliation":63,"orcid":63},"Sarfraz",{"paper_id":17327,"author_seq":13081,"given_name":17477,"surname":17478,"affiliation":63,"orcid":63},"Sarvinoz","Sharipova",{"paper_id":17327,"author_seq":13083,"given_name":17480,"surname":17481,"affiliation":63,"orcid":63},"Shahar","Golan",{"paper_id":17327,"author_seq":13086,"given_name":17483,"surname":8823,"affiliation":63,"orcid":63},"Shaoxiong",{"paper_id":17327,"author_seq":13090,"given_name":17485,"surname":17486,"affiliation":63,"orcid":63},"Sopuruchi Christian","Aboh",{"paper_id":17327,"author_seq":13094,"given_name":15672,"surname":17488,"affiliation":63,"orcid":63},"Sucur",{"paper_id":17327,"author_seq":13098,"given_name":9431,"surname":9432,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":13102,"given_name":17491,"surname":14917,"affiliation":63,"orcid":63},"Sussi",{"paper_id":17327,"author_seq":13106,"given_name":9478,"surname":9479,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":13110,"given_name":3442,"surname":17494,"affiliation":63,"orcid":63},"Lipp",{"paper_id":17327,"author_seq":13114,"given_name":9428,"surname":9429,"affiliation":63,"orcid":63},{"paper_id":17327,"author_seq":950,"given_name":17497,"surname":17498,"affiliation":63,"orcid":63},"Yelda Yeşildal","Eraydın",{"paper_id":17327,"author_seq":972,"given_name":6571,"surname":17500,"affiliation":63,"orcid":63},"Saaberi",{"paper_id":17327,"author_seq":13124,"given_name":17502,"surname":1899,"affiliation":63,"orcid":63},"Zhuohan","Potentially idiomatic expressions (PIEs) carry meanings inherently tied to the everyday experience of a given language community. As such, they constitute an interesting challenge for assessing the linguistic (and to some extent cultural) capabilities of NLP systems. In this paper, we present XMPIE, a parallel multilingual and multimodal dataset of potentially idiomatic expressions. The dataset, containing 34 languages and over ten thousand items, allows comparative analyses of idiomatic patterns among language-specific realisations and preferences in order to gather insights about shared cultural aspects. This parallel dataset allows evaluation of language model performance for a given PIE in different languages and whether idiomatic understanding in one language can be transferred to another. Moreover, the dataset supports the study of PIEs across textual and visual modalities, to measure to what extent PIE understanding in one modality transfers or implies in understanding in another modality (text vs. image). The data was created by language experts, with both textual and visual components crafted under multilingual guidelines, and each PIE is accompanied by five images representing a spectrum from idiomatic to literal meanings, including semantically related and random distractors. The result is a high-quality benchmark for evaluating multilingual and multimodal idiomatic language understanding.",{"paper_id":17505,"title":17506,"year":7,"month":188,"day":63,"doi":17507,"resource_url":17508,"first_page":17509,"last_page":17510,"pdf_url":17511,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17512,"paper_type":860,"authors":17513,"abstract":17525},"lrec2026-main-742","Which Way Does Time Flow? A Psychophysics-Grounded Evaluation for Vision–Language Models","10.63317\u002F58hxjifrhvw7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-742","9449","9459","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.742.pdf","matta-etal-2026-which",[17514,17517,17519,17521,17524],{"paper_id":17505,"author_seq":247,"given_name":17515,"surname":17516,"affiliation":63,"orcid":63},"Shiho","Matta",{"paper_id":17505,"author_seq":232,"given_name":17518,"surname":1581,"affiliation":63,"orcid":63},"Lis Kanashiro",{"paper_id":17505,"author_seq":218,"given_name":17520,"surname":4143,"affiliation":63,"orcid":63},"Peitao",{"paper_id":17505,"author_seq":203,"given_name":17522,"surname":17523,"affiliation":63,"orcid":63},"Shigeru","Kitazawa",{"paper_id":17505,"author_seq":188,"given_name":9846,"surname":4721,"affiliation":63,"orcid":63},"Modern vision–language models (VLMs) excel at many multimodal tasks, yet their grasp of temporal information in video remains weak and has not been adequately evaluated. We probe this gap with a deceptively simple but revealing challenge: judging the arrow of time (AoT)—whether a short clip is played forward or backward. We introduce AoT-PsyPhyBENCH, a psychophysically validated benchmark that tests whether VLMs can infer temporal direction in natural videos using the same stimuli and behavioral baselines established for humans. Our comprehensive evaluation of open-weight and proprietary, reasoning and non-reasoning VLMs reveals that most models perform near chance, and even the best model lags far behind human accuracy on physically irreversible processes (e.g., free fall, diffusion\u002Fexplosion) and causal manual actions (division\u002Faddition) that humans recognize almost instantly. These results highlight a fundamental gap in current multimodal systems: while they capture rich visual–semantic correlations, they lack the inductive biases required for temporal continuity and causal understanding. We release the code and data for AoT-PsyPhyBENCH to encourage further progress in the physical and temporal reasoning capabilities of VLMs.",{"paper_id":17527,"title":17528,"year":7,"month":188,"day":63,"doi":17529,"resource_url":17530,"first_page":17531,"last_page":17532,"pdf_url":17533,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17534,"paper_type":860,"authors":17535,"abstract":17541},"lrec2026-main-743","I Came, I Saw, I Explained: Benchmarking Multimodal LLMs on Figurative Meaning in Memes","10.63317\u002F55fa4fifm4pf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-743","9460","9477","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.743.pdf","zhou-etal-2026-came",[17536,17538,17539,17540],{"paper_id":17527,"author_seq":247,"given_name":17537,"surname":1039,"affiliation":63,"orcid":63},"Shijia",{"paper_id":17527,"author_seq":232,"given_name":15283,"surname":1932,"affiliation":63,"orcid":63},{"paper_id":17527,"author_seq":218,"given_name":3175,"surname":3176,"affiliation":63,"orcid":63},{"paper_id":17527,"author_seq":203,"given_name":5321,"surname":16748,"affiliation":63,"orcid":63},"Internet memes represent a popular form of multimodal online communication and often use figurative elements to convey layered meaning through the combination of text and images. However, it remains largely unclear how multimodal large language models (MLLMs) combine and interpret visual and textual information to identify figurative meaning in memes. To address this gap, we evaluate eight state-of-the-art generative MLLMs across three datasets on their ability to detect and explain six types of figurative meaning. In addition, we conduct a human evaluation of the explanations generated by these MLLMs, assessing whether the provided reasoning supports the predicted label and whether it remains faithful to the original meme content. Our findings indicate that all models exhibit a strong bias to associate a meme with figurative meaning, even when no such meaning is present. Qualitative analysis further shows that correct predictions are not always accompanied by faithful explanations.",{"paper_id":17543,"title":17544,"year":7,"month":188,"day":63,"doi":17545,"resource_url":17546,"first_page":17547,"last_page":17548,"pdf_url":17549,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17550,"paper_type":860,"authors":17551,"abstract":17566},"lrec2026-main-744","DEJIMA: A Novel Large-scale Japanese Dataset for Image Captioning and Visual Question Answering","10.63317\u002F45nioi7qjz28","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-744","9478","9489","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.744.pdf","katsube-etal-2026-dejima",[17552,17555,17557,17560,17562,17564],{"paper_id":17543,"author_seq":247,"given_name":17553,"surname":17554,"affiliation":63,"orcid":63},"Toshiki","Katsube",{"paper_id":17543,"author_seq":232,"given_name":17556,"surname":4261,"affiliation":63,"orcid":63},"Fukuhara",{"paper_id":17543,"author_seq":218,"given_name":17558,"surname":17559,"affiliation":63,"orcid":63},"Kenichiro","Ando",{"paper_id":17543,"author_seq":203,"given_name":4606,"surname":17561,"affiliation":63,"orcid":63},"Mukuta",{"paper_id":17543,"author_seq":188,"given_name":3520,"surname":17563,"affiliation":63,"orcid":63},"Uehara",{"paper_id":17543,"author_seq":172,"given_name":3526,"surname":17565,"affiliation":63,"orcid":63},"Harada","Vision-and-Language (V&L) models depend on large-scale, high-quality datasets, yet most resources are English-centric, and existing Japanese V&L datasets face a fundamental trade-off: manually annotated corpora offer quality but limited scale, translated datasets introduce unnatural phrasing and cultural bias, and web-crawled collections achieve scale but suffer from noise and poor grounding. To resolve this trade-off, we propose DEJIMA, a novel pipeline whose key idea is detection-guided LLM refinement: object detection first extracts visually verifiable evidence (labels and bounding boxes), then an LLM generates or refines Japanese text conditioned on this evidence, ensuring both factual grounding and linguistic naturalness without costly human annotation. Using this pipeline, we build two resources: an image–caption dataset (DEJIMA-Cap) and a VQA dataset (DEJIMA-VQA), each containing approximately 3.88M image–text pairs—over 20 times larger than existing Japanese V&L datasets. Human evaluations demonstrate that DEJIMA achieves substantially higher Japaneseness and linguistic naturalness than translation- or annotation-based baselines, while maintaining factual correctness comparable to human-annotated corpora. Models trained on DEJIMA show consistent improvements across multiple Japanese multimodal benchmarks, confirming that culturally grounded, large-scale resources play a key role in enhancing model performance. All pipeline components are commercially licensed, and we publicly release the dataset and metadata to support further research and applications. Our project page is available at https:\u002F\u002Fmil-tokyo.github.io\u002FDEJIMA-dataset\u002F.",{"paper_id":17568,"title":17569,"year":7,"month":188,"day":63,"doi":17570,"resource_url":17571,"first_page":17572,"last_page":17573,"pdf_url":17574,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17575,"paper_type":860,"authors":17576,"abstract":17585},"lrec2026-main-745","CLEVR-3D-DeRef","10.63317\u002F4hw2eqvxuhuf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-745","9490","9503","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.745.pdf","martin-etal-2026-clevr",[17577,17579,17582],{"paper_id":17568,"author_seq":247,"given_name":17578,"surname":3843,"affiliation":63,"orcid":63},"Mary Lynn",{"paper_id":17568,"author_seq":232,"given_name":17580,"surname":17581,"affiliation":63,"orcid":63},"Martha","Palmer",{"paper_id":17568,"author_seq":218,"given_name":17583,"surname":17584,"affiliation":63,"orcid":63},"Maria Leonor","Pacheco","Vision-language models (VLMs) often struggle to interpret spatial referring expressions that require relational reasoning rather than reliance on surface-level cues. These models frequently identify referents through explicit visual attributes such as color or shape, rather than understanding spatial relationships (e.g., \"to the left of the red cube”). To systematically analyze these limitations, we introduce CLEVR-3D-DeRef, a synthetic and extensible benchmark dataset modeled after CLEVR-Ref+, designed to evaluate spatial reasoning in multi-modal systems. CLEVR-3D-DeRef extends the original framework by incorporating depth information for 3D spatial reasoning, introducing de-identified context-dependent referring expressions that require relational inference to disambiguate referent objects, and expanding the range of spatial relations beyond the original four. We further extend our dataset by producing expressions with and without ordinal language and diversifying the language and structure of expressions while preserving meaning.",{"paper_id":17587,"title":17588,"year":7,"month":188,"day":63,"doi":17589,"resource_url":17590,"first_page":17591,"last_page":17592,"pdf_url":17593,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17594,"paper_type":860,"authors":17595,"abstract":17602},"lrec2026-main-746","Bridging Text-to-Sign Translation via Codebook-Oriented Pretraining","10.63317\u002F2s9976y7ibcu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-746","9504","9513","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.746.pdf","phuangchoke-etal-2026-bridging",[17596,17599],{"paper_id":17587,"author_seq":247,"given_name":17597,"surname":17598,"affiliation":63,"orcid":63},"Ninlawat","Phuangchoke",{"paper_id":17587,"author_seq":232,"given_name":17600,"surname":17601,"affiliation":63,"orcid":63},"Chantri","Polprasert","Sign Language Production (SLP), the automatic translation from spoken to sign languages, faces several challenges due to the intricate mapping between linguistic semantics and the spatial–temporal motion domain. Existing SLP methods employing a transformer model with a Vector Quantization (VQ) method exhibit poor translation performance due to weak semantic alignment between the codebook and the text representation. In this work, we propose a novel text-to-sign translation based on model pretraining, which enhances semantic alignment by inheriting codebook-oriented prior knowledge from masked self-supervised models. Our approach involves two stages: (i) transforming sign language into discrete values by employing VQ with masked self-attention learning to create pre-tasks that bridge the semantic gap between text and codebook representations, (ii) constructing an end-to-end architecture with an encoder-decoder-like structure that inherits the parameters of the model from the first stage. The integration of these designs forms a robust sign language representation and significantly improves the translation model, which surpass prior baselines.",{"paper_id":17604,"title":17605,"year":7,"month":188,"day":63,"doi":17606,"resource_url":17607,"first_page":17608,"last_page":17609,"pdf_url":17610,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17611,"paper_type":860,"authors":17612,"abstract":17618},"lrec2026-main-747","A Resource and Evaluation Method for Phonological Continuity in Japanese Sign Language","10.63317\u002F4p22nojyxbxa","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-747","9514","9524","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.747.pdf","inoue-etal-2026-resource",[17613,17615,17617],{"paper_id":17604,"author_seq":247,"given_name":17614,"surname":17288,"affiliation":63,"orcid":63},"Jundai",{"paper_id":17604,"author_seq":232,"given_name":2790,"surname":17616,"affiliation":63,"orcid":63},"Hara",{"paper_id":17604,"author_seq":218,"given_name":892,"surname":5289,"affiliation":63,"orcid":63},"Computational models for sign language processing often represent phonological components as categories. This approach, however, does not adequately capture the continuous nature of sign articulation, obscuring nuanced phonetic variation. Furthermore, the field has lacked resources and standardized methods to evaluate a model’s ability to represent this continuity. In this work, we address these limitations. First, we introduce the JSL Ordered Triplet Dataset, a new manually-annotated resource designed to benchmark the modeling of gradual phonological progressions in Japanese Sign Language. Second, we propose a learning framework that reframes the task from classification to ranking, using Positive-Unlabeled (PU) learning to optimize the Area Under the ROC Curve (AUC). Our intrinsic evaluation on the new dataset shows that the learned continuous embeddings significantly outperform a cross-entropy baseline in ordering intermediate forms, improving the average accuracy on the continuity ranking task across phonological components from 81.52% to 91.71%. These embeddings also maintain strong discriminative power for standard component classification. This work provides the community with a valuable resource and a method for learning and evaluating more linguistically-grounded representations of sign language.",{"paper_id":17620,"title":17621,"year":7,"month":188,"day":63,"doi":17622,"resource_url":17623,"first_page":17624,"last_page":17625,"pdf_url":17626,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17627,"paper_type":860,"authors":17628,"abstract":17636},"lrec2026-main-748","Sentiment Analysis of German Sign Language Fairy Tales","10.63317\u002F3cyfzw6vs9oe","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-748","9525","9534","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.748.pdf","nunnari-etal-2026-sentiment",[17629,17632,17634],{"paper_id":17620,"author_seq":247,"given_name":17630,"surname":17631,"affiliation":63,"orcid":63},"Fabrizio","Nunnari",{"paper_id":17620,"author_seq":232,"given_name":17633,"surname":10214,"affiliation":63,"orcid":63},"Siddhant",{"paper_id":17620,"author_seq":218,"given_name":4377,"surname":17635,"affiliation":63,"orcid":63},"Gebhard","We present a dataset and a model for sentiment analysis of German sign language (DGS) fairy tales. First, we perform sentiment analysis for three levels of valence (negative, neutral, positive) on German fairy tales text segments using four large language models (LLMs) and majority voting, reaching an inter-annotator agreement of 0.781 Krippendorff’s alpha. Second, we extract face and body motion features from each corresponding DGS video segment using MediaPipe. Finally, we train an explainable model (based on XGBoost) to predict negative, neutral or positive sentiment from video features. Results show an average balanced accuracy of 0.631. A thorough analysis of the most important features reveal that, in addition to eyebrows and mouth motion on the face, also the motion of hips, elbows, and shoulders considerably contribute in the discrimination of the conveyed sentiment, indicating an equal importance of face and body for sentiment communication in sign language.",{"paper_id":17638,"title":17639,"year":7,"month":188,"day":63,"doi":17640,"resource_url":17641,"first_page":17642,"last_page":17643,"pdf_url":17644,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17645,"paper_type":860,"authors":17646,"abstract":17658},"lrec2026-main-749","A Critical Study of Automatic Evaluation in Sign Language Translation","10.63317\u002F4n2sooe4fb2i","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-749","9535","9548","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.749.pdf","yazdani-etal-2026-critical",[17647,17650,17653,17654,17657],{"paper_id":17638,"author_seq":247,"given_name":17648,"surname":17649,"affiliation":63,"orcid":63},"Shakib","Yazdani",{"paper_id":17638,"author_seq":232,"given_name":17651,"surname":17652,"affiliation":63,"orcid":63},"Yasser","HAMIDULLAH",{"paper_id":17638,"author_seq":218,"given_name":5732,"surname":9572,"affiliation":63,"orcid":63},{"paper_id":17638,"author_seq":203,"given_name":17655,"surname":17656,"affiliation":63,"orcid":63},"Eleftherios","Avramidis",{"paper_id":17638,"author_seq":188,"given_name":3659,"surname":3660,"affiliation":63,"orcid":63},"Automatic evaluation metrics are crucial for advancing sign language translation (SLT). Current SLT evaluation metrics, such as BLEU and ROUGE, are only text-based, and it remains unclear to what extent text-based metrics can reliably capture the quality of SLT outputs. To address this gap, we investigate the limitations of text-based SLT evaluation metrics by analyzing six metrics, including BLEU, chrF, and ROUGE, as well as BLEURT on the one hand, and large language model (LLM)-based evaluators such as G-Eval and GEMBA zero-shot direct assessment on the other hand. Specifically, we assess the consistency and robustness of these metrics under three controlled conditions: paraphrasing, hallucinations in model outputs, and variations in sentence length. Our analysis highlights the limitations of lexical overlap metrics and demonstrates that while LLM-based evaluators better capture semantic equivalence often missed by conventional metrics, they can also exhibit bias toward LLM-paraphrased translations. Moreover, although all metrics are able to detect hallucinations, BLEU tends to be overly sensitive, whereas BLEURT and LLM-based evaluators are comparatively lenient toward subtle cases. This motivates the need for multimodal evaluation frameworks that extend beyond text-based metrics to enable a more holistic assessment of SLT outputs.",{"paper_id":17660,"title":17661,"year":7,"month":188,"day":63,"doi":17662,"resource_url":17663,"first_page":17664,"last_page":17665,"pdf_url":17666,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17667,"paper_type":860,"authors":17668,"abstract":17676},"lrec2026-main-750","How Much Data Is Enough Data? A New Motion Capture Corpus for Probabilistic Sign Language Generation","10.63317\u002F5pmyrs7f9o33","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-750","9549","9558","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.750.pdf","klezovich-etal-2026-how",[17669,17670,17672,17675],{"paper_id":17660,"author_seq":247,"given_name":2742,"surname":17051,"affiliation":63,"orcid":63},{"paper_id":17660,"author_seq":232,"given_name":8211,"surname":17671,"affiliation":63,"orcid":63},"Mesch",{"paper_id":17660,"author_seq":218,"given_name":17673,"surname":17674,"affiliation":63,"orcid":63},"Gustav Eje","Henter",{"paper_id":17660,"author_seq":203,"given_name":3601,"surname":17059,"affiliation":63,"orcid":63},"We present a new 4.1 hours long high-quality motion capture sign language dataset for Swedish Sign Language — STS Mocap v1. The dataset consists of high quality multimodal data: body tracked with markers, fingers tracked with Manus Quantum Metagloves, face tracked with iPhone LiveLink app in MetaHuman Animator mode, and corresponding textual sentence translation to spoken Swedish. With the help of this dataset, we show that four hours of motion capture data is enough for generative modeling of sign language conditioned on 2D pose. In comparison, training the same flow-matching model on only 30 minutes of this data, which is a common size for sign language motion capture datasets, shows a significant degradation in the quality of the synthesized data.",{"paper_id":17678,"title":17679,"year":7,"month":188,"day":63,"doi":17680,"resource_url":17681,"first_page":17682,"last_page":17683,"pdf_url":17684,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17685,"paper_type":860,"authors":17686,"abstract":17693},"lrec2026-main-751","Decomposing Sign Language Movements: A Multi-Band Visualization Method for Articulatory Analysis","10.63317\u002F32sdurbs4fio","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-751","9559","9568","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.751.pdf","sevilla-etal-2026-decomposing",[17687,17690],{"paper_id":17678,"author_seq":247,"given_name":17688,"surname":17689,"affiliation":63,"orcid":63},"Antonio F. G.","Sevilla",{"paper_id":17678,"author_seq":232,"given_name":17691,"surname":17692,"affiliation":63,"orcid":63},"José María","Lahoz-Bengoechea","Understanding the structure of sign language movements requires methods that can isolate and analyze the hierarchical and simultaneous nature of sign articulation. We present a method for tracking and visualizing sign language movements that progressively isolates dependent movements within the articulatory chain: hand rotation from arm displacement and finger movement from hand movement. Using MediaPipe hand tracking on ordinary 2D video, we decompose motion into separate gestural components and compute velocity and direction for each articulator. We present these movement channels in a time-aligned multi-band visualization that reveals temporal structure, bimanual synchronization patterns, and the coordination of different articulatory components. An interactive web-based viewer synchronizes the visualization with video, enabling researchers to efficiently explore movement patterns and their relationship to signing. We demonstrate the method with examples from isolated signs and continuous signing, showing how it reveals patterns that are difficult to observe in raw video, including bimanual coordination, internal movements, and the distinction between linguistic and non-linguistic segments. This approach provides accessible tools for empirical investigation of rhythmic and prosodic patterns in sign languages.",{"paper_id":17695,"title":17696,"year":7,"month":188,"day":63,"doi":17697,"resource_url":17698,"first_page":17699,"last_page":17700,"pdf_url":17701,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17702,"paper_type":860,"authors":17703,"abstract":17710},"lrec2026-main-752","Implicit Bias in Peer Review: Through the Lens of Language Abstraction","10.63317\u002F4uxwwzgswmqc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-752","9569","9580","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.752.pdf","zhang-etal-2026-implicit",[17704,17706,17708],{"paper_id":17695,"author_seq":247,"given_name":17705,"surname":1519,"affiliation":63,"orcid":63},"Xulang",{"paper_id":17695,"author_seq":232,"given_name":17707,"surname":12080,"affiliation":63,"orcid":63},"Rui",{"paper_id":17695,"author_seq":218,"given_name":2022,"surname":17709,"affiliation":63,"orcid":63},"Cambria","Peer review is essential for the scholarly publishing process. However, its credibility is increasingly brought to questions. Bias is one of the aspects worthy of investigation. Existing research mostly focuses on predefined, explicit bias types, which are insufficient for analyzing the myriad of implicit biases in peer review. Thus, we proposed to study the bias in peer review through the lens of language abstraction, informed by the cognitive theories which suggest that frequency of abstraction in descriptions plays a latent yet important role in bias transmission. Hence, we trained a model to assess the abstraction level of text, and applied it to a review dataset to examine the connection between abstraction and the implicit biases in peer reviews. Results show that there are indeed observable quantitative differences in the abstraction use of reviews recommending to reject versus recommending to accept. Furthermore, reviews for the rejected papers tend to be more abstract than ones for the accepted papers, indicating possible transmission of implicit bias. To the best of our knowledge, our study is the first to study generalized Linguistic Intergroup Bias in the academic text domain.",{"paper_id":17712,"title":17713,"year":7,"month":188,"day":63,"doi":17714,"resource_url":17715,"first_page":17716,"last_page":17717,"pdf_url":17718,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17719,"paper_type":860,"authors":17720,"abstract":17736},"lrec2026-main-753","The PARLO Dementia Corpus: A German Multi-Center Resource for Alzheimer's Disease","10.63317\u002F5eo6ayamaqnq","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-753","9581","9591","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.753.pdf","braun-etal-2026-parlo",[17721,17724,17726,17728,17731,17733],{"paper_id":17712,"author_seq":247,"given_name":17722,"surname":17723,"affiliation":63,"orcid":63},"Franziska","Braun",{"paper_id":17712,"author_seq":232,"given_name":8040,"surname":17725,"affiliation":63,"orcid":63},"Witzl",{"paper_id":17712,"author_seq":218,"given_name":2175,"surname":17727,"affiliation":63,"orcid":63},"Hönig",{"paper_id":17712,"author_seq":203,"given_name":17729,"surname":17730,"affiliation":63,"orcid":63},"Elmar","Nöth",{"paper_id":17712,"author_seq":188,"given_name":995,"surname":17732,"affiliation":63,"orcid":63},"Bocklet",{"paper_id":17712,"author_seq":172,"given_name":17734,"surname":17735,"affiliation":63,"orcid":63},"Korbinian","Riedhammer","Early and accessible detection of Alzheimer’s disease (AD) remains a major challenge, as current diagnostic methods often rely on costly and invasive biomarkers. Speech and language analysis has emerged as a promising non-invasive and scalable approach to detecting cognitive impairment, but research in this area is hindered by the lack of publicly available datasets, especially for languages other than English. This paper introduces the PARLO Dementia Corpus (PDC), a new multi-center, clinically validated German resource for AD collected across nine academic memory clinics in Germany. The dataset comprises speech recordings from individuals with AD-related mild cognitive impairment and mild to moderate dementia, as well as cognitively healthy controls. Speech was elicited using a standardized test battery of eight neuropsychological tasks, including confrontation naming, verbal fluency, word repetition, picture description, story reading, and recall tasks. In addition to audio recordings, the dataset includes manually verified transcriptions and detailed demographic, clinical, and biomarker metadata. Baseline experiments on ASR benchmarking, automated test evaluation, and LLM-based classification illustrate the feasibility of automatic, speech-based cognitive assessment and highlight the diagnostic value of recall-driven speech production. The PDC thus establishes the first publicly available German benchmark for multi-modal and cross-lingual research on neurodegenerative diseases.",{"paper_id":17738,"title":17739,"year":7,"month":188,"day":63,"doi":17740,"resource_url":17741,"first_page":17742,"last_page":17743,"pdf_url":17744,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":17745,"bibkey":17746,"paper_type":860,"authors":17747,"abstract":17757},"lrec2026-main-754","Lexical and Discourse Semantics in a Reading-time Corpus of English","10.63317\u002F5hcb799tf3uq","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-754","9592","9601","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.754.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.754_OptionalSupplementaryMaterial.zip","dotlacil-etal-2026-lexical",[17748,17750,17753,17755],{"paper_id":17738,"author_seq":247,"given_name":5422,"surname":17749,"affiliation":63,"orcid":63},"Dotlacil",{"paper_id":17738,"author_seq":232,"given_name":17751,"surname":17752,"affiliation":63,"orcid":63},"Laia Colina","Fortuny",{"paper_id":17738,"author_seq":218,"given_name":3446,"surname":17754,"affiliation":63,"orcid":63},"Kloostra",{"paper_id":17738,"author_seq":203,"given_name":1915,"surname":17756,"affiliation":63,"orcid":63},"Bos","We present a novel language resource that combines a reading-time corpus, constructed in psycholinguistics, with rich lexical, compositional, and discourse meaning representation annotations. While existing psycholinguistic corpora typically provide morphological and syntactic annotations, no comparable corpora with comprehensive semantic information have been made available until now. We enriched the UCL corpus (361 sentences of self-paced reading, eye-tracking, and EEG data) with annotations in the style of the Parallel Meaning Bank (PMB) project, including WordNet synsets, VerbNet thematic roles, Combinatory Categorial Grammar (CCG) parses, and Discourse Representation Theory (DRT) structures. We demonstrate the utility of this resource through two case studies examining (1) encoding interference effects due to gender similarity and (2) integration costs in semantic role assignment. Both studies reveal processing patterns consistent with established psycholinguistic theories and\u002For previous findings. This resource fills a significant gap in psycholinguistic research, enabling the evaluation of semantic processing theories on naturalistic corpus data and extending the existing pool of annotated reading-time corpora. It should be useful to psycholinguists, as well as to cognitive scientists interested in language processing.",{"paper_id":17759,"title":17760,"year":7,"month":188,"day":63,"doi":17761,"resource_url":17762,"first_page":17763,"last_page":17764,"pdf_url":17765,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17766,"paper_type":860,"authors":17767,"abstract":17774},"lrec2026-main-755","Semantic Capacity in Language Learners and LLMs: A Case Study of Quantifier Scope","10.63317\u002F43c9u8ugd6m7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-755","9602","9617","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.755.pdf","fang-etal-2026-semantic",[17768,17771,17772],{"paper_id":17759,"author_seq":247,"given_name":17769,"surname":17770,"affiliation":63,"orcid":63},"Shaohua","Fang",{"paper_id":17759,"author_seq":232,"given_name":8131,"surname":3446,"affiliation":63,"orcid":63},{"paper_id":17759,"author_seq":218,"given_name":10609,"surname":17773,"affiliation":63,"orcid":63},"Cong","This study investigates the semantic capacity of large language models (LLMs) through the lens of quantifier scope interpretation. Sentences containing multiple quantifiers often give rise to interpretive ambiguities, and the range of available readings can vary across languages. Adopting a cross-linguistic perspective, we examine how LLMs interpret quantifier scope in English and Chinese, using model-generated probabilities to assess the relative likelihood of competing interpretations. Human similarity (HS) scores were used to quantify the extent to which LLMs emulate human performance across language groups. Results reveal that most LLMs prefer the surface scope interpretations, aligning with human tendencies, while only some differentiate between English and Chinese in the inverse scope preferences, reflecting human-similar patterns. HS scores highlight variability in LLMs’ approximation of human behavior, but their overall potential to align with humans is notable. Linguistic identity, instantiated through monolingual and bilingual personas of English or Chinese, was found to influence LLM behavior. Differences in model architecture, scale, and particularly models’ pre-training data language background, significantly influence how closely LLMs approximate human quantifier scope interpretations.",{"paper_id":17776,"title":17777,"year":7,"month":188,"day":63,"doi":17778,"resource_url":17779,"first_page":17780,"last_page":17781,"pdf_url":17782,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":17783,"bibkey":17784,"paper_type":860,"authors":17785,"abstract":17792},"lrec2026-main-756","How Long Does a Quick Kiss Take? Studying Event Duration of Light Verb Constructions Using Explicit Word Embeddings","10.63317\u002F5gsno5o8o3ve","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-756","9618","9634","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.756.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.756_OptionalSupplementaryMaterial.zip","huybrecht-etal-2026-how",[17786,17789],{"paper_id":17776,"author_seq":247,"given_name":17787,"surname":17788,"affiliation":63,"orcid":63},"Lin de","Huybrecht",{"paper_id":17776,"author_seq":232,"given_name":17790,"surname":17791,"affiliation":63,"orcid":63},"Geraint A.","Wiggins","Psycholinguistic research indicates that choosing one syntactic construction over another to describe an event can influence its perceived duration: Light Verb Constructions (LVCs) such as punctive events in count syntax (to give a kiss) and durative events in mass syntax (to do research) are perceived as taking less time than their Full Verb Constructions (FVCs; to kiss and to research). Similar computational results were achieved using BERT embeddings to semantically project events onto a one-dimensional Duration scale. We reproduce and further develop this experiment with explicit word embeddings from our own co-occurrence count-based vector space. By semantically projecting 158 LVC-FVC pairs onto our Duration scale, we find that LVCs are modelled as significantly shorter than FVCs. However, we do not find an overall statistically significant difference in duration between sentences containing the target LVCs and FVCs. We demonstrate that semantic properties observed in human experiments and in BERT embeddings can also be modelled using explicit word embeddings, which have the advantage of being fully transparent. However, using transcripts from spoken conversations can be challenging when studying a specific construction: optimising the extraction of sentences containing the target expressions and composition of their meanings are to be addressed in future work.",{"paper_id":17794,"title":17795,"year":7,"month":188,"day":63,"doi":17796,"resource_url":17797,"first_page":17798,"last_page":17799,"pdf_url":17800,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17801,"paper_type":860,"authors":17802,"abstract":17806},"lrec2026-main-757","Disambiguation of Emotion Annotations by Contextualizing Events in Plausible Narratives","10.63317\u002F2agma6tpnh8h","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-757","9635","9656","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.757.pdf","schaefer-etal-2026-disambiguation",[17803,17805],{"paper_id":17794,"author_seq":247,"given_name":1521,"surname":17804,"affiliation":63,"orcid":63},"Schaefer",{"paper_id":17794,"author_seq":232,"given_name":1064,"surname":11820,"affiliation":63,"orcid":63},"Ambiguity in emotion analysis stems both from potentially missing information and the subjectivity of interpreting a text. The latter did receive substantial attention, but can we fill missing information to resolve ambiguity? We address this question by developing a method to automatically generate reasonable contexts for an otherwise ambiguous classification instance. These generated contexts may act as illustrations of potential interpretations by different readers, as they can fill missing information with their individual world knowledge. This task to generate plausible narratives is a challenging one: We combine techniques from short story generation to achieve coherent narratives. The resulting dataset of Emotional BackStories, EBS, allows for the first comprehensive and systematic examination of contextualized emotion analysis. We conduct automatic and human annotation and find that the generated contextual narratives do indeed clarify the interpretation of specific emotions. Particularly relief and sadness benefit from our approach, while joy does not require the additional context we provide.",{"paper_id":17808,"title":17809,"year":7,"month":188,"day":63,"doi":17810,"resource_url":17811,"first_page":17812,"last_page":17813,"pdf_url":17814,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17815,"paper_type":860,"authors":17816,"abstract":17821},"lrec2026-main-758","Identifying Contexts of Distress in College Students' Reddit Posts: A Comparative Study of Classical NLP and Large Language Models","10.63317\u002F2k99z869ni4v","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-758","9657","9668","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.758.pdf","graff-etal-2026-identifying",[17817,17820],{"paper_id":17808,"author_seq":247,"given_name":17818,"surname":17819,"affiliation":63,"orcid":63},"Carine","Graff",{"paper_id":17808,"author_seq":232,"given_name":9739,"surname":9740,"affiliation":63,"orcid":63},"Mental health is a salient and growing societal concern among college students. Social media platforms such as Reddit offer a rich source of data regarding how students talk about their mental health, and NLP tools may potentially assist in identifying when a student is struggling. In this paper, we investigate how different NLP tools can be used to extract context surrounding college students expressions of distress. We construct a novel dataset from Reddit posts (College Distress on Reddit, or CDR), and examine the \"classical NLP pipeline\", and modern generative LLMs on this data. Our dataset exploration is conducted in parallel with, and contrasted against the Dreaddit dataset to examine cross-domain variation. Results show that standard or \"classical\" NLP tools extract a limited number of concrete entities, whereas generative models can infer more nuanced causes. However, LLMs struggle with knowledge extraction in specific content areas. Our work shows how important it is to be wary of LLMs, especially in mental health contexts.",{"paper_id":17823,"title":17824,"year":7,"month":188,"day":63,"doi":17825,"resource_url":17826,"first_page":17827,"last_page":17828,"pdf_url":17829,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17830,"paper_type":860,"authors":17831,"abstract":17839},"lrec2026-main-759","TiC-MuFormer: Time-Aware Caption-Integrated Multimodal Transformers for User-Level Mental Health Modeling","10.63317\u002F4pyjbjj5hsvt","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-759","9669","9677","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.759.pdf","tsoumplekas-etal-2026-tic",[17832,17834,17836],{"paper_id":17823,"author_seq":247,"given_name":2565,"surname":17833,"affiliation":63,"orcid":63},"Tsoumplekas",{"paper_id":17823,"author_seq":232,"given_name":3351,"surname":17835,"affiliation":63,"orcid":63},"Spyridis",{"paper_id":17823,"author_seq":218,"given_name":17837,"surname":17838,"affiliation":63,"orcid":63},"Vasileios","Argyriou","User-level affective modeling from social media requires integrating heterogeneous signals that unfold over time. While prior work has focused predominantly on textual analysis, visually expressed affect and temporal posting patterns also carry important psychological cues. However, these modalities are difficult to combine in practice due to sparse emotional evidence, asynchronous posting behavior, and frequent semantic misalignment between images and accompanying text. This paper introduces TiC-MuFormer, a time-enriched caption-integrated multimodal transformer that addresses these challenges by verbalizing visual content through image captioning before fusion and injecting temporal structure prior to cross-modal attention, enabling user trajectories to be modeled in a time-aware semantic space. We instantiate the method on a mental health detection task and demonstrate that it achieves state-of-the-art results across all user-level metrics, outperforming both unimodal and multimodal baselines. Ablation studies further show that temporal coverage, batch size and encoder choice jointly influence downstream accuracy, underscoring the importance of aligned temporal and semantic representations. Overall, this work highlights caption-guided temporal multimodality as a principled modeling strategy for general affective or psychiatric risk inference in social platforms.",{"paper_id":17841,"title":17842,"year":7,"month":188,"day":63,"doi":17843,"resource_url":17844,"first_page":17845,"last_page":17846,"pdf_url":17847,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17848,"paper_type":860,"authors":17849,"abstract":17861},"lrec2026-main-760","Improving Neural Argumentative Stance Classification in Controversial Topics with Emotion-Lexicon Features","10.63317\u002F2e6be7q3bnzh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-760","9678","9691","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.760.pdf","abkenar-etal-2026-improving",[17850,17853,17855,17856,17857,17858],{"paper_id":17841,"author_seq":247,"given_name":17851,"surname":17852,"affiliation":63,"orcid":63},"Mohammad Yeghaneh","Abkenar",{"paper_id":17841,"author_seq":232,"given_name":17854,"surname":3676,"affiliation":63,"orcid":63},"Weixing",{"paper_id":17841,"author_seq":218,"given_name":15369,"surname":15370,"affiliation":63,"orcid":63},{"paper_id":17841,"author_seq":203,"given_name":11601,"surname":11602,"affiliation":63,"orcid":63},{"paper_id":17841,"author_seq":188,"given_name":1709,"surname":15482,"affiliation":63,"orcid":63},{"paper_id":17841,"author_seq":172,"given_name":17859,"surname":17860,"affiliation":63,"orcid":63},"Panagiotis","Ioannidis","Argumentation mining comprises several subtasks, among which stance classification focuses on identifying the standpoint expressed in an argumentative text toward a specific target topic. While arguments—especially about controversial topics—often appeal to emotions, most prior work has not systematically incorporated explicit, fine-grained emotion analysis to improve performance on this task. In particular, prior research on stance classification has predominantly utilized non-argumentative texts and has been restricted to specific domains or topics, limiting generalizability. We work on five datasets from diverse domains encompassing a range of controversial topics and present an approach for expanding the Bias-Corrected NRC Emotion Lexicon using DistilBERT embeddings, which we feed into a Neural Argumentative Stance Classification model. Our method systematically expands the emotion lexicon through contextualized embeddings to identify emotionally charged terms not previously captured in the lexicon. Our expanded NRC lexicon (eNRC) improves over the baseline across all five datasets (up to +6.2 percentage points in F1 score), outperforms the original NRC on four datasets (up to +3.0), and surpasses the LLM-based approach on nearly all corpora. We provide all resources—including eNRC, the adapted corpora, and model architecture—to enable other researchers to build upon our work",{"paper_id":17863,"title":17864,"year":7,"month":188,"day":63,"doi":17865,"resource_url":17866,"first_page":17867,"last_page":17868,"pdf_url":17869,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17870,"paper_type":860,"authors":17871,"abstract":17879},"lrec2026-main-761","Emotion Transcription in Conversation: A Benchmark for Capturing Subtle and Complex Emotional States through Natural Language","10.63317\u002F29vy7k4asj77","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-761","9692","9709","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.761.pdf","tanaka-etal-2026-emotion",[17872,17874,17876,17878],{"paper_id":17863,"author_seq":247,"given_name":17873,"surname":2766,"affiliation":63,"orcid":63},"Yoshiki",{"paper_id":17863,"author_seq":232,"given_name":17875,"surname":17563,"affiliation":63,"orcid":63},"Ryuichi",{"paper_id":17863,"author_seq":218,"given_name":17877,"surname":17288,"affiliation":63,"orcid":63},"Koji",{"paper_id":17863,"author_seq":203,"given_name":4571,"surname":4572,"affiliation":63,"orcid":63},"Emotion Recognition in Conversation (ERC) is critical for enabling natural human-machine interactions. However, existing methods predominantly employ categorical or dimensional emotion annotations, which often fail to adequately represent complex, subtle, or culturally specific emotional nuances. To overcome this limitation, we propose a novel task named Emotion Transcription in Conversation (ETC). This task focuses on generating natural language descriptions that accurately reflect speakers’ emotional states within conversational contexts. To address the ETC, we constructed a Japanese dataset comprising text-based dialogues annotated with participants’ self-reported emotional states, described in natural language. The dataset also includes emotion category labels for each transcription, enabling quantitative analysis and its application to ERC. We benchmarked baseline models, finding that while fine-tuning on our dataset enhances model performance, current models still struggle to infer implicit emotional states. The ETC task will encourage further research into more expressive emotion understanding in dialogue. The dataset is publicly available at https:\u002F\u002Fgithub.com\u002FUEC-InabaLab\u002FETCDataset.",{"paper_id":17881,"title":17882,"year":7,"month":188,"day":63,"doi":17883,"resource_url":17884,"first_page":17885,"last_page":17886,"pdf_url":17887,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17888,"paper_type":860,"authors":17889,"abstract":17899},"lrec2026-main-762","SETUP: Sentence-level English-To-Uniform Meaning Representation Parser","10.63317\u002F3mhzy8cuhkaw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-762","9710","9721","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.762.pdf","markle-etal-2026-setup",[17890,17893,17896],{"paper_id":17881,"author_seq":247,"given_name":17891,"surname":17892,"affiliation":63,"orcid":63},"Emma","Markle",{"paper_id":17881,"author_seq":232,"given_name":17894,"surname":17895,"affiliation":63,"orcid":63},"Javier Gutierrez","Bach",{"paper_id":17881,"author_seq":218,"given_name":17897,"surname":17898,"affiliation":63,"orcid":63},"Shira","Wein","Uniform Meaning Representation (UMR) is a novel graph-based semantic representation which captures the core meaning of a text, with flexibility incorporated into the annotation schema such that the breadth of the world’s languages can be annotated (including low-resource languages). While UMR shows promise in enabling language documentation, improving low-resource language technologies, and adding interpretability, the downstream applications of UMR can only be fully explored when text-to-UMR parsers enable the automatic large-scale production of accurate UMR graphs at test time. Prior work on text-to-UMR parsing is limited to date. In this paper, we introduce two methods for English text-to-UMR parsing, one of which fine-tunes existing parsers for Abstract Meaning Representation and the other, which leverages a converter from Universal Dependencies, using prior work as a baseline. Our best-performing model, which we call SETUP, achieves an AnCast score of 84 and a SMATCH++ score of 91, indicating substantial gains towards automatic UMR parsing.",{"paper_id":17901,"title":17902,"year":7,"month":188,"day":63,"doi":17903,"resource_url":17904,"first_page":17905,"last_page":17906,"pdf_url":17907,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17908,"paper_type":860,"authors":17909,"abstract":17916},"lrec2026-main-763","This One or That One? A Study on Accessibility via Demonstratives with Multimodal Large Language Models","10.63317\u002F29f29zththay","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-763","9722","9732","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.763.pdf","wang-etal-2026-this",[17910,17911,17914],{"paper_id":17901,"author_seq":247,"given_name":2998,"surname":3676,"affiliation":63,"orcid":63},{"paper_id":17901,"author_seq":232,"given_name":17912,"surname":17913,"affiliation":63,"orcid":63},"Emmanuele","Chersoni",{"paper_id":17901,"author_seq":218,"given_name":17915,"surname":1837,"affiliation":63,"orcid":63},"Chu-Ren","Accessibility refers to the ease with which a speaker can acquire an object, and it is often conveyed through demonstrative pronouns like \"this\" and \"that\", indicating proximal or distal objects. Most importantly, accessibility also involves perspective shifts, which are essential for understanding differing viewpoints. In this case study, we adopt an evaluation dataset with a pair-to-pair question structure for referent identification based on demonstratives. Our experiments show that current Multimodal Large Language Models (MLLMs) exhibit markedly low performance in accessibility tasks requiring perspective shifts, with accuracies around 2.33% (Chinese) and 1.83% (English). Moreover, models struggle with qualitative characteristics and frame-based reasoning, often failing to apply implicit contextual rules unless explicitly encoded in training data. These limitations suggest that MLLMs rely heavily on surface co-occurrence instead of truly grounded, embodied experience. Our evaluation framework provides a robust lens revealing that MLLMs lack both self-other distinction—an essential aspect of self-awareness—and the embodied cognition necessary for reliable performance in practical embodied AI applications.",{"paper_id":17918,"title":17919,"year":7,"month":188,"day":63,"doi":17920,"resource_url":17921,"first_page":17922,"last_page":17923,"pdf_url":17924,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17925,"paper_type":860,"authors":17926,"abstract":17941},"lrec2026-main-764","AMR Parsing beyond English: An Experiment on Bulgarian, French, Hungarian and Ukrainian","10.63317\u002F2qb7sj4u3jn4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-764","9733","9744","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.764.pdf","mitov-etal-2026-amr",[17927,17930,17933,17936,17939,17940],{"paper_id":17918,"author_seq":247,"given_name":17928,"surname":17929,"affiliation":63,"orcid":63},"Ivaylo","Mitov",{"paper_id":17918,"author_seq":232,"given_name":17931,"surname":17932,"affiliation":63,"orcid":63},"Tadzhat","Marharian",{"paper_id":17918,"author_seq":218,"given_name":17934,"surname":17935,"affiliation":63,"orcid":63},"Zsofia F.","Hauk",{"paper_id":17918,"author_seq":203,"given_name":17937,"surname":17938,"affiliation":63,"orcid":63},"Samba","FALL",{"paper_id":17918,"author_seq":188,"given_name":5346,"surname":5347,"affiliation":63,"orcid":63},{"paper_id":17918,"author_seq":172,"given_name":3696,"surname":1150,"affiliation":63,"orcid":63},"Under the assumption that the meaning of a sentence should be unchanged when it is translated into another language, recent work has developed on cross-lingual semantic parsing in an effort to extend the access to semantic resources beyond English. In this paper, we develop the automatic production of Abstract Meaning Representations (AMR), a graph-based semantic formalism, for four languages – Bulgarian, French, Hungarian and Ukrainian. We achieve high-performance on French and Hungarian, and execute, to our knowledge, the first semantic parsing of Bulgarian and Ukrainian on translations of the AMR3.0 corpus (Knight et al., 2020). Furthermore, we perform a complementary experiment on a novel parallel corpus of gold AMR annotations of the first chapter of \"The Adventures of Pinocchio\" in Bulgarian and Ukrainian. The experiment reveals that, despite their above-average performance, the models’ performance decreases when probed on texts outside of the domain of the training data.",{"paper_id":17943,"title":17944,"year":7,"month":188,"day":63,"doi":17945,"resource_url":17946,"first_page":17947,"last_page":17948,"pdf_url":17949,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17950,"paper_type":860,"authors":17951,"abstract":17956},"lrec2026-main-765","Semantic Parsing for Evaluating Large Language Models: Separating Linguistic Abilities with YARN","10.63317\u002F4hqtuv5e47pw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-765","9745","9755","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.765.pdf","vergnette-etal-2026-semantic",[17952,17955],{"paper_id":17943,"author_seq":247,"given_name":17953,"surname":17954,"affiliation":63,"orcid":63},"Rémi de","Vergnette",{"paper_id":17943,"author_seq":232,"given_name":5346,"surname":5347,"affiliation":63,"orcid":63},"We evaluate large language models (LLMs) through semantic parsing into Yarn, a structured meaning representation that distinguishes predicate–argument structure from higher-level linguistic features such as tense, aspect, and modality. For evaluation, we employ SmatchY, a fine-grained metric designed to assess different layers of meaning independently. Our experiments test multiple LLMs under varied conditions, including inference modes, linearization formats (JSON and logic-inspired CFG), and the presence or absence of auxiliary supervision via partial semantic parses. Results show that model performance is highly sensitive to both representational design and supervision, with no single configuration consistently outperforming the others. While some models gain from additional semantic information in prompts, others are negatively affected. A layer-wise analysis indicates that surface-level features such as temporality and negation are captured more reliably than deeper semantic phenomena like quantification. Consistent with prior work, our findings highlight the limited capacity of current LLMs to generate fully formal meaning representations.",{"paper_id":17958,"title":17959,"year":7,"month":188,"day":63,"doi":17960,"resource_url":17961,"first_page":17962,"last_page":17963,"pdf_url":17964,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17965,"paper_type":860,"authors":17966,"abstract":17971},"lrec2026-main-766","Two Ojibwe Constraint Grammars: Morphological Disambiguation and Dependency Parsing","10.63317\u002F3icwfyipet43","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-766","9756","9766","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.766.pdf","diederichsen-etal-2026-two",[17967,17969],{"paper_id":17958,"author_seq":247,"given_name":961,"surname":17968,"affiliation":63,"orcid":63},"Diederichsen",{"paper_id":17958,"author_seq":232,"given_name":8040,"surname":17970,"affiliation":63,"orcid":63},"Hammerly","This paper presents the first iteration of two connected Ojibwe constraint grammars, one for morphological disambiguation and one for syntactic parsing. Due to the polysynthetic nature of Ojibwe, along with its status as a low-resource language, the disambiguation grammar proves to be an effective and resource-efficient tool for morphological disambiguation, successfully eliminating 32% of redundant readings and fully resolving 41% of ambiguous tokens. The dependency grammar focuses on assigning dependency relations to model argument structure, where the constraint grammar once again proves to be an effective paradigm, with F1 scores of 0.97 for subject and 0.94 for object relations. The rule-based design of both grammars is linguistically informed, allowing for precise modeling of language-specific phenomena such as animacy, obviation, and verb-argument agreement. Applications of the two constraint grammars include building a disambiguated morphologically-tagged corpus of the Ojibwe language and creating a treebank for the Ojibwe language following the widely adopted CoNLL-U format.",{"paper_id":17973,"title":17974,"year":7,"month":188,"day":63,"doi":17975,"resource_url":17976,"first_page":17977,"last_page":17978,"pdf_url":17979,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17980,"paper_type":860,"authors":17981,"abstract":17987},"lrec2026-main-767","Multimodal LLMs Do Not Compose Skills Optimally across Modalities","10.63317\u002F32osokhu3sac","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-767","9767","9789","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.767.pdf","ontalvilla-etal-2026-multimodal",[17982,17984,17986],{"paper_id":17973,"author_seq":247,"given_name":13074,"surname":17983,"affiliation":63,"orcid":63},"Ontalvilla",{"paper_id":17973,"author_seq":232,"given_name":1276,"surname":17985,"affiliation":63,"orcid":63},"Ormazabal",{"paper_id":17973,"author_seq":218,"given_name":16594,"surname":16939,"affiliation":63,"orcid":63},"Skill composition is the ability to combine previously learned skills to solve new tasks. As neural networks acquire increasingly complex skills during their pretraining, it is not clear how successfully they can compose them. In this paper, we focus on Multimodal Large Language Models (MLLM), and study their ability to compose skills across modalities. To this end, we design three evaluation tasks which can be solved sequentially composing two modality-dependent skills, and evaluate several open MLLMs under two main settings: i) prompting the model to directly solve the task, and ii) using a two-step cascaded inference approach, which manually enforces the composition of the two skills for a given task. Even with these straightforward compositions, we find that all evaluated MLLMs exhibit a significant cross-modality skill composition gap. To mitigate the aforementioned gap, we explore two alternatives: i) use chain-of-thought prompting to explicitly instruct MLLMs for skill composition and ii) a specific fine-tuning recipe to promote skill composition. Although those strategies improve model performance, they still exhibit significant skill composition gaps, suggesting that more research is needed to improve cross-modal skill composition in MLLMs.",{"paper_id":17989,"title":17990,"year":7,"month":188,"day":63,"doi":17991,"resource_url":17992,"first_page":17993,"last_page":17994,"pdf_url":17995,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":17996,"paper_type":860,"authors":17997,"abstract":18009},"lrec2026-main-768","Code-Switching in End-to-End Automatic Speech Recognition: A Systematic Literature Review","10.63317\u002F477upr9ikf9n","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-768","9790","9812","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.768.pdf","agro-etal-2026-code",[17998,17999,18002,18005,18008],{"paper_id":17989,"author_seq":247,"given_name":17417,"surname":17418,"affiliation":63,"orcid":63},{"paper_id":17989,"author_seq":232,"given_name":18000,"surname":18001,"affiliation":63,"orcid":63},"Atharva A.","Kulkarni",{"paper_id":17989,"author_seq":218,"given_name":18003,"surname":18004,"affiliation":63,"orcid":63},"Karima","Kadaoui",{"paper_id":17989,"author_seq":203,"given_name":18006,"surname":18007,"affiliation":63,"orcid":63},"Zeerak","Talat",{"paper_id":17989,"author_seq":188,"given_name":1775,"surname":1776,"affiliation":63,"orcid":63},"Motivated by a growing research interest into automatic speech recognition (ASR), and the growing body of work for languages in which code-switching (CS) often occurs, we present a systematic literature review of code-switching in end-to-end ASR models. We collect and manually annotate papers published in peer reviewed venues. We document the languages considered, datasets, metrics, model choices, and performance, and present a discussion of challenges in end-to-end ASR for code-switching. Our analysis thus provides insights on current research efforts and available resources as well as opportunities and gaps to guide future research.",{"paper_id":18011,"title":18012,"year":7,"month":188,"day":63,"doi":18013,"resource_url":18014,"first_page":18015,"last_page":18016,"pdf_url":18017,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18018,"paper_type":860,"authors":18019,"abstract":18028},"lrec2026-main-769","MUStReason: A Benchmark for Diagnosing Pragmatic Reasoning in VideoLMs for Multimodal Sarcasm Detection.","10.63317\u002F5cucfvxymbbv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-769","9813","9829","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.769.pdf","saha-etal-2026-mustreason",[18020,18022,18025,18027],{"paper_id":18011,"author_seq":247,"given_name":18021,"surname":1880,"affiliation":63,"orcid":63},"Anisha",{"paper_id":18011,"author_seq":232,"given_name":18023,"surname":18024,"affiliation":63,"orcid":63},"Varsha","Suresh",{"paper_id":18011,"author_seq":218,"given_name":9732,"surname":18026,"affiliation":63,"orcid":63},"Hospedales",{"paper_id":18011,"author_seq":203,"given_name":2545,"surname":9679,"affiliation":63,"orcid":63},"Sarcasm is a specific type of irony which involves discerning what is said from what is meant. Detecting sarcasm depends not only on the literal content of an utterance but also on non-verbal cues such as speaker’s tonality, facial expressions and conversational context. However, current multimodal models struggle with complex tasks like sarcasm detection, which require identifying relevant cues across modalities and pragmatically reasoning over them to infer the speaker’s intention. To explore these limitations in VideoLMs, we introduce MUStReason, a diagnostic benchmark enriched with annotations of modality-specific relevant cues and underlying reasoning steps to identify sarcastic intent. In addition to benchmarking sarcasm classification performance in VideoLMs, using MUStReason we quantitatively and qualitatively evaluate the generated reasoning by disentangling the problem into perception and reasoning and aim to pinpoint the current gaps in these VideoLMs. Furthermore, to facilitate structured pragmatic reasoning, we propose PragCoT, a framework that steers VideoLMs to focus on implied intentions over literal meaning, a property core to detecting sarcasm. Code and dataset are available at https:\u002F\u002Fgithub.com\u002Fanisha0325\u002FMUStReason",{"paper_id":18030,"title":18031,"year":7,"month":188,"day":63,"doi":18032,"resource_url":18033,"first_page":18034,"last_page":18035,"pdf_url":18036,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18037,"paper_type":860,"authors":18038,"abstract":18047},"lrec2026-main-770","Human-Centered Multimodal Fusion for Sexism Detection in Memes with Eye-Tracking, Heart Rate, and EEG Signals","10.63317\u002F2w2vaanu3pe7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-770","9830","9840","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.770.pdf","gabaldn-etal-2026-human",[18039,18042,18044],{"paper_id":18030,"author_seq":247,"given_name":18040,"surname":18041,"affiliation":63,"orcid":63},"Iván Arcos","Gabaldón",{"paper_id":18030,"author_seq":232,"given_name":5735,"surname":18043,"affiliation":63,"orcid":63},"Rosso",{"paper_id":18030,"author_seq":218,"given_name":18045,"surname":18046,"affiliation":63,"orcid":63},"Elena Gomis","Vicent","The automated detection of sexism in memes is a notoriously challenging task due to multimodal ambiguity, cultural nuance, and the use of humor to provide plausible deniability. As a result, content-only models often fail to capture the complexity of human perception. To address this fundamental limitation, we introduce and validate a human-centered paradigm that augments standard content features with rich physiological data. We created a novel resource by recording Eye-Tracking (ET), Heart Rate (HR), and Electroencephalography (EEG) from 16 subjects (8 per experiment) while they viewed 3,984 memes from the EXIST 2025 dataset. Our statistical analysis reveals significant physiological differences in how subjects process sexist versus non-sexist content. Sexist memes were associated with higher cognitive load (evidenced by increased fixation counts and longer reaction times), and with differences in EEG spectral power across the Alpha, Beta, and Gamma frequency bands. This pattern, commonly linked in previous research to increased attentional engagement and cognitive effort during visual processing, suggests that sexist memes may elicit more demanding neural activity compared to non-sexist ones. Building on these findings, we propose a novel multimodal fusion model that integrates these physiological signals with enriched textual-visual features derived from a Vision-Language Model (VLM). Our final model achieves an AUC of 0.794 in binary sexism detection, a statistically significant 3.4% improvement over a powerful VLM-based baseline. The fusion of physiological data proves particularly effective for nuanced and ambiguous cases, boosting the F1-score for the most challenging fine-grained category, *Misogyny and Non-Sexual Violence*, by an unprecedented 26.3%. Our work demonstrates that human physiological responses provide a robust, objective signal of perception that can significantly enhance the accuracy and human-awareness of automated systems for countering online sexism.",{"paper_id":18049,"title":18050,"year":7,"month":188,"day":63,"doi":18051,"resource_url":18052,"first_page":18053,"last_page":18054,"pdf_url":18055,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18056,"paper_type":860,"authors":18057,"abstract":18068},"lrec2026-main-771","Nos_Brais-GL: A FAIR Galician TTS Corpus for Neural Speech Synthesis","10.63317\u002F2x6ou8cyanmo","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-771","9841","9849","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.771.pdf","vladu-etal-2026-nos_brais",[18058,18059,18062,18065,18067],{"paper_id":18049,"author_seq":247,"given_name":4175,"surname":4176,"affiliation":63,"orcid":63},{"paper_id":18049,"author_seq":232,"given_name":18060,"surname":18061,"affiliation":63,"orcid":63},"Antonio Moscoso","Sánchez",{"paper_id":18049,"author_seq":218,"given_name":18063,"surname":18064,"affiliation":63,"orcid":63},"Carmen","Magariños",{"paper_id":18049,"author_seq":203,"given_name":18066,"surname":4182,"affiliation":63,"orcid":63},"María Perez",{"paper_id":18049,"author_seq":188,"given_name":4178,"surname":4179,"affiliation":63,"orcid":63},"This paper introduces Nos_Brais-GL, a new open-access high-quality Galician speech corpus designed for the development of neural Text-to-Speech (TTS) systems. Nos_Brais-GL contains approximately 18 hours of professionally recorded male speech and a carefully curated set of utterances selected to ensure linguistic variation and phonetic and prosodic richness. Beyond its immediate application in synthetic speech generation, Nos_Brais-GL exemplifies good practices in TTS corpus design for lesser-resourced languages, emphasizing methodological transparency, open licensing, and interoperability.",{"paper_id":18070,"title":18071,"year":7,"month":188,"day":63,"doi":18072,"resource_url":18073,"first_page":18074,"last_page":18075,"pdf_url":18076,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18077,"paper_type":860,"authors":18078,"abstract":18087},"lrec2026-main-772","DR-CUP: A Dataset on Real-time Commentary in U.S. Presidential Debates","10.63317\u002F3xv2rb4s2iyn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-772","9850","9860","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.772.pdf","chang-etal-2026-dr",[18079,18081,18083,18085],{"paper_id":18070,"author_seq":247,"given_name":18080,"surname":864,"affiliation":63,"orcid":63},"Yu-Yu",{"paper_id":18070,"author_seq":232,"given_name":18082,"surname":919,"affiliation":63,"orcid":63},"Huan-Wen",{"paper_id":18070,"author_seq":218,"given_name":18084,"surname":1840,"affiliation":63,"orcid":63},"Chung-Chi",{"paper_id":18070,"author_seq":203,"given_name":18086,"surname":3676,"affiliation":63,"orcid":63},"Ming-Hung","Presidential debates are critical platforms for political discourse, yet existing research lacks datasets tailored for analyzing real-time professional commentary. To address this, we introduce the Dataset on Real-time Commentary in U.S. Presidential debates (DR-CUP), which aligns U.S. presidential debate transcripts (2016–2024) with professional commentary and annotations. DR-CUP supports research on commentary understanding, planning, and generation, offering insights into expert analysis and its role in contextualizing complex political discourse. In pilot studies, we evaluated state-of-the-art large language models (LLMs), revealing notable performance differences in understanding expert commentary and planning for generating professional commentary. DR-CUP is the first dataset to incorporate real-time cross-document alignment for debate data, providing a comprehensive resource for advancing research in political communication and computational social science.",{"paper_id":18089,"title":18090,"year":7,"month":188,"day":63,"doi":18091,"resource_url":18092,"first_page":18093,"last_page":18094,"pdf_url":18095,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18096,"paper_type":860,"authors":18097,"abstract":18119},"lrec2026-main-773","Russian Generative Spelling, Punctuation and Capitalization Correction","10.63317\u002F2gv3b9npuo2s","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-773","9861","9872","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.773.pdf","martynov-etal-2026-russian",[18098,18101,18104,18107,18110,18113,18116],{"paper_id":18089,"author_seq":247,"given_name":18099,"surname":18100,"affiliation":63,"orcid":63},"Nikita","Martynov",{"paper_id":18089,"author_seq":232,"given_name":18102,"surname":18103,"affiliation":63,"orcid":63},"Danil","Astafurov",{"paper_id":18089,"author_seq":218,"given_name":18105,"surname":18106,"affiliation":63,"orcid":63},"Ulyana","Isaeva",{"paper_id":18089,"author_seq":203,"given_name":18108,"surname":18109,"affiliation":63,"orcid":63},"Ivan Vasil'yevich","Maksimov",{"paper_id":18089,"author_seq":188,"given_name":18111,"surname":18112,"affiliation":63,"orcid":63},"Joqsan","Azocar",{"paper_id":18089,"author_seq":172,"given_name":18114,"surname":18115,"affiliation":63,"orcid":63},"Dmitrii","Kosenko",{"paper_id":18089,"author_seq":155,"given_name":18117,"surname":18118,"affiliation":63,"orcid":63},"Alena","Fenogenova","This paper presents SAGE, an open-access framework that encloses a set of models specifically designed for the generative correction of spelling, punctuation, and capitalization errors in Russian. The release includes four models, featuring a Russian-English version and a distilled version for easy use and cost-effectiveness. The models are pre-trained using a sequence-to-sequence approach on artificial errors that mimic human mistakes and fine-tuned on annotated multi-domain texts. A set of carefully engineered auxiliary learning objectives is employed during pre-training to enrich the models with additional semantic and syntactic information. Evaluations indicate that SAGE models, despite having a small number of parameters, outperform top-tier multilingual and Russian-specific large language models, including both closed- and open-source options, and are considered state-of-the-art. We release the online demo powered by a single Nvidia A100 80GB GPU as a Web service, which allows to simultaneously test the most advanced SAGE model of 1.7B parameters, its distilled version and the Russian-English SAGE model.",{"paper_id":18121,"title":18122,"year":7,"month":188,"day":63,"doi":18123,"resource_url":18124,"first_page":18125,"last_page":18126,"pdf_url":18127,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18128,"paper_type":860,"authors":18129,"abstract":18138},"lrec2026-main-774","Using Multimodal and Language-Agnostic Sentence Embeddings for Abstractive Summarization","10.63317\u002F59f6s77tynig","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-774","9873","9883","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.774.pdf","hammoud-etal-2026-multimodal",[18130,18133,18134,18135],{"paper_id":18121,"author_seq":247,"given_name":18131,"surname":18132,"affiliation":63,"orcid":63},"Chaimae Chellaf El","Hammoud",{"paper_id":18121,"author_seq":232,"given_name":3730,"surname":3731,"affiliation":63,"orcid":63},{"paper_id":18121,"author_seq":218,"given_name":3733,"surname":3734,"affiliation":63,"orcid":63},{"paper_id":18121,"author_seq":203,"given_name":18136,"surname":18137,"affiliation":63,"orcid":63},"Stéphane","Huet","Abstractive summarization aims to generate concise summaries by creating new sentences, allowing for flexible rephrasing. However, this approach can be vulnerable to inaccuracies, particularly ‘hallucinations’ where the model introduces non-existent information. In this paper, we leverage the use of multimodal and multilingual sentence embeddings derived from pre-trained models such as LaBSE, SONAR, and BGE-M3, and feed them into a modified BART-based French model. A Named Entity Injection mechanism that appends tokenized named entities to the decoder input is introduced, in order to improve the factual consistency of the generated summary. Our novel framework, SBARThez, is applicable to both text and speech inputs and supports cross-lingual summarization; it shows competitive performance relative to token-level baselines, especially for low-resource languages, while generating more concise and abstract summaries.",{"paper_id":18140,"title":18141,"year":7,"month":188,"day":63,"doi":18142,"resource_url":18143,"first_page":18144,"last_page":18145,"pdf_url":18146,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18147,"paper_type":860,"authors":18148,"abstract":18158},"lrec2026-main-775","Gradient-Controlled Decoding: A Safety Guardrail for LLMs with Dual-Anchor Steering","10.63317\u002F5axtpujejwcx","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-775","9884","9892","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.775.pdf","chiniya-etal-2026-gradient",[18149,18152,18155],{"paper_id":18140,"author_seq":247,"given_name":18150,"surname":18151,"affiliation":63,"orcid":63},"Purva","Chiniya",{"paper_id":18140,"author_seq":232,"given_name":18153,"surname":18154,"affiliation":63,"orcid":63},"Kevin Joseph","Scaria",{"paper_id":18140,"author_seq":218,"given_name":18156,"surname":18157,"affiliation":63,"orcid":63},"Sagar","Chaturvedi","Large language models (LLMs) remain susceptible to jailbreak and direct prompt-injection attacks, yet the strongest defensive filters frequently over- refuse benign queries and degrade user experience. Previous work on prompt injection detection such as, GradSafe, detects unsafe prompts with a single \"accept all\" anchor token, but its threshold is brittle and it offers no deterministic guarantee that harmful content will not be emitted once decoding begins. We introduce Gradient-Controlled Decoding (GCD), a training-free guardrail that combines with both an acceptance anchor (\"Sure\") and refusal anchor (\"Sorry\") tightening the decision boundary and lowering false positives. In the mitigation stage, if a prompt is flagged, GCD preset-injects one or two refusal tokens (\"Sorry, I can’t . . . \") before autoregressive decoding resumes, guaranteeing first- token safety regardless of sampling strategy. On ToxicChat, XSTest-v2, and AdvBench, GCD reduces false positives by 52% vs. GradSafe at comparable recall, lowers attack success rate by up to 20% vs. the strongest decoding-only baseline, adds under 15-20 ms latency on an average on V100 instances, transfers to LLaMA-2-7B, Mixtral-8×7B, and Qwen-2-7B, and requires only 20 template prompts. GCD is a lightweight, scalable safety layer for real-time LLM deployment.",{"paper_id":18160,"title":18161,"year":7,"month":188,"day":63,"doi":18162,"resource_url":18163,"first_page":18164,"last_page":18165,"pdf_url":18166,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18167,"paper_type":860,"authors":18168,"abstract":18184},"lrec2026-main-776","The Chronicles of RiDiC: Generating Datasets with Controlled Popularity Distribution for Long-form Factuality Evaluation","10.63317\u002F4iz2mc2bikvt","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-776","9893","9904","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.776.pdf","braslavski-etal-2026-chronicles",[18169,18171,18173,18176,18178,18181,18183],{"paper_id":18160,"author_seq":247,"given_name":3469,"surname":18170,"affiliation":63,"orcid":63},"Braslavski",{"paper_id":18160,"author_seq":232,"given_name":18114,"surname":18172,"affiliation":63,"orcid":63},"Iarosh",{"paper_id":18160,"author_seq":218,"given_name":18174,"surname":18175,"affiliation":63,"orcid":63},"Nikita Sergeevich","Sushko",{"paper_id":18160,"author_seq":203,"given_name":3439,"surname":18177,"affiliation":63,"orcid":63},"Sakhovskiy",{"paper_id":18160,"author_seq":188,"given_name":18179,"surname":18180,"affiliation":63,"orcid":63},"Vasily","Konovalov",{"paper_id":18160,"author_seq":172,"given_name":2968,"surname":18182,"affiliation":63,"orcid":63},"Tutubalina",{"paper_id":18160,"author_seq":155,"given_name":869,"surname":8956,"affiliation":63,"orcid":63},"We present a configurable pipeline and the associated code that can be used to generate multilingual sets of entities with specified characteristics, such as domain, geographical location and popularity, using data from Wikipedia and Wikidata. These datasets are intended for evaluating the factuality of LLMs’ long-form generation, thereby complementing evaluation based on short-form QA datasets. We present the RiDiC dataset as an example of this approach. RiDiC contains 3,000 entities from three domains – rivers, natural disasters, and car models – spanning different popularity tiers. Each entity is accompanied by its geographical location, English and Chinese names (if available) and relevant English and Chinese Wikipedia content, which is used to evaluate LLMs’ responses. Generations about RiDiC entities were obtained from three LLMs in English and Chinese. These were then evaluated using a third-party factuality checker, which showed that entities from our dataset caused even frontier models to hallucinate. The code, data and generation\u002Fevaluation scripts have been released to enable the approach to be extended to new LLMs, languages and domains.",{"paper_id":18186,"title":18187,"year":7,"month":188,"day":63,"doi":18188,"resource_url":18189,"first_page":18190,"last_page":18191,"pdf_url":18192,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18193,"paper_type":860,"authors":18194,"abstract":18207},"lrec2026-main-777","MeteoGalEus: An Iberian Multilingual Weather Dataset in Galician, Euskera, and Spanish","10.63317\u002F29v6u9pgo67x","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-777","9905","9919","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.777.pdf","vivelcouso-etal-2026-meteogaleus",[18195,18198,18201,18203,18204],{"paper_id":18186,"author_seq":247,"given_name":18196,"surname":18197,"affiliation":63,"orcid":63},"Ainhoa","Vivel-Couso",{"paper_id":18186,"author_seq":232,"given_name":18199,"surname":18200,"affiliation":63,"orcid":63},"Nella Zabrina","Pramata",{"paper_id":18186,"author_seq":218,"given_name":1061,"surname":18202,"affiliation":63,"orcid":63},"Robredo",{"paper_id":18186,"author_seq":203,"given_name":1276,"surname":1277,"affiliation":63,"orcid":63},{"paper_id":18186,"author_seq":188,"given_name":18205,"surname":18206,"affiliation":63,"orcid":63},"Jose Maria","Alonso-Moral","This paper introduces MeteoGalEus, a multilingual weather dataset that combines meteorological observations from two Spanish regional agencies, Euskalmet and MeteoGalicia. The dataset contains daily records spanning 4 years and 6 months, with aligned observations for both sources. MeteoGalEus captures key meteorological variables including temperature, wind and state of the sky. The dataset is provided in a structured format, facilitating data analysis and integration, with textual forecasts available in the official languages for each region (i.e., Galician and Spanish for MeteoGalicia; Euskera and Spanish for Euskalmet). By merging and harmonizing data from two regional agencies, MeteoGalEus is a unique resource for cross-regional weather analysis and multilingual climate studies. This dataset is suited for tasks requiring high-quality, aligned, and standardized weather data across multiple languages and regions. We conducted baseline experiments using LLaMA-based models in both zero-shot and fine-tuned settings to illustrate the use of MeteoGalEus for natural language generation (NLG). Fine-tuning led to consistent improvements across all metrics, with BERTScore increasing from 0.68 to 0.79, ROUGE from 0.20 to 0.35, and BLEU from 0.02 to 0.17 in the best-performing model. The experiments show how MeteoGalEus can be taken as a benchmark for multilingual and cross-regional NLG tasks.",{"paper_id":18209,"title":18210,"year":7,"month":188,"day":63,"doi":18211,"resource_url":18212,"first_page":18213,"last_page":18214,"pdf_url":18215,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18216,"paper_type":860,"authors":18217,"abstract":18224},"lrec2026-main-778","RadTimeline: Timeline Summarization for Longitudinal Radiological Lung Findings","10.63317\u002F32uhj8rx3u8f","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-778","9920","9939","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.778.pdf","zhou-etal-2026-radtimeline",[18218,18220,18221],{"paper_id":18209,"author_seq":247,"given_name":18219,"surname":1039,"affiliation":63,"orcid":63},"Sitong",{"paper_id":18209,"author_seq":232,"given_name":9849,"surname":9850,"affiliation":63,"orcid":63},{"paper_id":18209,"author_seq":218,"given_name":18222,"surname":18223,"affiliation":63,"orcid":63},"Mari","Ostendorf","Tracking findings in longitudinal radiology reports is crucial for accurately identifying disease progression, and the time-consuming process would benefit from automatic summarization. This work introduces a structured summarization task, where we frame longitudinal report summarization as a timeline generation task, with dated findings organized in columns and temporally related findings grouped in rows. This structured summarization format enables straightforward comparison of findings across time and facilitates fact-checking against the associated reports. The timeline is generated using a 3-step LLM process of extracting findings, generating group names, and using the names to group the findings. To evaluate such systems, we create RadTimeline, a timeline dataset focused on tracking lung-related radiologic findings in chest-related imaging reports. Experiments on RadTimeline show tradeoffs of different-sized LLMs and prompting strategies. Our results highlight that group name generation as an intermediate step is critical for effective finding grouping. The best configuration has some irrelevant findings but very good recall, and grouping performance is comparable to human annotators.",{"paper_id":18226,"title":18227,"year":7,"month":188,"day":63,"doi":18228,"resource_url":18229,"first_page":18230,"last_page":18231,"pdf_url":18232,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18233,"paper_type":860,"authors":18234,"abstract":18241},"lrec2026-main-779","InstructSum: A Benchmark to Evaluate Instruction-Following Capability of Large Language Models in Summarization","10.63317\u002F4uvygn9qdyem","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-779","9940","9952","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.779.pdf","nishida-etal-2026-instructsum",[18235,18237,18239],{"paper_id":18226,"author_seq":247,"given_name":4445,"surname":18236,"affiliation":63,"orcid":63},"Nishida",{"paper_id":18226,"author_seq":232,"given_name":18238,"surname":18236,"affiliation":63,"orcid":63},"Kyosuke",{"paper_id":18226,"author_seq":218,"given_name":18240,"surname":10869,"affiliation":63,"orcid":63},"Itsumi","Pre-trained large language models (LLMs) align their outputs with user intent through natural language instructions. In the summarization task, conciseness of the output is inherently required, which makes the instruction-following capability of LLMs particularly important. That is, providing supplementary information beyond the instruction can be undesirable. In this study, we introduce a novel benchmark, InstructSum, consisting of 3,309 types of instructions to evaluate the instruction-following capability in the summarization task. InstructSum has multiple instructions per source text, and thus it enables the evaluation of how LLMs adjust the content of the summary according to the instructions. Our experiments with six LLM families revealed the challenges that LLMs face in this task. For example, LLMs provide polite and helpful responses with irrelevant information; they go beyond instructions and fail to respond with a concise summary.",{"paper_id":18243,"title":18244,"year":7,"month":188,"day":63,"doi":18245,"resource_url":18246,"first_page":18247,"last_page":18248,"pdf_url":18249,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18250,"paper_type":860,"authors":18251,"abstract":18262},"lrec2026-main-780","NOVELSUM: Evaluating Long-Form Summary Generation for Historical Scandinavian Novels","10.63317\u002F22upgvjw86b9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-780","9953","9963","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.780.pdf","allaith-etal-2026-novelsum",[18252,18253,18255,18258,18260],{"paper_id":18243,"author_seq":247,"given_name":2207,"surname":2726,"affiliation":63,"orcid":63},{"paper_id":18243,"author_seq":232,"given_name":869,"surname":18254,"affiliation":63,"orcid":63},"Conroy",{"paper_id":18243,"author_seq":218,"given_name":18256,"surname":18257,"affiliation":63,"orcid":63},"Kirstine Nielsen","Degn",{"paper_id":18243,"author_seq":203,"given_name":4278,"surname":18259,"affiliation":63,"orcid":63},"Bjerring-Hansen",{"paper_id":18243,"author_seq":188,"given_name":1668,"surname":18261,"affiliation":63,"orcid":63},"Hershcovich","We study long-form summarization of late-19th-century Danish and Norwegian novels and propose NOVELSUM, an evaluation resource and protocol tailored to literary narrative. We use a curated set of historical novels paired with professional reference summaries to establish baselines with long-document encoder–decoder models and prompt-based large-context LLMs. We evaluate with automatic metrics, expert human judgments, and LLM-as-judge scoring. Our human study identifies evaluation dimensions and literary facets that achieve substantial inter-annotator agreement and align with scholarly expectations. We further analyze reference-free evaluation, showing when it tracks expert trends and where it fails (notably for factual and setting-related criteria), thereby clarifying its utility when gold references or expert readers are unavailable. Our results benchmark long-context and prompted LLM approaches on historical literary prose and offer a practical path for human-grounded and reference-free assessment.",{"paper_id":18264,"title":18265,"year":7,"month":188,"day":63,"doi":18266,"resource_url":18267,"first_page":18268,"last_page":18269,"pdf_url":18270,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18271,"paper_type":860,"authors":18272,"abstract":18279},"lrec2026-main-781","Evaluating Large Language Models for Text-to-Gloss Translation in Kazakh-Russian Sign Language: A Pilot Study","10.63317\u002F2ikts3xaqget","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-781","9964","9972","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.781.pdf","kozhirbayev-etal-2026-evaluating",[18273,18276],{"paper_id":18264,"author_seq":247,"given_name":18274,"surname":18275,"affiliation":63,"orcid":63},"Zhanibek","Kozhirbayev",{"paper_id":18264,"author_seq":232,"given_name":18277,"surname":18278,"affiliation":63,"orcid":63},"Alfarabi","Imashev","Conceptual glossing involves a systematic linguistic transformation in which the models must preserve meaning, grammatical integrity, and punctuation while turning the real language into a more structured structure. The purpose of this study is to assess the accuracy and dependability of glosses produced by these models by juxtaposing them with human-annotated standards, investigating whether the models maintain essential linguistic characteristics. By identifying the strengths and weaknesses of each model, we want to determine which architectures are most suitable for organized language tasks, such as glossing. This may reduce the manual labor required for linguistic annotation by experts while maintaining superior quality outcomes. And help deaf signers with weak reading skills interpret written paragraphs into glosses, making them more comprehensible and naturally looking to them. Text-to-gloss translation converts written or spoken language into sign language glosses, enhancing accessibility for the Deaf and Hard of Hearing (DHH) community. This pilot study evaluates four large language models (LLMs): GPT-4-turbo, Grok 3, Deepseek-V3, and Gemini 20 Flash to generate conceptual glosses in Kazakh-Russian Sign Language (K-RSL), still an under-resourced sign language. Using a dataset of 250 Russian sentences with expert-annotated K-RSL glosses, we assess performance across METEOR, BLEU, BERTScore, and WER. Results show Deepseek-V3 excels on complex texts (METEOR: 0.426 for K-RSL word order, 0.377 for fairytale paragraphs), while Gemini 20 Flash performs strongly on short sentences (METEOR: 0.602). These findings demonstrate LLMs’ potential to automate gloss production, reducing manual annotation and aiding DHH individuals with reading comprehension. Challenges include K-RSL’s unique grammar and limited datasets. This is the first study to apply LLMs to K-RSL glossing and examine the potential efficacy of autonomous gloss production.",{"paper_id":18281,"title":18282,"year":7,"month":188,"day":63,"doi":18283,"resource_url":18284,"first_page":18285,"last_page":18286,"pdf_url":18287,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18288,"paper_type":860,"authors":18289,"abstract":18297},"lrec2026-main-782","HotelCheckSpan: A Benchmark Dataset for LLM Faithfulness","10.63317\u002F3x4p5r52zmvw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-782","9973","9987","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.782.pdf","schmidtova-etal-2026-hotelcheckspan",[18290,18292,18294],{"paper_id":18281,"author_seq":247,"given_name":8609,"surname":18291,"affiliation":63,"orcid":63},"Schmidtova",{"paper_id":18281,"author_seq":232,"given_name":5665,"surname":18293,"affiliation":63,"orcid":63},"Dusek",{"paper_id":18281,"author_seq":218,"given_name":18295,"surname":18296,"affiliation":63,"orcid":63},"Saad","Mahamood","Hallucinations are among the most persistent and challenging issues in large language model (LLM) outputs. This particularly holds in domains that combine both objective and subjective content, such as hotel descriptions, that are intended to be enticing advertisements for the hotel. Distinguishing between factual errors and interpretative exaggeration is often subtle, complicating both human and automated evaluation. To address this, we present HotelCheckSpan, the first span-level faithfulness dataset for the hotel domain. Each example aggregates one or more hotel descriptions, and human-annotated summaries are labeled with three error types: Incorrect, Misleading, and Not Checkable. By marking the precise spans where errors occur, the dataset captures fine-grained information about the nature of hallucinations and factual inconsistencies. In addition to human annotations, we collect span-level judgments from multiple LLMs, enabling direct human–model comparisons. Our analysis shows that inter-annotator agreement varies substantially across aggregation levels: example-level agreement can mask subtle span-level disagreements, while soft and hard F1 variants highlight discrepancies in both span placement and error categorization. HotelCheckSpan provides a benchmark for studying ambiguity and disagreement, validating automatic faithfulness metrics, and evaluating LLMs as judges, offering a rich resource for research on faithfulness, subjectivity, and annotation practices in mixed-content domains",{"paper_id":18299,"title":18300,"year":7,"month":188,"day":63,"doi":18301,"resource_url":18302,"first_page":18303,"last_page":18304,"pdf_url":18305,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18306,"paper_type":860,"authors":18307,"abstract":18310},"lrec2026-main-783","Procrustes Analysis for Improving Language Model Merging","10.63317\u002F3ywqnrqzviyt","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-783","9988","9998","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.783.pdf","ferret-2026-procrustes",[18308],{"paper_id":18299,"author_seq":247,"given_name":10720,"surname":18309,"affiliation":63,"orcid":63},"Ferret","The availability of many fine-tuned neural language models for different tasks naturally leads to the question of whether it is worthwhile to combine them, particularly through parameter merging, which is the least resource-intensive option. Among the many existing methods, some focus on parameter alignment before actual merging. In this article, we propose a new method within this research area, based on Procrustes analysis. We evaluate this method for merging fine-tuned models for the same task, derived from the same encoder-based model. Considering nine tasks from the GLUE benchmark, three Named Entity Recognition tasks, and six reference merging methods, we show that our proposal can improve upon existing merging methods in most tested configurations.",{"paper_id":18312,"title":18313,"year":7,"month":188,"day":63,"doi":18314,"resource_url":18315,"first_page":18316,"last_page":18317,"pdf_url":18318,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18319,"paper_type":860,"authors":18320,"abstract":18330},"lrec2026-main-784","MetaCORA: A Meta-Learned Curriculum for Adversarial and Contrastive Robustness in Speech Recognition","10.63317\u002F55npni82hjf5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-784","9999","10008","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.784.pdf","dai-etal-2026-metacora",[18321,18323,18325,18327],{"paper_id":18312,"author_seq":247,"given_name":18322,"surname":5170,"affiliation":63,"orcid":63},"Yuqian",{"paper_id":18312,"author_seq":232,"given_name":18324,"surname":16220,"affiliation":63,"orcid":63},"Chun Fai",{"paper_id":18312,"author_seq":218,"given_name":18326,"surname":4798,"affiliation":63,"orcid":63},"Ying Ki",{"paper_id":18312,"author_seq":203,"given_name":18328,"surname":18329,"affiliation":63,"orcid":63},"Tsz Ho","Pun","Pre-trained speech models like Whisper demonstrate impressive performance under ideal conditions but still face robustness challenges in low-resource language scenarios. We introduce Meta Curriculum Optimization for Robust ASR (MetaCORA), a novel meta-curriculum adaptive framework that improves speech recognition for low-resource Hong Kong Cantonese by integrating adversarial training with feature contrastive learning. Our approach dynamically adjusts three critical hyperparameters: adversarial perturbation magnitude, optimization step size, and contrastive learning temperature, allowing the model to adapt to varying training difficulties throughout the learning process. Unlike traditional meta-learning approaches, our framework does not rely on end-to-end differentiability but instead utilizes validation performance as a signal to guide hyperparameter adjustments. Experimental results demonstrate that our approach achieves lower WER than standard Whisper fine-tuning, commercial speech recognition systems, and LLM-based methods. Ablation studies confirm the necessity of each component, as removing any single element leads to a measurable drop in performance. The model also exhibits robustness under noisy conditions, achieving consistently lower WER than baseline systems. Further analysis shows that MetaCORA effectively compresses the distance between adversarial feature representations while maintaining well-separated class boundaries in the embedding space, providing a mechanistic explanation for its improvement.",{"paper_id":18332,"title":18333,"year":7,"month":188,"day":63,"doi":18334,"resource_url":18335,"first_page":18336,"last_page":18337,"pdf_url":18338,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18339,"paper_type":860,"authors":18340,"abstract":18347},"lrec2026-main-785","Insights from Transfer Learning Experiments with Word-in-Context and Word Sense Disambiguation Models","10.63317\u002F2nnxnn5jfjan","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-785","10009","10019","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.785.pdf","mujko-etal-2026-insights",[18341,18344],{"paper_id":18332,"author_seq":247,"given_name":18342,"surname":18343,"affiliation":63,"orcid":63},"Alp","Mujko",{"paper_id":18332,"author_seq":232,"given_name":18345,"surname":18346,"affiliation":63,"orcid":63},"Dominik","Schlechtweg","We investigate the relationship between Word-in-Context (WiC) and Word Sense Disambiguation (WSD) by examining how training on one or both tasks affects performance on the other. Using established English datasets we train a unified sentence transformer (xlm-roberta-large) with target-word highlighting and contrastive loss. Models are evaluated on WiC and WSD benchmarks across single-task, joint, and combined dataset configurations. Results show that joint training consistently improves or maintains WiC performance, particularly in low-resource settings, while WSD benefits mainly when annotated data is limited. Cross-task experiments demonstrate strong transfer: WSD-trained models generalize effectively to WiC, and WiC-trained models outperform baselines on WSD, indicating shared context-sensitive lexical representations. Combining multiple WiC datasets further enhances accuracy and stability. These findings highlight the complementary nature of WiC and WSD and demonstrate that unified training strategies can yield more robust and generalizable sense disambiguation models. The results provide practical guidance for designing datasets and models in multilingual and low-resource contexts, emphasizing the value of leveraging shared semantic representations.",{"paper_id":18349,"title":18350,"year":7,"month":188,"day":63,"doi":18351,"resource_url":18352,"first_page":18353,"last_page":18354,"pdf_url":18355,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18356,"paper_type":860,"authors":18357,"abstract":18371},"lrec2026-main-786","Joint Identification and Induction of Semantic Frames with Scalable Semi-Supervised Graph Clustering","10.63317\u002F5q7o3fgim7pb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-786","10020","10030","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.786.pdf","barteld-etal-2026-joint",[18358,18360,18362,18365,18367,18369],{"paper_id":18349,"author_seq":247,"given_name":2109,"surname":18359,"affiliation":63,"orcid":63},"Barteld",{"paper_id":18349,"author_seq":232,"given_name":6140,"surname":18361,"affiliation":63,"orcid":63},"Remus",{"paper_id":18349,"author_seq":218,"given_name":18363,"surname":18364,"affiliation":63,"orcid":63},"Saba","Anwar",{"paper_id":18349,"author_seq":203,"given_name":1296,"surname":18366,"affiliation":63,"orcid":63},"Stawecki",{"paper_id":18349,"author_seq":188,"given_name":869,"surname":18368,"affiliation":63,"orcid":63},"Ziem",{"paper_id":18349,"author_seq":172,"given_name":10524,"surname":18370,"affiliation":63,"orcid":63},"Biemann","Current methods for automatically assigning frames to their evoking words can be divided into frame identification and frame induction. In frame identification, frame names coming from a labeled dataset are assigned to unseen instances, a classical supervised labeling task. However, the training datasets are known to be incomplete in terms of real-world frames, resulting in an issue with potentially new frame labels. In frame induction, instances are clustered regarding the frames they evoke, a classical unsupervised clustering task. However, existing training data is not used to identify known frames. To overcome these shortcomings, we propose to use semi-supervised clustering for combined frame identification and frame induction. By using constrained clustering with hard constraints coming from labeled data, the resulting clusters contain only labeled instances with the same label. Thus, frame names can be easily assigned. We show for English and German datasets that using semi-supervised clustering improves the quality of frame induction compared to unsupervised clustering methods and results in notably good performance regarding frame identification.",{"paper_id":18373,"title":18374,"year":7,"month":188,"day":63,"doi":18375,"resource_url":18376,"first_page":18377,"last_page":18378,"pdf_url":18379,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18380,"paper_type":860,"authors":18381,"abstract":18389},"lrec2026-main-787","Low-Rank Compression of Language Models via Differentiable Rank Selection","10.63317\u002F2xbs948bhby9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-787","10031","10045","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.787.pdf","sundrani-etal-2026-low",[18382,18385,18387],{"paper_id":18373,"author_seq":247,"given_name":18383,"surname":18384,"affiliation":63,"orcid":63},"Sidhant","Sundrani",{"paper_id":18373,"author_seq":232,"given_name":7651,"surname":18386,"affiliation":63,"orcid":63},"Tudisco",{"paper_id":18373,"author_seq":218,"given_name":14664,"surname":18388,"affiliation":63,"orcid":63},"Minervini","Approaches for compressing large-language models using low-rank decomposition have made strides, particularly with the introduction of activation and loss-aware SVD, which improves the trade-off between decomposition rank and downstream task performance. Despite these advancements, a persistent challenge remains–selecting the optimal ranks for each layer to jointly optimise compression rate and downstream task accuracy. Current methods either rely on heuristics that can yield sub-optimal results due to their limited discrete search space or are gradient-based but are not as performant as heuristic approaches without post-compression fine-tuning. To address these issues, we propose Learning to Low-Rank Compress (LLRC), a gradient-based approach that directly learns the weights of masks that select singular values in a fine-tuning-free setting. Using a calibration dataset, we train only the mask weights to select fewer and fewer singular values while minimising the divergence of intermediate activations from the original model. Our approach outperforms competing methods that similarly require no post-compression fine-tuning across various compression rates on common-sense reasoning and open-domain question-answering tasks. For instance, with a compression rate of 20% on Llama-2-13B, LLRC outperforms the competitive Sensitivity-based Truncation Rank Searching (STRS) on MMLU, BoolQ, and OpenbookQA by 12%, 3.5%, and 4.4%, respectively. Compared to other compression techniques, our approach consistently outperforms fine-tuning-free variants of SVD-LLM and LLM-Pruner across datasets and compression rates. Our approach also performs competitively with LLM-Pruner after fine-tuning on Llama-2-7B and Llama-2-13B.",{"paper_id":18391,"title":18392,"year":7,"month":188,"day":63,"doi":18393,"resource_url":18394,"first_page":18395,"last_page":18396,"pdf_url":18397,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18398,"paper_type":860,"authors":18399,"abstract":18405},"lrec2026-main-788","Self-supervised Data Augmentation for Text Classification in Low-Data Settings","10.63317\u002F2zryuih2ucnr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-788","10046","10056","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.788.pdf","ding-etal-2026-self",[18400,18402,18404],{"paper_id":18391,"author_seq":247,"given_name":18401,"surname":12906,"affiliation":63,"orcid":63},"Deyu",{"paper_id":18391,"author_seq":232,"given_name":18403,"surname":3676,"affiliation":63,"orcid":63},"Mengying",{"paper_id":18391,"author_seq":218,"given_name":4651,"surname":15770,"affiliation":63,"orcid":63},"Due to data sparsity and high annotation cost, data augmentation has established itself as an effective tool for boosting model performance on supervised NLP tasks. Where task-agnostic augmentation methods tend to act as simple regularizers for the data, task-aware methods also leverage labels for the generation of data that are most suitable for downstream tasks. While prior work has investigated generation and sampling strategies individually, the potential of a self-supervised approach that leverages multiple pre-trained models in generation and sampling remains underexplored. To address this issue, we present an ensemble-based framework of language models that proposes augmentation candidates and internally reviews their suitability for low-resource text classification tasks. We evaluate our model on six classification benchmarks and find that it consistently outperforms state-of-the-art data augmentation baselines in classification accuracy by an average of 0.97 points in low-data scenarios.",{"paper_id":18407,"title":18408,"year":7,"month":188,"day":63,"doi":18409,"resource_url":18410,"first_page":18411,"last_page":18412,"pdf_url":18413,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18414,"paper_type":860,"authors":18415,"abstract":18422},"lrec2026-main-789","Distribution-aware Low-bitwidth Quantization for Large Language Models","10.63317\u002F3mnfp3i37gy2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-789","10057","10070","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.789.pdf","huynh-etal-2026-distribution",[18416,18418,18420],{"paper_id":18407,"author_seq":247,"given_name":18417,"surname":10897,"affiliation":63,"orcid":63},"Bao Tan Duy",{"paper_id":18407,"author_seq":232,"given_name":2796,"surname":18419,"affiliation":63,"orcid":63},"Tsunakawa",{"paper_id":18407,"author_seq":218,"given_name":18421,"surname":18236,"affiliation":63,"orcid":63},"Masafumi","The increasing scale and complexity of large language models (LLMs) present significant computational and memory challenges, limiting their widespread deployment. Post-training quantization (PTQ) has emerged as a key technique for mitigating these challenges without costly retraining. However, compressing models to ultra-low bitwidths (e.g., 2-3 bits) while maintaining accuracy remains a major challenge. In this study, we present a comprehensive PTQ framework that addresses this problem by compressing LLM weights through three core innovations: (1) a calibration process guided by Kullback-Leibler divergence minimization to preserve the original weight distribution, (2) a learnable codebook optimization mechanism employing noise substitution for vector quantization to enable robust gradient estimation, and (3) a layer-grouping strategy based on statistical distribution similarity to improve parameter efficiency. Experimental evaluations on large-scale models show that the proposed framework achieves competitive performance compared with state-of-the-art quantization techniques. Importantly, these results are obtained without any post-quantization fine-tuning, highlighting the efficiency and practical applicability of our approach for deploying highly compressed LLMs.",{"paper_id":18424,"title":18425,"year":7,"month":188,"day":63,"doi":18426,"resource_url":18427,"first_page":18428,"last_page":18429,"pdf_url":18430,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18431,"paper_type":860,"authors":18432,"abstract":18442},"lrec2026-main-790","TG-ASR: Translation-Guided Learning with Parallel Gated Cross Attention for Low-Resource Automatic Speech Recognition","10.63317\u002F2ne34rzfnfgz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-790","10071","10081","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.790.pdf","yang-etal-2026-tg",[18433,18435,18437,18439,18440,18441],{"paper_id":18424,"author_seq":247,"given_name":18434,"surname":6675,"affiliation":63,"orcid":63},"ChengYeh",{"paper_id":18424,"author_seq":232,"given_name":18436,"surname":3676,"affiliation":63,"orcid":63},"Chien-Chun",{"paper_id":18424,"author_seq":218,"given_name":18438,"surname":1840,"affiliation":63,"orcid":63},"Li-Wei",{"paper_id":18424,"author_seq":203,"given_name":11874,"surname":1359,"affiliation":63,"orcid":63},{"paper_id":18424,"author_seq":188,"given_name":11876,"surname":3676,"affiliation":63,"orcid":63},{"paper_id":18424,"author_seq":172,"given_name":10616,"surname":1840,"affiliation":63,"orcid":63},"Low-resource automatic speech recognition remains a critical challenge due to the scarcity of transcribed data for many languages.Taiwanese Hokkien exemplifies this problem as, although extensive speech content exists in television dramas and online videos, transcriptions are scarce and most available subtitles are in Mandarin.To address this gap, this paper presents TG-ASR for Taiwanese drama speech recognition, a translation-guided ASR framework that leverages multilingual translation embeddings to enhance recognition in low-resource conditions.The framework centers on the parallel gated cross-attention (PGCA) mechanism, which adaptively integrates embeddings from multiple auxiliary languages into the ASR decoder.This mechanism enables robust cross-linguistic semantic guidance while maintaining stable optimization and avoiding interference between languages.To support future research, we release YT-THDC, a 30-hour corpus of Taiwanese drama speech with aligned Mandarin subtitles and manually verified Taiwanese transcriptions.Extensive experiments and analysis identify which auxiliary languages most effectively improve Taiwanese ASR, achieving a 13.51% relative reduction in character error rate and demonstrating the potential of translation-guided learning for underrepresented languages in real-world scenarios.",{"paper_id":18444,"title":18445,"year":7,"month":188,"day":63,"doi":18446,"resource_url":18447,"first_page":18448,"last_page":18449,"pdf_url":18450,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18451,"paper_type":860,"authors":18452,"abstract":18464},"lrec2026-main-791","Harnessing Synergy in Context and Emoji for Joint Detection of Harmful Online Content in Multi-turn Conversations","10.63317\u002F4rii4qtzbpew","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-791","10082","10092","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.791.pdf","hu-etal-2026-harnessing",[18453,18455,18458,18459,18462],{"paper_id":18444,"author_seq":247,"given_name":18454,"surname":12243,"affiliation":63,"orcid":63},"Feiyan",{"paper_id":18444,"author_seq":232,"given_name":18456,"surname":18457,"affiliation":63,"orcid":63},"Ciara Anne","Byrne",{"paper_id":18444,"author_seq":218,"given_name":3284,"surname":1039,"affiliation":63,"orcid":63},{"paper_id":18444,"author_seq":203,"given_name":18460,"surname":18461,"affiliation":63,"orcid":63},"Rena","Maycock",{"paper_id":18444,"author_seq":188,"given_name":8134,"surname":18463,"affiliation":63,"orcid":63},"Langan","Detecting harmful content, such as cyberbullying, self-harm, and grooming, in self-generated content or conversations is an emerging research area with significant potential for positive social impact. However, challenges such as the scarcity of real-world conversational data, labor-intensive annotation processes, and inconsistent content policies hinder understanding and evaluating the performance of harmful content detection systems. In this study, we utilize openly available forum data to construct conversation proxies, facilitating the analysis and detection of harmful content. We undertook extensive efforts to label the conversational data using a consistent content policy developed by experts, with ten annotators contributing to the labeling process. Our experiments investigated the impact of context window size and found that performance in joint detection improved gradually up to a context window of 16 sentences, after which performance plateaued. Additionally, experiments with emojis demonstrated that using a tokenizer capable of decoding emojis yielded the best performance, while either removing emojis or converting them to text resulted in inferior outcomes.",{"paper_id":18466,"title":18467,"year":7,"month":188,"day":63,"doi":18468,"resource_url":18469,"first_page":18470,"last_page":18471,"pdf_url":18472,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18473,"paper_type":860,"authors":18474,"abstract":18483},"lrec2026-main-792","Dynamic Layer Selection for Efficient Tone Recognition in Self-Supervised Speech Models","10.63317\u002F3hi6ut7agp4y","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-792","10093","10101","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.792.pdf","obiang-etal-2026-dynamic",[18475,18478,18480],{"paper_id":18466,"author_seq":247,"given_name":18476,"surname":18477,"affiliation":63,"orcid":63},"Saint Germes B. Bengono","Obiang",{"paper_id":18466,"author_seq":232,"given_name":13746,"surname":18479,"affiliation":63,"orcid":63},"Tsopze",{"paper_id":18466,"author_seq":218,"given_name":18481,"surname":18482,"affiliation":63,"orcid":63},"Paulin Melatagia","Yonta","Low-resource tonal languages present significant challenges to speech processing technologies, due to limited training data and the critical role of pitch variation in expressing meaning. This paper applies established weighted layer combination methods to tone recognition in such languages, with a specific focus on Yoruba and Yemba. Building on our previous work with Wav2vec 2.0 representations and the weighted-sum methodology from Yang et al. (2024), we investigate layer specialisation in the SSA-HuBERT self-supervised speech model for tonal tasks. Our systematic analysis reveals significant performance differences between different layers, with middle layers generally outperforming both lower and upper layers for tonal recognition tasks. While typical approaches only use the output of the last layer, our experiments show that weighted layer combination outperforms the last layer by 20.4% and 15.8% relative improvement in tone error rate (TER) for Yoruba and Yemba, respectively. In addition to performance improvements, our approach provides dramatic computational efficiency gains, reducing the resources required by over 90% compared to evaluating each layer separately. Analysis of the learned layer weights reveals language-specific patterns, with Yoruba favouring middle layers and Yemba giving more weight to early layers. These results provide valuable insights into how tonal information is encoded in self-supervised speech models, and demonstrate a practical application of established layer combination methods in low-resource language contexts.",{"paper_id":18485,"title":18486,"year":7,"month":188,"day":63,"doi":18487,"resource_url":18488,"first_page":18489,"last_page":18490,"pdf_url":18491,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18492,"paper_type":860,"authors":18493,"abstract":18503},"lrec2026-main-793","Intent Recognition in Speech-to-Text Processing in the Context of Natural Interaction with Cognitive Assistive Systems","10.63317\u002F2ekx6bohnzso","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-793","10102","10113","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.793.pdf","ensan-etal-2026-intent",[18494,18496,18498,18500],{"paper_id":18485,"author_seq":247,"given_name":1755,"surname":18495,"affiliation":63,"orcid":63},"Ensan",{"paper_id":18485,"author_seq":232,"given_name":18497,"surname":8696,"affiliation":63,"orcid":63},"Magnus",{"paper_id":18485,"author_seq":218,"given_name":961,"surname":18499,"affiliation":63,"orcid":63},"Busch",{"paper_id":18485,"author_seq":203,"given_name":18501,"surname":18502,"affiliation":63,"orcid":63},"Adreas","Wendemuth","This study investigates efficient speech-to-intent recognition for human–robot interaction in elderly-care environments in German, targeting deployment on resource-constrained platforms such as the Jetson AGX Orin. To benchmark performance, we created a domain-specific German dataset with two sub-datasets (PaSID and PaSynTex) that simulate specific nursing home communication scenarios. Two alternative speech-to-intent pipelines were developed and evaluated: a two-stage system combining automatic speech recognition (ASR) with a large language model (LLM), and an end-to-end large audio–language model (LALM) architecture. The performance of Whisper-based ASR systems was evaluated across a wide variety of LLMs and several LALMs, comparing intent-classification accuracy, latency, and resource efficiency. The results indicate that optimized ASR + LLM configurations, particularly Whisper Turbo coupled with Phi-3.5-mini or Qwen 2.5-7B, outperform unified LALM approaches while maintaining substantially lower memory and inference costs. Also, the analysis shows that, the unified LALM models outperform the two-step integration of ASR + LLM in the same configuration, but at the cost of higher resource utilization, likely due to limited optimization for edge deployment. Overall, the findings provide initial evidence that modular ASR + LLM pipelines provide a more practical solution for real-time, on-device intent recognition in assistive robotics in German, offering an effective trade-off between performance and deployability on resource-constrained platforms.",{"paper_id":18505,"title":18506,"year":7,"month":188,"day":63,"doi":18507,"resource_url":18508,"first_page":18509,"last_page":18510,"pdf_url":18511,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18512,"paper_type":860,"authors":18513,"abstract":18522},"lrec2026-main-794","Merging Continual Pretraining Models for Domain-Specialized LLMs: A Case Study in Finance","10.63317\u002F3cyxer74uvyp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-794","10114","10129","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.794.pdf","ueda-etal-2026-merging",[18514,18516,18517,18520],{"paper_id":18505,"author_seq":247,"given_name":10865,"surname":18515,"affiliation":63,"orcid":63},"Ueda",{"paper_id":18505,"author_seq":232,"given_name":1219,"surname":10995,"affiliation":63,"orcid":63},{"paper_id":18505,"author_seq":218,"given_name":18518,"surname":18519,"affiliation":63,"orcid":63},"Hirohiko","Suwa",{"paper_id":18505,"author_seq":203,"given_name":11890,"surname":18521,"affiliation":63,"orcid":63},"Yasumoto","While LLMs excel at general tasks, they struggle in specialized domains like finance, requiring diverse skills in domain knowledge, mathematical reasoning, and multilingual processing. Merging domain-specific Continual Pre-training (CPT) \"experts\" offers a practical alternative to costly and unstable multi-skill training. However, unlike established Supervised Fine-Tuning (SFT) model-based merging, CPT model merging remains largely unexplored. We address this gap by creating financial LLMs from experts in finance, math, and Japanese. We propose a three-stage evaluation focusing on knowledge recovery, complementarity, and emergence, and assess three merging methods (Task Arithmetic, TIES, and DARE-TIES) on a comprehensive financial benchmark curated from 18 tasks across 8 established datasets. Results show that merging an expert with its base model recovers general knowledge lost during CPT, while merging experts improves performance and can yield emergent cross-domain skills. Among the methods, Task Arithmetic performs strongly but is hyperparameter-sensitive, whereas TIES is more robust. Our findings also suggest that while model similarity correlates with merging success, emergent skills depend on more complex factors. This work presents the first foundational analysis of CPT model merging, establishing a principled framework and providing clear guidance for building multi-skill LLMs from existing assets.",{"paper_id":18524,"title":18525,"year":7,"month":188,"day":63,"doi":18526,"resource_url":18527,"first_page":18528,"last_page":18529,"pdf_url":18530,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18531,"paper_type":860,"authors":18532,"abstract":18551},"lrec2026-main-795","Phonetic-based Ranking for Improved Pseudo-Labeling in Low-Resource ASR","10.63317\u002F338dnb8n7e85","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-795","10130","10139","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.795.pdf","matassoni-etal-2026-phonetic",[18533,18535,18537,18539,18542,18543,18544,18547,18548,18550],{"paper_id":18524,"author_seq":247,"given_name":2146,"surname":18534,"affiliation":63,"orcid":63},"Matassoni",{"paper_id":18524,"author_seq":232,"given_name":9117,"surname":18536,"affiliation":63,"orcid":63},"Gretter",{"paper_id":18524,"author_seq":218,"given_name":18538,"surname":2894,"affiliation":63,"orcid":63},"Falavigna",{"paper_id":18524,"author_seq":203,"given_name":18540,"surname":18541,"affiliation":63,"orcid":63},"Mohamed Nabih Ali Mohamed","Nawar",{"paper_id":18524,"author_seq":188,"given_name":7692,"surname":4089,"affiliation":63,"orcid":63},{"paper_id":18524,"author_seq":172,"given_name":5045,"surname":5046,"affiliation":63,"orcid":63},{"paper_id":18524,"author_seq":155,"given_name":18545,"surname":18546,"affiliation":63,"orcid":63},"Mauro","Cettolo",{"paper_id":18524,"author_seq":138,"given_name":2146,"surname":5043,"affiliation":63,"orcid":63},{"paper_id":18524,"author_seq":121,"given_name":2548,"surname":18549,"affiliation":63,"orcid":63},"Papi",{"paper_id":18524,"author_seq":104,"given_name":5050,"surname":5051,"affiliation":63,"orcid":63},"The rise of large language models has boosted speech and language technologies; however, where transcripts of audio data are limited, the performance of current technology is not yet satisfactory. One common strategy to tackle data scarcity is leveraging pseudo-labels, for example automatically transcribing data with a pre-trained ASR. One critical issue of this approach is assessing the quality of the automatic transcriptions, that may be rather bad for low-resourced languages. While several filtering approaches exist in literature, they typically work with decent pre-trained ASR models but may fail otherwise. In this work we propose a phonetic-based ranking, enabling an effective selection with controllable computational resources; the resulting subset of pseudo-labels serves as additional material for fine-tuning the source ASR models. Experiments on common benchmarks in three low-resource languages demonstrate the effectiveness of the proposed approach, yielding up to a 3-point reduction in WER.",{"paper_id":18553,"title":18554,"year":7,"month":188,"day":63,"doi":18555,"resource_url":18556,"first_page":18557,"last_page":18558,"pdf_url":18559,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18560,"paper_type":860,"authors":18561,"abstract":18569},"lrec2026-main-796","Privacy-Preserving Information Extraction with Local LLMs: A Comparative Study on Dutch Debt Collection Letters","10.63317\u002F3s2rpqsud2um","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-796","10140","10149","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.796.pdf","celep-etal-2026-privacy",[18562,18565,18567],{"paper_id":18553,"author_seq":247,"given_name":18563,"surname":18564,"affiliation":63,"orcid":63},"Beyza","Celep",{"paper_id":18553,"author_seq":232,"given_name":8892,"surname":18566,"affiliation":63,"orcid":63},"Amat-Lefort",{"paper_id":18553,"author_seq":218,"given_name":18568,"surname":13660,"affiliation":63,"orcid":63},"Joost","For individuals in financial distress, understanding debt collection letters is critical. These documents are often unstructured, use complex legal language, and contain highly sensitive personal data. Automating information extraction is essential for assisting caseworkers, who currently perform this task manually; a slow and error-prone process. The sensitive nature of this data requires efficient, privacy-preserving, locally-deployed solutions. This paper compares the feasibility of various local NLP models for this task. We evaluated a feature-engineered Conditional Random Field (CRF), a fine-tuned spaCy NER model, and several Large Language Models (LLMs) (1.1B to 14B parameters) on a new synthetic dataset of 1,000 Dutch debt letters. Models were compared using accuracy (F1-score) and deployment metrics (CPU runtime, memory usage). Our results show a clear performance-resource trade-off. Lightweight CRF and spaCy models efficiently extracted structured data but failed in many critical unstructured fields. In contrast, LLM performance scaled directly with model size. The 14B DeepSeek model achieved the highest accuracy (95.2% average F1), successfully handling all field types. In conclusion, larger local LLMs are the most viable solution for accurate, private document processing. Alternatively, a hybrid approach using lightweight models for structured data and LLMs only for complex, unstructured fields, would also be adequate.",{"paper_id":18571,"title":18572,"year":7,"month":188,"day":63,"doi":18573,"resource_url":18574,"first_page":18575,"last_page":18576,"pdf_url":18577,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18578,"paper_type":860,"authors":18579,"abstract":18585},"lrec2026-main-797","Forewarned Is Forearmed: When Non-Sequential Embedding Turns into an Anomaly Detector","10.63317\u002F58vxg7q9649q","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-797","10150","10156","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.797.pdf","allesiardo-etal-2026-forewarned",[18580,18583,18584],{"paper_id":18571,"author_seq":247,"given_name":18581,"surname":18582,"affiliation":63,"orcid":63},"Elys","Allesiardo",{"paper_id":18571,"author_seq":232,"given_name":2409,"surname":5782,"affiliation":63,"orcid":63},{"paper_id":18571,"author_seq":218,"given_name":5784,"surname":5785,"affiliation":63,"orcid":63},"This paper offers an in-depth analysis of non-sequential multimodal sentence-level embeddings, with a particular focus on the SONAR model. We demonstrate that certain embedding dimensions are sensitive to perturbations and can serve as indicators of decoding anomalies. By leveraging the consistency between successive encoding and decoding, we successfully build an accurate detector. Additionally, we explore modifying specific dimensions of interest to attempt to correct them. This work underscores the importance of understanding and analyzing the embeddings themselves to enhance the reliability of multimodal representations.",{"paper_id":18587,"title":18588,"year":7,"month":188,"day":63,"doi":18589,"resource_url":18590,"first_page":18591,"last_page":18592,"pdf_url":18593,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18594,"paper_type":860,"authors":18595,"abstract":18603},"lrec2026-main-798","A Joint Detection Framework for Latvian Loanwords and Calques Using Monolingual Data","10.63317\u002F2f6e35y4dgkn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-798","10157","10167","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.798.pdf","zhang-etal-2026-joint",[18596,18598,18601],{"paper_id":18587,"author_seq":247,"given_name":18597,"surname":1519,"affiliation":63,"orcid":63},"Yelingyun",{"paper_id":18587,"author_seq":232,"given_name":18599,"surname":18600,"affiliation":63,"orcid":63},"Atis","Kapenieks",{"paper_id":18587,"author_seq":218,"given_name":4854,"surname":18602,"affiliation":63,"orcid":63},"Platonova","Lexical borrowing is pervasive across languages with extensive cultural contact, yet its automatic detection remains challenging for low-resource languages, especially regarding calques. Existing methods depend heavily on bilingual resources and focus almost exclusively on phonological loanwords, leaving structural borrowing phenomena like calques largely unaddressed by automated tools. This paper proposes a novel joint binary classification pipeline based solely on monolingual data and mBERT, introducing the first large-scale annotated Latvian borrowing dataset with over 3,000 manually labeled entries across three categories: loanwords, calques, and local words. The pipeline adopts a staged decision process grounded in language contact theory, separating surface-level loanwords before tackling the more ambiguous calque category. Experiments demonstrate that our semi-supervised strategy with pseudo-labeling achieves a macro-F1 of 0.854 on an external test set, outperforming both a direct three-way classifier and a GPT-4o zero-shot baseline. These results establish a performance benchmark for the previously unaddressed task of automatic borrowing detection in Latvian, providing empirical tools for borrowing detection in resource-scarce contexts.",{"paper_id":18605,"title":18606,"year":7,"month":188,"day":63,"doi":18607,"resource_url":18608,"first_page":18609,"last_page":18610,"pdf_url":18611,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18612,"paper_type":860,"authors":18613,"abstract":18667},"lrec2026-main-799","Pantagruel: Unified Self-Supervised Encoders for French Text and Speech","10.63317\u002F573q4exhmpgd","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-799","10168","10191","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.799.pdf","le-etal-2026-pantagruel",[18614,18616,18617,18618,18621,18623,18624,18626,18627,18630,18632,18635,18638,18639,18640,18641,18643,18644,18645,18646,18648,18649,18651,18654,18655,18657,18660,18662,18665,18666],{"paper_id":18605,"author_seq":247,"given_name":18615,"surname":6468,"affiliation":63,"orcid":63},"Phuong-Hang",{"paper_id":18605,"author_seq":232,"given_name":5784,"surname":11229,"affiliation":63,"orcid":63},{"paper_id":18605,"author_seq":218,"given_name":2308,"surname":2309,"affiliation":63,"orcid":63},{"paper_id":18605,"author_seq":203,"given_name":18619,"surname":18620,"affiliation":63,"orcid":63},"Maryem","Bouziane",{"paper_id":18605,"author_seq":188,"given_name":4846,"surname":18622,"affiliation":63,"orcid":63},"Ghennai",{"paper_id":18605,"author_seq":172,"given_name":2314,"surname":2315,"affiliation":63,"orcid":63},{"paper_id":18605,"author_seq":155,"given_name":6765,"surname":18625,"affiliation":63,"orcid":63},"Milintsevich",{"paper_id":18605,"author_seq":138,"given_name":3730,"surname":3731,"affiliation":63,"orcid":63},{"paper_id":18605,"author_seq":121,"given_name":18628,"surname":18629,"affiliation":63,"orcid":63},"Aidan","Mannion",{"paper_id":18605,"author_seq":104,"given_name":12496,"surname":18631,"affiliation":63,"orcid":63},"Defauw",{"paper_id":18605,"author_seq":87,"given_name":18633,"surname":18634,"affiliation":63,"orcid":63},"Shuyue","Gu",{"paper_id":18605,"author_seq":73,"given_name":18636,"surname":18637,"affiliation":63,"orcid":63},"Alexandre Daniel","Audibert",{"paper_id":18605,"author_seq":55,"given_name":2146,"surname":4117,"affiliation":63,"orcid":63},{"paper_id":18605,"author_seq":38,"given_name":3733,"surname":3734,"affiliation":63,"orcid":63},{"paper_id":18605,"author_seq":17,"given_name":2320,"surname":2321,"affiliation":63,"orcid":63},{"paper_id":18605,"author_seq":2971,"given_name":6140,"surname":18642,"affiliation":63,"orcid":63},"Lalande",{"paper_id":18605,"author_seq":2974,"given_name":2331,"surname":2332,"affiliation":63,"orcid":63},{"paper_id":18605,"author_seq":857,"given_name":6521,"surname":6522,"affiliation":63,"orcid":63},{"paper_id":18605,"author_seq":877,"given_name":1219,"surname":10995,"affiliation":63,"orcid":63},{"paper_id":18605,"author_seq":2984,"given_name":18647,"surname":2312,"affiliation":63,"orcid":63},"Étienne",{"paper_id":18605,"author_seq":2988,"given_name":1938,"surname":2329,"affiliation":63,"orcid":63},{"paper_id":18605,"author_seq":2992,"given_name":5346,"surname":18650,"affiliation":63,"orcid":63},"Peyrard",{"paper_id":18605,"author_seq":2996,"given_name":18652,"surname":18653,"affiliation":63,"orcid":63},"Solange","Rossato",{"paper_id":18605,"author_seq":3000,"given_name":4797,"surname":6524,"affiliation":63,"orcid":63},{"paper_id":18605,"author_seq":3004,"given_name":15784,"surname":18656,"affiliation":63,"orcid":63},"Nardy",{"paper_id":18605,"author_seq":3008,"given_name":18658,"surname":18659,"affiliation":63,"orcid":63},"Gilles","Sérasset",{"paper_id":18605,"author_seq":3478,"given_name":2232,"surname":18661,"affiliation":63,"orcid":63},"Segonne",{"paper_id":18605,"author_seq":3482,"given_name":18663,"surname":18664,"affiliation":63,"orcid":63},"Solène","Evain",{"paper_id":18605,"author_seq":3486,"given_name":2317,"surname":2318,"affiliation":63,"orcid":63},{"paper_id":18605,"author_seq":12954,"given_name":1044,"surname":1045,"affiliation":63,"orcid":63},"We release Pantagruel models, a new family of self-supervised encoder models for French text and speech. Instead of predicting modality-tailored targets such as textual tokens or speech units, Pantagruel learns contextualized target representations in the feature space, allowing modality-specific encoders to capture linguistic and acoustic regularities more effectively. Separate models are pre-trained on large-scale French corpora, including Wikipedia, OSCAR and CroissantLLM for text, together with MultilingualLibriSpeech, LeBenchmark, and INA-100k for speech. INA-100k is a newly introduced 100,000-hour corpus of French audio derived from the archives of the Institut National de l’Audiovisuel (INA), the national repository of French radio and television broadcasts, providing highly diverse audio data. We evaluate Pantagruel across a broad range of downstream tasks spanning both modalities, including those from the standard French benchmarks such as FLUE or LeBenchmark. Across these tasks, Pantagruel models show competitive or superior performance compared to strong French baselines such as CamemBERT, FlauBERT, and LeBenchmark2.0, while maintaining a shared architecture that can seamlessly handle either speech or text inputs. These results confirm the effectiveness of feature-space self-supervised objectives for French representation learning and highlight Pantagruel as a robust foundation for multimodal speech-text understanding.",{"paper_id":18669,"title":18670,"year":7,"month":188,"day":63,"doi":18671,"resource_url":18672,"first_page":18673,"last_page":18674,"pdf_url":18675,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18676,"paper_type":860,"authors":18677,"abstract":18687},"lrec2026-main-800","Merge and Conquer: Instructing Multilingual Models by Adding Target Language Weights","10.63317\u002F4k6cgmb8djof","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-800","10192","10207","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.800.pdf","valero-etal-2026-merge",[18678,18681,18684,18685,18686],{"paper_id":18669,"author_seq":247,"given_name":18679,"surname":18680,"affiliation":63,"orcid":63},"Eneko","Valero",{"paper_id":18669,"author_seq":232,"given_name":18682,"surname":18683,"affiliation":63,"orcid":63},"Maria Ribalta i","Albado",{"paper_id":18669,"author_seq":218,"given_name":9043,"surname":9044,"affiliation":63,"orcid":63},{"paper_id":18669,"author_seq":203,"given_name":9041,"surname":1180,"affiliation":63,"orcid":63},{"paper_id":18669,"author_seq":188,"given_name":9046,"surname":9047,"affiliation":63,"orcid":63},"Large Language Models (LLMs) remain heavily centered on English, with limited performance in low-resource languages. Existing adaptation approaches, such as continual pre-training, demand significant computational resources. In the case of instructed models, high-quality instruction data is also required, both of which are often inaccessible for low-resource language communities. Under these constraints, model merging offers a lightweight alternative, but its potential in low-resource contexts has not been systematically explored. In this work, we explore whether it is possible to transfer language knowledge to an instruction-tuned LLM by merging it with a language-specific base model, thereby eliminating the need of language-specific instructions and repeated fine-tuning processes whenever stronger instructed variants become available. Through experiments covering four Iberian languages (Basque, Catalan, Galician, and Spanish) and two model families, we show that merging enables effective instruction-following behavior in new languages and even supports multilingual capability through the combination of multiple language-specific models. Our results indicate that model merging is a viable and efficient alternative to traditional adaptation methods for low-resource languages, achieving competitive performance while greatly reducing computational cost.",{"paper_id":18689,"title":18690,"year":7,"month":188,"day":63,"doi":18691,"resource_url":18692,"first_page":18693,"last_page":18694,"pdf_url":18695,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18696,"paper_type":860,"authors":18697,"abstract":18700},"lrec2026-main-801","SemiAdapt: Semi-Supervised and Efficient LoRA-Based Domain Adaptation for Low-Resource Irish Machine Translation with Transformers","10.63317\u002F2gfkah2nq6uv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-801","10208","10220","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.801.pdf","mcgiff-etal-2026-semiadapt",[18698,18699],{"paper_id":18689,"author_seq":247,"given_name":8969,"surname":8970,"affiliation":63,"orcid":63},{"paper_id":18689,"author_seq":232,"given_name":8991,"surname":8992,"affiliation":63,"orcid":63},"Fine-tuning is widely used to adapt multilingual Transformer models for machine translation (MT) in specific domains. However, full-parameter fine-tuning of large multilingual models with billions of parameters is computationally expensive, thus creating a barrier to entry for researchers working on low-resource tasks such as Irish translation. Parameter-efficient fine-tuning (PEFT) addresses this by updating a fraction of the original model parameters, with the Low-Rank Adaptation approach (LoRA) introducing small, trainable adapter layers. We introduce SemiAdapt-Full and SemiAdapt-LoRA as semi-supervised approaches that leverage inferred domains to improve overall performance in MT. SemiAdapt-LoRA employs dynamic routing at inference time, eliminating the need to load multiple separately fine-tuned models. Instead, a single shared base model is maintained while lightweight domain-specific adapters, updating only 1.39% of the model parameters in our case, are activated dynamically. We demonstrate that SemiAdapt-Full can outperform full-model fine-tuning and SemiAdapt-LoRA can propel PEFT methods to compete with full-model fine-tuning. We further evaluate corpus-level domain fine-tuning and demonstrate that our embedding-based inference methods perform especially well on larger and noisier corpora. Code and training configurations are released to support reproducibility. Ultimately, our approach narrows the performance gap between PEFT and full-parameter fine-tuning, offering resource-constrained researchers a computationally efficient alternative.",{"paper_id":18702,"title":18703,"year":7,"month":188,"day":63,"doi":18704,"resource_url":18705,"first_page":18706,"last_page":18707,"pdf_url":18708,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18709,"paper_type":860,"authors":18710,"abstract":18717},"lrec2026-main-802","Data Selection Effects on Self-Supervised Learning of Audio Representations for French Audiovisual Broadcasts","10.63317\u002F4kdn23nttrh4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-802","10221","10232","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.802.pdf","pelloin-etal-2026-data",[18711,18712,18714,18716],{"paper_id":18702,"author_seq":247,"given_name":5784,"surname":11229,"affiliation":63,"orcid":63},{"paper_id":18702,"author_seq":232,"given_name":5037,"surname":18713,"affiliation":63,"orcid":63},"Bekkali",{"paper_id":18702,"author_seq":218,"given_name":18715,"surname":8328,"affiliation":63,"orcid":63},"Reda",{"paper_id":18702,"author_seq":203,"given_name":1061,"surname":11221,"affiliation":63,"orcid":63},"Audio and speech self-supervised encoder models are now widely used for a lot of different tasks. Many of these models are often trained on clean segmented speech content such as LibriSpeech. In this paper, we look into how the pretraining datasets of such SSL (Self-Supervised Learning) models impact their downstream results. We build a large pretraining corpus of highly diverse TV and Radio broadcast audio content, which we describe with automatic tools. We use these annotations to build smaller subsets, which we use to train audio SSL models. Then, we evaluate the models on multiple downstream tasks such as automatic speech recognition, voice activity and music detection, or speaker recognition. The results show the potential of pretraining SSL models on diverse audio content without restricting it to speech. We also perform a membership inference attack to evaluate the encoder ability to memorize their training datasets, which highlight the importance of data deduplication. This unified training could bridge speech and music machine learning communities.",{"paper_id":18719,"title":18720,"year":7,"month":188,"day":63,"doi":18721,"resource_url":18722,"first_page":18723,"last_page":18724,"pdf_url":18725,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18726,"paper_type":860,"authors":18727,"abstract":18736},"lrec2026-main-803","SENS-ASR: Semantic Embedding Injection in Neural-transducer for Streaming Automatic Speech Recognition","10.63317\u002F2bpj98q88wzi","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-803","10233","10241","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.803.pdf","dkhissi-etal-2026-sens",[18728,18731,18732,18733],{"paper_id":18719,"author_seq":247,"given_name":18729,"surname":18730,"affiliation":63,"orcid":63},"Youness","Dkhissi",{"paper_id":18719,"author_seq":232,"given_name":5784,"surname":5785,"affiliation":63,"orcid":63},{"paper_id":18719,"author_seq":218,"given_name":18581,"surname":18582,"affiliation":63,"orcid":63},{"paper_id":18719,"author_seq":203,"given_name":18734,"surname":18735,"affiliation":63,"orcid":63},"Anthony","Larcher","Many Automatic Speech Recognition (ASR) applications require streaming processing of the audio data. In streaming mode, ASR systems need to start transcribing the input stream before it is complete, i.e., the systems have to process a stream of inputs with a limited (or no) future context. Compared to offline mode, this reduction of the future context degrades the performance of Streaming-ASR systems, especially while working with low-latency constraint. In this work, we present SENS-ASR, an approach to enhance the transcription quality of Streaming-ASR by reinforcing the acoustic information with semantic information. This semantic information is extracted from the available past frame-embeddings by a context module. This module is trained using knowledge distillation from a sentence embedding Language Model fine-tuned on the training dataset transcriptions. Experiments on standard datasets show that SENS-ASR significantly improves the Word Error Rate on small-chunk streaming scenarios.",{"paper_id":18738,"title":18739,"year":7,"month":188,"day":63,"doi":18740,"resource_url":18741,"first_page":18742,"last_page":18743,"pdf_url":18744,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18745,"paper_type":860,"authors":18746,"abstract":18752},"lrec2026-main-804","Efficient Financial Language Understanding via Distillation with Synthetic Data","10.63317\u002F3b3zxy5qrw8s","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-804","10242","10254","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.804.pdf","huang-etal-2026-efficient",[18747,18749],{"paper_id":18738,"author_seq":247,"given_name":18748,"surname":1837,"affiliation":63,"orcid":63},"Wen-Fong (Xavier)",{"paper_id":18738,"author_seq":232,"given_name":18750,"surname":18751,"affiliation":63,"orcid":63},"Edwin","Simpson","Large instruction-following models are powerful but costly to deploy, particularly in finance, where labelled data are limited by confidentiality and expert annotation cost. We present an efficient framework for financial sentiment analysis through distillation with synthetic data, transferring knowledge from a large instruction-tuned teacher to compact student models. The framework is designed for low-resource conditions, where a small set of real examples are collected and labelled by hand. The framework then clusters the examples and uses the clusters to select seeds for generating synthetic examples via structured few-shot prompting. Experiments show that clustering-based seed selection yields more representative synthetic data than random sampling, enabling compact models to achieve strong performance with minimal supervision. Notably, on a more complex and noisy text domain, the compact model trained on the complete synthetic–seed corpus even outperforms the teacher model, while remaining competitive on formal text. The framework provides a practical route toward resource-efficient domain adaptation in financial NLP with minimal human labelling effort.",{"paper_id":18754,"title":18755,"year":7,"month":188,"day":63,"doi":18756,"resource_url":18757,"first_page":18758,"last_page":18759,"pdf_url":18760,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18761,"paper_type":860,"authors":18762,"abstract":18774},"lrec2026-main-805","Rubric-Guided Fine-tuning of SpeechLLMs for Multi-Aspect, Multi-Rater L2 Reading-Speech Assessment","10.63317\u002F4dgvijh3226x","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-805","10255","10265","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.805.pdf","parikh-etal-2026-rubric",[18763,18766,18768,18771],{"paper_id":18754,"author_seq":247,"given_name":18764,"surname":18765,"affiliation":63,"orcid":63},"Aditya Kamlesh","Parikh",{"paper_id":18754,"author_seq":232,"given_name":13880,"surname":18767,"affiliation":63,"orcid":63},"Tejedor-García",{"paper_id":18754,"author_seq":218,"given_name":18769,"surname":18770,"affiliation":63,"orcid":63},"Catia","Cucchiarini",{"paper_id":18754,"author_seq":203,"given_name":18772,"surname":18773,"affiliation":63,"orcid":63},"Helmer","Strik","Reliable and interpretable automated assessment of second-language (L2) speech remains a central challenge, as large speech-language models (SpeechLLMs) often struggle to align with the nuanced variability of human raters. To address this, we introduce a rubric-guided reasoning framework that explicitly encodes multi-aspect human assessment criteria: accuracy, fluency, and prosody, while calibrating model uncertainty to capture natural rating variability. We fine-tune the Qwen2-Audio-7B-Instruct model using multi-rater human judgments and develop an uncertainty-calibrated regression approach supported by conformal calibration for interpretable confidence intervals. Our Gaussian uncertainty modeling and conformal calibration approach achieves the strongest alignment with human ratings, outperforming regression and classification baselines. The model reliably assesses fluency and prosody while highlighting the inherent difficulty of assessing accuracy. Together, these results demonstrate that rubric-guided, uncertainty-calibrated reasoning offers a principled path toward trustworthy and explainable SpeechLLM-based speech assessment.",{"paper_id":18776,"title":18777,"year":7,"month":188,"day":63,"doi":18778,"resource_url":18779,"first_page":18780,"last_page":18781,"pdf_url":18782,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18783,"paper_type":860,"authors":18784,"abstract":18799},"lrec2026-main-806","Leveraging Semi-Supervised Learning for Multimodal Hate Speech Data Annotation and Detection","10.63317\u002F4un2wjkpdn2m","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-806","10266","10275","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.806.pdf","rammohan-etal-2026-leveraging",[18785,18788,18790,18792,18794,18796],{"paper_id":18776,"author_seq":247,"given_name":18786,"surname":18787,"affiliation":63,"orcid":63},"Rathi Adarshi","Rammohan",{"paper_id":18776,"author_seq":232,"given_name":18789,"surname":11555,"affiliation":63,"orcid":63},"Zhao",{"paper_id":18776,"author_seq":218,"given_name":18345,"surname":18791,"affiliation":63,"orcid":63},"Puchała",{"paper_id":18776,"author_seq":203,"given_name":1313,"surname":18793,"affiliation":63,"orcid":63},"Świderska",{"paper_id":18776,"author_seq":188,"given_name":5040,"surname":18795,"affiliation":63,"orcid":63},"Küster",{"paper_id":18776,"author_seq":172,"given_name":18797,"surname":18798,"affiliation":63,"orcid":63},"Tanja","Schultz","While the Internet and social media have fundamentally transformed our lives, they can also rapidly spread hate speech, i.e., derogatory statements targeting individuals or groups based on their immutable characteristics. Automatic detection systems could help limit this harmful phenomenon. However, the lack of large-scale annotated datasets remains a major bottleneck for developing better algorithms. In this work, we employ semi-supervised learning (SSL) to leverage the advantages of limited labeled data alongside large amounts of unlabeled data. We apply three SSL approaches, Fix-match, Full-match, and All-match learning, to enhance the performance of end-to-end pre-trained speech and text models for hate speech detection. Our findings indicate that SSL methods enhance the performance, achieving F1 scores of 0.851 on speech, 0.957 on text, and 0.959 with multimodal fusion. Furthermore, we analyze the impact of different weak augmentation strategies on labeled data and assess the quality of generated pseudo-labels to evaluate their potential use in data annotation.",{"paper_id":18801,"title":18802,"year":7,"month":188,"day":63,"doi":18803,"resource_url":18804,"first_page":18805,"last_page":18806,"pdf_url":18807,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18808,"paper_type":860,"authors":18809,"abstract":18813},"lrec2026-main-807","Lexicalized Constituency Parsing for Middle Dutch: Low-resource Training and Cross-Domain Generalization","10.63317\u002F3bg3jhcbj8in","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-807","10276","10290","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.807.pdf","liang-etal-2026-lexicalized",[18810,18812],{"paper_id":18801,"author_seq":247,"given_name":18811,"surname":9376,"affiliation":63,"orcid":63},"Yiming",{"paper_id":18801,"author_seq":232,"given_name":17770,"surname":18789,"affiliation":63,"orcid":63},"Recent years have seen growing interest in applying neural networks and contextualized word embeddings to the parsing of historical languages. However, most advances have focused on dependency parsing, while constituency parsing for low-resource historical languages like Middle Dutch has received little attention. In this paper, we adapt a transformer-based constituency parser to Middle Dutch, a highly heterogeneous and low-resource language, and investigate methods to improve both its in-domain and cross-domain performance. We show that joint training with higher-resource auxiliary languages increases F1 scores by up to 0.73, with the greatest gains achieved from languages that are geographically and temporally closer to Middle Dutch. We further evaluate strategies for leveraging newly annotated data from additional domains, finding that fine-tuning and data combination yield comparable improvements, and our neural parser consistently outperforms the currently used PCFG-based parser for Middle Dutch. We further explore feature-separation techniques for domain adaptation and demonstrate that a minimum threshold of approximately 200 examples per domain is needed to effectively enhance cross-domain performance.",{"paper_id":18815,"title":18816,"year":7,"month":188,"day":63,"doi":18817,"resource_url":18818,"first_page":18819,"last_page":18820,"pdf_url":18821,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18822,"paper_type":860,"authors":18823,"abstract":18830},"lrec2026-main-808","Quantifying the Accuracy and Cost Impact of Design Decisions in Budget-Constrained Agentic LLM Search","10.63317\u002F3wfsiry9yjog","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-808","10291","10300","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.808.pdf","mccleary-etal-2026-quantifying",[18824,18827],{"paper_id":18815,"author_seq":247,"given_name":18825,"surname":18826,"affiliation":63,"orcid":63},"Kyle A.","McCleary",{"paper_id":18815,"author_seq":232,"given_name":18828,"surname":18829,"affiliation":63,"orcid":63},"James M.","Ghawaly","Agentic Retrieval-Augmented Generation (RAG) systems combine iterative search, planning prompts, and retrieval backends, but deployed settings impose explicit budgets on tool calls and completion tokens. We present a controlled measurement study of how search depth, retrieval strategy, and completion budget affect accuracy and cost under fixed constraints. Using Budget-Constrained Agentic Search (BCAS), a model-agnostic evaluation harness that surfaces remaining budget and gates tool use, we run comparisons across six LLMs and three question-answering benchmarks. Across models and datasets, accuracy improves with additional searches up to a small cap, hybrid lexical and dense retrieval with lightweight re-ranking produces the largest average gains in our ablation grid, and larger completion budgets are most helpful on HotpotQA-style synthesis. These results provide practical guidance for configuring budgeted agentic retrieval pipelines and are accompanied by reproducible prompts and evaluation settings.",{"paper_id":18832,"title":18833,"year":7,"month":188,"day":63,"doi":18834,"resource_url":18835,"first_page":18836,"last_page":18837,"pdf_url":18838,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18839,"paper_type":860,"authors":18840,"abstract":18851},"lrec2026-main-809","Reason-to-Learn (R2L): Multi-Agent Knowledge Distillation for Lightweight LLMs in Sentiment Analysis","10.63317\u002F3aygmawej3my","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-809","10301","10312","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.809.pdf","tu-etal-2026-reason",[18841,18843,18845,18847,18849],{"paper_id":18832,"author_seq":247,"given_name":18842,"surname":7773,"affiliation":63,"orcid":63},"Le-Huy",{"paper_id":18832,"author_seq":232,"given_name":18844,"surname":2395,"affiliation":63,"orcid":63},"Quan",{"paper_id":18832,"author_seq":218,"given_name":2232,"surname":18846,"affiliation":63,"orcid":63},"NGUYEN",{"paper_id":18832,"author_seq":203,"given_name":8211,"surname":18848,"affiliation":63,"orcid":63},"Bjorklund",{"paper_id":18832,"author_seq":188,"given_name":18850,"surname":5245,"affiliation":63,"orcid":63},"Xuan-Son","Large Language Models (LLMs) boast remarkable capabilities but face deployment challenges due to computational demands. We introduce Reason-to-Learn (R2L), a novel multi-agent collaborative knowledge distillation framework enabling small LLMs to learn from a distributed system of specialized agent models. Our architecture employs multiple autonomous teacher agents, each with distinct expertise and reasoning capabilities, coordinated by a meta-agent that orchestrates knowledge synthesis and conflict resolution. Unlike prior methods, our flexible four-phase process (Detection, Processing, Rationale Generation, Aggregation) leverages agent-based communication protocols and consensus mechanisms for cross-architecture knowledge transfer, demonstrated primarily on Vietnamese sentiment analysis. Experimental results are definitive: our lightweight R2L-Students (1-1.5B) consistently outperform the individual specialized agents (Qwen32B, Llama70B) and the GPT-4o meta-agent coordinator, especially on complex ABSA tasks. Ablation studies confirm our multi-agent collaborative approach outperformed traditional fine-tuning and single-agent distillation. Furthermore, R2L enhance generalizability of lightweight LLMs: our Vietnamese-trained student achieves strong zero-shot cross-lingual performance on Swedish ABSA (Svensk ABSAbank-Imm), with Krippendorff’s Alpha scores competitive with the specialized agents. R2L offers an efficient path to compact, high-performing specialist models through coordinated multi-agent learning.",{"paper_id":18853,"title":18854,"year":7,"month":188,"day":63,"doi":18855,"resource_url":18856,"first_page":18857,"last_page":18858,"pdf_url":18859,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18860,"paper_type":860,"authors":18861,"abstract":18873},"lrec2026-main-810","PRiSM: Partial Ranking via Inter-layer Semantic Measurement for Efficient Fine-tuning of Language Models","10.63317\u002F3eyz8rr5qun6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-810","10313","10323","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.810.pdf","biswas-etal-2026-prism",[18862,18864,18867,18869,18871],{"paper_id":18853,"author_seq":247,"given_name":18863,"surname":12633,"affiliation":63,"orcid":63},"Aldrin Kabya",{"paper_id":18853,"author_seq":232,"given_name":18865,"surname":18866,"affiliation":63,"orcid":63},"MD","Fahim",{"paper_id":18853,"author_seq":218,"given_name":18868,"surname":13467,"affiliation":63,"orcid":63},"Md. Ashraful",{"paper_id":18853,"author_seq":203,"given_name":18870,"surname":2207,"affiliation":63,"orcid":63},"Amin Ahsan",{"paper_id":18853,"author_seq":188,"given_name":18872,"surname":7714,"affiliation":63,"orcid":63},"Akm Mahbubur","The growing scale of pre-trained language models poses a challenge in fine-tuning for downstream tasks, especially in resource-constrained settings. Recent studies highlight that not all layers in transformer-based language models contribute equally to downstream task performance, giving rise to various partial fine-tuning strategies. However, current methods often introduce significant training overhead or rely on simple heuristics that yield suboptimal performance and poor generalization. We propose PRiSM (Partial Ranking via inter-layer Semantic Measurement), a training-free approach for layer-wise partial fine-tuning that leverages the cosine similarity between pre-trained aggregate token representations across layers to identify inter-layer relationships. PRiSM comprises two stages: (i) scoring layers based on their relevance to the task via a single forward pass, and (ii) fine-tuning a subset of block-wise highest-scoring layers, while keeping others frozen. We conduct experiments on 15 diverse NLP datasets, including single-sentence and sentence-pair classification tasks. Our method achieves competitive performance compared to full fine-tuning, with an average training speedup of 1.5× and a reduction of trainable parameters by 75%, and outperforms all the comparative baselines. Additionally, our approach does not cause any notable drop in performance when the domain is changed for the evaluation tasks, demonstrating robust cross-domain generalizability.",{"paper_id":18875,"title":18876,"year":7,"month":188,"day":63,"doi":18877,"resource_url":18878,"first_page":18879,"last_page":18880,"pdf_url":18881,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18882,"paper_type":860,"authors":18883,"abstract":18898},"lrec2026-main-811","SEFL: A Framework for Generating Synthetic Educational Assignment Feedback with LLM Agents","10.63317\u002F3gqx9z5n3zsu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-811","10324","10340","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.811.pdf","zhang-etal-2026-sefl",[18884,18885,18888,18891,18894,18897],{"paper_id":18875,"author_seq":247,"given_name":1518,"surname":1519,"affiliation":63,"orcid":63},{"paper_id":18875,"author_seq":232,"given_name":18886,"surname":18887,"affiliation":63,"orcid":63},"Amalie Pernille","Dilling",{"paper_id":18875,"author_seq":218,"given_name":18889,"surname":18890,"affiliation":63,"orcid":63},"Léon","Gondelman",{"paper_id":18875,"author_seq":203,"given_name":18892,"surname":18893,"affiliation":63,"orcid":63},"Niels Erik Ruan","Lyngdorf",{"paper_id":18875,"author_seq":188,"given_name":18895,"surname":18896,"affiliation":63,"orcid":63},"Euan D.","Lindsay",{"paper_id":18875,"author_seq":172,"given_name":1521,"surname":1522,"affiliation":63,"orcid":63},"Providing high-quality feedback on student assignments is crucial for student success, but it is heavily limited by time and budgetary constraints. In this work, we introduce Synthetic Educational Feedback Loops (SEFL), a synthetic data framework designed to generate data that resembles immediate, on-demand feedback at scale without relying on extensive, real-world student assignments and teacher feedback. To obtain this type of data, two large language models (LLMs) operate in a teacher-student role to simulate assignment completion and formative feedback, generating 19.8K synthetic pairs of student work and corresponding critiques and actionable improvements from a teacher. With this data, we fine-tune smaller, more computationally efficient LLMs on these synthetic pairs, enabling them to replicate key features of high-quality, goal-oriented feedback. Through comprehensive evaluations with three LLM judges and three human experts, across a subset of 900 outputs, we demonstrate that SEFL-tuned models outperform both their untuned counterparts and an existing baseline in terms of feedback quality. The potential for societal impact is reinforced by extensive qualitative comments and ratings from human stakeholders — both students and higher education instructors. SEFL has the potential to transform feedback processes for higher education and beyond.",{"paper_id":18900,"title":18901,"year":7,"month":188,"day":63,"doi":18902,"resource_url":18903,"first_page":18904,"last_page":18905,"pdf_url":18906,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18907,"paper_type":860,"authors":18908,"abstract":18916},"lrec2026-main-812","LGSE: Lexically Grounded Subword Embedding Initialization for Low-Resource Language Adaptation","10.63317\u002F5i7acx7v348w","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-812","10341","10352","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.812.pdf","teklehaymanot-etal-2026-lgse",[18909,18912,18915],{"paper_id":18900,"author_seq":247,"given_name":18910,"surname":18911,"affiliation":63,"orcid":63},"Hailay Kidu","Teklehaymanot",{"paper_id":18900,"author_seq":232,"given_name":18913,"surname":18914,"affiliation":63,"orcid":63},"Dren","Fazlija",{"paper_id":18900,"author_seq":218,"given_name":4552,"surname":10036,"affiliation":63,"orcid":63},"Adapting pretrained language models to low-resource, morphologically rich languages remains a significant challenge. Existing vocabulary expansion methods typically rely on arbitrarily segmented subword units, resulting in fragmented lexical representations and loss of critical morphological information. To address this limitation, we propose the Lexically Grounded Subword Embedding Initialization (LGSE) framework, which introduces morphologically informed segmentation for initializing embeddings of novel tokens. Instead of using random vectors or arbitrary subwords, LGSE decomposes words into their constituent morphemes and constructs semantically coherent embeddings by averaging pretrained subword or FastText-based morpheme representations. When a token cannot be segmented into meaningful morphemes, its embedding is constructed using character n-gram representations to capture structural information. During Language-Adaptive Pretraining, we apply a regularization term that penalizes large deviations of newly introduced embeddings from their initialized values, preserving alignment with the original pretrained embedding space while enabling adaptation to the target language. To isolate the effect of initialization, we retain the original pre-trained model vocabulary and tokenizer and update only the new embeddings during adaptation. We evaluate LGSE on three NLP tasks: Question Answering, Named Entity Recognition, and Text Classification, in two morphologically rich, low-resource languages: Amharic and Tigrinya, where morphological segmentation resources are available. Experimental results show that LGSE consistently outperforms baseline methods across all tasks, demonstrating the effectiveness of morphologically grounded embedding initialization for improving representation quality in underrepresented languages. Project resources are available¹.",{"paper_id":18918,"title":18919,"year":7,"month":188,"day":63,"doi":18920,"resource_url":18921,"first_page":18922,"last_page":18923,"pdf_url":18924,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18925,"paper_type":860,"authors":18926,"abstract":18930},"lrec2026-main-813","A Cheap Lunch: Synthetic Annotation With Reduced Human Effort for Medical Text Mining","10.63317\u002F43rc447ycaeu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-813","10353","10364","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.813.pdf","chen-etal-2026-cheap",[18927,18929],{"paper_id":18918,"author_seq":247,"given_name":18928,"surname":1840,"affiliation":63,"orcid":63},"Shutao",{"paper_id":18918,"author_seq":232,"given_name":7672,"surname":7673,"affiliation":63,"orcid":63},"Electronic Health Records are rich resources of patient knowledge and information among which knowledge about the functioning of patients as defined in the International Classification of Functioning (ICF) by the WHO. However, the patient notes have yet to be explored as the knowledge is packaged in sometimes cryptic language exchanged between caretakers. Recent research started to use NLP techniques to extract this knowledge but often requires laborious annotation. In this paper, we report on how the annotation can (partly) be done by a generative LLM, both for ICF categories that were previously manually annotated and for new ICF categories for which there was no annotation. We show that a domain specific encoder finetuned with both manual and synthetic annotations outperforms finetuning with just the manual annotations on a dedicated test set that was adapted for the new categories with minimal manual effort. We also assessed the quality of the synthetic annotations of the training data. Our process shows how competitive text classifiers for medical text mining can be developed and extended to new categories with minimal manual effort by experts.",{"paper_id":18932,"title":18933,"year":7,"month":188,"day":63,"doi":18934,"resource_url":18935,"first_page":18936,"last_page":18937,"pdf_url":18938,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18939,"paper_type":860,"authors":18940,"abstract":18948},"lrec2026-main-814","Supervised Contrastive Fine-Tuning for Active Few-Shot Learning","10.63317\u002F5p4u2sjsmrcm","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-814","10365","10375","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.814.pdf","zhang-etal-2026-supervised",[18941,18943,18946],{"paper_id":18932,"author_seq":247,"given_name":18942,"surname":1519,"affiliation":63,"orcid":63},"Zirui",{"paper_id":18932,"author_seq":232,"given_name":18944,"surname":18945,"affiliation":63,"orcid":63},"Lei","Ge",{"paper_id":18932,"author_seq":218,"given_name":18947,"surname":14086,"affiliation":63,"orcid":63},"Shengyu","Active Few-Shot Learning (AFSL) is an effective paradigm for improving the performance of large language models under limited annotation budgets. To address the inefficiency of conventional fine-tuning objectives in AFSL, this paper proposes a supervised contrastive fine-tuning framework specifically designed for natural language processing (NLP) text classification tasks. By integrating Supervised Contrastive Learning (SCL) with Hard Negative Mining (HNM), the proposed framework optimizes the embedding space through an enhanced hybrid loss function, thereby improving the utilization efficiency of labeled samples. Extensive experiments on five benchmark datasets show that, under a fixed state-of-the-art (SOTA) query strategy, our method consistently outperforms baseline models in text classification performance, and exhibits strong generalizability across different backbone architectures and acquisition functions. These findings demonstrate that optimizing how to learn—through improved learning objectives—provides a complementary direction to existing query strategies in advancing AFSL.",{"paper_id":18950,"title":18951,"year":7,"month":188,"day":63,"doi":18952,"resource_url":18953,"first_page":18954,"last_page":18955,"pdf_url":18956,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18957,"paper_type":860,"authors":18958,"abstract":18967},"lrec2026-main-815","Simulating Student Interactions for Virtual Pretesting with In-Context Learning","10.63317\u002F5m57qocfb6ph","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-815","10376","10389","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.815.pdf","thuy-etal-2026-simulating",[18959,18961,18963,18965],{"paper_id":18950,"author_seq":247,"given_name":1995,"surname":18960,"affiliation":63,"orcid":63},"Thuy",{"paper_id":18950,"author_seq":232,"given_name":1107,"surname":18962,"affiliation":63,"orcid":63},"Benedetto",{"paper_id":18950,"author_seq":218,"given_name":6029,"surname":18964,"affiliation":63,"orcid":63},"Loginova",{"paper_id":18950,"author_seq":203,"given_name":18966,"surname":5599,"affiliation":63,"orcid":63},"Dries F.","Recent research has experimented with using Large Language Models (LLMs) for simulating student responses to exam questions. This approach, known as virtual pretesting, potentially offers a scalable alternative to traditional pretesting, which is costly and time-intensive, by enabling the creation of datasets of virtual students’ responses. Prior studies focused on zero-shot role-playing, prompting one LLM to imitate students of different levels, but showed limited alignment with response patterns of real students. This work introduces a framework that improves the alignment of LLM-based student simulations through in-context learning (ICL), leveraging previous question-answer records to provide the model with richer information about students’ skills and misconceptions. Our experiments show that not all models can leverage the additional contextual information. However, a multi-model approach, which combines simulations from several models, significantly improves alignment of the simulated responses when provided with relevant context: we observe a reduction of up to 30% in difficulty estimation RMSE with respect to the non contextual and individual contextual models. Overall, our findings indicate that LLMs can be used with ICL to create synthetic datasets of student responses approximating some patterns of learner behavior, however their ability to align with authentic student performance remains limited.",{"paper_id":18969,"title":18970,"year":7,"month":188,"day":63,"doi":18971,"resource_url":18972,"first_page":18973,"last_page":18974,"pdf_url":18975,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":18976,"paper_type":860,"authors":18977,"abstract":18984},"lrec2026-main-816","An Exploration-Analysis-Disambiguation Reasoning Framework for Word Sense Disambiguation with Low-Parameter LLMs","10.63317\u002F3oun2fvikwt5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-816","10390","10404","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.816.pdf","sumanathilaka-etal-2026-exploration",[18978,18981,18982],{"paper_id":18969,"author_seq":247,"given_name":18979,"surname":18980,"affiliation":63,"orcid":63},"Deshan Koshala","Sumanathilaka",{"paper_id":18969,"author_seq":232,"given_name":7301,"surname":15171,"affiliation":63,"orcid":63},{"paper_id":18969,"author_seq":218,"given_name":1296,"surname":18983,"affiliation":63,"orcid":63},"Hough","Word Sense Disambiguation (WSD) remains a key challenge in Natural Language Processing (NLP), especially when dealing with rare or domain-specific senses that are often misinterpreted. While modern high-parameter Large Language Models (LLMs) such as GPT-4-Turbo have shown state-of-the-art WSD performance, their computational and energy demands limit scalability. This study investigates whether low-parameter LLMs (\u003C4B parameters) can achieve comparable results through fine-tuning strategies that emphasize reasoning-driven sense identification. Using the FEWS dataset augmented with semi-automated, rationale-rich annotations, we fine-tune eight small-scale open-source LLMs (e.g. Gemma and Qwen). Our results reveal that Chain-of-Thought (CoT)-based reasoning combined with neighbour-word analysis achieves performance comparable to GPT-4-Turbo in zero-shot settings. Importantly, Gemma-3-4B and Qwen-3-4B models consistently outperform all medium-parameter baselines and state-of-the-art models on FEWS, with robust generalization to unseen senses. Furthermore, evaluation on the unseen \"Fool Me If You Can” dataset confirms strong cross-domain adaptability without task-specific fine-tuning. This work demonstrates that with carefully crafted reasoning-centric fine-tuning, low-parameter LLMs can deliver accurate WSD while substantially reducing computational and energy demands.",{"paper_id":18986,"title":18987,"year":7,"month":188,"day":63,"doi":18988,"resource_url":18989,"first_page":18990,"last_page":18991,"pdf_url":18992,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":18993,"bibkey":18994,"paper_type":860,"authors":18995,"abstract":19027},"lrec2026-main-817","Building Effective Japanese Medical LLMs with an Open Recipe for Domain Adaptation through Continued Pre-training","10.63317\u002F47uvbxqph5ph","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-817","10405","10423","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.817.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.817_OptionalSupplementaryMaterial.zip","aizawa-etal-2026-building",[18996,18998,19000,19001,19003,19005,19007,19010,19011,19013,19014,19015,19017,19020,19022,19024],{"paper_id":18986,"author_seq":247,"given_name":2043,"surname":18997,"affiliation":63,"orcid":63},"Aizawa",{"paper_id":18986,"author_seq":232,"given_name":1463,"surname":18999,"affiliation":63,"orcid":63},"Arase",{"paper_id":18986,"author_seq":218,"given_name":9846,"surname":4721,"affiliation":63,"orcid":63},{"paper_id":18986,"author_seq":203,"given_name":19002,"surname":1837,"affiliation":63,"orcid":63},"Jiahao",{"paper_id":18986,"author_seq":188,"given_name":19004,"surname":1837,"affiliation":63,"orcid":63},"Zhiyi",{"paper_id":18986,"author_seq":172,"given_name":19006,"surname":3284,"affiliation":63,"orcid":63},"Junfeng",{"paper_id":18986,"author_seq":155,"given_name":19008,"surname":19009,"affiliation":63,"orcid":63},"Teruhito","Kanazawa",{"paper_id":18986,"author_seq":138,"given_name":2790,"surname":3527,"affiliation":63,"orcid":63},{"paper_id":18986,"author_seq":121,"given_name":19012,"surname":4613,"affiliation":63,"orcid":63},"Kazuma",{"paper_id":18986,"author_seq":104,"given_name":2796,"surname":3504,"affiliation":63,"orcid":63},{"paper_id":18986,"author_seq":87,"given_name":5009,"surname":5010,"affiliation":63,"orcid":63},{"paper_id":18986,"author_seq":73,"given_name":4606,"surname":19016,"affiliation":63,"orcid":63},"Oda",{"paper_id":18986,"author_seq":55,"given_name":19018,"surname":19019,"affiliation":63,"orcid":63},"Yuma","Tsuta",{"paper_id":18986,"author_seq":38,"given_name":7887,"surname":19021,"affiliation":63,"orcid":63},"Wan",{"paper_id":18986,"author_seq":17,"given_name":19023,"surname":6675,"affiliation":63,"orcid":63},"Zhishen",{"paper_id":18986,"author_seq":2971,"given_name":19025,"surname":19026,"affiliation":63,"orcid":63},"Rio","Yokota","In high-stakes domains such as medicine, ensuring transparency of the training corpus is essential, with careful consideration of local healthcare landscapes; however, the majority of existing medical large language models (LLMs) have not disclosed the details of their training corpora. Here, we introduce an open recipe for domain adaptation of LLMs to the Japanese medical domain. We employed fully open-source Japanese general-domain LLMs as base models, whose pre-training datasets are also disclosed. To establish effective corpora for domain adaptation through continued pre-training, we started with small-scale medical datasets and ultimately constructed a medical corpus consisting of 79.6B tokens, incorporating local clinical guidelines, medical textbooks, and other domain-specific resources. The resulting LLM from continued pre-training, namely SIP-med-llm-8x13B, with an active parameter count of 22B, demonstrated favorable accuracy on benchmarks including the Japanese National Medical Examination. This performance was comparable to that of 70B-parameter open-weight models whose construction details remain non-transparent. This represents the first case in the Japanese medical field where complete corpus details have been disclosed for fully from-scratch development, providing important insights for future efforts to construct medical LLMs tailored to the specific characteristics of local contexts. The model is available publicly at this Hugging Face repository: https:\u002F\u002Fhuggingface.co\u002FSIP-med-LLM\u002FSIP-jmed-llm-2-8x13b-OP-instruct.",{"paper_id":19029,"title":19030,"year":7,"month":188,"day":63,"doi":19031,"resource_url":19032,"first_page":19033,"last_page":19034,"pdf_url":19035,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19036,"paper_type":860,"authors":19037,"abstract":19049},"lrec2026-main-818","New Encoders for German Trained from Scratch: Comparing ModernGBERT with Converted LLM2Vec Models","10.63317\u002F4s26zh4i323y","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-818","10424","10446","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.818.pdf","wunderle-etal-2026-new",[19038,19040,19042,19044,19047],{"paper_id":19029,"author_seq":247,"given_name":5233,"surname":19039,"affiliation":63,"orcid":63},"Wunderle",{"paper_id":19029,"author_seq":232,"given_name":6272,"surname":19041,"affiliation":63,"orcid":63},"Ehrmanntraut",{"paper_id":19029,"author_seq":218,"given_name":1380,"surname":19043,"affiliation":63,"orcid":63},"Pfister",{"paper_id":19029,"author_seq":203,"given_name":19045,"surname":19046,"affiliation":63,"orcid":63},"Fotis","Jannidis",{"paper_id":19029,"author_seq":188,"given_name":4651,"surname":19048,"affiliation":63,"orcid":63},"Hotho","Encoders remain essential for efficient German NLP and NLU scenarios despite the rise of decoder-only LLMs. This work studies two routes to high-quality German encoders under identical data and training constraints: a) training from scratch and b) converting decoders via LLMVec. We introduce two resources: ModernGBERT (134M, 1B), fully transparent German encoders in the ModernBERT style, and LLäMmleinVec (120M, 1B, 7B), decoder-to-encoder conversions trained with masked next-token prediction, both undergoing a context extension to 8192 tokens. Across SuperGLEBer, ModernGBERT 1B sets a new state of the art (avg 0.808), surpassing GBERTlarge (+4%) and the seven-times larger converted 7B model (0.787). On German MTEB after supervised fine-tuning, ModernGBERT 1B (0.551) approaches the converted 7B model (0.557). We release all models, checkpoints, datasets, and full training records, and introduce an encoder-adapted QA-NIAH evaluation. All in all, our results provide actionable guidance: when parameter efficiency and latency matter, from-scratch encoders dominate. When a pre-trained decoder exists and compute is a limited, conversion offers an effective alternative.",{"paper_id":19051,"title":19052,"year":7,"month":188,"day":63,"doi":19053,"resource_url":19054,"first_page":19055,"last_page":19056,"pdf_url":19057,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19058,"paper_type":860,"authors":19059,"abstract":19066},"lrec2026-main-819","Arabic ChartSumm: An English-to-Arabic Benchmark for Metadata-to-Text Summarization","10.63317\u002F5pxwxs9vtum4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-819","10447","10456","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.819.pdf","elchafei-etal-2026-arabic",[19060,19063],{"paper_id":19051,"author_seq":247,"given_name":19061,"surname":19062,"affiliation":63,"orcid":63},"Passant","Elchafei",{"paper_id":19051,"author_seq":232,"given_name":19064,"surname":19065,"affiliation":63,"orcid":63},"Amany","Fashwan","Generating summaries from chart metadata in Arabic presents unique challenges at the intersection of cross-lingual transfer and data-to-text generation. Chart-to-text benchmarks have advanced English-language research, yet Arabic remains without a comparable resource, underscoring its continued underrepresentation in NLP. To cover this gap, we construct the first Arabic ChartSumm benchmark by translating chart metadata and reference summaries from English into Modern Standard Arabic (MSA). Two high-quality machine translation models with contrasting architectures are employed: NLLB-200-distilled-600M, designed for low-resource coverage, and Qwen2.5-1.5B, an open large language model with general multilingual capabilities. A central contribution of this work is a translation quality evaluation that systematically assesses both systems using BLEU, chrF, COMET_ref, and COMET_QE metrics against a Google-Translate Arabic pivot. Results demonstrate that NLLB achieves markedly higher lexical and semantic fidelity. Building on this foundation, we fine-tune two models, mT5 (multilingual) and CAMeL-Lab’s AraBART (Arabic-specific), to generate Arabic summaries from structured chart metadata. Experimental results show that AraBART trained on NLLB translations outperforms other configurations, achieving ROUGE-L = 63.8 and BLEU = 33.1, highlighting the strong dependency of downstream summarization quality on translation accuracy and demonstrating its superior capacity for Arabic generation.",{"paper_id":19068,"title":19069,"year":7,"month":188,"day":63,"doi":19070,"resource_url":19071,"first_page":19072,"last_page":19073,"pdf_url":19074,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19075,"paper_type":860,"authors":19076,"abstract":19091},"lrec2026-main-820","Introducing a Bangla Sentence – Gloss Pair Dataset for Bangla Sign Language Translation and Research","10.63317\u002F38qenrwzegr9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-820","10457","10466","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.820.pdf","saha-etal-2026-introducing",[19077,19079,19082,19085,19088],{"paper_id":19068,"author_seq":247,"given_name":19078,"surname":1880,"affiliation":63,"orcid":63},"Neelavro",{"paper_id":19068,"author_seq":232,"given_name":19080,"surname":19081,"affiliation":63,"orcid":63},"Rafi","Shahriyar",{"paper_id":19068,"author_seq":218,"given_name":19083,"surname":19084,"affiliation":63,"orcid":63},"Nafis Ashraf","Roudra",{"paper_id":19068,"author_seq":203,"given_name":19086,"surname":19087,"affiliation":63,"orcid":63},"Saadman","Sakib",{"paper_id":19068,"author_seq":188,"given_name":19089,"surname":19090,"affiliation":63,"orcid":63},"Annajiat Alim","Rasel","Bangla Sign Language (BdSL) translation represents a low-resource NLP task due to the lack of large-scale datasets that address sentence-level translation. Correspondingly, existing research in this field has been limited to word and alphabet level detection. In this work, we introduce Bangla-SGP, a novel parallel dataset consisting of 1,000 human-annotated sentence–gloss pairs which was augmented with around 3,000 synthetically generated pairs using syntactic and morphological rules through a rule-based Retrieval-Augmented Generation (RAG) pipeline. The gloss sequences of the spoken Bangla sentences are made up of individual glosses which are Bangla sign supported words and serve as an intermediate representation for a continuous sign. Our dataset consists of 1000 high quality Bangla sentences that are manually annotated into a gloss sequence by a professional signer. The augmentation process incorporates rule-based linguistic strategies and prompt engineering techniques that we have adopted by critically analyzing our human annotated sentence-gloss pairs and by working closely with our professional signer. Furthermore, we fine-tune several transformer-based models such as mBart50, Google mT5, GPT4.1-nano and evaluate their sentence-to-gloss translation performance using BLEU scores, based on these evaluation metrics we compare the model’s gloss-translation consistency across our dataset and the RWTH-PHOENIX-2014T benchmark.",{"paper_id":19093,"title":19094,"year":7,"month":188,"day":63,"doi":19095,"resource_url":19096,"first_page":19097,"last_page":19098,"pdf_url":19099,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19100,"paper_type":860,"authors":19101,"abstract":19110},"lrec2026-main-821","Language Models as Semantic Augmenters for Sequential Recommenders","10.63317\u002F2fap9guysbm2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-821","10467","10484","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.821.pdf","valizadeh-etal-2026-language",[19102,19105,19106,19108],{"paper_id":19093,"author_seq":247,"given_name":19103,"surname":19104,"affiliation":63,"orcid":63},"Mahsa","Valizadeh",{"paper_id":19093,"author_seq":232,"given_name":5517,"surname":5518,"affiliation":63,"orcid":63},{"paper_id":19093,"author_seq":218,"given_name":17707,"surname":19107,"affiliation":63,"orcid":63},"Tuo",{"paper_id":19093,"author_seq":203,"given_name":4091,"surname":19109,"affiliation":63,"orcid":63},"Caverlee","Large Language Models (LLMs) excel at capturing latent semantics and contextual relationships across diverse modalities. However, in modeling user behavior from sequential interaction data, performance often suffers when such semantic context is limited or absent. We introduce LaMAR, a LLM-driven semantic enrichment framework designed to enrich such sequences automatically. LaMAR leverages LLMs in a few-shot setting to generate auxiliary contextual signals by inferring latent semantic aspects of a user’s intent and item relationships from existing metadata. These generated signals, such as inferred usage scenarios, item intents, or thematic summaries, augment the original sequences with greater contextual depth. We demonstrate the utility of this generated resource by integrating it into benchmark sequential modeling tasks, where it consistently improves performance. Further analysis shows that LLM-generated signals exhibit high semantic novelty and diversity, enhancing the representational capacity of the downstream models. This work represents a new data-centric paradigm where LLMs serve as intelligent context generators, contributing a new method for the semi-automatic creation of training data and language resources.",{"paper_id":19112,"title":19113,"year":7,"month":188,"day":63,"doi":19114,"resource_url":19115,"first_page":19116,"last_page":19117,"pdf_url":19118,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19119,"paper_type":860,"authors":19120,"abstract":19135},"lrec2026-main-822","Efficient Adaptation of English Language Models for Morphologically Rich and Underrepresented Languages: The Case of Arabic","10.63317\u002F3xdz933rn47i","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-822","10485","10496","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.822.pdf","eldamaty-etal-2026-efficient",[19121,19124,19127,19130,19132],{"paper_id":19112,"author_seq":247,"given_name":19122,"surname":19123,"affiliation":63,"orcid":63},"Ahmed Samy","Eldamaty",{"paper_id":19112,"author_seq":232,"given_name":19125,"surname":19126,"affiliation":63,"orcid":63},"Mohamed Maher Zenhom","Abdelrahman",{"paper_id":19112,"author_seq":218,"given_name":19128,"surname":19129,"affiliation":63,"orcid":63},"Mohamed Mostafa Ibrahim","Elbehery",{"paper_id":19112,"author_seq":203,"given_name":4108,"surname":19131,"affiliation":63,"orcid":63},"Ashraf",{"paper_id":19112,"author_seq":188,"given_name":19133,"surname":19134,"affiliation":63,"orcid":63},"Radwa","Elshawi","Transformer-based language models have revolutionized NLP, yet their adaptation to morphologically rich and dialectally diverse languages such as Arabic remains non-trivial. We introduce ModernAraBERT, a resource-efficient adaptation of the English-pretrained ModernBERT for Arabic, employing continued pretraining on large Arabic corpora followed by lightweight head-only fine-tuning with a frozen encoder. This strategy retains cross-lingual knowledge while capturing Arabic morphology and orthographic variation, offering a scalable alternative to training monolingual models from scratch. We evaluate ModernAraBERT on three representative Arabic NLP tasks, sentiment analysis, named entity recognition, and extractive question answering, against strong Arabic-specific and multilingual baselines (AraBERTv1, AraBERTv2, MARBERT, mBERT). Across all tasks, ModernAraBERT achieves consistent and often substantial improvements, particularly for sentence and token-level understanding, demonstrating that modern English encoder architectures can be efficiently transferred to Arabic through language-adaptive pretraining. Beyond Arabic, our findings highlight a generalizable paradigm for extending state-of-the-art models to morphologically complex and underrepresented languages with reduced computational overhead.",{"paper_id":19137,"title":19138,"year":7,"month":188,"day":63,"doi":19139,"resource_url":19140,"first_page":19141,"last_page":19142,"pdf_url":19143,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19144,"paper_type":860,"authors":19145,"abstract":19151},"lrec2026-main-823","GhostWriter: Hidden AI-Generated Texts over Multiple Languages, Domains and Generators","10.63317\u002F57fd7juh5zek","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-823","10497","10516","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.823.pdf","schaaf-etal-2026-ghostwriter",[19146,19148,19150],{"paper_id":19137,"author_seq":247,"given_name":2152,"surname":19147,"affiliation":63,"orcid":63},"Schaaf",{"paper_id":19137,"author_seq":232,"given_name":4481,"surname":19149,"affiliation":63,"orcid":63},"Bönisch",{"paper_id":19137,"author_seq":218,"given_name":869,"surname":2349,"affiliation":63,"orcid":63},"The advent of Transformer-based Large Language Models (LLMs) has led to an unprecedented surge of AI-generated text (AIGT) across online platforms and academic domains. While these models exhibit near-human fluency and stylistic coherence, their widespread adoption has raised concerns about authorship integrity, research quality, and the recursive contamination of training corpora with synthetic data. These developments underscore the need for reliable AIGT detection methods and benchmark datasets, particularly for malicious or deceptive *ghostwriting* scenarios where AIGT is intentionally crafted to evade detection. To address this, we present **GhostWriter**, a large-scale, bilingual (German and English), multi-generator, and multi-domain dataset for AIGT detection. The dataset comprises human- and AI-authored texts produced under domain-specific *ghostwriting* conditions, including examples intentionally embedded within otherwise human-written texts to obscure their AI origin. With **GhostWriter**, we (i) aim to expand the resources available for German AIGT datasets, (ii) emphasize mixed or fused synthesizations—since most existing corpora are limited to the document level—and (iii) introduce specifically crafted malicious ghostwriting scenarios across multiple domains and generators.",{"paper_id":19153,"title":19154,"year":7,"month":188,"day":63,"doi":19155,"resource_url":19156,"first_page":19157,"last_page":19158,"pdf_url":19159,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19160,"paper_type":860,"authors":19161,"abstract":19176},"lrec2026-main-824","Using LLMs to Extract Instances of Schematic Constructions from Unannotated L2 Learner Corpora","10.63317\u002F3ieeohd75wkj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-824","10517","10524","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.824.pdf","kallas-etal-2026-llms",[19162,19164,19167,19170,19173],{"paper_id":19153,"author_seq":247,"given_name":1729,"surname":19163,"affiliation":63,"orcid":63},"Kallas",{"paper_id":19153,"author_seq":232,"given_name":19165,"surname":19166,"affiliation":63,"orcid":63},"Ahto","Kiil",{"paper_id":19153,"author_seq":218,"given_name":19168,"surname":19169,"affiliation":63,"orcid":63},"Heete","Sahkai",{"paper_id":19153,"author_seq":203,"given_name":19171,"surname":19172,"affiliation":63,"orcid":63},"Geda","Paulsen",{"paper_id":19153,"author_seq":188,"given_name":19174,"surname":19175,"affiliation":63,"orcid":63},"Kertu","Saul","Our previous study found that generative LLMs can be successfully used to identify instances of schematic constructions (as defined in Construction Grammar) in unannotated L1 corpus data. This study tests the applicability of LLMs to also identify instances of constructions in unannotated L2 data. L2 learner corpora are notoriously difficult to annotate and query since they contain errors. Using LLMs can thus simplify the retrieval of construction data from L2 corpora. The identification of instances of constructions in L2 learner data has many possible uses in pedagogical applications of Construction Grammar and constructicography, like the identification of error-prone (properties of) constructions and the distribution of constructional instances across CEFR levels. Using the Estonian Nominal Quantifier Construction as the example construction and an Estonian CEFR-graded learner corpus as the source of L2 data, we tested several prompts and several models (OpenAI’s o3-mini, o3, gpt-5-mini and gpt-5, Google DeepMind’s Gemini Flash 2.5, Anthropic’s Claude Sonnet 4.5 and Opus 4.1). We found that the best model, gpt-5, achieved F1-scores from 0.90 to 0.96, depending on the level of detail of the prompt.",{"paper_id":19178,"title":19179,"year":7,"month":188,"day":63,"doi":19180,"resource_url":19181,"first_page":19182,"last_page":19183,"pdf_url":19184,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19185,"paper_type":860,"authors":19186,"abstract":19191},"lrec2026-main-825","Corruption-Based Data Augmentation for Arabic Essay Scoring: A Preliminary Study on the Organization Trait","10.63317\u002F5kt6mmyaumav","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-825","10525","10531","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.825.pdf","bashendy-etal-2026-corruption",[19187,19190],{"paper_id":19178,"author_seq":247,"given_name":19188,"surname":19189,"affiliation":63,"orcid":63},"May Saed","Bashendy",{"paper_id":19178,"author_seq":232,"given_name":1492,"surname":1493,"affiliation":63,"orcid":63},"Despite significant advances in Automated Essay Scoring (AES), progress in Arabic AES remains limited by the scarcity and imbalance of publicly available datasets. Manual curation of such data is labor-intensive and lacks scalability. To address this, we introduce COrE, a corruption-based data augmentation method that targets the organization trait of Arabic essays. COrE generates synthetic essays by intentionally disrupting the organization of well-written essays through controlled, distance-aware sentence swapping. Our experiments are conducted on TAQAE, a dataset of 620 essays across 4 distinct writing prompts. We evaluate the effectiveness of COrE using two widely-adopted pre-trained models: AraBERTv2 and CAMeLBERT-mix. Both models show improved performance with COrE, achieving gains of 9-17% over the no-augmentation baseline. These results highlight the potential of trait-specific augmentation to address data scarcity and enhance AES performance for low-resource languages.",{"paper_id":19193,"title":19194,"year":7,"month":188,"day":63,"doi":19195,"resource_url":19196,"first_page":19197,"last_page":19198,"pdf_url":19199,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19200,"paper_type":860,"authors":19201,"abstract":19208},"lrec2026-main-826","Structured Prompting for Arabic Essay Proficiency: A Trait-Centric Evaluation Approach","10.63317\u002F4st85zet2p6h","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-826","10532","10544","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.826.pdf","mandhari-etal-2026-structured",[19202,19205,19206,19207],{"paper_id":19193,"author_seq":247,"given_name":19203,"surname":19204,"affiliation":63,"orcid":63},"Salim Al","Mandhari",{"paper_id":19193,"author_seq":232,"given_name":12041,"surname":12042,"affiliation":63,"orcid":63},{"paper_id":19193,"author_seq":218,"given_name":12047,"surname":12048,"affiliation":63,"orcid":63},{"paper_id":19193,"author_seq":203,"given_name":1216,"surname":12462,"affiliation":63,"orcid":63},"This paper presents a novel prompt engineering framework for trait specific Automatic Essay Scoring (AES) in Arabic, leveraging large language models (LLMs) under zero-shot and few-shot configurations. Addressing the scarcity of scalable, linguistically informed AES tools for Arabic, we introduce a three-tier prompting strategy (standard, hybrid, and rubric-guided) that guides LLMs in evaluating distinct language proficiency traits such as organization, vocabulary, development, and style. The hybrid approach simulates multi-agent evaluation with trait specialist raters, while the rubric-guided method incorporates scored exemplars to enhance model alignment. In zero and few-shot settings, we evaluate eight LLMs on the QAES dataset, the first publicly available Arabic AES resource with trait level annotations. Experimental results using Quadratic Weighted Kappa (QWK) and Confidence Intervals show that Fanar-1-9B-Instruct achieves the highest trait level agreement in both zero and few-shot prompting (QWK = 0.28 and CI = 0.41), with rubric-guided prompting yielding consistent gains across all traits and models. Discourse-level traits such as Development and Style showed the greatest improvements. These findings confirm that structured prompting, not model scale alone, enables effective AES in Arabic. Our study presents the first comprehensive framework for proficiency oriented Arabic AES and sets the foundation for scalable assessment in low resource educational contexts.",{"paper_id":19210,"title":19211,"year":7,"month":188,"day":63,"doi":19212,"resource_url":19213,"first_page":19214,"last_page":19215,"pdf_url":19216,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19217,"paper_type":860,"authors":19218,"abstract":19224},"lrec2026-main-827","ManufactuBERT: Efficient Continual Pretraining for Manufacturing","10.63317\u002F3c7yx4ewnj5m","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-827","10545","10555","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.827.pdf","armingaud-etal-2026-manufactubert",[19219,19221],{"paper_id":19210,"author_seq":247,"given_name":11295,"surname":19220,"affiliation":63,"orcid":63},"Armingaud",{"paper_id":19210,"author_seq":232,"given_name":19222,"surname":19223,"affiliation":63,"orcid":63},"Romaric","Besancon","While large general-purpose Transformer-based encoders excel at general language understanding, their performance diminishes in specialized domains like manufacturing due to a lack of exposure to domain-specific terminology and semantics. In this paper, we address this gap by introducing ManufactuBERT, a RoBERTa model continually pretrained on a large-scale corpus curated for the manufacturing domain. We present a comprehensive data processing pipeline to create this corpus from web data, involving an initial domain-specific filtering step followed by a multi-stage deduplication process that removes redundancies. Our experiments show that ManufactuBERT establishes a new state-of-the-art on a range of manufacturing-related NLP tasks, outperforming strong specialized baselines. More importantly, we demonstrate that training on our carefully deduplicated corpus significantly accelerates convergence, leading to a 33% reduction in training time and computational cost compared to training on the non-deduplicated dataset. The proposed pipeline offers a reproducible example for developing high-performing encoders in other specialized domains. Our model, code and curated corpus will be publicly available.",{"paper_id":19226,"title":19227,"year":7,"month":188,"day":63,"doi":19228,"resource_url":19229,"first_page":19230,"last_page":19231,"pdf_url":19232,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19233,"paper_type":860,"authors":19234,"abstract":19242},"lrec2026-main-828","Śmigiel Dataset: Laying Foundations for Investigating Machine-Generated Text Detection in Polish","10.63317\u002F3p7ghe9pfm8v","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-828","10556","10568","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.828.pdf","strebeyko-etal-2026-Åmigiel",[19235,19237,19239],{"paper_id":19226,"author_seq":247,"given_name":5422,"surname":19236,"affiliation":63,"orcid":63},"Strebeyko",{"paper_id":19226,"author_seq":232,"given_name":5684,"surname":19238,"affiliation":63,"orcid":63},"Wróblewska",{"paper_id":19226,"author_seq":218,"given_name":19240,"surname":19241,"affiliation":63,"orcid":63},"Piotr","Przybyła","We present Śmigiel, the first open dataset for training and evaluating machine-generated text (MGT) in Polish. The dataset includes a collection of human-written text fragments from six domains, which are used to prompt text generation by eight language models capable of producing credible Polish text. In addition to the raw corpus of over 462K generated texts, we also release a cleaned source- and domain-balanced dataset suitable for training and evaluating MGT detectors. Finally, we conduct preliminary experiments with text classifiers, showing that task difficulty depends on the text domain, the generating language model, and the availability of similar data in training. The results indicate that MGT detection in Polish can be approached with general-purpose classifiers that generalize well to new LLMs, but struggle to adapt to genres not represented in the training data.",{"paper_id":19244,"title":19245,"year":7,"month":188,"day":63,"doi":19246,"resource_url":19247,"first_page":19248,"last_page":19249,"pdf_url":19250,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19251,"paper_type":860,"authors":19252,"abstract":19275},"lrec2026-main-829","Extracting Medical Image-Related Entities from Spanish Electronic Health Records Using NER Methods","10.63317\u002F4t6agzu5ygqr","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-829","10569","10578","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.829.pdf","platas-etal-2026-extracting",[19253,19255,19257,19259,19262,19264,19267,19270,19272],{"paper_id":19244,"author_seq":247,"given_name":869,"surname":19254,"affiliation":63,"orcid":63},"Platas",{"paper_id":19244,"author_seq":232,"given_name":6445,"surname":19256,"affiliation":63,"orcid":63},"Merino",{"paper_id":19244,"author_seq":218,"given_name":2968,"surname":19258,"affiliation":63,"orcid":63},"Zotova",{"paper_id":19244,"author_seq":203,"given_name":19260,"surname":19261,"affiliation":63,"orcid":63},"Montse","Cuadros",{"paper_id":19244,"author_seq":188,"given_name":5349,"surname":19263,"affiliation":63,"orcid":63},"López-Linares",{"paper_id":19244,"author_seq":172,"given_name":19265,"surname":19266,"affiliation":63,"orcid":63},"Mikel Pérez de","Mendiola",{"paper_id":19244,"author_seq":155,"given_name":19268,"surname":19269,"affiliation":63,"orcid":63},"María","Gálvez",{"paper_id":19244,"author_seq":138,"given_name":5732,"surname":19271,"affiliation":63,"orcid":63},"Barba",{"paper_id":19244,"author_seq":121,"given_name":19273,"surname":19274,"affiliation":63,"orcid":63},"Antón","Asla","This paper presents a novel corpus in Spanish tailored for the extraction of medical image-related entities from radiological reports using Named Entity Recognition (NER) methods. The dataset was created by aggregating and refining multiple existing corpora, focusing on entities that can be visually interpreted in associated medical images. This resource aims to bridge the gap between natural language processing and computer vision in the biomedical domain. The study evaluates various NER methods, including encoder-only, encoder-decoder, and decoder-only architectures. It explores fine-tuning, zero-shot, and few-shot In-Context Learning (ICL) strategies to determine the most effective approach for entity extraction. The resulting dataset is publicly available.",{"paper_id":19277,"title":19278,"year":7,"month":188,"day":63,"doi":19279,"resource_url":19280,"first_page":19281,"last_page":19282,"pdf_url":19283,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19284,"paper_type":860,"authors":19285,"abstract":19291},"lrec2026-main-830","A Novel Synthetic Dataset for Few-Shot Legal Relation Extraction in German","10.63317\u002F5oaz3mdtekn9","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-830","10579","10591","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.830.pdf","nouri-etal-2026-novel",[19286,19288,19289,19290],{"paper_id":19277,"author_seq":247,"given_name":19287,"surname":5367,"affiliation":63,"orcid":63},"Shiva Banasaz",{"paper_id":19277,"author_seq":232,"given_name":2968,"surname":2969,"affiliation":63,"orcid":63},{"paper_id":19277,"author_seq":218,"given_name":1296,"surname":14307,"affiliation":63,"orcid":63},{"paper_id":19277,"author_seq":203,"given_name":3009,"surname":3010,"affiliation":63,"orcid":63},"The legal domain is particularly challenging for natural language processing due to the personal and confidential information it contains. Despite the significant advances of large language models (LLMs), applying them to relation extraction (RE) in legal texts remains challenging, not only because of the task’s linguistic and semantic complexity, but also due to privacy, compliance, and infrastructure constraints under regulations such as the EU AI Act. To address these challenges, we propose a novel synthetic dataset for German legal relation extraction, created using LLMs through a controlled, privacy-preserving, template-based pipeline. The dataset allows for reproducible and legally compliant experimentation. We benchmark it using two few-shot learning paradigms, a description-enhanced Model-Agnostic Meta-Learning (MAML) framework and Prototypical Networks with supervised contrastive loss and curriculum-aware prototype enrichment. Our results demonstrate that combining few-shot learning with structured semantic knowledge achieves robust and interpretable results, with the curriculum-aware Proto-Contrastive model reaching an F1-score of 99.83%.",{"paper_id":19293,"title":19294,"year":7,"month":188,"day":63,"doi":19295,"resource_url":19296,"first_page":19297,"last_page":19298,"pdf_url":19299,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19300,"paper_type":860,"authors":19301,"abstract":19307},"lrec2026-main-831","LLM-Based Data Generation and Clinical Skills Evaluation for Low-Resource French OSCEs","10.63317\u002F3wfd8ey9mgaa","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-831","10592","10602","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.831.pdf","huang-etal-2026-llm",[19302,19303,19305],{"paper_id":19293,"author_seq":247,"given_name":11714,"surname":1837,"affiliation":63,"orcid":63},{"paper_id":19293,"author_seq":232,"given_name":17216,"surname":19304,"affiliation":63,"orcid":63},"Bourgeade",{"paper_id":19293,"author_seq":218,"given_name":6855,"surname":19306,"affiliation":63,"orcid":63},"Illina","Objective Structured Clinical Examinations (OSCEs) are the standard method for assessing medical students’ clinical and communication skills through structured patient interviews. In France, however, the organization of training sessions is limited by human and logistical constraints, restricting students’ access to repeated practice and structured feedback. Recent advances in Natural Language Processing (NLP) and Large Language Models (LLMs) now offer the opportunity to automatically evaluate such medical interviews, thereby alleviating the need for human examiners during training. Yet, real French OSCE annotated transcripts remain extremely scarce, limiting reproducible research and reliable benchmarking. To address these challenges, we investigate the use of LLMs for both generating and evaluating French OSCE dialogues in a low-resource context. We introduce a controlled pipeline that produces synthetic doctor–patient interview transcripts guided by scenario-specific evaluation criteria, combining ideal and perturbed performances to simulate varying student skill levels. The resulting dialogues are automatically silver-labeled through an LLM-assisted framework supporting adjustable evaluation strictness. Benchmarking multiple open-source and proprietary LLMs shows that mid-size models (≤32B parameters) achieve accuracies comparable to GPT-4o ( 90%) on synthetic data, highlighting the feasibility of locally deployable, privacy-preserving evaluation systems for medical education.",{"paper_id":19309,"title":19310,"year":7,"month":188,"day":63,"doi":19311,"resource_url":19312,"first_page":19313,"last_page":19314,"pdf_url":19315,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19316,"paper_type":860,"authors":19317,"abstract":19324},"lrec2026-main-832","Instruction-Tuned Urdu LLMs: Efficient Adaptation of Llama Models and Evaluation Resources for Urdu","10.63317\u002F49yadrbey2cj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-832","10603","10616","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.832.pdf","tahir-etal-2026-instruction",[19318,19321,19322,19323],{"paper_id":19309,"author_seq":247,"given_name":19319,"surname":19320,"affiliation":63,"orcid":63},"Munief Hassan","Tahir",{"paper_id":19309,"author_seq":232,"given_name":3953,"surname":3954,"affiliation":63,"orcid":63},{"paper_id":19309,"author_seq":218,"given_name":3968,"surname":3969,"affiliation":63,"orcid":63},{"paper_id":19309,"author_seq":203,"given_name":3821,"surname":3971,"affiliation":63,"orcid":63},"This paper presents UrduLLaMA 1.1 and UrduLLaMA 1.1 Tiny, two instruction-tuned large language models (LLMs) designed to advance natural language processing for Urdu, a low-resource language with limited representation in multilingual corpora. These instruction-tuned models are derived from Llama-3.1-8B-Instruct and Llama-3.2-3B-Instruct architectures, respectively by conducting continual pretraining on 800 million diverse Urdu tokens curated from public and proprietary sources, followed by Supervised Fine-Tuning (SFT) using LoRA on 432K Urdu instructions spanning diverse NLP tasks. Rigorous evaluation across 14 culturally-specific domains using our novel Urdu LLM Evaluation Dataset demonstrates superior performance. UrduLLaMA 1.1 achieves 65.3 average accuracy (GPT-5 Nano evaluation), outperforming its Llama-3.1-8B-Instruct base (50.7) across all categories and surpassing Llama-3.3-70B-Instruct (62.7) in 8 out of 14 domains. UrduLLaMA 1.1 Tiny transforms Llama-3.2-3B-Instruct (38.8) into a (61.2) performer. Human evaluation by native Urdu linguists confirms these gains (3.51\u002F5 vs. 2.61\u002F5 base). Our results validate targeted adaptation strategies combining continual pretraining with instruction tuning as computationally efficient solutions for low-resource languages, enabling state-of-the-art Urdu LLM models with accessible hardware.",{"paper_id":19326,"title":19327,"year":7,"month":188,"day":63,"doi":19328,"resource_url":19329,"first_page":19330,"last_page":19331,"pdf_url":19332,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19333,"paper_type":860,"authors":19334,"abstract":19349},"lrec2026-main-833","Is Biomedical Specialization Still Worth It? Insights from Domain-Adaptive Language Modelling with a New French Health Corpus","10.63317\u002F5aujtfs5wq6b","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-833","10617","10633","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.833.pdf","mannion-etal-2026-is",[19335,19336,19339,19342,19344,19346,19347,19348],{"paper_id":19326,"author_seq":247,"given_name":18628,"surname":18629,"affiliation":63,"orcid":63},{"paper_id":19326,"author_seq":232,"given_name":19337,"surname":19338,"affiliation":63,"orcid":63},"Cécile","Macaire",{"paper_id":19326,"author_seq":218,"given_name":19340,"surname":19341,"affiliation":63,"orcid":63},"Armand","Violle",{"paper_id":19326,"author_seq":203,"given_name":18136,"surname":19343,"affiliation":63,"orcid":63},"Ohayon",{"paper_id":19326,"author_seq":188,"given_name":13091,"surname":19345,"affiliation":63,"orcid":63},"Tannier",{"paper_id":19326,"author_seq":172,"given_name":1044,"surname":1045,"affiliation":63,"orcid":63},{"paper_id":19326,"author_seq":155,"given_name":2320,"surname":2321,"affiliation":63,"orcid":63},{"paper_id":19326,"author_seq":138,"given_name":1219,"surname":10995,"affiliation":63,"orcid":63},"Large language models (LLMs) have demonstrated remarkable capabilities across diverse domains, yet their adaptation to specialized fields remains challenging, particularly for non-English languages. This study investigates domain-adaptive pre-training (DAPT) as a strategy for specializing small to mid-sized LLMs in the French biomedical domain through continued pre-training. We address two key research questions: the viability of specialized continued pre-training for domain adaptation and the relationship between domain-specific performance gains and general capability degradation. Our contributions include the release of a fully open-licensed French biomedical corpus suitable for commercial and open-source applications, the training and release of specialized French biomedical LLMs, and novel insights for DAPT implementation. Our methodology encompasses the collection and refinement of high-quality French biomedical texts, the exploration of causal language modeling approaches using DAPT, and conducting extensive comparative evaluations. Our results cast doubt on the efficacy of DAPT, in contrast to previous works, but we highlight its viability in smaller-scale, resource-constrained scenarios under the right conditions. Our findings further suggest that model merging post-DAPT is essential to mitigate generalization trade-offs, and in some cases even improves performance on specialized tasks at which the DAPT was directed.",{"paper_id":19351,"title":19352,"year":7,"month":188,"day":63,"doi":19353,"resource_url":19354,"first_page":19355,"last_page":19356,"pdf_url":19357,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19358,"paper_type":860,"authors":19359,"abstract":19383},"lrec2026-main-834","TildeOpen LLM: Leveraging Curriculum Learning to Achieve Equitable Language Representation","10.63317\u002F2emdserc8sfq","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-834","10634","10652","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.834.pdf","bergmanis-etal-2026-tildeopen",[19360,19363,19366,19369,19371,19374,19377,19380],{"paper_id":19351,"author_seq":247,"given_name":19361,"surname":19362,"affiliation":63,"orcid":63},"Toms","Bergmanis",{"paper_id":19351,"author_seq":232,"given_name":19364,"surname":19365,"affiliation":63,"orcid":63},"Ingus Jānis","Pretkalniņš",{"paper_id":19351,"author_seq":218,"given_name":19367,"surname":19368,"affiliation":63,"orcid":63},"Martins","Kronis",{"paper_id":19351,"author_seq":203,"given_name":14112,"surname":19370,"affiliation":63,"orcid":63},"Nicmanis",{"paper_id":19351,"author_seq":188,"given_name":19372,"surname":19373,"affiliation":63,"orcid":63},"Jeļizaveta","Jelinska",{"paper_id":19351,"author_seq":172,"given_name":19375,"surname":19376,"affiliation":63,"orcid":63},"Roberts","Rozis",{"paper_id":19351,"author_seq":155,"given_name":19378,"surname":19379,"affiliation":63,"orcid":63},"Rinalds","Vīksna",{"paper_id":19351,"author_seq":138,"given_name":19381,"surname":19382,"affiliation":63,"orcid":63},"Marcis","Pinnis","Large language models often underperform in many European languages due to the dominance of English and a few high-resource languages in training data. This paper presents TildeOpen LLM, a 30-billion-parameter open-weight foundational model trained on 34 European languages to promote linguistic equity and improve performance for low-resource languages. To address the data imbalance, we combine dataset upsampling with a curriculum-based training schedule that alternates between uniform and natural language distributions. The resulting model performs favorably compared to other multilingual LLMs despite being trained with significantly fewer computing resources. Evaluation across multiple multilingual benchmarks shows that TildeOpen surpasses existing open-weight models in text generation and comprehension, particularly for Baltic, Finno-Ugric, and Slavic languages. Human evaluations confirm an up to tenfold reduction in linguistic errors relative to leading baselines. The model and associated resources are fully open-weight and publicly available at huggingface.co\u002FTildeAI\u002FTildeOpen-30b. These outcomes demonstrate that careful data curation and balanced training strategies can substantially enhance multilingual model quality without increasing model size or training volume.",{"paper_id":19385,"title":19386,"year":7,"month":188,"day":63,"doi":19387,"resource_url":19388,"first_page":19389,"last_page":19390,"pdf_url":19391,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19392,"paper_type":860,"authors":19393,"abstract":19403},"lrec2026-main-835","Common Sense vs. Morality: The Curious Case of Narrative Focus Bias in LLMs","10.63317\u002F23hsqksy9475","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-835","10653","10663","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.835.pdf","purkayastha-etal-2026-common",[19394,19397,19399,19401],{"paper_id":19385,"author_seq":247,"given_name":19395,"surname":19396,"affiliation":63,"orcid":63},"Saugata","Purkayastha",{"paper_id":19385,"author_seq":232,"given_name":5236,"surname":19398,"affiliation":63,"orcid":63},"Kushare",{"paper_id":19385,"author_seq":218,"given_name":19400,"surname":4533,"affiliation":63,"orcid":63},"Pragya Paramita",{"paper_id":19385,"author_seq":203,"given_name":19402,"surname":19396,"affiliation":63,"orcid":63},"Sukannya","Large Language Models (LLMs) are increasingly deployed across diverse real-world applications and user communities. As such, it is crucial that these models remain both morally grounded and knowledge-aware. In this work, we uncover a critical limitation of current LLMs—their tendency to prioritize moral reasoning over commonsense understanding. To investigate this phenomenon, we introduce COMORAL, a novel benchmark dataset containing commonsense contradictions embedded within moral dilemmas. Through extensive evaluation of ten LLMs across different model sizes, we find that existing models consistently struggle to identify such contradictions without prior signal. Furthermore, we observe a pervasive narrative focus bias, wherein LLMs more readily detect commonsense contradictions when they are attributed to a secondary character rather than the primary (narrator) character. Our comprehensive analysis underscores the need for enhanced reasoning-aware training to improve the commonsense robustness of large language models.",{"paper_id":19405,"title":19406,"year":7,"month":188,"day":63,"doi":19407,"resource_url":19408,"first_page":19409,"last_page":19410,"pdf_url":19411,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19412,"paper_type":860,"authors":19413,"abstract":19419},"lrec2026-main-836","Emphasizing the Commendable: A Study of Homogenized Transitive Verb Constructions in Machine Generated Peer Reviews","10.63317\u002F2a6746sy5cke","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-836","10664","10676","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.836.pdf","fung-etal-2026-emphasizing",[19414,19417,19418],{"paper_id":19405,"author_seq":247,"given_name":19415,"surname":19416,"affiliation":63,"orcid":63},"Hing-Yuet","Fung",{"paper_id":19405,"author_seq":232,"given_name":9589,"surname":9590,"affiliation":63,"orcid":63},{"paper_id":19405,"author_seq":218,"given_name":9569,"surname":9587,"affiliation":63,"orcid":63},"We present a study of machine generated text (MGT) output homogenization with a focus on the relative usage of the prototypical object construction of verbs (the O construction), which takes a noun phrase as its accusative argument. Verbs of different semantics have different tendencies of selecting a direct object or clausal complement; and hence lead to natural variation away from the prototypical usage. However, our results in the study between scientific peer reviews written by human and machines show a shift to unusually high usage of the O construction in MGT and greatly suppressing the frequency of other construction types. This is considered a serious case of syntactic homogenization. A major finding is that frequent verbs, like \"emphasize\", appear top on the list of such homogenized syntactic construction. This is more striking than identifying disproportionately more frequent usage of naturally rare words such as \"commendable\" in previous work. Our results will contribute to the prevention of further homogenization of MGT before they merge deeper into the ecosystem of human-written text.",{"paper_id":19421,"title":19422,"year":7,"month":188,"day":63,"doi":19423,"resource_url":19424,"first_page":19425,"last_page":19426,"pdf_url":19427,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19428,"paper_type":860,"authors":19429,"abstract":19442},"lrec2026-main-837","CoDAE: Adapting Large Language Models for Education via Chain-of-Thought Data Augmentation","10.63317\u002F447hbzyohzyf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-837","10677","10687","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.837.pdf","yuan-etal-2026-codae",[19430,19433,19436,19438,19441],{"paper_id":19421,"author_seq":247,"given_name":19431,"surname":19432,"affiliation":63,"orcid":63},"Shuzhou","Yuan",{"paper_id":19421,"author_seq":232,"given_name":19434,"surname":19435,"affiliation":63,"orcid":63},"Willliam","LaCroix",{"paper_id":19421,"author_seq":218,"given_name":938,"surname":19437,"affiliation":63,"orcid":63},"Ghoshal",{"paper_id":19421,"author_seq":203,"given_name":19439,"surname":19440,"affiliation":63,"orcid":63},"Ercong","Nie",{"paper_id":19421,"author_seq":188,"given_name":1732,"surname":9252,"affiliation":63,"orcid":63},"Large Language Models (LLMs) are increasingly employed as AI tutors in education due to their scalability and potential for personalized instruction. However, off-the-shelf LLMs often underperform in educational settings, exhibiting limitations such as providing answers too readily, failing to adapt their responses to students’ uncertainty, and remaining susceptible to emotionally manipulative prompts. To address these challenges, we introduce CoDAE, a framework that adapts LLMs for educational use through Chain-of-Thought (CoT) data augmentation. We collect real-world dialogues between students and a ChatGPT-based tutor and enrich them using CoT prompting to promote step-by-step reasoning and pedagogically aligned guidance. Furthermore, we design targeted dialogue cases to explicitly mitigate three key limitations: over-compliance, low response adaptivity, and threat vulnerability. We fine-tune four open-source LLMs on different variants of the augmented datasets and evaluate them in simulated educational scenarios using both automatic metrics and LLM-as-a-judge assessments. Our results show that models fine-tuned with CoDAE deliver more pedagogically appropriate guidance, promote student reflection and more effectively prevent premature answer disclosure.",{"paper_id":19444,"title":19445,"year":7,"month":188,"day":63,"doi":19446,"resource_url":19447,"first_page":19448,"last_page":19449,"pdf_url":19450,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19451,"paper_type":860,"authors":19452,"abstract":19473},"lrec2026-main-838","Synthetic Instruction Generation for Low-Resource Nordic Languages: Viability and Limitations in LLM Instruction-Tuning","10.63317\u002F3e73sy24wup3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-838","10688","10698","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.838.pdf","stenlund-etal-2026-synthetic",[19453,19456,19459,19461,19462,19464,19467,19470,19472],{"paper_id":19444,"author_seq":247,"given_name":19454,"surname":19455,"affiliation":63,"orcid":63},"Mathias","Stenlund",{"paper_id":19444,"author_seq":232,"given_name":19457,"surname":19458,"affiliation":63,"orcid":63},"Annika","Simonsen",{"paper_id":19444,"author_seq":218,"given_name":4323,"surname":19460,"affiliation":63,"orcid":63},"Bungum",{"paper_id":19444,"author_seq":203,"given_name":1380,"surname":4655,"affiliation":63,"orcid":63},{"paper_id":19444,"author_seq":188,"given_name":19463,"surname":3676,"affiliation":63,"orcid":63},"Jiangtao",{"paper_id":19444,"author_seq":172,"given_name":19465,"surname":19466,"affiliation":63,"orcid":63},"Oleg","Filatov",{"paper_id":19444,"author_seq":155,"given_name":19468,"surname":19469,"affiliation":63,"orcid":63},"Hemanadhan","Myneni",{"paper_id":19444,"author_seq":138,"given_name":19471,"surname":3802,"affiliation":63,"orcid":63},"Morris",{"paper_id":19444,"author_seq":121,"given_name":1410,"surname":1411,"affiliation":63,"orcid":63},"Pretrained large language models (LLMs) gain instruction-following abilities through instruction-tuning, a method which relies on datasets of instruction–response pairs. However, for low-resource languages, collecting human-authored instructions is costly, raising the question of whether synthetic instructions can substitute human-authored instructions for non-English languages. We compare instruction-tuning of a smaller pretrained LLM in four Nordic languages using (a) human-authored instructions paired with synthetic responses and (b) fully synthetic instruction–response pairs generated with a minimal-effort pipeline. Native-speaker evaluations show that models instruction-tuned on synthetic instructions perform on par with those trained on human-authored instructions for the largest Nordic languages, suggesting that minimal-effort synthetic instructions can serve as a practical alternative. In contrast, response quality deteriorates sharply for Icelandic, underscoring the limitations of current synthetic data generation pipelines when the LLM competence in the target language is weak. Overall, our results highlight that while synthetic instructions can enable cost-efficient instruction-tuning for the largest Nordic languages, they remain insufficient for Icelandic, clarifying when minimal-effort synthetic approaches suffice and when they fall short.",{"paper_id":19475,"title":19476,"year":7,"month":188,"day":63,"doi":19477,"resource_url":19478,"first_page":19479,"last_page":19480,"pdf_url":19481,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19482,"paper_type":860,"authors":19483,"abstract":19490},"lrec2026-main-839","AYN: A Tiny Yet Competitive Indian Legal Language Model Pretrained from Scratch","10.63317\u002F3c62f5f9g7q2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-839","10699","10722","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.839.pdf","niyogi-etal-2026-ayn",[19484,19487,19489],{"paper_id":19475,"author_seq":247,"given_name":19485,"surname":19486,"affiliation":63,"orcid":63},"Mitodru","Niyogi",{"paper_id":19475,"author_seq":232,"given_name":7522,"surname":19488,"affiliation":63,"orcid":63},"Gaussier",{"paper_id":19475,"author_seq":218,"given_name":12794,"surname":12795,"affiliation":63,"orcid":63},"Decoder-only Large Language Models (LLMs) are currently the model of choice for many Natural Language Processing (NLP) applications. Through instruction fine-tuning and prompting approaches, such LLMs have been efficiently used to solve both general and domain-specific tasks. However, they are costly to train and, to a certain extent, costly to use as well, and one can wonder whether LLMs can be replaced by domain-specific Tiny Language Models (TLMs), which typically contain less than 100M parameters. We address this question in this study by comparing the performance of an 88M TLM pretrained from scratch for 185 A100 hours on a specific domain with a domain-specific tokenizer (here, the Indian legal domain) with LLMs of various sizes between 1B and 8B for solving domain-specific tasks. We show in particular that our legal TLM, Ayn, can indeed outperform LLMs up to 80 times larger on the legal case judgment prediction task, rival LLMs up to 30 times larger on the summarization task, and still be competitive with these larger LLMs on general tasks.",{"paper_id":19492,"title":19493,"year":7,"month":188,"day":63,"doi":19494,"resource_url":19495,"first_page":19496,"last_page":19497,"pdf_url":19498,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19499,"paper_type":860,"authors":19500,"abstract":19514},"lrec2026-main-840","Low-Resource Dialect Adaptation of Large Language Models: A French Dialect Case-Study","10.63317\u002F3kg84h4wnshx","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-840","10723","10734","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.840.pdf","khan-etal-2026-low",[19501,19503,19506,19509,19511],{"paper_id":19492,"author_seq":247,"given_name":19502,"surname":2909,"affiliation":63,"orcid":63},"Eeham",{"paper_id":19492,"author_seq":232,"given_name":19504,"surname":19505,"affiliation":63,"orcid":63},"Firas","Saidani",{"paper_id":19492,"author_seq":218,"given_name":19507,"surname":19508,"affiliation":63,"orcid":63},"Owen Van","Esbroeck",{"paper_id":19492,"author_seq":203,"given_name":5132,"surname":19510,"affiliation":63,"orcid":63},"Khoury",{"paper_id":19492,"author_seq":188,"given_name":19512,"surname":19513,"affiliation":63,"orcid":63},"Leila","Kosseim","Despite the widespread adoption of Large Language Models (LLMs), their strongest capabilities remain largely confined to a small number of high-resource languages for which there is abundant training data. Recently, continual pre-training (CPT) has emerged as a means to fine-tune these models to low-resource regional dialects. In this paper, we study the use of CPT for dialect learning under tight data and compute budgets. Using low-rank adaptation (LoRA) and compute-efficient continual pre-training, we adapt three LLMs to the Québec French dialect using a very small dataset and benchmark them on the COLE suite. Our experiments demonstrate an improvement on the minority dialect benchmarks with minimal regression on the prestige language benchmarks with around 1% of model parameters updated. Analysis of the results demonstrate that gains are highly contingent on corpus composition. These findings indicate that CPT with parameter-efficient fine-tuning (PEFT) can narrow the dialect gap by providing cost-effective and sustainable language resource creation, expanding high-quality LLM access to minority linguistic communities. To support reproducibility and broaden access, we release the first Québec French LLMs on Hugging Face.",{"paper_id":19516,"title":19517,"year":7,"month":188,"day":63,"doi":19518,"resource_url":19519,"first_page":19520,"last_page":19521,"pdf_url":19522,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19523,"paper_type":860,"authors":19524,"abstract":19532},"lrec2026-main-841","Reformulate and Create, Don't Translate: Creating Natural Prompts for Underserved Languages","10.63317\u002F4kz5jrk3j9kw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-841","10735","10749","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.841.pdf","simonsen-etal-2026-reformulate",[19525,19526,19527,19528,19531],{"paper_id":19516,"author_seq":247,"given_name":19457,"surname":19458,"affiliation":63,"orcid":63},{"paper_id":19516,"author_seq":232,"given_name":19454,"surname":19455,"affiliation":63,"orcid":63},{"paper_id":19516,"author_seq":218,"given_name":4323,"surname":19460,"affiliation":63,"orcid":63},{"paper_id":19516,"author_seq":203,"given_name":19529,"surname":19530,"affiliation":63,"orcid":63},"Marc Daníel Skipstað","Volhardt",{"paper_id":19516,"author_seq":188,"given_name":1410,"surname":1411,"affiliation":63,"orcid":63},"We present a methodology for creating high-quality instruction prompts for low-resource Germanic languages that addresses a critical challenge: small annotator pools risk producing datasets reflecting narrow individual interests rather than diverse user needs. In this work, native speakers reformulate existing English prompts from OpenAssistant or create entirely original prompts, adapting them to reflect local contexts and natural language patterns while preserving broad task and topic diversity. This approach produced high-quality prompt datasets totaling 6,950 prompts across seven Germanic languages (German, Dutch, Swedish, Norwegian Bokmål\u002FNynorsk, Danish, Icelandic and Faroese) with validated coverage of diverse tasks and topics. Blind evaluation demonstrates that human-reformulated prompts significantly outperform synthetically generated prompts in naturalness and comprehensibility, particularly for low-resource languages like Icelandic and Faroese. For the bigger Scandinavian lan- guage, Danish, the difference was less pronounced. The prompt dataset is released under an open-source license at https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FAnnikaSimonsen\u002FTrustLLM-reformulation-prompts.",{"paper_id":19534,"title":19535,"year":7,"month":188,"day":63,"doi":19536,"resource_url":19537,"first_page":19538,"last_page":19539,"pdf_url":19540,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19541,"paper_type":860,"authors":19542,"abstract":19550},"lrec2026-main-842","Generating High Quality Synthetic Data for Dutch Medical Conversations","10.63317\u002F52kv8b8eq52o","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-842","10750","10763","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.842.pdf","kuan-etal-2026-generating",[19543,19546,19547],{"paper_id":19534,"author_seq":247,"given_name":19544,"surname":19545,"affiliation":63,"orcid":63},"Cecilia","Kuan",{"paper_id":19534,"author_seq":232,"given_name":18764,"surname":18765,"affiliation":63,"orcid":63},{"paper_id":19534,"author_seq":218,"given_name":19548,"surname":19549,"affiliation":63,"orcid":63},"Henk van den","Heuvel","Medical conversations offer insights into clinical communication often absent from Electronic Health Records. However, developing reliable clinical Natural Language Processing (NLP) models is hampered by the scarcity of domain-specific datasets, as clinical data are typically inaccessible due to privacy and ethical constraints. To address these challenges, we present a pipeline for generating synthetic Dutch medical dialogues using a Dutch fine-tuned Large Language Model, with real medical conversations serving as linguistic and structural reference. The generated dialogues were evaluated through quantitative metrics and qualitative review by native speakers and medical practitioners. Quantitative analysis revealed strong lexical variety and overly regular turn-taking, suggesting scripted rather than natural conversation flow. Qualitative review produced slightly below-average scores, with raters noting issues in domain specificity and natural expression. The limited correlation between quantitative and qualitative results highlights that numerical metrics alone cannot fully capture linguistic quality. Our findings demonstrate that generating synthetic Dutch medical dialogues is feasible but requires domain knowledge and carefully structured prompting to balance naturalness and structure in conversation. This work provides a foundation for expanding Dutch clinical NLP resources through ethically generated synthetic data.",{"paper_id":19552,"title":19553,"year":7,"month":188,"day":63,"doi":19554,"resource_url":19555,"first_page":19556,"last_page":19557,"pdf_url":19558,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19559,"paper_type":860,"authors":19560,"abstract":19583},"lrec2026-main-843","DeepICD-R1: Medical Reasoning through Hierarchical Rewards and Unsupervised Distillation","10.63317\u002F5ntn3gnmv9cy","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-843","10764","10775","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.843.pdf","rhr-etal-2026-deepicd",[19561,19563,19566,19568,19571,19574,19576,19578,19581],{"paper_id":19552,"author_seq":247,"given_name":17216,"surname":19562,"affiliation":63,"orcid":63},"Röhr",{"paper_id":19552,"author_seq":232,"given_name":19564,"surname":19565,"affiliation":63,"orcid":63},"Thomas Maximilian Josef","Steffek",{"paper_id":19552,"author_seq":218,"given_name":1064,"surname":19567,"affiliation":63,"orcid":63},"Teucher",{"paper_id":19552,"author_seq":203,"given_name":19569,"surname":19570,"affiliation":63,"orcid":63},"Keno","Bressem",{"paper_id":19552,"author_seq":188,"given_name":19572,"surname":19573,"affiliation":63,"orcid":63},"Alexei","Figueroa",{"paper_id":19552,"author_seq":172,"given_name":1216,"surname":19575,"affiliation":63,"orcid":63},"Grundmann",{"paper_id":19552,"author_seq":155,"given_name":1625,"surname":19577,"affiliation":63,"orcid":63},"Troeger",{"paper_id":19552,"author_seq":138,"given_name":19579,"surname":19580,"affiliation":63,"orcid":63},"Felix Alexander","Gers",{"paper_id":19552,"author_seq":121,"given_name":869,"surname":19582,"affiliation":63,"orcid":63},"Löser","Large language models (LLMs) show strong reasoning abilities, but full retraining for the medical domain is often infeasible because of lacking data or compute resources. We present DeepICD-R1, a framework for efficient medical reasoning fine-tuning that unites hierarchical rewards with distilled supervision. We reformulate ICD-10-CM prediction as a reinforcement learning problem and design a hierarchical outcome-based reward that reflects the ICD code structure across chapter, category, and full-code levels. In parallel, we publish a large-scale distilled dataset of over 90k reasoning traces derived from MIMIC-IV admission notes, integrating clinical validation and official coding guidelines. Fine-tuning smaller instruction-tuned LLMs with this data and GRPO reinforcement yields consistent gains in diagnostic accuracy and reasoning coherence. Extensive ablations confirm that hierarchical supervision and verifiable outcome rewards enable competitive, domain-specialized reasoning models without additional pretraining, providing a reproducible foundation for clinical NLP research. Keywords: Clinical NLP, Large Reasoning Model, GRPO, Supervised Fine-Tuning",{"paper_id":19585,"title":19586,"year":7,"month":188,"day":63,"doi":19587,"resource_url":19588,"first_page":19589,"last_page":19590,"pdf_url":19591,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19592,"paper_type":860,"authors":19593,"abstract":19602},"lrec2026-main-844","SynthLLM: An LLM-based Scalable Synthetic Data Generation Pipeline for Low-Resource Languages","10.63317\u002F36i5afj23ivf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-844","10776","10791","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.844.pdf","panahi-etal-2026-synthllm",[19594,19597,19600],{"paper_id":19585,"author_seq":247,"given_name":19595,"surname":19596,"affiliation":63,"orcid":63},"Solmaz","Panahi",{"paper_id":19585,"author_seq":232,"given_name":19598,"surname":19599,"affiliation":63,"orcid":63},"Vasudevan","Nedumpozhimana",{"paper_id":19585,"author_seq":218,"given_name":13618,"surname":19601,"affiliation":63,"orcid":63},"Kelleher","Large Language Models (LLMs) have enabled scalable synthetic data generation, yet their effective adaptation to low-resource languages remains underexplored. We introduce an LLM-based generate and annotate paradigm to create synthetic datasets for low-resource NLP classification tasks. The framework employs a smaller model for text generation and a stronger model for automatic annotation. Using Farsi Natural Language Inference (NLI) as a case study, we construct a large-scale synthetic dataset of 100,000 labeled instances. We provide a systematic empirical analysis of annotation quality, label-distribution effects, and training regimes. We compare GPT-4o-mini, Aya-23-35B, and DeBERTa as annotators and examine how annotation variability propagates to downstream performance. Our results show that a warm-up phase with synthetic data consistently outperforms data mixing and reversed ordering. Notably, open-source annotation (Aya-23-35B) achieves comparable downstream performance to the proprietary model (GPT-4o-mini), with significant cost implications for deploying pipelines in low-resource settings. The dataset and code are publicly available at https:\u002F\u002Fhuggingface.co\u002Fdatasets\u002FSolmazp\u002Ftext2entail.",{"paper_id":19604,"title":19605,"year":7,"month":188,"day":63,"doi":19606,"resource_url":19607,"first_page":19608,"last_page":19609,"pdf_url":19610,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19611,"paper_type":860,"authors":19612,"abstract":19619},"lrec2026-main-845","Persona-Conditioned Generation of Patient Self-Reports from EHRs","10.63317\u002F34prj6o9qfrk","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-845","10792","10801","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.845.pdf","wu-etal-2026-persona",[19613,19615,19617],{"paper_id":19604,"author_seq":247,"given_name":19614,"surname":7319,"affiliation":63,"orcid":63},"Yuexin",{"paper_id":19604,"author_seq":232,"given_name":19616,"surname":3270,"affiliation":63,"orcid":63},"Jianming",{"paper_id":19604,"author_seq":218,"given_name":9434,"surname":19618,"affiliation":63,"orcid":63},"Rus","Accurate diagnosis depends not only on clinical expertise but also on how patients describe their symptoms at first contact. Yet large English corpora of patient-authored self-reports are scarce, limiting advances in natural, context-aware narrative modeling. We address this gap by generating first-person self-reports from structured EHR content conditioned on persona attributes that capture social and clinical context. Reports are produced by two generators and scored by two independent graders using a rubric with four dimensions, complemented by a rubric-free preference test. Across 10k stratified cases, we compare two generators under a reliable evaluation protocol and select the higher-scoring one based primarily on Clinical Correctness and Faithfulness, yielding a dataset composed of narratives from the stronger system. Our contributions are threefold: (I) we developed and release a large, persona-conditioned dataset of patient-style self-reports grounded in patient-stated EHR facts, (II) we introduce a transparent evaluation framework that combines rubric-based scoring with rubric-free preference to mitigate grader bias and enable cross-validation, (III) we find that graders exhibit systematic stylistic preferences in rubric-free approach that influence scores independent of clinical content, and (IV) we study large language models for producing first-person self-reports from structured EHRs, highlighting where they succeed, where they fail, and how this affects use in telemedicine and triage.",{"paper_id":19621,"title":19622,"year":7,"month":188,"day":63,"doi":19623,"resource_url":19624,"first_page":19625,"last_page":19626,"pdf_url":19627,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19628,"paper_type":860,"authors":19629,"abstract":19638},"lrec2026-main-846","SocialStep: Fast Prediction of Social Determinants of Health","10.63317\u002F2t6bdjt7dvvi","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-846","10802","10814","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.846.pdf","landes-etal-2026-socialstep",[19630,19632,19635],{"paper_id":19621,"author_seq":247,"given_name":1216,"surname":19631,"affiliation":63,"orcid":63},"Landes",{"paper_id":19621,"author_seq":232,"given_name":19633,"surname":19634,"affiliation":63,"orcid":63},"Adam Richard","Cross",{"paper_id":19621,"author_seq":218,"given_name":19636,"surname":19637,"affiliation":63,"orcid":63},"Jimeng","Sun","Given thousands of medical documents, how can we automatically uncover patients’ social risk factors? Social Determinants of Health (SDoH) constitute a growing class of non-clinical risk factors that shape patient trajectories. While clinically significant, automatic detection of SDoH from free text remains understudied due to scarce and imbalanced training data. Current approaches often rely on monolithic large language models. We present SocialStep, a two-step hybrid pipeline that first uses a lightweight classifier to triage sentences and then applies a Large Language Model (LLM) for multilabel classification to the relevant subset. On the Medical Information Mart for Intensive Care III (MIMIC-III) dataset, SocialStep improves macro F1 by 5 points over the state-of-the-art baseline while running 12.2× faster. These findings demonstrate that integrating compact neural encoders with large language models provides a scalable and highly accurate framework for clinical NLP tasks, including SDoH extraction. Notably, we also observe some unexpected patterns in LLM performance. SocialStep offers a practical blueprint for hybrid model deployment that identifies critical social risk factors without prohibitive computational cost.",{"paper_id":19640,"title":19641,"year":7,"month":188,"day":63,"doi":19642,"resource_url":19643,"first_page":19644,"last_page":19645,"pdf_url":19646,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19647,"paper_type":860,"authors":19648,"abstract":19653},"lrec2026-main-847","Dynamically Acquiring Text Content to Enable the Classification of Lesser-known Entities for Real-world Tasks","10.63317\u002F2sgctcg3bvf7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-847","10815","10825","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.847.pdf","alam-etal-2026-dynamically",[19649,19651],{"paper_id":19640,"author_seq":247,"given_name":19650,"surname":5462,"affiliation":63,"orcid":63},"Fahmida",{"paper_id":19640,"author_seq":232,"given_name":5802,"surname":19652,"affiliation":63,"orcid":63},"Riloff","Existing Natural Language Processing (NLP) resources often lack the task-specific information required for real-world problems and provide limited coverage of lesser-known or newly introduced entities. For example, business organizations and health care providers may need to be classified into a variety of different taxonomic schemes for specific application tasks. Our goal is to enable domain experts to easily create a task-specific classifier for entities by providing only entity names and gold labels as training data. Our framework then dynamically acquires descriptive text about each entity, which is subsequently used as the basis for producing a text-based classifier. We propose a novel text acquisition method that leverages both web and large language models (LLMs). We evaluate our proposed framework on two classification problems in distinct domains: (i) classifying organizations into Standard Industrial Classification (SIC) Codes, which categorize organizations based on their business activities; and (ii) classifying healthcare providers into healthcare provider taxonomy codes, which represent a provider’s medical specialty and area of practice. Our best-performing model achieved macro-averaged F1-scores of 82.3% and 72.9% on the SIC code and healthcare taxonomy code classification tasks, respectively.",{"paper_id":19655,"title":19656,"year":7,"month":188,"day":63,"doi":19657,"resource_url":19658,"first_page":19659,"last_page":19660,"pdf_url":19661,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19662,"paper_type":860,"authors":19663,"abstract":19668},"lrec2026-main-848","RILEC: Detection and Generation of L1 Russian Interference Errors in English Learner Texts","10.63317\u002F5bas35hntskv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-848","10826","10837","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.848.pdf","kharlamova-etal-2026-rilec",[19664,19667],{"paper_id":19655,"author_seq":247,"given_name":19665,"surname":19666,"affiliation":63,"orcid":63},"Darya","Kharlamova",{"paper_id":19655,"author_seq":232,"given_name":6855,"surname":8655,"affiliation":63,"orcid":63},"Many errors in student essays can be explained by influence from the native language (L1). L1 interference refers to errors influenced by a speaker’s first language, such as using stadion instead of stadium, reflecting lexical transliteration from Russian. In this work, we address the task of detecting such errors in English essays written by Russian-speaking learners. We introduce RILEC, a large-scale dataset of over 18,000 sentences, combining expert-annotated data from REALEC with synthetic examples generated through rule-based and neural augmentation. We propose a framework for generating L1-motivated errors using generative language models optimized with PPO, prompt-based control, and rule-based patterns. Models fine-tuned on RILEC achieve strong performance, particularly on word-level interference types such as transliteration and tense semantics. We find that the proposed augmentation pipeline leads to a significant performance improvement, making it a potentially valuable tool for learners and teachers to more effectively identify and address such errors.",{"paper_id":19670,"title":19671,"year":7,"month":188,"day":63,"doi":19672,"resource_url":19673,"first_page":19674,"last_page":19675,"pdf_url":19676,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19677,"paper_type":860,"authors":19678,"abstract":19694},"lrec2026-main-849","Critical Foreign Policy Decision (CFPD) Benchmark: Measuring Diplomatic Preferences of Large Language Models","10.63317\u002F2xw2yfabkain","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-849","10838","10852","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.849.pdf","jensen-etal-2026-critical",[19679,19680,19683,19686,19687,19690,19691],{"paper_id":19670,"author_seq":247,"given_name":4797,"surname":1507,"affiliation":63,"orcid":63},{"paper_id":19670,"author_seq":232,"given_name":19681,"surname":19682,"affiliation":63,"orcid":63},"Ian J.","Reynolds",{"paper_id":19670,"author_seq":218,"given_name":19684,"surname":19685,"affiliation":63,"orcid":63},"Yasir","Atalan",{"paper_id":19670,"author_seq":203,"given_name":1732,"surname":9018,"affiliation":63,"orcid":63},{"paper_id":19670,"author_seq":188,"given_name":19688,"surname":19689,"affiliation":63,"orcid":63},"Austin","Woo",{"paper_id":19670,"author_seq":172,"given_name":18734,"surname":1840,"affiliation":63,"orcid":63},{"paper_id":19670,"author_seq":155,"given_name":19692,"surname":19693,"affiliation":63,"orcid":63},"Trevor","Howarth","As national security institutions increasingly integrate Artificial Intelligence (AI) into decision-making and content generation processes, understanding the inherent biases of large language models (LLMs) is crucial. We present a novel benchmark designed to evaluate biases and preferences of models in the context of international relations (IR), which we apply to eight prominent foundation models: Llama 3.1 8B Instruct, Llama 3.1 70B Instruct, GPT-4o, Gemini 1.5 Pro-002, Mixtral 8x22B, Claude 3.5 Sonnet, DeepSeek V3, and Qwen2 72B. We designed a bias discovery study around core topics in IR using 400 expert-crafted scenarios to analyze results from our selected models. These scenarios focused on four topical domains: military escalation, military and humanitarian intervention, cooperative behavior, and alliance dynamics. Analysis reveals noteworthy variation among model recommendations based on the four tested domains. Particularly, DeepSeek V3, Qwen2 72B, Gemini 1.5 Pro-002, and Llama 3.1 8B Instruct models offered significantly more escalatory recommendations than Claude 3.5 Sonnet and GPT-4o models. All models exhibit some degree of country-specific biases. These findings highlight the necessity for controlled deployment of LLMs in high-stakes environments, emphasizing the need for domain-specific evaluations and model fine-tuning to align with institutional objectives.",{"paper_id":19696,"title":19697,"year":7,"month":188,"day":63,"doi":19698,"resource_url":19699,"first_page":19700,"last_page":19701,"pdf_url":19702,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19703,"paper_type":860,"authors":19704,"abstract":19715},"lrec2026-main-850","CrisisCL: A Domain Incremental Learning Benchmark for Crisis Management","10.63317\u002F5eem8gu9j9o8","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-850","10853","10865","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.850.pdf","kiem-etal-2026-crisiscl",[19705,19708,19710,19713],{"paper_id":19696,"author_seq":247,"given_name":19706,"surname":19707,"affiliation":63,"orcid":63},"Paul Le Van","Kiem",{"paper_id":19696,"author_seq":232,"given_name":19709,"surname":11065,"affiliation":63,"orcid":63},"Romain",{"paper_id":19696,"author_seq":218,"given_name":19711,"surname":19712,"affiliation":63,"orcid":63},"Farah","Benamara",{"paper_id":19696,"author_seq":203,"given_name":3263,"surname":19714,"affiliation":63,"orcid":63},"MORICEAU","This paper proposes CrisisCL, a domain incremental learning benchmark for crisis management. Based on previous crisis management protocols, it improves consistency by allowing continual learning (CL) of new crises. A set of experiments have been conducted on multilingual datasets relying on continual learning methods and transformers to improve performance and ensure model generalization. Results reveal that regularization methods are more effective on large, coherent domains, whereas replay strategies struggle under constrained memory. Additional experimental protocols further expose the limitations of current CL methods when generalizing to unforeseen crisis events.",{"paper_id":19717,"title":19718,"year":7,"month":188,"day":63,"doi":19719,"resource_url":19720,"first_page":19721,"last_page":19722,"pdf_url":19723,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19724,"paper_type":860,"authors":19725,"abstract":19731},"lrec2026-main-851","Towards Consistent Detection of Cognitive Distortions: LLM-Based Annotation and Dataset-Agnostic Evaluation","10.63317\u002F4joqxtgdgxq2","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-851","10866","10882","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.851.pdf","sharma-etal-2026-consistent",[19726,19728,19730],{"paper_id":19717,"author_seq":247,"given_name":19727,"surname":11395,"affiliation":63,"orcid":63},"Neha",{"paper_id":19717,"author_seq":232,"given_name":19729,"surname":5572,"affiliation":63,"orcid":63},"Navneet",{"paper_id":19717,"author_seq":218,"given_name":13547,"surname":13548,"affiliation":63,"orcid":63},"Text-based automated Cognitive Distortion detection is a challenging task due to its subjective nature, with low agreement scores observed even among expert human annotators, leading to unreliable annotations. We explore the use of Large Language Models (LLMs) as consistent and reliable annotators, and propose that multiple independent LLM runs can reveal stable labeling patterns despite the inherent subjectivity of the task. Furthermore, to fairly compare models trained on datasets with different characteristics, we introduce a dataset-agnostic evaluation framework using Cohen’s kappa as an effect size measure. This methodology allows for fair cross-dataset and cross-study comparisons where traditional metrics like F1 score fall short. Our results show that GPT-4 can produce consistent annotations (Fleiss’s Kappa = 0.78), resulting in improved test set performance for models trained on these annotations compared to those trained on human-labeled data. While human expert verification was inconclusive on our target dataset, our findings suggest that LLMs can offer a scalable and internally consistent alternative for generating training data that supports strong downstream performance in subjective NLP tasks.",{"paper_id":19733,"title":19734,"year":7,"month":188,"day":63,"doi":19735,"resource_url":19736,"first_page":19737,"last_page":19738,"pdf_url":19739,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19740,"paper_type":860,"authors":19741,"abstract":19749},"lrec2026-main-852","LLMs as Annotators: Evaluating Model–Human Alignment in Detecting Contentious Language in Historical Corpora","10.63317\u002F3dhy55mxo9zb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-852","10883","10896","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.852.pdf","zhao-etal-2026-llms",[19742,19744,19747],{"paper_id":19733,"author_seq":247,"given_name":19743,"surname":18789,"affiliation":63,"orcid":63},"Yahui",{"paper_id":19733,"author_seq":232,"given_name":19745,"surname":19746,"affiliation":63,"orcid":63},"Clemencia","Siro",{"paper_id":19733,"author_seq":218,"given_name":13890,"surname":19748,"affiliation":63,"orcid":63},"Hollink","Historical texts often contain terminology that reflects outdated or harmful social values. Identifying such contentious terms is essential for the Galleries, Libraries, Archives, and Museums (GLAM) community, but manual annotation requires cultural expertise and is difficult to scale. This study evaluates whether large language models (LLMs) can support this process by aligning with human judgments of contentiousness in historical Dutch corpora. Using the Dutch Contentious Contexts Corpus (ConConCor), we formalize the task as context-dependent binary classification and compare two LLMs across multiple prompt configurations and evaluation scenarios. The models achieve near-human-level agreement on explicit cases but diverge when contextual or historical reasoning is required. Analysis of disagreement patterns shows that LLMs capture overtly harmful expressions yet tend to over-predict contentiousness for identity-related and colonial terms and under-predict for semantically shifted or figurative uses. These findings suggest that LLMs can act as auxiliary annotators for sensitive language detection in historical materials, provided that human oversight and contextual interpretation remain central to annotation workflows.",{"paper_id":19751,"title":19752,"year":7,"month":188,"day":63,"doi":19753,"resource_url":19754,"first_page":19755,"last_page":19756,"pdf_url":19757,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19758,"paper_type":860,"authors":19759,"abstract":19767},"lrec2026-main-853","Widespread Gender and Pronoun Bias in Moral Judgments across LLMs","10.63317\u002F2qxmkmu9smbk","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-853","10897","10911","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.853.pdf","fernandes-etal-2026-widespread",[19760,19762,19764],{"paper_id":19751,"author_seq":247,"given_name":19761,"surname":7996,"affiliation":63,"orcid":63},"Gustavo Lucius",{"paper_id":19751,"author_seq":232,"given_name":19763,"surname":1578,"affiliation":63,"orcid":63},"Jeiverson",{"paper_id":19751,"author_seq":218,"given_name":19765,"surname":19766,"affiliation":63,"orcid":63},"Pedro O.S","Vaz-de-Melo","Large language models (LLMs) are increasingly used to assess moral or ethical statements, yet their judgments may reflect social and linguistic biases. This work presents a controlled, sentence-level study of how grammatical person, number, and gender markers influence LLM moral classifications of fairness. Starting from 550 balanced base sentences from the ETHICS dataset, we generated 26 counterfactual variants per item, systematically varying pronouns and demographic markers to yield 14,850 semantically equivalent sentences. We evaluated six model families (Grok, GPT, LLaMA, Gemma, DeepSeek, and Mistral), and measured fairness judgments and inter-group disparities using Statistical Parity Difference (SPD). Results show statistically significant biases: sentences written in the singular form and third person are more often judged as \"fair”, while those in the second person are penalized. Gender markers produce the strongest effects, with non-binary subjects consistently favored and male subjects disfavored. We conjecture that these patterns reflect distributional and alignment biases learned during training, emphasizing the need for targeted fairness interventions in moral LLM applications.",{"paper_id":19769,"title":19770,"year":7,"month":188,"day":63,"doi":19771,"resource_url":19772,"first_page":19773,"last_page":19774,"pdf_url":19775,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19776,"paper_type":860,"authors":19777,"abstract":19782},"lrec2026-main-854","Frame2KG: A Benchmark and Evaluation Toolkit for Interpretable Frame-to-Graph Generation","10.63317\u002F4ys6kofrzoc5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-854","10912","10926","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.854.pdf","watson-etal-2026-frame2kg",[19778,19779,19780,19781],{"paper_id":19769,"author_seq":247,"given_name":5980,"surname":5981,"affiliation":63,"orcid":63},{"paper_id":19769,"author_seq":232,"given_name":5983,"surname":5984,"affiliation":63,"orcid":63},{"paper_id":19769,"author_seq":218,"given_name":5986,"surname":5987,"affiliation":63,"orcid":63},{"paper_id":19769,"author_seq":203,"given_name":5989,"surname":2998,"affiliation":63,"orcid":63},"Interpretable frame-to-knowledge-graph (Frame2KG) generation enables structured visual scene representation while supporting on-device inference to enhance privacy, improve interpretability, and minimise compute. We introduce Frame2KG-YC2, a synthetic, reproducible dataset derived from YouCook2 that pairs keyframes with schema-valid JSON knowledge graphs containing typed, spatially grounded entities and semantic predicates, alongside faithful textual paraphrases. Using this corpus, we fine-tune Qwen2.5-VL models (3B and 7B) with parameter-efficient LoRA adapters on attention layers (QKVO), with and without GateProj\u002FUp\u002FDown MLP projections. For evaluation and benchmarking, we propose a deterministic toolkit featuring two-stage node matching, an IoU gate followed by Hungarian assignment on blended spatial-semantic similarity, and comprehensive metrics spanning node\u002Fedge precision-recall-F1, matched-pair IoU, and structural validity. On a held-out test set, our models achieve Node F1μ up to 0.621 and Edge F1μ up to 0.208, with mean matched IoU of ≈0.61 and >98% schema conformity. We show that MLP gating consistently improves predicate accuracy and spatial grounding, while post-training quantisation maintains accuracy and improves deployability on edge hardware. We release the dataset, code, adapters, and evaluation toolkit to establish an open, interpretable baseline for future temporal and multi-view extensions.",{"paper_id":19784,"title":19785,"year":7,"month":188,"day":63,"doi":19786,"resource_url":19787,"first_page":19788,"last_page":19789,"pdf_url":19790,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19791,"paper_type":860,"authors":19792,"abstract":19808},"lrec2026-main-855","Injecting Structured Biomedical Knowledge into Language Models:Continual Pretraining vs. GraphRAG","10.63317\u002F35ddm5i6bjyd","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-855","10927","10936","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.855.pdf","klila-etal-2026-injecting",[19793,19796,19799,19802,19805],{"paper_id":19784,"author_seq":247,"given_name":19794,"surname":19795,"affiliation":63,"orcid":63},"Jaafer","Klila",{"paper_id":19784,"author_seq":232,"given_name":19797,"surname":19798,"affiliation":63,"orcid":63},"Sondes Bannour","Souihi",{"paper_id":19784,"author_seq":218,"given_name":19800,"surname":19801,"affiliation":63,"orcid":63},"Rahma","Boujelbane",{"paper_id":19784,"author_seq":203,"given_name":19803,"surname":19804,"affiliation":63,"orcid":63},"Nasredine","Semmar",{"paper_id":19784,"author_seq":188,"given_name":19806,"surname":19807,"affiliation":63,"orcid":63},"Lamia","Hadrich-Belguith","The injection of domain-specific knowledge is crucial for adapting language models (LMs) to specialized fields such as biomedicine. While most current approaches rely on unstructured text corpora, this study explores two complementary strategies for leveraging structured knowledge from the UMLS Metathesaurus: (i) Continual pretraining that embeds knowledge into model parameters, and (ii) Graph Retrieval-Augmented Generation (GraphRAG) that consults a knowledge graph at inference time. We first construct a large-scale biomedical knowledge graph from UMLS (3.4 million concepts and 34.2 million relations), stored in Neo4j for efficient querying. We then derive a  100-million-token textual corpus from this graph to continually pretrain two models: BERTUMLS (from BERT) and BioBERTUMLS (from BioBERT). We evaluate these models on six BLURB (Biomedical Language Understanding and Reasoning Benchmark) datasets spanning five task types and evaluate GraphRAG on the two QA (Question Answering) datasets (PubMedQA, BioASQ). On BLURB tasks, BERTUMLS improves over BERT, with the largest gains on knowledge-intensive QA. Effects on BioBERT are more nuanced, suggesting diminishing returns when the base model already encodes substantial biomedical text knowledge. Finally, augmenting LLaMA 3-8B with our GraphRAG pipeline yields over than 3 points accuracy on PubMedQA and 5 points on BioASQ without any retraining, delivering transparent, multi-hop, and easily updated knowledge access. We release the processed UMLS Neo4j graph to support reproducibility.",{"paper_id":19810,"title":19811,"year":7,"month":188,"day":63,"doi":19812,"resource_url":19813,"first_page":19814,"last_page":19815,"pdf_url":19816,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19817,"paper_type":860,"authors":19818,"abstract":19831},"lrec2026-main-856","Linguistic Knowledge Graphs for Sense Prediction: A Case-study on Latin","10.63317\u002F2b2274srsgkf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-856","10937","10952","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.856.pdf","ghizzota-etal-2026-linguistic",[19819,19821,19824,19826,19829],{"paper_id":19810,"author_seq":247,"given_name":7656,"surname":19820,"affiliation":63,"orcid":63},"Ghizzota",{"paper_id":19810,"author_seq":232,"given_name":19822,"surname":19823,"affiliation":63,"orcid":63},"Paola","Marongiu",{"paper_id":19810,"author_seq":218,"given_name":19825,"surname":6937,"affiliation":63,"orcid":63},"Pierpaolo",{"paper_id":19810,"author_seq":203,"given_name":19827,"surname":19828,"affiliation":63,"orcid":63},"Stefano","Ferilli",{"paper_id":19810,"author_seq":188,"given_name":3175,"surname":19830,"affiliation":63,"orcid":63},"McGillivray","This paper investigates the integration of the Linguistic Knowledge Graph (LKG) and Large Language Models (LLMs) for word sense prediction in Latin, a morphologically rich and low-resource historical language. Building on recent work in word sense disambiguation (WSD) and semantic change detection, we use a LKG that integrates information from a diachronic Latin corpus, a sense-annotated dataset of Latin, Latin WordNet, and Wikidata, as a structured representation of semantic and contextual relations. We present sense prediction as a binary classification task over the Latin dataset, using a Graph Retrieval-Augmented Generation approach that combines knowledge graph retrieval with LLM prompting. Two types of graph metadata are tested: author-related information (work, period, occupation) and linguistic metadata (synset and hypernyms derived from WordNet for each word sense). Experiments conducted on GPT-4o-mini, LLaMA-3.1-8B and LLaMA-3.3-70B show varying performance, with F1 scores ranging from 0.53 to 0.77. While GPT-4o-mini achieves the best overall accuracy, LLaMA-3.3-70B benefits the most from graph-based metadata, improving its F1 score by up to 3 points. Analysis by word type reveals that concrete and semantically shifting words are more easily disambiguated than abstract and semantically stable words. Results highlight both the promise and the challenges of combining graph-structured linguistic knowledge with LLMs for historical WSD.",{"paper_id":19833,"title":19834,"year":7,"month":188,"day":63,"doi":19835,"resource_url":19836,"first_page":19837,"last_page":19838,"pdf_url":19839,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19840,"paper_type":860,"authors":19841,"abstract":19856},"lrec2026-main-857","ACID: On the Perception of Online Classism","10.63317\u002F2myisgn9aju6","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-857","10953","10969","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.857.pdf","muti-etal-2026-acid",[19842,19843,19845,19848,19850,19853],{"paper_id":19833,"author_seq":247,"given_name":3060,"surname":3061,"affiliation":63,"orcid":63},{"paper_id":19833,"author_seq":232,"given_name":2155,"surname":19844,"affiliation":63,"orcid":63},"Bassignana",{"paper_id":19833,"author_seq":218,"given_name":19846,"surname":19847,"affiliation":63,"orcid":63},"Amanda Cercas","Curry",{"paper_id":19833,"author_seq":203,"given_name":3205,"surname":19849,"affiliation":63,"orcid":63},"Durante",{"paper_id":19833,"author_seq":188,"given_name":19851,"surname":19852,"affiliation":63,"orcid":63},"Dirk","Hovy",{"paper_id":19833,"author_seq":172,"given_name":19854,"surname":19855,"affiliation":63,"orcid":63},"Debora","Nozza","Socioeconomic status (SES) structures social inequality and underlies class-based discrimination that is often rationalised through stereotypes expressed in public discourse. However, despite extensive research on hate speech detection in Natural Language Processing, classism detection remains an underexplored phenomenon. We introduce ACID, a cross-cultural corpus with over 1.15 million instances, to investigate classism across YouTube and Twitter from 14 English-speaking countries. We examine (i) which stereotypes are invoked towards lower-SES, (ii) whether blame for lower-SES is attributed to individuals or structural factors, and (iii) whether these people are portrayed offensively. Across platforms, explanations are predominantly framed in terms of individual responsibility. Across countries, class stereotypes consistently revolve around moralized notions of dependency, laziness, and ignorance, revealing a shared global structure of class-based stigma. Our dataset and analysis are a foundation to advance research on class-based discrimination and its representation in online discourse.",{"paper_id":19858,"title":19859,"year":7,"month":188,"day":63,"doi":19860,"resource_url":19861,"first_page":19862,"last_page":19863,"pdf_url":19864,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19865,"paper_type":860,"authors":19866,"abstract":19872},"lrec2026-main-858","The Spectrum of Sentiment: Optimistic, Pessimistic, and Neutral Voices in Online Depression Discourse","10.63317\u002F3eqsahrxy75e","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-858","10970","10981","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.858.pdf","tabusca-etal-2026-spectrum",[19867,19870,19871],{"paper_id":19858,"author_seq":247,"given_name":19868,"surname":19869,"affiliation":63,"orcid":63},"Stefana Arina","Tabusca",{"paper_id":19858,"author_seq":232,"given_name":7181,"surname":7182,"affiliation":63,"orcid":63},{"paper_id":19858,"author_seq":218,"given_name":5687,"surname":5688,"affiliation":63,"orcid":63},"The relationship between depression and the concepts of optimism and pessimism has been extensively researched by psychologists. In this paper, we use computational approaches to study how optimism and pessimism are expressed in the online discourse of people with a depression diagnosis. Publicly available datasets are used for the development of an optimism\u002Fpessimism detection model, as well as for the analyses performed on social media posts of individuals with depression, as measured by BDI-II, a validated depression questionnaire. To analyze the optimistic and pessimistic posts by individuals with depression, we use LIWC features and perform topic modeling. We also investigate specific words driving mislabeling using SHAP. Our results show that while there may not be significant differences in the number of optimistic versus pessimistic posts between individuals in the depression and control groups, the content of the posts differs meaningfully, both in terms of linguistic features and approached topics.",{"paper_id":19874,"title":19875,"year":7,"month":188,"day":63,"doi":19876,"resource_url":19877,"first_page":19878,"last_page":19879,"pdf_url":19880,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19881,"paper_type":860,"authors":19882,"abstract":19888},"lrec2026-main-859","A Benchmark Dataset and Comparative Evaluation of Phonemized and Romanized Urdu for Text-to-Speech","10.63317\u002F2avnr98mgbre","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-859","10982","10993","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.859.pdf","shahid-etal-2026-benchmark",[19883,19886],{"paper_id":19874,"author_seq":247,"given_name":19884,"surname":19885,"affiliation":63,"orcid":63},"M Kaab Bin","Shahid",{"paper_id":19874,"author_seq":232,"given_name":6768,"surname":19887,"affiliation":63,"orcid":63},"Izharuddin","Text-to-Speech (TTS) system for the Urdu language presents significant challenges, primarily due to the scarcity of high-quality datasets and an insufficient focus on modeling pronunciation. Urdu is spoken by 250 million people worldwide, but its research on computational linguistics remains underrepresented. In this paper, we introduce URDUTTS, a comprehensive and publicly available Urdu TTS dataset containing 89 hours of studio-quality speech, with accompanying transcriptions in three formats: Urdu Script, Phonemized Script, and Romanized Script. The dataset includes both mono-speaker and multi-speaker configurations. As Urdu relies heavily on phonetic features, accurate pronunciation is highly essential for the language. Therefore, we benchmark our dataset using VITS and GlowTTS models to compare the widely used Romanized script format with the Phonemized representation. To make the evaluation highly comprehensive, we combined both objective and subjective evaluation strategies. For objective evaluation, Mel-Cepstral Distortion (MCD with Plain, Dynamic Time-Warping, and Slope-Limitation variants), Signal-to-Noise Ratio (SNR), Word Error Rate (WER), and Character Error Rate (CER) were taken. Subjective evaluation was governed by Mean Opinion Score (MOS) ratings from 40 native speakers. Results show that using VITS and GlowTTS with Phonemized transcriptions performs significantly better than Romanized ones, with an improvement of 9.6% and 26.5% in MOS. The data and code are available at github.com\u002FKAABSHAHID\u002FURDUTTS.",{"paper_id":19890,"title":19891,"year":7,"month":188,"day":63,"doi":19892,"resource_url":19893,"first_page":19894,"last_page":19895,"pdf_url":19896,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19897,"paper_type":860,"authors":19898,"abstract":19909},"lrec2026-main-860","S-VoCAL: A Dataset and Evaluation Framework for Inferring Speaking Voice Character Attributes in Literature","10.63317\u002F4hnrkbo6dhi5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-860","10994","11009","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.860.pdf","berthepardo-etal-2026-vocal",[19899,19902,19905,19908],{"paper_id":19890,"author_seq":247,"given_name":19900,"surname":19901,"affiliation":63,"orcid":63},"Abigail","Berthe-Pardo",{"paper_id":19890,"author_seq":232,"given_name":19903,"surname":19904,"affiliation":63,"orcid":63},"Gaspard","Michel",{"paper_id":19890,"author_seq":218,"given_name":19906,"surname":19907,"affiliation":63,"orcid":63},"Elena V.","Epure",{"paper_id":19890,"author_seq":203,"given_name":5419,"surname":5420,"affiliation":63,"orcid":63},"With recent advances in Text-to-Speech (TTS) systems, synthetic audiobook narration has seen increased interest, reaching unprecedented levels of naturalness. However, larger gaps remain in synthetic narration systems’ ability to impersonate fictional characters, and convey complex emotions or prosody. A promising direction to enhance character identification is the assignment of plausible voices to each fictional characters in a book. This step typically requires complex inference of attributes in book-length contexts, such as a character’s age, gender, origin or physical health, which in turns requires dedicated benchmark datasets to evaluate extraction systems’ performances. We present S-VoCAL (Speaking Voice Character Attributes in Literature), the first dataset and evaluation framework dedicated to evaluate the inference of voice-related fictional character attributes. S-VoCAL entails 8 attributes grounded in sociophonetic studies, and 952 character-book pairs derived from Project Gutenberg. Its evaluation framework addresses the particularities of each attribute, and includes a novel similarity metric based on recent Large Language Models embeddings. We demonstrate the applicability of S-VoCAL by applying a simple Retrieval-Augmented Generation (RAG) pipeline to the task of inferring character attributes. Our results suggest that the RAG pipeline reliably infers attributes such as Age or Gender, but struggles on others such as Origin or Physical Health. The dataset and evaluation code are available at https:\u002F\u002Fgithub.com\u002FAbigailBerthe\u002FS-VoCAL.",{"paper_id":19911,"title":19912,"year":7,"month":188,"day":63,"doi":19913,"resource_url":19914,"first_page":19915,"last_page":19916,"pdf_url":19917,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19918,"paper_type":860,"authors":19919,"abstract":19928},"lrec2026-main-861","BankMathBench: A Benchmark for Numerical Reasoning in Banking Scenarios","10.63317\u002F3uxnd7yxsmsb","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-861","11010","11027","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.861.pdf","lee-etal-2026-bankmathbench",[19920,19922,19924,19925],{"paper_id":19911,"author_seq":247,"given_name":19921,"surname":1359,"affiliation":63,"orcid":63},"Yunseung",{"paper_id":19911,"author_seq":232,"given_name":19923,"surname":5173,"affiliation":63,"orcid":63},"Subin",{"paper_id":19911,"author_seq":218,"given_name":16765,"surname":16766,"affiliation":63,"orcid":63},{"paper_id":19911,"author_seq":203,"given_name":19926,"surname":19927,"affiliation":63,"orcid":63},"Jaegul","Choo","Large language models (LLMs)-based chatbots are increasingly being adopted in the financial domain, particularly in digital banking, to handle customer inquiries about products such as deposits, savings, and loans. However, these models still exhibit low accuracy in core banking computations—including total payout estimation, comparison of products with varying interest rates, and interest calculation under early repayment conditions. Such tasks require multi-step numerical reasoning and contextual understanding of banking products, yet existing LLMs often make systematic errors—misinterpreting product types, applying conditions incorrectly, or failing basic calculations involving exponents and geometric progressions. However, such errors have rarely been captured by existing benchmarks. Mathematical datasets focus on fundamental math problems, whereas financial benchmarks primarily target financial documents, leaving everyday banking scenarios underexplored. To address this limitation, we propose BankMathBench, a domain-specific dataset that reflects realistic banking tasks. BankMathBench is organized in three levels of difficulty—basic, intermediate, and advanced—corresponding to single-product reasoning, multi-product comparison, and multi-condition scenarios, respectively. When trained on BankMathBench, open-source LLMs exhibited notable improvements in both formula generation and numerical reasoning accuracy, demonstrating the dataset’s effectiveness in enhancing domain-specific reasoning. With tool-augmented fine-tuning, the models achieved average accuracy increases of 57.6%p (basic), 75.1%p (intermediate), and 62.9%p (advanced), representing significant gains over zero-shot baselines. These findings highlight BankMathBench as a reliable benchmark for evaluating and advancing LLMs’ numerical reasoning in real-world banking scenarios.",{"paper_id":19930,"title":19931,"year":7,"month":188,"day":63,"doi":19932,"resource_url":19933,"first_page":19934,"last_page":19935,"pdf_url":19936,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19937,"paper_type":860,"authors":19938,"abstract":19961},"lrec2026-main-862","TR-TEB: Turkish Text Embedding Benchmark","10.63317\u002F3qway8hn6y53","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-862","11028","11044","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.862.pdf","arslan-etal-2026-tr",[19939,19941,19944,19947,19950,19953,19955,19958],{"paper_id":19930,"author_seq":247,"given_name":19940,"surname":17341,"affiliation":63,"orcid":63},"Omer",{"paper_id":19930,"author_seq":232,"given_name":19942,"surname":19943,"affiliation":63,"orcid":63},"Atalay","Celik",{"paper_id":19930,"author_seq":218,"given_name":19945,"surname":19946,"affiliation":63,"orcid":63},"Yusuf","Aslan",{"paper_id":19930,"author_seq":203,"given_name":19948,"surname":19949,"affiliation":63,"orcid":63},"Hasan Fatih","Durkaya",{"paper_id":19930,"author_seq":188,"given_name":19951,"surname":19952,"affiliation":63,"orcid":63},"Mustafa Furkan","Zenginoglu",{"paper_id":19930,"author_seq":172,"given_name":19954,"surname":3058,"affiliation":63,"orcid":63},"Musa Alperen",{"paper_id":19930,"author_seq":155,"given_name":19956,"surname":19957,"affiliation":63,"orcid":63},"Merve Gul","Kantarci",{"paper_id":19930,"author_seq":138,"given_name":19959,"surname":19960,"affiliation":63,"orcid":63},"Mehmet","Haklidir","Text embeddings are central to modern natural language processing, enabling several downstream tasks. Despite their significance, existing evaluation frameworks primarily target English and other high-resource languages, leaving critical gaps for languages such as Turkish. To address this, we present TR-TEB (Turkish Text Embedding Benchmark), the first comprehensive, standardized, and reproducible benchmark for Turkish text embeddings. TR-TEB spans five core task categories: classification, pair classification, clustering, retrieval, and semantic textual similarity. It is supported by a diverse dataset portfolio that integrates 14 curated open-source resources, 26 high-quality translated datasets, and 7 newly constructed Turkish-specific datasets designed to capture the language’s unique characteristics. We test our framework by comparing 45 well-known open-source embedding models. As the first unified evaluation suite, TR-TEB serves as a core tool for the Turkish embedding research community, establishing a systematic basis for model comparison and improvement. Furthermore, its benchmarking methodology and dataset creation process provide a blueprint for extending robust embedding evaluation to other low-resource languages.",{"paper_id":19963,"title":19964,"year":7,"month":188,"day":63,"doi":19965,"resource_url":19966,"first_page":19967,"last_page":19968,"pdf_url":19969,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19970,"paper_type":860,"authors":19971,"abstract":19983},"lrec2026-main-863","Simple Additions, Substantial Gains: Expanding Scripts, Languages, and Lineage Coverage in URIEL+","10.63317\u002F5e6qapi74t97","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-863","11045","11059","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.863.pdf","shipton-etal-2026-simple",[19972,19975,19976,19977,19979,19981,19982],{"paper_id":19963,"author_seq":247,"given_name":19973,"surname":19974,"affiliation":63,"orcid":63},"Mason","Shipton",{"paper_id":19963,"author_seq":232,"given_name":16233,"surname":2233,"affiliation":63,"orcid":63},{"paper_id":19963,"author_seq":218,"given_name":5520,"surname":2909,"affiliation":63,"orcid":63},{"paper_id":19963,"author_seq":203,"given_name":19978,"surname":9801,"affiliation":63,"orcid":63},"Phuong H.",{"paper_id":19963,"author_seq":188,"given_name":19980,"surname":16398,"affiliation":63,"orcid":63},"Xiang",{"paper_id":19963,"author_seq":172,"given_name":15592,"surname":15593,"affiliation":63,"orcid":63},{"paper_id":19963,"author_seq":155,"given_name":11727,"surname":1359,"affiliation":63,"orcid":63},"The URIEL+ linguistic knowledge base supports multilingual research by encoding languages through geographic, genetic, and typological vectors. However, data sparsity (e.g. missing feature types, incomplete language entries, and limited genealogical coverage) remains prevalent. This limits the usefulness of URIEL+ in cross-lingual transfer, particularly for supporting low-resource languages. To address this sparsity, we extend URIEL+ by introducing script vectors to represent writing system properties for 7,488 languages, integrating Glottolog to add 18,710 additional languages, and expanding lineage imputation for 26,449 languages by propagating typological and script features across genealogies. These improvements reduce feature sparsity by 14% for script vectors, increase language coverage by up to 19,015 languages (1,007%), and boost imputation quality metrics by up to 35%. Our benchmark on cross-lingual transfer tasks (oriented around low-resource languages) shows occasionally divergent performance compared to URIEL+, with performance gains up to 6% in certain setups.",{"paper_id":19985,"title":19986,"year":7,"month":188,"day":63,"doi":19987,"resource_url":19988,"first_page":19989,"last_page":19990,"pdf_url":19991,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":19992,"paper_type":860,"authors":19993,"abstract":20006},"lrec2026-main-864","SciClaimEval: Cross-modal Claim Verification in Scientific Papers","10.63317\u002F4ap9rg2gnwmf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-864","11060","11071","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.864.pdf","ho-etal-2026-sciclaimeval",[19994,19996,19998,20000,20002,20003,20005],{"paper_id":19985,"author_seq":247,"given_name":19995,"surname":919,"affiliation":63,"orcid":63},"Xanh",{"paper_id":19985,"author_seq":232,"given_name":19997,"surname":7319,"affiliation":63,"orcid":63},"Yun-Ang",{"paper_id":19985,"author_seq":218,"given_name":19999,"surname":2247,"affiliation":63,"orcid":63},"Sunisth",{"paper_id":19985,"author_seq":203,"given_name":20001,"surname":9847,"affiliation":63,"orcid":63},"Tian Cheng",{"paper_id":19985,"author_seq":188,"given_name":2175,"surname":8777,"affiliation":63,"orcid":63},{"paper_id":19985,"author_seq":172,"given_name":4550,"surname":20004,"affiliation":63,"orcid":63},"Greiner-Petter",{"paper_id":19985,"author_seq":155,"given_name":2043,"surname":18997,"affiliation":63,"orcid":63},"We present SciClaimEval, a new scientific dataset for the claim verification task. Unlike existing resources, SciClaimEval features authentic claims, including refuted ones, directly extracted from published papers. To create refuted claims, we introduce a novel approach that modifies the supporting evidence (figures and tables), rather than altering the claims or relying on large language models (LLMs) to fabricate contradictions. The dataset provides cross-modal evidence with diverse representations: figures are available as images, while tables are provided in multiple formats, including images, LaTeX source, HTML, and JSON. SciClaimEval contains 1,664 annotated samples from 180 papers across three domains, machine learning, natural language processing, and medicine, validated through expert annotation. We benchmark 11 multimodal foundation models, both open-source and proprietary, across the dataset. Results show that figure-based verification remains particularly challenging for all models, as a substantial performance gap remains between the best system and human baseline.",{"paper_id":20008,"title":20009,"year":7,"month":188,"day":63,"doi":20010,"resource_url":20011,"first_page":20012,"last_page":20013,"pdf_url":20014,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20015,"paper_type":860,"authors":20016,"abstract":20021},"lrec2026-main-865","Localizing Events in Space: Comparing Humans and AI Models","10.63317\u002F5hmw7fkruat3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-865","11072","11084","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.865.pdf","kim-etal-2026-localizing",[20017,20019,20020],{"paper_id":20008,"author_seq":247,"given_name":20018,"surname":5173,"affiliation":63,"orcid":63},"Derrick Eui Gyu",{"paper_id":20008,"author_seq":232,"given_name":2007,"surname":3861,"affiliation":63,"orcid":63},{"paper_id":20008,"author_seq":218,"given_name":4091,"surname":4092,"affiliation":63,"orcid":63},"Understanding how Large Language Models (LLMs) and Text-to-Image models (T2Is) acquire and apply implicit spatial knowledge remains an open challenge. In this paper, we present a novel dataset and evaluation framework designed to probe event localization capabilities in both humans, LLMs and T2Is. Our dataset includes 134 sentence pairs derived from Flickr30k captions, where explicit location information is systematically removed via Abstract Meaning Representation (AMR) parsing and manual refinement. Using this dataset, we analyze the effects of location ablation on spatial reasoning across human annotators, LLMs, and T2Is. Results show that while humans maintain robust location inferences after ablation, LLMs exhibit degraded performance, particularly for semantically polysemous verbs. T2Is demonstrate similar limitations, often generating visually inconsistent spatial contexts when locative cues are missing. Our findings highlight the gap between human and LLMs and T2Is in recovering implicit situational knowledge and suggest future directions for improving spatial reasoning in multimodal AI systems. This dataset contribution work serves as a proof-of-concept for systematic evaluation of implicit spatial reasoning and paves the way for larger-scale studies.",{"paper_id":20023,"title":20024,"year":7,"month":188,"day":63,"doi":20025,"resource_url":20026,"first_page":20027,"last_page":20028,"pdf_url":20029,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20030,"paper_type":860,"authors":20031,"abstract":20038},"lrec2026-main-866","STRUDEL: Unrolling a Benchmark for Evaluating Vision-Language Models on Structured Diagram Understanding across Domains","10.63317\u002F33jqjf2wspgp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-866","11085","11107","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.866.pdf","steinigen-etal-2026-strudel",[20032,20034,20036],{"paper_id":20023,"author_seq":247,"given_name":1668,"surname":20033,"affiliation":63,"orcid":63},"Steinigen",{"paper_id":20023,"author_seq":232,"given_name":3460,"surname":20035,"affiliation":63,"orcid":63},"Flek",{"paper_id":20023,"author_seq":218,"given_name":4763,"surname":20037,"affiliation":63,"orcid":63},"Houben","Vision-Language Models (VLMs) have achieved impressive progress across diverse multimodal tasks, yet their ability to interpret structured diagrams, such as circuit schematics, molecular structures, musical notation, business process flow charts or class diagrams, which are central to scientific and engineering communication, remains underexplored. We introduce STRUDEL (STRUctured Diagram EvaLuation), a benchmark for evaluating VLMs on structured diagram understanding across 8 domains and 20 image categories. STRUDEL leverages Large-Language Models (LLMs) to synthesize code in domain-specific formal representation languages (FRLs) (e.g. circuit netlists, SMILES, ABC-Notation, BPMN or PlantUML), which are rendered into valid diagrams and paired with generated tasks, functional descriptions, and captions. A multi-stage pipeline filters invalid, cluttered, or redundant samples and employs LLM-as-a-judge scoring to ensure correctness. Through targeted experiments, we evaluate the ability of LLMs to generate valid code in distinct FRLs, demonstrating their capability to successfully perform this task. The resulting benchmark comprises diverse task types covering identification, quantification, structural analysis, image-text association, and image-to-code translation. Evaluating 35 VLMs using STRUDEL reveals that models excel at association tasks, demonstrating strong visual-textual alignment, yet struggle with quantification and identification, where precise structural understanding is required. Performance varies markedly in image-to-code translation, reflecting significant differences in how models connect visual inputs to formal representations. Overall, STRUDEL establishes a scalable foundation for assessing and advancing VLMs torward deeper and more systematic understanding of structured visual information across domains.",{"paper_id":20040,"title":20041,"year":7,"month":188,"day":63,"doi":20042,"resource_url":20043,"first_page":20044,"last_page":20045,"pdf_url":20046,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20047,"paper_type":860,"authors":20048,"abstract":20056},"lrec2026-main-867","VG-CoT: Towards Trustworthy Visual Reasoning via Grounded Chain-of-Thought","10.63317\u002F28jn9v88qbod","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-867","11108","11118","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.867.pdf","lim-etal-2026-vg",[20049,20052,20054,20055],{"paper_id":20040,"author_seq":247,"given_name":20050,"surname":20051,"affiliation":63,"orcid":63},"Byeonggeuk","Lim",{"paper_id":20040,"author_seq":232,"given_name":20053,"surname":5173,"affiliation":63,"orcid":63},"Kyeonghyun",{"paper_id":20040,"author_seq":218,"given_name":10122,"surname":10123,"affiliation":63,"orcid":63},{"paper_id":20040,"author_seq":203,"given_name":6880,"surname":5173,"affiliation":63,"orcid":63},"The advancement of Large Vision-Language Models (LVLMs) requires precise local region-based reasoning that faithfully grounds the model’s logic in actual visual evidence. However, existing datasets face limitations in scalability due to extensive manual annotation and lack explicit alignment between multi-step reasoning and corresponding image regions, which constrains the evaluation of model trustworthiness. To address these challenges, we propose the Visual Grounding Chain-of-Thought (VG-CoT) dataset, which explicitly links each reasoning step to real visual evidence within the image through a fully automated three-stage pipeline. The pipeline first extracts object- and text-level visual evidence using state-of-the-art detection and OCR models, then generates step-by-step grounded reasoning with GPT-4o, and finally refines the grounding through a rationale-driven open-set detection process. In addition, we introduce a new benchmark that comprehensively evaluates LVLMs reasoning across three complementary dimensions: Rationale Quality, Answer Accuracy, and Reasoning–Answer Alignment. Experiments with representative LVLMs, including LLaVA-1.5 and Qwen2-VL, demonstrate consistent improvements across all evaluation metrics, confirming that VG-CoT effectively enhances trustworthy, evidence-based reasoning while maintaining scalable and cost-efficient dataset construction. The dataset and code will be released publicly upon acceptance to facilitate further research.",{"paper_id":20058,"title":20059,"year":7,"month":188,"day":63,"doi":20060,"resource_url":20061,"first_page":20062,"last_page":20063,"pdf_url":20064,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20065,"paper_type":860,"authors":20066,"abstract":20075},"lrec2026-main-868","VectorEdits: A Dataset and Benchmark for Instruction-Based Editing of Vector Graphics","10.63317\u002F5gc5ibtb5k8i","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-868","11119","11124","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.868.pdf","kuchar-etal-2026-vectoredits",[20067,20069,20071,20073],{"paper_id":20058,"author_seq":247,"given_name":1648,"surname":20068,"affiliation":63,"orcid":63},"Kuchar",{"paper_id":20058,"author_seq":232,"given_name":7398,"surname":20070,"affiliation":63,"orcid":63},"Kadlcik",{"paper_id":20058,"author_seq":218,"given_name":1631,"surname":20072,"affiliation":63,"orcid":63},"Spiegel",{"paper_id":20058,"author_seq":203,"given_name":1631,"surname":20074,"affiliation":63,"orcid":63},"Stefanik","We introduce a large-scale dataset for instruction-guided vector image editing, consisting of over 270,000 pairs of SVG images paired with natural language edit instructions. Our dataset enables training and evaluation of models that modify vector graphics based on textual commands. We describe the data collection process, including image pairing via CLIP similarity and instruction generation with vision-language models. Initial experiments with state-of-the-art large language models reveal that current methods struggle to produce accurate and valid edits, underscoring the challenge of this task. To foster research in natural language-driven vector graphic generation and editing, we make our resources created within this work publicly available.",{"paper_id":20077,"title":20078,"year":7,"month":188,"day":63,"doi":20079,"resource_url":20080,"first_page":20081,"last_page":20082,"pdf_url":20083,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20084,"paper_type":860,"authors":20085,"abstract":20093},"lrec2026-main-869","ViWikiFC: Fact-Checking for Vietnamese Wikipedia-Based Textual Knowledge Source","10.63317\u002F5ew98tfds6jc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-869","11125","11140","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.869.pdf","le-etal-2026-viwikifc",[20086,20087,20090,20092],{"paper_id":20077,"author_seq":247,"given_name":6467,"surname":6468,"affiliation":63,"orcid":63},{"paper_id":20077,"author_seq":232,"given_name":20088,"surname":20089,"affiliation":63,"orcid":63},"Long Truong","To",{"paper_id":20077,"author_seq":218,"given_name":20091,"surname":2395,"affiliation":63,"orcid":63},"Manh Trong",{"paper_id":20077,"author_seq":203,"given_name":6476,"surname":2395,"affiliation":63,"orcid":63},"Fact-checking is essential due to the explosion of misinformation in the media ecosystem. Although false information exists in every language and country, most research to solve the problem has mainly concentrated on huge communities like English and Chinese. Low-resource languages like Vietnamese are necessary to explore corpora and models for fact verification. To bridge this gap, we construct ViWikiFC, the first manually annotated open-domain corpus for Vietnamese Wikipedia Fact Checking more than 20K claims generated by converting evidence sentences extracted from Wikipedia articles. We analyze our corpus through many linguistic aspects, from the new dependency rate, the new n-gram rate, and the new word rate. We conducted various experiments for Vietnamese fact-checking, including evidence retrieval and verdict prediction. BM25 and InfoXLM_(Large) achieved the best results in two tasks, with BM25 achieving an accuracy of 88.30% for SUPPORTS, 86.93% for REFUTES, and only 56.67% for the NEI label in the evidence retrieval task. InfoXLM_(Large) achieved an F₁ score of 86.51%. Furthermore, we also conducted a pipeline approach, which only achieved a strict accuracy of 67.00% when using InfoXLM_(Large) and BM25. These results demonstrate that our dataset is challenging for the Vietnamese language model in fact-checking tasks.",{"paper_id":20095,"title":20096,"year":7,"month":188,"day":63,"doi":20097,"resource_url":20098,"first_page":20099,"last_page":20100,"pdf_url":20101,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20102,"paper_type":860,"authors":20103,"abstract":20111},"lrec2026-main-870","Automated Extraction of Answer Candidates for Question Generation","10.63317\u002F5on33ymypqu7","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-870","11141","11151","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.870.pdf","preda-etal-2026-automated",[20104,20106,20107,20108],{"paper_id":20095,"author_seq":247,"given_name":4283,"surname":20105,"affiliation":63,"orcid":63},"Preda",{"paper_id":20095,"author_seq":232,"given_name":12147,"surname":12146,"affiliation":63,"orcid":63},{"paper_id":20095,"author_seq":218,"given_name":2813,"surname":12144,"affiliation":63,"orcid":63},{"paper_id":20095,"author_seq":203,"given_name":20109,"surname":20110,"affiliation":63,"orcid":63},"Danielle S.","McNamara","Answering questions based on a reference text is a frequently employed comprehension assessment method that enables teachers to effectively and efficiently evaluate students. Various tools and methods were developed to tackle automated question generation, however, selecting valid answer candidates as a first step is less addressed. Thus, we introduce a solution built on top of FairytaleQA and tailored for training a DeBERTa-based model to classify the quality of each candidate to be part of a strong answer-question pair. First, we extract answer candidates by syntactically parsing the context (i.e., selecting text spans from the reference text based on the nodes in the constituency tree); then, questions are generated for the extracted candidates using a pre-trained LLM model on this task. Next, we assess a candidate’s quality by relying on another fine-tuned model’s capability to answer the previously generated question for that candidate. This enables us to categorize answers using a four-class system: very good, good, average, and unusable. A significant advantage of our method is that the encoder classifier can score all potential answer candidates in a single inference step for the entire context. We compare our selection against both the answers from explicit questions in the original dataset and a fine-tuned LLM for answer selection using an Elo ranking system. In addition, we propose three strategies based on semantic similarity and text position to ensure coverage and diversity of candidates’ selection.",{"paper_id":20113,"title":20114,"year":7,"month":188,"day":63,"doi":20115,"resource_url":20116,"first_page":20117,"last_page":20118,"pdf_url":20119,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20120,"paper_type":860,"authors":20121,"abstract":20132},"lrec2026-main-871","Green Bots versus Red Bots: Evaluating Large Language Models for Simulating Persuasion Dynamics in Online Influence Campaigns","10.63317\u002F3ucmt3dnin5n","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-871","11152","11171","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.871.pdf","ali-etal-2026-green",[20122,20124,20127,20129],{"paper_id":20113,"author_seq":247,"given_name":20123,"surname":2207,"affiliation":63,"orcid":63},"Majd Eddin Al",{"paper_id":20113,"author_seq":232,"given_name":20125,"surname":20126,"affiliation":63,"orcid":63},"Filip Mihai","Muntean",{"paper_id":20113,"author_seq":218,"given_name":12528,"surname":20128,"affiliation":63,"orcid":63},"Donatelli",{"paper_id":20113,"author_seq":203,"given_name":20130,"surname":20131,"affiliation":63,"orcid":63},"Jurriaan van","Diggelen","Large language models (LLMs) are increasingly used to simulate social interaction and persuasion dynamics, yet their validity as proxies for human cognition and behavior remains unverified. We propose a dual-level evaluation framework to assess LLM-based agents at both the individual and collective levels. At the individual level, we examine agent fidelity by comparing LLM-generated political personas to human benchmark data. We find that while agents capture broad partisan orientations, they underestimate within-group variability and reproduce stereotypical ideological biases. At the collective level, we deploy Big Five personality-differentiated agents in 1080 structured dialogues to test the effect of rhetorical strategy on persuasive success. Our simulations reproduce theoretically expected interaction patterns; nevertheless, belief shifts are exaggerated relative to human baselines, supporting LLMs’ tendency toward over-responsiveness. These findings suggest a trade-off between engagement-optimized training objectives and psychological realism, confirming the need to use LLMs with caution to simulate human behavior. We contribute three resources: a persuasion dynamics dataset, a standardized agent taxonomy of \"red\" and \"green\" bots, and a framework for evaluating both individual-agent fidelity and emergent group-level behavior.",{"paper_id":20134,"title":20135,"year":7,"month":188,"day":63,"doi":20136,"resource_url":20137,"first_page":20138,"last_page":20139,"pdf_url":20140,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20141,"paper_type":860,"authors":20142,"abstract":20149},"lrec2026-main-872","Towards Expectation Detection in Language: A Case Study on Treatment Expectations in Reddit","10.63317\u002F3d6bmefz4xyw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-872","11172","11187","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.872.pdf","velutharambath-etal-2026-expectation",[20143,20146],{"paper_id":20134,"author_seq":247,"given_name":20144,"surname":20145,"affiliation":63,"orcid":63},"Aswathy","Velutharambath",{"paper_id":20134,"author_seq":232,"given_name":20147,"surname":20148,"affiliation":63,"orcid":63},"Amelie","Wührl","Patients’ expectations towards their treatment have a substantial effect on the treatments’ success. While primarily studied in clinical settings, online patient platforms like medical subreddits may hold complementary insights: treatment expectations that patients feel unnecessary or uncomfortable to share elsewhere. Despite this, no studies examine what type of expectations users discuss online and how they express them. Presumably this is because expectations have not been studied in natural language processing (NLP) before. Therefore, we introduce the task of Expectation Detection, arguing that expectations are relevant for many applications, including opinion mining and product design. Subsequently, we present a case study for the medical domain, where expectations are particularly crucial to extract. We contribute RedHOTExpect, a corpus of Reddit posts (4.5K posts) to study expectations in this context. We use a large language model (LLM) to silver-label the data and validate its quality manually (label accuracy  78%). Based on this, we analyze which linguistic patterns characterize expectations and explore what patients expect and why. We find that optimism and proactive framing are more pronounced in posts about physical or treatment-related illnesses compared to mental-health contexts, and that in our dataset, patients mostly discuss benefits rather than negative outcomes. The RedHOTExpect corpus can be obtained from https:\u002F\u002Fwww.ims.uni-stuttgart.de\u002Fdata\u002FRedHOTExpect",{"paper_id":20151,"title":20152,"year":7,"month":188,"day":63,"doi":20153,"resource_url":20154,"first_page":20155,"last_page":20156,"pdf_url":20157,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20158,"paper_type":860,"authors":20159,"abstract":20165},"lrec2026-main-873","Empathy Speaks in Metaphors: The Empathy-Metaphor Corpus of Figurative Language in Empathetic Text","10.63317\u002F5aj9dv5nn5nf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-873","11188","11198","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.873.pdf","lee-etal-2026-empathy",[20160,20162],{"paper_id":20151,"author_seq":247,"given_name":20161,"surname":1359,"affiliation":63,"orcid":63},"Gyeongeun",{"paper_id":20151,"author_seq":232,"given_name":20163,"surname":20164,"affiliation":63,"orcid":63},"Natalie","Parde","Metaphorical language is a powerful vehicle for expressing empathy, yet it has received limited attention in computational studies of supportive communication. We introduce Empathy-Metaphor, the first corpus that explicitly annotates metaphorical spans in empathetic online peer-support. Building on 2,492 empathetic posts from an acne support forum, the dataset contains over 2,100 manually identified metaphorical spans with strong inter-annotator agreement (κ=0.85). Analyses show that metaphors are frequent, diverse, and strategically positioned, often framing acne as a battle, journey, or shared struggle. Lexical and semantic clustering highlight recurring themes of encouragement and emotional hardship, while psycholinguistic analysis emphasizes the prominence of conflict and negative emotion framings. Benchmark experiments demonstrate that transformer models, especially DeBERTa-v3, substantially outperform linear and recurrent baselines, achieving a token-level macro F1 of 0.634 and a span-level macro F1 of 0.440 under relaxed evaluation. These contributions establish a new resource for studying figurative language in empathetic text, providing insights into the creative role of metaphors in online support.",{"paper_id":20167,"title":20168,"year":7,"month":188,"day":63,"doi":20169,"resource_url":20170,"first_page":20171,"last_page":20172,"pdf_url":20173,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20174,"paper_type":860,"authors":20175,"abstract":20177},"lrec2026-main-874","A Computational Diachronic Analysis of Gen Z Mental Health Discourse: A Large-scale Reddit Corpus Study from Pre- to Post-COVID","10.63317\u002F4g79vhy3fcqz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-874","11199","11208","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.874.pdf","mao-2026-computational",[20176],{"paper_id":20167,"author_seq":247,"given_name":12079,"surname":12080,"affiliation":63,"orcid":63},"Generation Z’s mental health discourse has been uniquely shaped by digital saturation and the COVID-19 pandemic. This study introduces a large-scale corpus of Gen Z mental health discourse on Reddit, comprising over 3 million posts across 11 subreddits (2017–2025), identified through behavioral cross-posting between mental health and Gen Z-identified communities. Using a hybrid methodology that integrates statistical corpus linguistics with NLP techniques, we conduct diachronic keyness analysis, sentiment tracking, and topic modeling to examine lexical, syntactic, and semantic patterns across pre-, during-, and post-COVID periods. Our analysis reveals: (1) ritualized support exchanges more pronounced in Gen Z where highly negative self-disclosure functions as an authenticity signal; (2) a pandemic-induced reframing of existing mental health topics, particularly a rise in physical symptoms, followed by a sustained post-pandemic sentiment decline; and (3) a generational divergence where Gen Z favors abstract, existential concerns, unlike the pragmatic focus of non-Gen Z users. This study contributes a replicable approach for analyzing youth discourse and underscores the importance of culturally and linguistically informed digital mental health interventions, which can support Gen Z’s modes of expressing distress rather than pathologizing them.",{"paper_id":20179,"title":20180,"year":7,"month":188,"day":63,"doi":20181,"resource_url":20182,"first_page":20183,"last_page":20184,"pdf_url":20185,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20186,"paper_type":860,"authors":20187,"abstract":20199},"lrec2026-main-875","Oat Milk Vegan Chocolate Taste Great!: Monitoring the Food Transition Debate in Reddit","10.63317\u002F3botxmxbq64i","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-875","11209","11220","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.875.pdf","zella-etal-2026-oat",[20188,20190,20193,20195,20198],{"paper_id":20179,"author_seq":247,"given_name":7128,"surname":20189,"affiliation":63,"orcid":63},"Zella",{"paper_id":20179,"author_seq":232,"given_name":20191,"surname":20192,"affiliation":63,"orcid":63},"Jan Willem","Bolderdijk",{"paper_id":20179,"author_seq":218,"given_name":13518,"surname":20194,"affiliation":63,"orcid":63},"Peels",{"paper_id":20179,"author_seq":203,"given_name":20196,"surname":20197,"affiliation":63,"orcid":63},"Gerry","Wakker",{"paper_id":20179,"author_seq":188,"given_name":2072,"surname":2073,"affiliation":63,"orcid":63},"We present DRiFT (Debates on Reddit involving Food Transition), a new large-scale corpus and set of computational methods for using language as an early indicator of social change in the protein transition, i.e., the shift from a diet predominantly based on animal proteins to one based mainly on plant sources. DRiFT comprises 17.5M Reddit comments (2010–2022) from 29 subreddits grouped into two speaker communities: SUSTAINABLE (early adopters\u002Finnovators) and GENERIC (general public). Building on neologism analysis, lexical semantic change detection, and connotative profiling, we introduce three linguistic measures of innovation awareness, meaning shift, and attitudinal valence. We extract neonyms and retronyms to quantify awareness; apply static and contextual embedding-based Lexical Semantic Change methods (PPMI, SGNS, BERT substitutions) to probe semantic reconceptualization; and adapt an embedding-based connotation hyperplane to measure polarity changes for targeted terms. Results show marked diastratic differences, with SUSTAINABLE users both using innovation-specific lexicon more frequently and having reconceptualized core food terms in ethical\u002Fenvironmental frames, while the GENERIC community exhibits rapid proportional growth in neologism use and emerging positive connotations for some plant-based products. Diachronic denotational shifts over the 12-year window are weak, suggesting shortcoming of embedding-based methods to capture subtle meaning changes. DRiFT and our analyses demonstrate that language can function as a sensitive \"thermometer\" of subtle social change, revealing attitudinal dynamics before observable behavioral shifts.",{"paper_id":20201,"title":20202,"year":7,"month":188,"day":63,"doi":20203,"resource_url":20204,"first_page":20205,"last_page":20206,"pdf_url":20207,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20208,"paper_type":860,"authors":20209,"abstract":20217},"lrec2026-main-876","ClimateChat-300K: A Multi-Modal Facebook Dataset for Understanding Diverse Perspectives in Climate Communication","10.63317\u002F38nwpwivdq2x","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-876","11221","11229","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.876.pdf","zaghouani-etal-2026-climatechat",[20210,20211,20212,20213,20214],{"paper_id":20201,"author_seq":247,"given_name":2365,"surname":2366,"affiliation":63,"orcid":63},{"paper_id":20201,"author_seq":232,"given_name":12632,"surname":12633,"affiliation":63,"orcid":63},{"paper_id":20201,"author_seq":218,"given_name":12629,"surname":12630,"affiliation":63,"orcid":63},{"paper_id":20201,"author_seq":203,"given_name":12635,"surname":12636,"affiliation":63,"orcid":63},{"paper_id":20201,"author_seq":188,"given_name":20215,"surname":20216,"affiliation":63,"orcid":63},"George","Mikros","We present ClimateChat-300K, a large-scale dataset of 299,329 public Facebook posts about climate change collected between May 2020 and May 2024 through the CrowdTangle platform. The dataset contains 41 metadata features including post content, engagement metrics, and page attributes, covering material from more than 26,000 global pages. Each post includes rich contextual information such as language, timestamp, page category, and interaction counts, enabling comprehensive analyses of public discourse around climate communication. Using topic modeling and sentiment analysis, we identify ten main themes grouped into five domains: policy, activism, cooperation, science, and conservation. The results reveal that emotional tone, post format, and page identity strongly influence audience engagement, with visually rich and emotionally charged content receiving the highest levels of interaction. The dataset also demonstrates how online discussions evolved in response to major events such as international climate summits and the COVID-19 pandemic period. ClimateChat-300K provides an open resource for reproducible and interdisciplinary research on polarization, misinformation, and the dynamics of digital climate discourse. By releasing this dataset, we aim to support transparent, data-driven research and contribute to a deeper understanding of how public engagement with climate issues develops across time, geography, and institutional contexts.",{"paper_id":20219,"title":20220,"year":7,"month":188,"day":63,"doi":20221,"resource_url":20222,"first_page":20223,"last_page":20224,"pdf_url":20225,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20226,"paper_type":860,"authors":20227,"abstract":20237},"lrec2026-main-877","HateMirage: An Explainable Multi-Dimensional Dataset for Decoding Faux Hate and Subtle Online Abuse","10.63317\u002F3zwvwrw5wiiu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-877","11230","11240","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.877.pdf","kasu-etal-2026-hatemirage",[20228,20231,20234,20236],{"paper_id":20219,"author_seq":247,"given_name":20229,"surname":20230,"affiliation":63,"orcid":63},"Sai Kartheek Reddy","Kasu",{"paper_id":20219,"author_seq":232,"given_name":20232,"surname":20233,"affiliation":63,"orcid":63},"Shankar","Biradar",{"paper_id":20219,"author_seq":218,"given_name":7268,"surname":20235,"affiliation":63,"orcid":63},"Saumya",{"paper_id":20219,"author_seq":203,"given_name":6204,"surname":6205,"affiliation":63,"orcid":63},"Subtle and indirect hate speech remains an underexplored challenge in online safety research, particularly when harmful intent is embedded within misleading or manipulative narratives. Existing hate speech datasets primarily capture overt toxicity, underrepresenting the nuanced ways misinformation can incite or normalize hate. To address this gap, we present HateMirage, a novel dataset of Faux Hate comments designed to advance reasoning and explainability research on hate emerging from fake or distorted narratives. The dataset was constructed by identifying widely debunked misinformation claims from fact-checking sources and tracing related YouTube discussions, resulting in 4,530 user comments. Each comment is annotated along three interpretable dimensions: Target (who is affected), Intent (the underlying motivation or goal behind the comment), and Implication (its potential social impact). Unlike prior explainability datasets such as HateXplain and HARE, which offer token-level or single-dimensional reasoning, HateMirage introduces a multi-dimensional explanation framework that captures the interplay between misinformation, harm, and social consequence. We benchmark multiple open-source language models on HateMirage using ROUGE-L F1 and Sentence-BERT similarity to assess explanation coherence. Results suggest that explanation quality may depend more on pretraining diversity and reasoning-oriented data rather than on model scale alone. By coupling misinformation reasoning with harm attribution, HateMirage establishes a new benchmark for interpretable hate detection and responsible AI research.",{"paper_id":20239,"title":20240,"year":7,"month":188,"day":63,"doi":20241,"resource_url":20242,"first_page":20243,"last_page":20244,"pdf_url":20245,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20246,"paper_type":860,"authors":20247,"abstract":20257},"lrec2026-main-878","MindSET: Advancing Mental Health Benchmarking through Large-Scale Social Media Data","10.63317\u002F4cdunjq3bziz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-878","11241","11251","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.878.pdf","mankarious-etal-2026-mindset",[20248,20250,20251,20252,20253,20254],{"paper_id":20239,"author_seq":247,"given_name":18295,"surname":20249,"affiliation":63,"orcid":63},"Mankarious",{"paper_id":20239,"author_seq":232,"given_name":10323,"surname":14084,"affiliation":63,"orcid":63},{"paper_id":20239,"author_seq":218,"given_name":1668,"surname":14079,"affiliation":63,"orcid":63},{"paper_id":20239,"author_seq":203,"given_name":14081,"surname":14082,"affiliation":63,"orcid":63},{"paper_id":20239,"author_seq":188,"given_name":2998,"surname":14086,"affiliation":63,"orcid":63},{"paper_id":20239,"author_seq":172,"given_name":20255,"surname":20256,"affiliation":63,"orcid":63},"Ayah","Zirikly","Social media data has become a vital resource for studying mental health, offering real-time insights into thoughts, emotions, and behaviors that traditional methods often miss. Progress in this area has been facilitated by benchmark datasets for mental health analysis; however, most existing benchmarks have become outdated due to limited data availability, inadequate cleaning, and the inherently diverse nature of social media content (e.g., multilingual and harmful material). We present a new benchmark dataset, MindSET, curated from Reddit using self-reported diagnoses to address these limitations. The annotated dataset contains over 13M annotated posts across seven mental health conditions—more than twice the size of previous benchmarks. To ensure data quality, we applied rigorous preprocessing steps, including language filtering, and removal of Not Safe for Work (NSFW) and duplicate content. We further performed a linguistic analysis using LIWC to examine psychological term frequencies across the eight groups represented in the dataset. To demonstrate the dataset’s utility, we conducted binary classification experiments for diagnosis detection using both fine-tuned language models and Bag-of-Words (BoW) features. Models trained on MindSET consistently outperformed those trained on previous benchmarks, achieving up to an 18-point improvement in F1 for Autism detection. Overall, MindSET provides a robust foundation for researchers exploring the intersection of social media and mental health, supporting both early risk detection and deeper analysis of emerging psychological trends.",{"paper_id":20259,"title":20260,"year":7,"month":188,"day":63,"doi":20261,"resource_url":20262,"first_page":20263,"last_page":20264,"pdf_url":20265,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20266,"paper_type":860,"authors":20267,"abstract":20274},"lrec2026-main-879","A Corpus of Misunderstood Irony on Turkish Social Media","10.63317\u002F3kehaa7yjjqc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-879","11252","11259","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.879.pdf","ltekin-etal-2026-corpus",[20268,20271],{"paper_id":20259,"author_seq":247,"given_name":20269,"surname":20270,"affiliation":63,"orcid":63},"Çağrı","Çöltekin",{"paper_id":20259,"author_seq":232,"given_name":20272,"surname":20273,"affiliation":63,"orcid":63},"Güliz","Güneş","We present a new Turkish social media corpus annotated for verbal irony. The ironic post candidates are identified by a distant supervision method relying on reports of misunderstood irony in social media platforms. The data collected through this method, as well as irony-tagged posts and a random sample of posts are annotated by three annotators, resulting in a corpus of 3000 tweets with high quality annotations that may be useful for linguistic analysis as well as for training automatic irony detection systems or testing irony understanding of large language models. Since irony interpretation typically involves context, our dataset also includes the preceding conversational context of the potentially ironic expression. Besides the description of the corpus and the annotation process, this paper presents an analysis of the corpus. Our findings indicate that relying on distant supervision alone may result in suboptimal labels for irony\u002Fsarcasm corpora. We also investigate the usefulness of context for the annotators in identifying irony.",{"paper_id":20276,"title":20277,"year":7,"month":188,"day":63,"doi":20278,"resource_url":20279,"first_page":20280,"last_page":20281,"pdf_url":20282,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20283,"paper_type":860,"authors":20284,"abstract":20295},"lrec2026-main-880","A Corpus of Joint EEG and Self-Paced Reading of Natural Dutch Texts","10.63317\u002F49tvxys2q4zc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-880","11260","11271","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.880.pdf","stergaard-etal-2026-corpus",[20285,20288,20291,20293],{"paper_id":20276,"author_seq":247,"given_name":20286,"surname":20287,"affiliation":63,"orcid":63},"Sara Møller","Østergaard",{"paper_id":20276,"author_seq":232,"given_name":20289,"surname":20290,"affiliation":63,"orcid":63},"Lenneke Doris","Lichtenberg",{"paper_id":20276,"author_seq":218,"given_name":13890,"surname":20292,"affiliation":63,"orcid":63},"Boon",{"paper_id":20276,"author_seq":203,"given_name":3696,"surname":20294,"affiliation":63,"orcid":63},"Nicenboim","We present the Tilburg corpus of Natural Dutch Texts (TiNT): A corpus of joint electroencephalography (EEG) and self-paced reading (SPR) of natural, medium-length, Dutch texts. The corpus contains recordings from 71 native Dutch speakers reading eight naturally occurring texts of around 600 words each. The texts are of varying genres and were chosen based on overall fluency and comprehensibility. To assess the quality of the corpus, we examined participant responses to comprehension questions, self-reported familiarity with the texts, and whether well-established effects replicated for both reading times and event-related potentials (ERPs) (N400 and P600). The corpus contributes to a small collection of corpora with simultaneous recording of reading times and EEG. While this is often achieved using eye-tracking, the use of SPR offers methodological advantages, particularly in aligning neural signals with word-level processing. In addition, the use of natural texts with longer dependencies makes the corpus a unique resource for psycholinguistic research. The corpus enables research into the relationship between neural and behavioral responses in naturalistic reading contexts.",{"paper_id":20297,"title":20298,"year":7,"month":188,"day":63,"doi":20299,"resource_url":20300,"first_page":20301,"last_page":20302,"pdf_url":20303,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20304,"paper_type":860,"authors":20305,"abstract":20319},"lrec2026-main-881","Evaluation Drift in LLM Personality Induction: Are We Moving the Goalpost?","10.63317\u002F4zdw5tmzx58h","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-881","11272","11285","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.881.pdf","rajput-etal-2026-evaluation",[20306,20309,20311,20314,20316],{"paper_id":20297,"author_seq":247,"given_name":20307,"surname":20308,"affiliation":63,"orcid":63},"Prateek Kumar","Rajput",{"paper_id":20297,"author_seq":232,"given_name":20310,"surname":1913,"affiliation":63,"orcid":63},"Yewei",{"paper_id":20297,"author_seq":218,"given_name":20312,"surname":20313,"affiliation":63,"orcid":63},"Iyiola Emmanuel","Olatunji",{"paper_id":20297,"author_seq":203,"given_name":20315,"surname":6295,"affiliation":63,"orcid":63},"Jacques",{"paper_id":20297,"author_seq":188,"given_name":20317,"surname":20318,"affiliation":63,"orcid":63},"Tegawendé","Bissyande","Can large language models reliably express a human-like personality, or are they merely mimicking surface cues without a stable underlying profile? We study this question on the long-form Essays Dataset, preferred over short, mood-driven text to target stable traits. Using a questionnaire-based (self-evaluation) test: IPIP-NEO, we ask: (i) does post-training (SFT, DPO, ORPO) stabilize questionnaire scores under prompt rephrasings, and (ii) can it induce target Big Five profiles from unguided essays? Across five models, fine-tuning consistently reduces variance in questionnaire responses, mitigating the fragility seen in pre-trained models. Yet accuracy on the full five-dimensional profile remains near chance even when single-trait scores improve, indicating that unguided essays lack the cues needed for faithful personality expression. We argue for scenario-grounded datasets or interactive elicitation that accumulates test-aligned evidence over time.",{"paper_id":20321,"title":20322,"year":7,"month":188,"day":63,"doi":20323,"resource_url":20324,"first_page":20325,"last_page":20326,"pdf_url":20327,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20328,"paper_type":860,"authors":20329,"abstract":20337},"lrec2026-main-882","A Multi-Dialectal, Longitudinal Corpus of Human-AI Hybrid Language Production","10.63317\u002F23wdinvr5ynf","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-882","11286","11299","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.882.pdf","gan-etal-2026-multi",[20330,20331,20333,20335],{"paper_id":20321,"author_seq":247,"given_name":14086,"surname":6055,"affiliation":63,"orcid":63},{"paper_id":20321,"author_seq":232,"given_name":2837,"surname":20332,"affiliation":63,"orcid":63},"Dunn",{"paper_id":20321,"author_seq":218,"given_name":1104,"surname":20334,"affiliation":63,"orcid":63},"Nini",{"paper_id":20321,"author_seq":203,"given_name":4797,"surname":20336,"affiliation":63,"orcid":63},"Adams","This paper presents a multi-dialectal, longitudinal corpus of human-AI hybrid language production, comprising purely human-written texts, purely LLM-generated texts, and hybrid texts produced under different LLM-assistance modes (e.g., stylistic suggestions, short continuations, partial essay generation). The corpus includes 693 participants from five national English dialects, with natural and hybrid samples paired within individuals over a four-week period. This design enables investigation of both short- and longer-term effects of LLM assistance on language use across geographic and social contexts. To illustrate the corpus’s utility, we analyze linguistic features across three dimensions: lexical diversity, syntactic complexity, and stylistic variation. The results show that LLM assistance enhances lexical diversity without a corresponding increase in syntactic complexity, revealing distinct effects across linguistic dimensions. Overall, this corpus offers a valuable resource for studying human-AI interaction, dialectal variation, and the influence of AI assistance on written language.",{"paper_id":20339,"title":20340,"year":7,"month":188,"day":63,"doi":20341,"resource_url":20342,"first_page":20343,"last_page":20344,"pdf_url":20345,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20346,"paper_type":860,"authors":20347,"abstract":20354},"lrec2026-main-883","Semantic Information: A Difference That Makes a Difference","10.63317\u002F4kfwe8i7ux33","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-883","11300","11308","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.883.pdf","philipp-etal-2026-semantic",[20348,20350,20352],{"paper_id":20339,"author_seq":247,"given_name":20349,"surname":12859,"affiliation":63,"orcid":63},"J. Nathanael",{"paper_id":20339,"author_seq":232,"given_name":2525,"surname":20351,"affiliation":63,"orcid":63},"Kölbl",{"paper_id":20339,"author_seq":218,"given_name":1732,"surname":20353,"affiliation":63,"orcid":63},"Richter","In the framework of distributional semantics, we introduce a novel notion and operationalisation of semantic information for natural language. The key idea is as follows: a linguistic sign carries semantic information about a document if it reduces the amount of surprisal for a language processor. We consider two systems, an informed one and an uninformed one, and describe semantic information in their terms. Processing effort is quantified via surprisal where the informed system is ‘aware’ of the linguistic sign and the uninformed one is not. On an English fairy tale corpus and on two German news corpora, we tested successfully the prediction that if the linguistic sign in question carries pre-information through semantic surprisal, the current level of surprisal for the language processor is reduced. The conclusion is that the degree of semantic information results from the degree of semantic prior information.",{"paper_id":20356,"title":20357,"year":7,"month":188,"day":63,"doi":20358,"resource_url":20359,"first_page":20360,"last_page":20361,"pdf_url":20362,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20363,"paper_type":860,"authors":20364,"abstract":20375},"lrec2026-main-884","Modeling the Memory-Surprisal Trade-Off over Time: Communicative Efficiency Decreases with Lexico-Grammatical Change in Scientific English","10.63317\u002F4txotk7fwkhp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-884","11309","11319","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.884.pdf","steuer-etal-2026-modeling",[20365,20368,20371,20373,20374],{"paper_id":20356,"author_seq":247,"given_name":20366,"surname":20367,"affiliation":63,"orcid":63},"Julius","Steuer",{"paper_id":20356,"author_seq":232,"given_name":20369,"surname":20370,"affiliation":63,"orcid":63},"Marie-Pauline","Krielke",{"paper_id":20356,"author_seq":218,"given_name":2975,"surname":20372,"affiliation":63,"orcid":63},"Degaetano-Ortlieb",{"paper_id":20356,"author_seq":203,"given_name":5326,"surname":5327,"affiliation":63,"orcid":63},{"paper_id":20356,"author_seq":188,"given_name":3626,"surname":3627,"affiliation":63,"orcid":63},"The memory-surprisal trade-off (MST) has been shown to hold cross-linguistically as a general principle of communicative efficiency: languages that exhibit information locality tend to have word orders that allow for efficient memory use, i.e., lower surprisal at a fixed memory budget. In this paper, we explore the influence of diachronic variation on the MST. We compare scientific English in the Royal Society Corpus (RSC, 18thc. – 20thc.) to \"general language\" in the Corpus of Historical American English (COHA) to assess the impact of intra-linguistic variation (register). We find that both time and register influence the shape of the tradeoff: Over time, vocabulary expansion raises minimal surprisal, while the shape of the MST curves changes. Decreasing distances between syntactic dependencies due to more local nominal encodings change how predictive information is distributed across memory scales. The effects are stronger for the RSC than for COHA.",{"paper_id":20377,"title":20378,"year":7,"month":188,"day":63,"doi":20379,"resource_url":20380,"first_page":20381,"last_page":20382,"pdf_url":20383,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20384,"paper_type":860,"authors":20385,"abstract":20393},"lrec2026-main-885","Mechanistic Interpretability Meets Cognitive Linguistics: Modelling Locative Image Schemas in the Circuit Framework","10.63317\u002F4iwnnycar4sc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-885","11320","11331","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.885.pdf","proietti-etal-2026-mechanistic",[20386,20388,20389,20392],{"paper_id":20377,"author_seq":247,"given_name":5439,"surname":20387,"affiliation":63,"orcid":63},"Proietti",{"paper_id":20377,"author_seq":232,"given_name":16026,"surname":16027,"affiliation":63,"orcid":63},{"paper_id":20377,"author_seq":218,"given_name":20390,"surname":20391,"affiliation":63,"orcid":63},"Grzegorz","Chrupała",{"paper_id":20377,"author_seq":203,"given_name":1712,"surname":9696,"affiliation":63,"orcid":63},"Large Language Models are often considered the best computational testbeds for linguistic theorisation at our disposal. However, their inner workings remain largely opaque, and the mechanisms behind their behaviour cannot always be easily connected with theoretical linguistic assumptions. Mechanistic Interpretability (MI) is surging as a specialised field to reverse engineer models’ internals and shed light on the causal relationships happening under the hood. Nevertheless, MI is predominantly focused on AI-Safety problems, and the attempts to understand linguistically motivated behaviours with these tools are still limited. In this work, we investigate whether an LLM, namely LlaMA-3.2-1b, has developed specialised mechanisms governing the selection of the locative preposition in simple copular clauses. To frame the problem as a next-token prediction objective, we introduce the Stranded Locative Preposition Selection task along with a small dataset aptly curated to test it. We make use of several MI tools to scan the model’s internals and relate their mechanisms to classic theory in Cognitive Linguistics, which assumes that the two basic locative prepositions in and on are the respective linguistic encoding of two different Image Schemas: Containment and Surface",{"paper_id":20395,"title":20396,"year":7,"month":188,"day":63,"doi":20397,"resource_url":20398,"first_page":20399,"last_page":20400,"pdf_url":20401,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20402,"paper_type":860,"authors":20403,"abstract":20412},"lrec2026-main-886","Variation Is the Norm: Embracing Sociolinguistics in NLP","10.63317\u002F3v3vhacxgfex","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-886","11332","11344","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.886.pdf","lutgen-etal-2026-variation",[20404,20407,20408,20409,20410],{"paper_id":20395,"author_seq":247,"given_name":20405,"surname":20406,"affiliation":63,"orcid":63},"Anne-Marie","Lutgen",{"paper_id":20395,"author_seq":232,"given_name":14807,"surname":14808,"affiliation":63,"orcid":63},{"paper_id":20395,"author_seq":218,"given_name":2810,"surname":3170,"affiliation":63,"orcid":63},{"paper_id":20395,"author_seq":203,"given_name":3175,"surname":3176,"affiliation":63,"orcid":63},{"paper_id":20395,"author_seq":188,"given_name":12218,"surname":20411,"affiliation":63,"orcid":63},"Purschke","In Natural Language Processing (NLP), variation is typically seen as noise and “normalised away” before processing, even though it is an integral part of language. Conversely, studying language variation in social contexts is central to sociolinguistics. We present a framework to combine the sociolinguistic dimension of language with the technical dimension of NLP. We argue that by embracing sociolinguistics, variation can actively be included in a research setup, in turn informing the NLP side. To illustrate this, we provide a case study on Luxembourgish, an evolving language featuring a large amount of orthographic variation, demonstrating how NLP performance is impacted. The results show large discrepancies in the performance of models tested and fine-tuned on data with a large amount of orthographic variation in comparison to data closer to the (orthographic) standard. Furthermore, we provide a possible solution to improve the performance by including variation in the fine-tuning process. This case study highlights the importance of including variation in the research setup, as models are currently not robust to occurring variation. Our framework facilitates the inclusion of variation in the thought-process while also being grounded in the theoretical framework of sociolinguistics.",{"paper_id":20414,"title":20415,"year":7,"month":188,"day":63,"doi":20416,"resource_url":20417,"first_page":20418,"last_page":20419,"pdf_url":20420,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20421,"paper_type":860,"authors":20422,"abstract":20435},"lrec2026-main-887","Appraisal Theory-Informed Emotion Prediction","10.63317\u002F3rx3wu7m6cgn","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-887","11345","11358","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.887.pdf","wang-etal-2026-appraisal",[20423,20425,20428,20429,20431,20434],{"paper_id":20414,"author_seq":247,"given_name":20424,"surname":3676,"affiliation":63,"orcid":63},"Xiaowei",{"paper_id":20414,"author_seq":232,"given_name":20426,"surname":20427,"affiliation":63,"orcid":63},"Jayant","Teotia",{"paper_id":20414,"author_seq":218,"given_name":17707,"surname":12080,"affiliation":63,"orcid":63},{"paper_id":20414,"author_seq":203,"given_name":20430,"surname":2253,"affiliation":63,"orcid":63},"Wandeep Kaur Ratan",{"paper_id":20414,"author_seq":188,"given_name":20432,"surname":20433,"affiliation":63,"orcid":63},"Sabrina Binti","Tiun",{"paper_id":20414,"author_seq":172,"given_name":2022,"surname":17709,"affiliation":63,"orcid":63},"Emotion Recognition in Conversation (ERC) focuses on identifying static emotional states, overlooking the cognitive mechanisms that drive emotional transitions. This work introduces a novel emotion prediction task grounded in Appraisal Theory, which conceptualizes emotion as a cognitive evaluation of expectations and their violations. To address this task, we develop a prompt-based reasoning framework that breaks emotional dynamics into three interpretable stages, e.g., expectation inference, violation detection, and emotion-shift prediction, thereby explaining not only which emotion is expressed, but also why it emerges. To examine whether LLMs exhibit human-like affective reasoning, we design six appraisal-informed prompting tasks and evaluate eight representative LLMs across four conversational corpora. A unified two-level evaluation, which measures both emotion classification and transition dynamics, reveals that explicit expectation cues improve accuracy by up to +2.4%, whereas violation-only cues often degrade performance. Our analysis uncovers a robust appraisal pattern across models and datasets: expectation construction is the primary contributor to accurate emotion prediction, while isolated violation cues tend to induce misattribution rather than improve causal reasoning. Beyond label accuracy, transition-level evaluation shows that LLMs capture emotion-shift direction above chance but exhibit a marked stability bias, over-predicting no-change trajectories and under-detecting fine-grained shifts. These findings demonstrate both the promise and the current limits of LLMs in appraisal-driven affective reasoning, and motivate a new cognitively-grounded research direction.",{"paper_id":20437,"title":20438,"year":7,"month":188,"day":63,"doi":20439,"resource_url":20440,"first_page":20441,"last_page":20442,"pdf_url":20443,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20444,"paper_type":860,"authors":20445,"abstract":20452},"lrec2026-main-888","The Evolution of Philosophy: A Metaphorical Cognition Perspective","10.63317\u002F3qdia7bp8cw3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-888","11359","11367","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.888.pdf","mao-etal-2026-evolution",[20446,20447,20449,20450,20451],{"paper_id":20437,"author_seq":247,"given_name":17707,"surname":12080,"affiliation":63,"orcid":63},{"paper_id":20437,"author_seq":232,"given_name":20448,"surname":1840,"affiliation":63,"orcid":63},"Dapeng",{"paper_id":20437,"author_seq":218,"given_name":3445,"surname":1837,"affiliation":63,"orcid":63},{"paper_id":20437,"author_seq":203,"given_name":17705,"surname":1519,"affiliation":63,"orcid":63},{"paper_id":20437,"author_seq":188,"given_name":2022,"surname":17709,"affiliation":63,"orcid":63},"We present a large-scale study of philosophical cognition through the lens of Conceptual Metaphor Theory. Using a computational metaphor processing system that extracts target concepts, source concepts, and concept mappings from a curated corpus of 50+ canonical texts (300k sentences) spanning ten schools from antiquity to the late twentieth century, we quantify how metaphor organizes philosophical argument. We model temporal dynamics with year-level cosine series, authorial neighborhoods with PCA projections, and school signatures with heatmaps of normalized frequencies. The study demonstrates that the history of philosophy is structured by stable cross-domain schemas that are selectively recombined to address new problems.",{"paper_id":20454,"title":20455,"year":7,"month":188,"day":63,"doi":20456,"resource_url":20457,"first_page":20458,"last_page":20459,"pdf_url":20460,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20461,"paper_type":860,"authors":20462,"abstract":20473},"lrec2026-main-889","Predicting States of Understanding in Explanatory Interactions Using Cognitive Load-Related Linguistic Cues","10.63317\u002F4tsmsshhd3ad","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-889","11368","11378","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.889.pdf","wang-etal-2026-predicting",[20463,20464,20467,20470],{"paper_id":20454,"author_seq":247,"given_name":2998,"surname":3676,"affiliation":63,"orcid":63},{"paper_id":20454,"author_seq":232,"given_name":20465,"surname":20466,"affiliation":63,"orcid":63},"Olcay","Türk",{"paper_id":20454,"author_seq":218,"given_name":20468,"surname":20469,"affiliation":63,"orcid":63},"Angela","Grimminger",{"paper_id":20454,"author_seq":203,"given_name":20471,"surname":20472,"affiliation":63,"orcid":63},"Hendrik","Buschmeier","We investigate how verbal and nonverbal linguistic features, exhibited by speakers and listeners in dialogue, can contribute to predicting the listener’s state of understanding in explanatory interactions on a moment-by-moment basis. Specifically, we examine three linguistic cues related to cognitive load and hypothesised to correlate with listener understanding: the information value (operationalised with surprisal) and syntactic complexity of the speaker’s utterances, and the variation in the listener’s interactive gaze behaviour. Based on statistical analyses of the MUNDEX corpus of face-to-face dialogic board game explanations, we find that individual cues vary with the listener’s level of understanding. Listener states (’Understanding’, ’Partial Understanding’, ’Non-Understanding’ and ’Misunderstanding’) were self-annotated by the listeners using a retrospective video-recall method. The results of a subsequent classification experiment, involving two off-the-shelf classifiers and a fine-tuned German BERT-based multimodal classifier, demonstrate that prediction of these four states of understanding is generally possible and improves when the three linguistic cues are considered alongside textual features.",{"paper_id":20475,"title":20476,"year":7,"month":188,"day":63,"doi":20477,"resource_url":20478,"first_page":20479,"last_page":20480,"pdf_url":20481,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20482,"paper_type":860,"authors":20483,"abstract":20494},"lrec2026-main-890","Figurative Language in Alzheimer's Discourse: Linguistic and Neural Alignment in Clinical Narratives","10.63317\u002F2yjp4743qieh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-890","11379","11389","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.890.pdf","kylymnyk-etal-2026-figurative",[20484,20486,20489,20490,20492,20493],{"paper_id":20475,"author_seq":247,"given_name":4631,"surname":20485,"affiliation":63,"orcid":63},"Kylymnyk",{"paper_id":20475,"author_seq":232,"given_name":20487,"surname":20488,"affiliation":63,"orcid":63},"Vitória Hilgert","Tomasel",{"paper_id":20475,"author_seq":218,"given_name":4074,"surname":4075,"affiliation":63,"orcid":63},{"paper_id":20475,"author_seq":203,"given_name":10323,"surname":20491,"affiliation":63,"orcid":63},"Watkins",{"paper_id":20475,"author_seq":188,"given_name":4071,"surname":10334,"affiliation":63,"orcid":63},{"paper_id":20475,"author_seq":172,"given_name":7059,"surname":10321,"affiliation":63,"orcid":63},"Figurative language, including multiword expressions and metaphors, provides a sensitive lens on cognitive functioning but remains largely overlooked in computational studies of Alzheimer’s Disease (AD). This work investigates figurative-language patterns in AD and whether they can help in distinguishing AD from non-clinical discourse and whether a neural model encodes comparable linguistic tendencies. We propose a two-step framework that combines relevant linguistic features with neural representations. Figurative expressions are automatically identified using Large Language Models focusing on idiomaticity and metaphor detection. These figurative language indicators are integrated with lexical, syntactic, and readability features and used to train classifiers on the ADReSS dataset. Correlation and proxy-model analyses reveal significant alignment between linguistic indicators and model predictions: participants with AD produce fewer figurative constructions, lower lexical diversity, and more concrete language. The results obtained demonstrate that contextual embeddings implicitly encode linguistic cues associated with cognitive decline and highlight the value of figurative-language metrics for transparent and linguistically grounded clinical NLP.",{"paper_id":20496,"title":20497,"year":7,"month":188,"day":63,"doi":20498,"resource_url":20499,"first_page":20500,"last_page":20501,"pdf_url":20502,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20503,"paper_type":860,"authors":20504,"abstract":20511},"lrec2026-main-891","Prompting Instruction-tuned LLMs for Semantic Similarity Values","10.63317\u002F3kbjxx6989dg","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-891","11390","11403","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.891.pdf","snelder-etal-2026-prompting",[20505,20508,20510],{"paper_id":20496,"author_seq":247,"given_name":20506,"surname":20507,"affiliation":63,"orcid":63},"Xander Akiko","Snelder",{"paper_id":20496,"author_seq":232,"given_name":20509,"surname":1837,"affiliation":63,"orcid":63},"Yunchong",{"paper_id":20496,"author_seq":218,"given_name":981,"surname":982,"affiliation":63,"orcid":63},"The impressive few-shot performance of generative decoder transformer language models at novel tasks has raised interest in using them to estimate lexical-semantic properties of words, word pairs or multi-word expressions. We explore the task of eliciting semantic similarity scores between word pairs through prompting, comparing these scores to human benchmarks. We investigate different prompting approaches, different model architectures and different languages using the Dutch, English and Mandarin Chinese SimLex-999 benchmarks. The results show that prompting each word pair individually yields better correlations, and that models struggle with the distinction between similarity and relatedness, just as static and contextual word embedding models did. The new, open-weight gpt-oss-20b model yields the highest correlation with human ratings out of the models we evaluated.",{"paper_id":20513,"title":20514,"year":7,"month":188,"day":63,"doi":20515,"resource_url":20516,"first_page":20517,"last_page":20518,"pdf_url":20519,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20520,"paper_type":860,"authors":20521,"abstract":20525},"lrec2026-main-892","Rethinking Evaluation in Retrieval-Augmented Personalized Dialogue: A Cognitive and Linguistic Perspective","10.63317\u002F29xyfmxaqr72","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-892","11404","11416","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.892.pdf","zhang-etal-2026-rethinking",[20522,20524],{"paper_id":20513,"author_seq":247,"given_name":20523,"surname":1519,"affiliation":63,"orcid":63},"Tianyi",{"paper_id":20513,"author_seq":232,"given_name":1061,"surname":6278,"affiliation":63,"orcid":63},"In cognitive science and linguistic theory, dialogue is not seen as a chain of independent utterances but rather as a joint activity sustained by coherence, consistency, and shared understanding. However, many systems for open-domain and personalized dialogue use surface-level similarity metrics (e.g., BLEU, ROUGE, F1) as one of their main reporting measures, which fail to capture these deeper aspects of conversational quality. We re-examine a notable retrieval-augmented framework for personalized dialogue, LAPDOG, as a case study for evaluation methodology. Using both human and LLM-based judges, we identify limitations in current evaluation practices, including corrupted dialogue histories, contradictions between retrieved stories and persona, and incoherent response generation. Our results show that human and LLM judgments align closely but diverge from lexical similarity metrics, underscoring the need for cognitively grounded evaluation methods. Broadly, this work charts a path toward more reliable assessment frameworks for retrieval-augmented dialogue systems that better reflect the principles of natural human communication.",{"paper_id":20527,"title":20528,"year":7,"month":188,"day":63,"doi":20529,"resource_url":20530,"first_page":20531,"last_page":20532,"pdf_url":20533,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20534,"paper_type":860,"authors":20535,"abstract":20541},"lrec2026-main-893","Evaluating Multimodal Large Language Model Narrative Interpretation through the Lens of Appraisal Theory","10.63317\u002F3kybwvtko6uq","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-893","11417","11426","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.893.pdf","teotia-etal-2026-evaluating",[20536,20537,20538,20539,20540],{"paper_id":20527,"author_seq":247,"given_name":20426,"surname":20427,"affiliation":63,"orcid":63},{"paper_id":20527,"author_seq":232,"given_name":20424,"surname":3676,"affiliation":63,"orcid":63},{"paper_id":20527,"author_seq":218,"given_name":17705,"surname":1519,"affiliation":63,"orcid":63},{"paper_id":20527,"author_seq":203,"given_name":17707,"surname":12080,"affiliation":63,"orcid":63},{"paper_id":20527,"author_seq":188,"given_name":2022,"surname":17709,"affiliation":63,"orcid":63},"Narrative interpretation is an essential aspect of human cognition, enabling individuals to comprehend complex sequences of events, form emotional connections, and engage in nuanced social reasoning. At the heart of this interpretive ability lies emotional understanding, which cognitive scientists often frame through Appraisal Theory, a model that views emotions as the outcome of subjective evaluations of events in relation to goals, values, and beliefs. In this study, we explore whether multimodal large language models (MLLMs) are able to replicate aspects of this human-like narrative and emotional reasoning. Specifically, we examine how well MLLMs interpret visual narratives, with a focus on their ability to identify and appraise emotional content within scenes. We also investigate whether these models can utilize additional narrative descriptions generated by them to enhance their emotional recognition capabilities, as humans often do. To probe these questions, we conducted a series of experiments using two publicly available datasets, EMOTIC and HECO. Contrary to our expectations, our results reveal a consistent and noteworthy pattern: rather than improving the models’ performance, the inclusion of supplementary narrative or contextual information frequently diminishes their ability to accurately recognize emotions. This counterintuitive finding suggests that current MLLMs face significant challenges in integrating multimodal information in a coherent, context-sensitive way. These findings underscore key limitations in the emotional and narrative reasoning capabilities of existing MLLMs and highlight a critical gap between human cognitive processes and current AI approaches.",{"paper_id":20543,"title":20544,"year":7,"month":188,"day":63,"doi":20545,"resource_url":20546,"first_page":20547,"last_page":20548,"pdf_url":20549,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20550,"paper_type":860,"authors":20551,"abstract":20559},"lrec2026-main-894","Mapping Liberty Metaphors across Cultures and Time","10.63317\u002F4dzoudvxnqwm","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-894","11427","11438","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.894.pdf","suen-etal-2026-mapping",[20552,20555,20556,20558],{"paper_id":20543,"author_seq":247,"given_name":20553,"surname":20554,"affiliation":63,"orcid":63},"Sidney","Suen",{"paper_id":20543,"author_seq":232,"given_name":17707,"surname":12080,"affiliation":63,"orcid":63},{"paper_id":20543,"author_seq":218,"given_name":2007,"surname":20557,"affiliation":63,"orcid":63},"Kwok",{"paper_id":20543,"author_seq":203,"given_name":2022,"surname":17709,"affiliation":63,"orcid":63},"Cognitive metaphors provide a lens for understanding how societies construct and negotiate ideas, including liberty discourse. This study explores conceptual metaphors in liberty discourse by applying a scalable, corpus‑driven approach for cognitive analysis. A curated list of thematic keywords related to liberty topics is used to extract relevant sentences from the Corpus of Historical American English (COHA) and the News on the Web (NOW) corpus. MetaPro, a framework grounded in Conceptual Metaphor Theory, processes these sentences to identify metaphorical mappings at scale. Embedding visualizations and frequency counts were applied to both corpora; in COHA, line graphs captured temporal shifts in metaphor usage across time, while in NOW, two‑dimensional heatmaps highlighted spatial variation across countries. Selected example phrases illustrate how metaphorical mappings extend across diverse issues and domains. Thus, metaphor distributions and shifts provide a useful empirical lens for identifying changing thematic concerns in liberty discourse, offering a scalable, cognitively grounded method for cultural analysis across time and space. This demonstrates the value of computational methods for large-scale culture research.",{"paper_id":20561,"title":20562,"year":7,"month":188,"day":63,"doi":20563,"resource_url":20564,"first_page":20565,"last_page":20566,"pdf_url":20567,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20568,"paper_type":860,"authors":20569,"abstract":20575},"lrec2026-main-895","The Sensorimotor Norms for the Chinese Classifiers","10.63317\u002F2ub8kenhmi8d","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-895","11439","11450","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.895.pdf","shao-etal-2026-sensorimotor",[20570,20573,20574],{"paper_id":20561,"author_seq":247,"given_name":20571,"surname":20572,"affiliation":63,"orcid":63},"Yimei","Shao",{"paper_id":20561,"author_seq":232,"given_name":13010,"surname":13011,"affiliation":63,"orcid":63},{"paper_id":20561,"author_seq":218,"given_name":17915,"surname":1837,"affiliation":63,"orcid":63},"Sensorimotor information plays a crucial role in the conceptual representation of linguistic knowledge. While previous studies have established sensorimotor norms for nouns and adjectives, little is known about how Chinese numeral classifiers encode perceptual and action-based experiences. The present study constructs the first large-scale sensorimotor norms for Chinese classifiers, collecting perceptual and action ratings for 357 classifiers from 288 native Chinese speakers. Participants evaluated each classifier along six perceptual modalities (vision, hearing, taste, smell, touch, and interoception) and five action effectors (foot\u002Fleg, hand\u002Farm, mouth\u002Fthroat, head, and torso). The resulting dataset provides detailed sensorimotor profiles for each classifier and reveals systematic mappings between classifier semantics and embodied dimensions. The findings demonstrate that Chinese classifiers are not purely syntactic markers but encode distinct sensorimotor features grounded in perceptual and motor systems, highlighting the embodied foundation of the classifier system and offering valuable resources for future psycholinguistic and computational modelling studies of Chinese semantics.",{"paper_id":20577,"title":20578,"year":7,"month":188,"day":63,"doi":20579,"resource_url":20580,"first_page":20581,"last_page":20582,"pdf_url":20583,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":20584,"bibkey":20585,"paper_type":860,"authors":20586,"abstract":20601},"lrec2026-main-896","DeepQuestion: Systematic Generation of Real-World Challenges for Evaluating LLMs Performance","10.63317\u002F37w7pv6oaaeg","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-896","11451","11460","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.896.pdf","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fadditional\u002F2026.lrec2026-1.896_OptionalSupplementaryMaterial.zip","khoramfar-etal-2026-deepquestion",[20587,20589,20591,20594,20597,20600],{"paper_id":20577,"author_seq":247,"given_name":2207,"surname":20588,"affiliation":63,"orcid":63},"Khoramfar",{"paper_id":20577,"author_seq":232,"given_name":2207,"surname":20590,"affiliation":63,"orcid":63},"Ramezani",{"paper_id":20577,"author_seq":218,"given_name":20592,"surname":20593,"affiliation":63,"orcid":63},"Mohammad Mahdi","Mohajeri",{"paper_id":20577,"author_seq":203,"given_name":20595,"surname":20596,"affiliation":63,"orcid":63},"Mohammad Javad","Dousti",{"paper_id":20577,"author_seq":188,"given_name":20598,"surname":20599,"affiliation":63,"orcid":63},"Majid Nili","Ahmadabadi",{"paper_id":20577,"author_seq":172,"given_name":8637,"surname":8638,"affiliation":63,"orcid":63},"While Large Language Models (LLMs) achieve near-human performance on standard benchmarks, their capabilities often fail to generalize to complex, real-world problems. To bridge this gap, we introduce DeepQuestion, a scalable, automated framework that systematically elevates the cognitive complexity of existing datasets through controlled task transformations grounded in explicit cognitive hierarchies. Based on Bloom’s taxonomy, DeepQuestion generates (1) scenario-based problems to test the application of knowledge in noisy, realistic contexts, and (2) instruction-based prompts that require models to create new questions from a given solution path, assessing synthesis and evaluation skills. Our extensive evaluation across ten leading open-source and proprietary models, covering both general-purpose and reasoning LLMs, reveals a stark performance decline—with accuracy dropping by up to 70%—as tasks ascend the cognitive hierarchy across evaluation settings. These findings underscore that current benchmarks overestimate true reasoning abilities and highlight the critical need for cognitively diverse evaluations to guide future LLM development.",{"paper_id":20603,"title":20604,"year":7,"month":188,"day":63,"doi":20605,"resource_url":20606,"first_page":20607,"last_page":20608,"pdf_url":20609,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20610,"paper_type":860,"authors":20611,"abstract":20622},"lrec2026-main-897","Pragmatic Modelling in Language Learning: Caregiver Question-Answer Feedback in Child-Directed Dialogue","10.63317\u002F4g9ggjcrgfax","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-897","11461","11478","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.897.pdf","bala-etal-2026-pragmatic",[20612,20614,20616,20619],{"paper_id":20603,"author_seq":247,"given_name":12283,"surname":20613,"affiliation":63,"orcid":63},"Bala",{"paper_id":20603,"author_seq":232,"given_name":1521,"surname":20615,"affiliation":63,"orcid":63},"Heim",{"paper_id":20603,"author_seq":218,"given_name":20617,"surname":20618,"affiliation":63,"orcid":63},"Elspeth","Edelstein",{"paper_id":20603,"author_seq":203,"given_name":20620,"surname":20621,"affiliation":63,"orcid":63},"Arabella","Sinclair","In language development, children learn to form Question–Answer (QA) sequences through caregiver feedback that adapts dynamically to their evolving linguistic abilities. Using expert annotated child-caregiver interaction, we examine four feedback types that guide children’s acquisition of adult-like QA behaviour: caregiver instructions through reformulating and affirming a child’s output as well as caregiver demonstrations through exemplifying and modelling adult-like behaviour. Our analysis reveals that feedback incidence, frequency and complexity progress and adapt over the course of development, akin to a tailored curriculum for pragmatic development. We release our annotated dataset which offers a rich resource for studying pragmatic feedback and provides the first large-scale empirical evidence of adaptive, tailored caregiver feedback on QA behaviour.",{"paper_id":20624,"title":20625,"year":7,"month":188,"day":63,"doi":20626,"resource_url":20627,"first_page":20628,"last_page":20629,"pdf_url":20630,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20631,"paper_type":860,"authors":20632,"abstract":20638},"lrec2026-main-898","Modular Approach to Automating Morphological Components in Grammar Engineering","10.63317\u002F4xy6oka3pbfi","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-898","11479","11494","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.898.pdf","voloshina-etal-2026-modular",[20633,20635],{"paper_id":20624,"author_seq":247,"given_name":6029,"surname":20634,"affiliation":63,"orcid":63},"Voloshina",{"paper_id":20624,"author_seq":232,"given_name":20636,"surname":20637,"affiliation":63,"orcid":63},"Krasimir","Angelov","Creating formal grammars is a time-consuming and complex task. We present a method to automatically create the morphological components of a formal grammar in Grammatical Framework. Our method is linguistically interpretable and modular, consisting of three stages: paradigm construction, extraction of inflectional classes, and prediction of inflectional classes. The modular structure allows human interventions after each stage. Moreover, our method supports encoding pre-existing language knowledge in form of Python APIs. Experiments show that automatically extracted morphological rules yield results comparable with manual grammars and that incorporating prior linguistic knowledge leads to improvement in low-resourced scenarios. Our findings show that our method simplifies the process of grammar development while preserving quality and interpretability.",{"paper_id":20640,"title":20641,"year":7,"month":188,"day":63,"doi":20642,"resource_url":20643,"first_page":20644,"last_page":20645,"pdf_url":20646,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20647,"paper_type":860,"authors":20648,"abstract":20658},"lrec2026-main-899","MorfFlex: Handling Rich Morphology","10.63317\u002F36ruwkgu8iex","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-899","11495","11505","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.899.pdf","hlavov-etal-2026-morfflex",[20649,20652,20654,20655,20657],{"paper_id":20640,"author_seq":247,"given_name":20650,"surname":20651,"affiliation":63,"orcid":63},"Jaroslava","Hlaváčová",{"paper_id":20640,"author_seq":232,"given_name":1938,"surname":20653,"affiliation":63,"orcid":63},"Mikulová",{"paper_id":20640,"author_seq":218,"given_name":1978,"surname":13440,"affiliation":63,"orcid":63},{"paper_id":20640,"author_seq":203,"given_name":4806,"surname":20656,"affiliation":63,"orcid":63},"Straka",{"paper_id":20640,"author_seq":188,"given_name":1380,"surname":3434,"affiliation":63,"orcid":63},"We present MorfFlex, a morphological dictionary architecture suitable for languages with extensive regularity in both inflection and derivation. As the primary example of MorfFlex in use we introduce MorfFlex CZ, a morphological dictionary of Czech. It is distributed as a simple, unstructured list of \u003Cwordform, lemma, tag> triplets, however, its manually maintained, unpublished source files and conversion scripts encode a sophisticated system of inflectional and derivational patterns. These patterns dramatically reduce the otherwise enormous size of the dictionary, which currently contains over 100 million wordforms and more than 1 million lemmas. The MorfFlex CZ dictionary serves as an essential resource for ensuring the consistency of manual morphological annotation in the Prague Dependency Treebanks and underpins state-of-the-art automatic tools such as MorphoDiTa. In this paper, we focus on: (i) presenting an effective method for managing the rich morphological system within the dictionary, and (ii) demonstrating the utility of such a language resource for maintaining annotation consistency in corpora and supporting the development of advanced NLP applications.",{"paper_id":20660,"title":20661,"year":7,"month":188,"day":63,"doi":20662,"resource_url":20663,"first_page":20664,"last_page":20665,"pdf_url":20666,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20667,"paper_type":860,"authors":20668,"abstract":20677},"lrec2026-main-900","Using Valency Inheritance in Building a Valency Lexicon","10.63317\u002F2juerwwf3m9x","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-900","11506","11515","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.900.pdf","kettnerov-etal-2026-valency",[20669,20672,20674,20675],{"paper_id":20660,"author_seq":247,"given_name":20670,"surname":20671,"affiliation":63,"orcid":63},"Václava","Kettnerová",{"paper_id":20660,"author_seq":232,"given_name":3442,"surname":20673,"affiliation":63,"orcid":63},"Kolářová",{"paper_id":20660,"author_seq":218,"given_name":1975,"surname":1976,"affiliation":63,"orcid":63},{"paper_id":20660,"author_seq":203,"given_name":1631,"surname":20676,"affiliation":63,"orcid":63},"Olbrich","Derived words often share certain characteristics with their base words, which leads to the idea that identical properties are inherited from the base words. These properties also cover valency. Valency inheritance has not been used to automatically build lexical resources providing information on valency, the manual annotation of which requires significant human effort. In this paper, we propose a procedure for generating valency frames of selected semantic categories of Czech nouns and adjectives exhibiting a significant level of valency inheritance, thus covering the productive and systemic core of the lexicon. Based on a semiautomatic comparison of the noun and adjectival valency frames from NomVallex and the verbal valency frames from VALLEX, rules describing valency changes in the valency frames of noun and adjectival derivatives are formulated. The conditions imposed by the rules on valency frames identify individual base lemmas in these lexicons for which direct noun and adjectival derivatives are searched in DeriNet. Based on the changes in valency determined in the rules, more than 23,000 valency frames assigned to more than 10,000 noun and adjectival derivatives were derived, achieving high accuracy. These valency frames were included in DeriVallex, a database providing a solid basis for extending current lexical resources.",{"paper_id":20679,"title":20680,"year":7,"month":188,"day":63,"doi":20681,"resource_url":20682,"first_page":20683,"last_page":20684,"pdf_url":20685,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20686,"paper_type":860,"authors":20687,"abstract":20690},"lrec2026-main-901","From CHAT to Coded CoNLL-U: A Reproducible Pipeline for the Syntactic Annotation and Querying of Child Language Data","10.63317\u002F498zo5heasd5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-901","11516","11523","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.901.pdf","stein-2026-chat",[20688],{"paper_id":20679,"author_seq":247,"given_name":20689,"surname":12353,"affiliation":63,"orcid":63},"Achim","The CHILDES database is a core resource for language acquisition research, yet its CHAT format poses significant challenges for modern computational analysis. To address this, we present a reproducible, open-source pipeline that transforms CHAT transcripts into annotated tabular (CSV) and CoNLL-U formats. Its core script, childes.py, automates the conversion and integrates part-of-speech tagging and dependency parsing. A key innovation is dql.py, a tool that uses a Grew dependency query language to systematically add user-defined linguistic codings to the parsed data. While the script is parametrised for various languages, the pipeline’s utility is demonstrated by applying it to the French CHILDES corpus to conduct a large-scale analysis of object clitic production. The resulting structured data reveals clear developmental trajectories, such as the gradual convergence of children’s dative clitic usage towards the adult input. The workflow and the resources it generates facilitate reproducible, data-driven research in language acquisition.",{"paper_id":20692,"title":20693,"year":7,"month":188,"day":63,"doi":20694,"resource_url":20695,"first_page":20696,"last_page":20697,"pdf_url":20698,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20699,"paper_type":860,"authors":20700,"abstract":20704},"lrec2026-main-902","TækTåk: Syntactic Analysis of Language Use on Danish TikTok","10.63317\u002F3v3ysvzdpy6g","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-902","11524","11534","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.902.pdf","kristensen-etal-2026-tÃ¦ktÃ¥k",[20701,20703],{"paper_id":20692,"author_seq":247,"given_name":16046,"surname":20702,"affiliation":63,"orcid":63},"Kristensen",{"paper_id":20692,"author_seq":232,"given_name":7809,"surname":7810,"affiliation":63,"orcid":63},"Language use is different across different language communities. Social media provides a rich source for studying how language varies, as it contains large data for a wide variety of sub-communities. In this paper, we study language usage on Danish TikTok. TikTok is a video-based platform, but most users are mainly active in the text-based comment sections. With the goal of analyzing language usage on this language variety, we contribute: 1) the first Danish social media treebank annotated for Universal Dependencies 2) evaluation of a variety of parsers using the new treebank, showing that cross-lingual in-domain data provides a valuable signal 3) a comparison of syntactic trends on standard Danish languages and TikTok language.",{"paper_id":20706,"title":20707,"year":7,"month":188,"day":63,"doi":20708,"resource_url":20709,"first_page":20710,"last_page":20711,"pdf_url":20712,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20713,"paper_type":860,"authors":20714,"abstract":20722},"lrec2026-main-903","Adaptive Chunking: Optimizing Chunking-Method Selection for RAG","10.63317\u002F3n8eu2phsvmc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-903","11535","11551","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.903.pdf","jnior-etal-2026-adaptive",[20715,20718,20721],{"paper_id":20706,"author_seq":247,"given_name":20716,"surname":20717,"affiliation":63,"orcid":63},"Paulo Roberto de Moura","Júnior",{"paper_id":20706,"author_seq":232,"given_name":20719,"surname":20720,"affiliation":63,"orcid":63},"Jean","Lelong",{"paper_id":20706,"author_seq":218,"given_name":4803,"surname":4804,"affiliation":63,"orcid":63},"The effectiveness of Retrieval-Augmented Generation (RAG) is highly dependent on how documents are chunked, that is, segmented into smaller units for indexing and retrieval. Yet, commonly used \"one-size-fits-all\" approaches often fail to capture the nuanced structure and semantics of diverse texts. Despite its central role, chunking lacks a dedicated evaluation framework, making it difficult to assess and compare strategies independently of downstream performance. We challenge this paradigm by introducing Adaptive Chunking, a framework that selects the most suitable chunking strategy for each document based on a set of five novel intrinsic, document-based metrics: References Completeness (RC), Intrachunk Cohesion (ICC), Document Contextual Coherence (DCC), Block Integrity (BI), and Size Compliance (SC), which directly assess chunking quality across key dimensions. To support this framework, we also introduce two new chunkers, an LLM-regex splitter and a split-then-merge recursive splitter, alongside targeted post-processing techniques. On a diverse corpus spanning legal, technical, and social science domains, our metric-guided adaptive method significantly improves downstream RAG performance. Without changing models or prompts, our framework increases RAG outcomes, raising answers correctness to 72% (from 62-64%) and increasing the number of successfully answered questions by over 30% (65 vs. 49). These results demonstrate that adaptive, document-aware chunking, guided by a complementary suite of intrinsic metrics, offers a practical and effective path to more robust RAG systems. Code available at https:\u002F\u002Fgithub.com\u002Fekimetrics\u002Fadaptive-chunking.",{"paper_id":20724,"title":20725,"year":7,"month":188,"day":63,"doi":20726,"resource_url":20727,"first_page":20728,"last_page":20729,"pdf_url":20730,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20731,"paper_type":860,"authors":20732,"abstract":20754},"lrec2026-main-904","Do Large Language Models Grasp the Grammar? Evidence from Grammar-Book-Guided Probing in Luxembourgish","10.63317\u002F3f5b63f9q7xp","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-904","11552","11561","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.904.pdf","li-etal-2026-do",[20733,20735,20736,20739,20741,20743,20745,20748,20750,20753],{"paper_id":20724,"author_seq":247,"given_name":20734,"surname":3446,"affiliation":63,"orcid":63},"Lujun",{"paper_id":20724,"author_seq":232,"given_name":20310,"surname":1913,"affiliation":63,"orcid":63},{"paper_id":20724,"author_seq":218,"given_name":20737,"surname":20738,"affiliation":63,"orcid":63},"Lama","Sleem",{"paper_id":20724,"author_seq":203,"given_name":20740,"surname":3676,"affiliation":63,"orcid":63},"Yiqun",{"paper_id":20724,"author_seq":188,"given_name":20742,"surname":3290,"affiliation":63,"orcid":63},"Yangjie",{"paper_id":20724,"author_seq":172,"given_name":9551,"surname":20744,"affiliation":63,"orcid":63},"Lothritz",{"paper_id":20724,"author_seq":155,"given_name":20746,"surname":20747,"affiliation":63,"orcid":63},"NiccolO'","Gentile",{"paper_id":20724,"author_seq":138,"given_name":6771,"surname":20749,"affiliation":63,"orcid":63},"State",{"paper_id":20724,"author_seq":121,"given_name":20751,"surname":20752,"affiliation":63,"orcid":63},"Tegawendé F.","Bissyandé",{"paper_id":20724,"author_seq":104,"given_name":20315,"surname":6295,"affiliation":63,"orcid":63},"Grammar refers to the system of rules that governs the structural organization and the semantic relations among linguistic units such as sentences, phrases, and words within a given language. In natural language processing, there remains a notable scarcity of grammar-focused evaluation protocols, a gap that is even more pronounced for low-resource languages. Moreover, the extent to which large language models genuinely comprehend grammatical structure, especially the mapping between syntactic structures and meanings remains under debate. To investigate this issue, we propose a Grammar-Book–Guided evaluation pipeline intended to provide a systematic and generalizable framework for grammar evaluation consisting of four key stages, and in this work we take Luxembourgish as a case study. The results show a weak positive correlation between translation performance and grammatical understanding, indicating that strong translations do not necessarily imply deep grammatical competence. Larger models perform well overall due to their semantic strength but remain weak in morphology and syntax, struggling particularly with Minimal Pair tasks, while strong reasoning ability offers a promising way to enhance their grammatical understanding.",{"paper_id":20756,"title":20757,"year":7,"month":188,"day":63,"doi":20758,"resource_url":20759,"first_page":20760,"last_page":20761,"pdf_url":20762,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20763,"paper_type":860,"authors":20764,"abstract":20768},"lrec2026-main-905","Survey of Tools for Manual Linguistic Annotation: Supporting Diversity through Interactive Exploration","10.63317\u002F4u4be6bbtj8e","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-905","11562","11573","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.905.pdf","pannitto-etal-2026-survey",[20765,20766,20767],{"paper_id":20756,"author_seq":247,"given_name":12553,"surname":12554,"affiliation":63,"orcid":63},{"paper_id":20756,"author_seq":232,"given_name":10974,"surname":10975,"affiliation":63,"orcid":63},{"paper_id":20756,"author_seq":218,"given_name":3696,"surname":1150,"affiliation":63,"orcid":63},"Manual annotation tools are core infrastructure for corpus creation, enabling the development of linguistically informed language resources relevant for both linguistic discovery and computational applications. We present a comprehensive survey of 21 tools supporting morphosyntactic and multi-word expression annotation, systematically documenting more than 50 features relevant for annotation workflows—from software architecture and usability to linguistic coverage and annotation scope. The survey results are published as an open dataset and made accessible through an interactive online platform that allows users to filter and compare tools according to their specific needs. Our initial analysis highlights a robust and open ecosystem of annotation tools, but advanced needs for complex and language-independent annotation are inconsistently addressed.",{"paper_id":20770,"title":20771,"year":7,"month":188,"day":63,"doi":20772,"resource_url":20773,"first_page":20774,"last_page":20775,"pdf_url":20776,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20777,"paper_type":860,"authors":20778,"abstract":20789},"lrec2026-main-906","TextLens & LeTTuce: Automated Corpus Annotation and Multilingual Tagging as a Service","10.63317\u002F3jvno99c6y49","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-906","11574","11584","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.906.pdf","hee-etal-2026-textlens",[20779,20780,20782,20784,20786,20788],{"paper_id":20770,"author_seq":247,"given_name":13361,"surname":13362,"affiliation":63,"orcid":63},{"paper_id":20770,"author_seq":232,"given_name":3601,"surname":20781,"affiliation":63,"orcid":63},"Doumen",{"paper_id":20770,"author_seq":218,"given_name":2232,"surname":20783,"affiliation":63,"orcid":63},"Prins",{"paper_id":20770,"author_seq":203,"given_name":20785,"surname":2253,"affiliation":63,"orcid":63},"Pranaydeep",{"paper_id":20770,"author_seq":188,"given_name":2232,"surname":20787,"affiliation":63,"orcid":63},"Vandeghinste",{"paper_id":20770,"author_seq":172,"given_name":2294,"surname":2295,"affiliation":63,"orcid":63},"We present TextLens, a web-based platform for automated linguistic annotation designed to lower technical barriers for researchers in digital humanities, linguistics and translation studies. Hosted by the Dutch Language Institute (INT), TextLens allows users to upload and annotate corpora in a variety of formats (.txt, .tsv, CoNLL-U, FoLiA, TEI, and NAF) using state-of-the-art NLP tools, without the need for local installation or computational resources. The platform supports multilingual data processing and provides a persistent dashboard for managing, monitoring and sharing annotation projects. Alongside this service, we introduce the LeTTuce-PoS Dataset, a new multilingual, manually annotated dataset for part-of-speech tagging in English, French, Dutch and German, covering multiple genres and offering a valuable resource to the research community. This paper also reports benchmark results for different PoS taggers (LeTs Preprocess, LeTTuce, spaCy and Stanza) on the dataset. Together, TextLens and the LeTTuce-PoS Dataset provide an accessible, scalable platform for high-quality annotation and a robust multilingual dataset that support comparable and reproducible research in multilingual contexts.",{"paper_id":20791,"title":20792,"year":7,"month":188,"day":63,"doi":20793,"resource_url":20794,"first_page":20795,"last_page":20796,"pdf_url":20797,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20798,"paper_type":860,"authors":20799,"abstract":20812},"lrec2026-main-907","The Corpus of Contemporary Polish — a New Reference Corpus with Rich Syntactic Annotations","10.63317\u002F2e37nxvjrs42","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-907","11585","11592","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.907.pdf","kiera-etal-2026-corpus",[20800,20801,20804,20807,20810],{"paper_id":20791,"author_seq":247,"given_name":14132,"surname":14133,"affiliation":63,"orcid":63},{"paper_id":20791,"author_seq":232,"given_name":20802,"surname":20803,"affiliation":63,"orcid":63},"Małgorzata","Marciniak",{"paper_id":20791,"author_seq":218,"given_name":20805,"surname":20806,"affiliation":63,"orcid":63},"Marcin","Woliński",{"paper_id":20791,"author_seq":203,"given_name":20808,"surname":20809,"affiliation":63,"orcid":63},"Katarzyna","Krasnowska-Kieraś",{"paper_id":20791,"author_seq":188,"given_name":7398,"surname":20811,"affiliation":63,"orcid":63},"Łaziński","In the paper, we describe the Corpus of Contemporary Polish (KWJP) and its rich syntactic annotation. The corpus covers a wide range of text originally published between 2011 and 2020. Although it carries on the idea of providing up-to-date reference corpora of Polish initiated by the National Corpus of Polish (NKJP) project, the principles underlying its development are not the same. In this article, we outline the different choices that affect corpora content and give an explanation for them. The article focuses mainly on the description of annotation layers in KWJP which are generated with a neural network based tool specially developed for this purpose. We describe in details syntactic structure annotation, which is represented by hybrid trees combining information typical to constituency and dependency trees. Finally, we provide several examples showing how annotation with hybrid trees facilitates querying and effective searching for information in the corpus.",{"paper_id":20814,"title":20815,"year":7,"month":188,"day":63,"doi":20816,"resource_url":20817,"first_page":20818,"last_page":20819,"pdf_url":20820,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20821,"paper_type":860,"authors":20822,"abstract":20831},"lrec2026-main-908","Prague Dependency Treebank - Consolidated 2.0: Enriching a Complex Annotation Scheme","10.63317\u002F276qjpo35shu","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-908","11593","11605","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.908.pdf","mikulov-etal-2026-prague",[20823,20824,20825,20826,20827,20829,20830],{"paper_id":20814,"author_seq":247,"given_name":1938,"surname":20653,"affiliation":63,"orcid":63},{"paper_id":20814,"author_seq":232,"given_name":1975,"surname":1976,"affiliation":63,"orcid":63},{"paper_id":20814,"author_seq":218,"given_name":4806,"surname":20656,"affiliation":63,"orcid":63},{"paper_id":20814,"author_seq":203,"given_name":11970,"surname":11971,"affiliation":63,"orcid":63},{"paper_id":20814,"author_seq":188,"given_name":1380,"surname":20828,"affiliation":63,"orcid":63},"Štěpánek",{"paper_id":20814,"author_seq":172,"given_name":1978,"surname":13440,"affiliation":63,"orcid":63},{"paper_id":20814,"author_seq":155,"given_name":1380,"surname":3434,"affiliation":63,"orcid":63},"The Prague Dependency Treebank framework is unique in its attempt to systematically include and link different layers of language, including a meaning representation with several types of inter-sentential phenomena, especially coreference and discourse relation. We present its second consolidated version (PDT-C 2.0), which concludes almost 30-years long project of sustained development of the resource to a uniformly and coherently annotated, genre-diversified, almost 4 million token language resource of Czech language, with accompanying fully compatible lexicons. In addition to continuous linguistic research, the richly linguistically annotated corpus is also widely used in international comparisons of the development of traditional and novel NLP tools as well as in conversions into other formalisms. The corpus and the trained parsers are available under the CC BY-NC-SA licence.",{"paper_id":20833,"title":20834,"year":7,"month":188,"day":63,"doi":20835,"resource_url":20836,"first_page":20837,"last_page":20838,"pdf_url":20839,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20840,"paper_type":860,"authors":20841,"abstract":20849},"lrec2026-main-909","Meet UD_Czech-PDTC: A Large and Genre-Rich Treebank in Universal Dependencies","10.63317\u002F5dpqivk4h8qk","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-909","11606","11619","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.909.pdf","mikulov-etal-2026-meet",[20842,20843,20844,20846,20847,20848],{"paper_id":20833,"author_seq":247,"given_name":1938,"surname":20653,"affiliation":63,"orcid":63},{"paper_id":20833,"author_seq":232,"given_name":1978,"surname":13440,"affiliation":63,"orcid":63},{"paper_id":20833,"author_seq":218,"given_name":1668,"surname":20845,"affiliation":63,"orcid":63},"Zeman",{"paper_id":20833,"author_seq":203,"given_name":1380,"surname":20828,"affiliation":63,"orcid":63},{"paper_id":20833,"author_seq":188,"given_name":4806,"surname":20656,"affiliation":63,"orcid":63},{"paper_id":20833,"author_seq":172,"given_name":1380,"surname":3434,"affiliation":63,"orcid":63},"Czech has been part of Universal Dependencies since its first release in 2015. It has also been one of the best represented languages, with the Prague Dependency Treebank being order of magnitude larger than most other UD treebanks. More recently, three other datasets from the Prague family were added and the annotations thoroughly revisited, forming the \"Prague Dependency Treebank-Consolidated\" (PDT-C). In comparison to the original PDT, PDT-C is more than twice as large, but it is also much more diverse in terms of genres and domains. In this paper, we describe the conversion of the new resource to Universal Dependencies. While the two annotation schemes are relatively similar at the first sight, there are numerous small differences in topology of the dependency structures and in granularity of the POS and relation type inventories. We demonstrate a selection of such differences on examples, discuss the diverging motivations, as well as ways to overcome the differences during conversion. We argue that while PDT is less \"universal\" and more tightly bound to one language, its multi-layer annotation is rich and provides all information needed for basic UD trees, and much more.",{"paper_id":20851,"title":20852,"year":7,"month":188,"day":63,"doi":20853,"resource_url":20854,"first_page":20855,"last_page":20856,"pdf_url":20857,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20858,"paper_type":860,"authors":20859,"abstract":20864},"lrec2026-main-910","Encoding Logical Relations of Chinese Complex Sentences within the Universal Dependencies Framework","10.63317\u002F2uasimdgfqin","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-910","11620","11630","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.910.pdf","zhu-etal-2026-encoding",[20860,20862],{"paper_id":20851,"author_seq":247,"given_name":20861,"surname":9726,"affiliation":63,"orcid":63},"Hongpu",{"paper_id":20851,"author_seq":232,"given_name":20863,"surname":3290,"affiliation":63,"orcid":63},"Hongzhi","Clauses in complex sentences always entail certain logical relations such as conjunctive, causative, and concessive. Such logical relations, however, are not properly represented in the universal dependencies (UD) framework, being collapsed into a adverbial clause (advcl) or clausal complement (ccomp) relation between clausal heads. This study extends the UD framework by encoding 13 logical relations. With the new framework, which is structurally identical to UD, we construct a training corpus containing about 1,769 sentences extracted from Chinese newswire and annotated an existing Chinese corpus (GSD-simp test) in UD as a test set. We trained a BERT-based biaffine parser and fine-tuned the Qwen-3 model with the training corpus and evaluated the models on the UD test data. They are compared against four general purpose LLMs including GPT-4o, GPT-5, Claude 4 and DeepSeek V3.2. We find that the fine-tuned Qwen-3-8B model achieves a UAS\u002FLAS of 0.840\u002F0.757, higher than the BERT-based parser and the general purpose LLMs. The results confirm the feasibility of our framework and highlight the inherent challenges of parsing hierarchical and implicit inter-clause relations.",{"paper_id":20866,"title":20867,"year":7,"month":188,"day":63,"doi":20868,"resource_url":20869,"first_page":20870,"last_page":20871,"pdf_url":20872,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20873,"paper_type":860,"authors":20874,"abstract":20877},"lrec2026-main-911","Unsupervised Labelling of Mutation Triggers in Welsh","10.63317\u002F37oxwc9pnyfv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-911","11631","11641","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.911.pdf","gutirrezroln-etal-2026-unsupervised",[20875,20876],{"paper_id":20866,"author_seq":247,"given_name":2834,"surname":2835,"affiliation":63,"orcid":63},{"paper_id":20866,"author_seq":232,"given_name":2846,"surname":2847,"affiliation":63,"orcid":63},"Initial consonant mutation is a key feature of Welsh, but its complexity poses significant challenges for both language learners and natural language processing (NLP) systems. While existing tools can reliably detect mutated forms, they provide no information about why a mutation occurs, i.e. what grammatical or lexical factors trigger the change. This paper introduces the novel task of mutation trigger labelling, representing the first computational attempt to analyse and explain the reasons behind Welsh mutations. Two preliminary approaches are explored: (i) a linguistically-informed rule-based system integrating Constraint Grammar rules, and (ii) large language models (LLMs), prompted in few-shot settings. Our experiments test the feasibility of automatically identifying and labelling linguistic triggers behind Welsh mutations using a dataset constructed from grammar reference books and public corpora, and establish baseline insights into how context-aware mutation analysis can be achieved. By framing mutation trigger labelling as a linguistic computational problem, this work lays important groundwork within Welsh NLP and contributes to the broader development of explainable grammatical analysis for low-resource languages.",{"paper_id":20879,"title":20880,"year":7,"month":188,"day":63,"doi":20881,"resource_url":20882,"first_page":20883,"last_page":20884,"pdf_url":20885,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20886,"paper_type":860,"authors":20887,"abstract":20894},"lrec2026-main-912","UzUDT: Uzbek Universal Dependencies Treebank","10.63317\u002F2uedjqezjxn5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-912","11642","11649","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.912.pdf","matlatipov-etal-2026-uzudt",[20888,20891],{"paper_id":20879,"author_seq":247,"given_name":20889,"surname":20890,"affiliation":63,"orcid":63},"Sanatbek Gayratovich","Matlatipov",{"paper_id":20879,"author_seq":232,"given_name":20892,"surname":20893,"affiliation":63,"orcid":63},"Mersaid","Aripov","In this paper, we present a new Universal Dependencies treebank for Uzbek language(UzUDT) developed as a gold-standard resource with full manual annotation. The treebank includes 684 sentences (7,582 tokens) from Uzbek literary texts, and is larger and more domain-diverse than the existing Uzbek UD treebank. The corpus was developed through rigorous multi-annotator adjudication, achieving very high inter-annotator agreement (multi-rater agreement coefficients >0.90) across lemmatization, PoS tagging, and morphological features. Alongside comprehensive corpus profiling, we establish robust computational baselines by evaluating graph-based (Stanza) and transition-based (spaCy) parsing architectures using both static and monolingual contextual embeddings. Our evaluations reveal a critical architectural trade-off for low-resource agglutinative parsing: joint transition-based models excel at morphosyntactic tagging, whereas graph-based models remain strictly superior for resolving complex structural dependencies. Furthermore, we demonstrate that cross-treebank data augmentation yields substantial, synergistic accuracy gains. The resource provides a much-needed high-quality treebank for Uzbek to assist in developing better NLP tools and to enable linguistic research in the low-resource language",{"paper_id":20896,"title":20897,"year":7,"month":188,"day":63,"doi":20898,"resource_url":20899,"first_page":20900,"last_page":20901,"pdf_url":20902,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20903,"paper_type":860,"authors":20904,"abstract":20915},"lrec2026-main-913","BRAGD: Constrained Multi-Label POS Tagging for Faroese","10.63317\u002F4ibuz9yubt3c","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-913","11650","11668","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.913.pdf","simonsen-etal-2026-bragd",[20905,20906,20907,20910,20911,20912],{"paper_id":20896,"author_seq":247,"given_name":19457,"surname":19458,"affiliation":63,"orcid":63},{"paper_id":20896,"author_seq":232,"given_name":3175,"surname":8877,"affiliation":63,"orcid":63},{"paper_id":20896,"author_seq":218,"given_name":20908,"surname":20909,"affiliation":63,"orcid":63},"Uni","Johannesen",{"paper_id":20896,"author_seq":203,"given_name":8874,"surname":8875,"affiliation":63,"orcid":63},{"paper_id":20896,"author_seq":188,"given_name":1410,"surname":1411,"affiliation":63,"orcid":63},{"paper_id":20896,"author_seq":172,"given_name":20913,"surname":20914,"affiliation":63,"orcid":63},"Vésteinn","Snæbjarnarson","We present the first multi-label part-of-speech (POS) tagger for Faroese using linguistically-informed constraints, addressing the data sparsity problem inherent in compound tag approaches. We propose the BRAGD tagset, which decomposes compound morphological tags into independent features (word class, gender, number, case, etc.). The BRAGD tagset is the third iteration of a tagset previously released for Faroese, with substantial modifications that are better aligned with Faroese grammar. We annotate the previously released Sosialurin corpus with the tagset, as well as a new annotated out-of-domain test corpus of 500 sentences from more varied and contemporary texts. To train the tagger, we use a constrained loss function that dynamically masks morphologically invalid features based on the word class (noun, verb, adjective, etc.). We fine-tune a Scandinavian transformer language model using the constrained multi-label loss, achieving an overall accuracy of 97.5%. We find that models trained with multi-label loss perform better, converge faster, and show significantly lower error rates on out-of-domain data than single-label approaches or previously reported methods for Faroese POS tagging. This confirms that the multi-label approach learns robust morphological patterns rather than memorizing domain-specific tag distributions. We release models, code, and the systematically revised Sosialurin-BRAGD corpus, featuring the new BRAGD tagset and a new out-of-domain evaluation corpus from diverse and contemporary text types.",{"paper_id":20917,"title":20918,"year":7,"month":188,"day":63,"doi":20919,"resource_url":20920,"first_page":20921,"last_page":20922,"pdf_url":20923,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20924,"paper_type":860,"authors":20925,"abstract":20930},"lrec2026-main-914","Syntactic Sugar for Syntactic Queries: Sequential Representations for Dependency Queries","10.63317\u002F2vfu2ssa33us","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-914","11669","11678","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.914.pdf","deworetzki-etal-2026-syntactic",[20926,20928],{"paper_id":20917,"author_seq":247,"given_name":11557,"surname":20927,"affiliation":63,"orcid":63},"Deworetzki",{"paper_id":20917,"author_seq":232,"given_name":3060,"surname":20929,"affiliation":63,"orcid":63},"Masciolini","Syntactic query languages such as Grew and dep_search allow looking for grammatical patterns in linguistically annotated corpora. However, these languages are often unsupported by large-scale corpus management tools, where queries are of an essentially sequential nature. In this paper, we present CQP\u002FTree, a tool to convert syntactic queries into CQL, the Corpus Query Language used in Corpus Workbench, SketchEngine, Korp and several other such systems. In this framework, syntactic queries act as _syntactic sugar_: they allow expressing complex CQL queries in a more readable and concise fashion, thus bridging the gap between expressive linguistic search and large-scale corpora. CQP\u002FTree is available as a web and command-line tool, as well as an open source Python library.",{"paper_id":20932,"title":20933,"year":7,"month":188,"day":63,"doi":20934,"resource_url":20935,"first_page":20936,"last_page":20937,"pdf_url":20938,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20939,"paper_type":860,"authors":20940,"abstract":20944},"lrec2026-main-915","Context Is (Almost) Everything: Llama-3 on Structured Output and AMR Parsing","10.63317\u002F584xu46viahy","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-915","11679","11698","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.915.pdf","buljan-etal-2026-context",[20941,20942,20943],{"paper_id":20932,"author_seq":247,"given_name":3414,"surname":3415,"affiliation":63,"orcid":63},{"paper_id":20932,"author_seq":232,"given_name":3402,"surname":3403,"affiliation":63,"orcid":63},{"paper_id":20932,"author_seq":218,"given_name":14921,"surname":14922,"affiliation":63,"orcid":63},"This paper evaluates the ability of an open-source LLM (Llama-3.1) to compute sentence-level semantics and encode it in formal language. We here compare two versions of the model on the task of generating a meaning representation graph for a given English sentence in the form of Abstract Meaning Representation. We explore the model’s in-context learning capability, comparing zero-shot prompting to few-shot demonstrations of varying levels of specificity. We find that Llama-3.1 frequently makes errors when reproducing the syntactic structure of both seen and unseen structured output, and that it only achieves near-SotA parsing performance when shown highly specific demonstrations similar in structure to the target sentence graph. We include an in-depth analysis of the model output, considering performance through the lens of fine-grained semantic phenomena, graph properties (e.g. top node accuracy), and graph complexity.",{"paper_id":20946,"title":20947,"year":7,"month":188,"day":63,"doi":20948,"resource_url":20949,"first_page":20950,"last_page":20951,"pdf_url":20952,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20953,"paper_type":860,"authors":20954,"abstract":20956},"lrec2026-main-916","Towards the Morphological Annotation of North Markian (Low German)","10.63317\u002F54xnswqy73gs","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-916","11699","11714","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.916.pdf","chiarcos-2026-morphological",[20955],{"paper_id":20946,"author_seq":247,"given_name":3643,"surname":4737,"affiliation":63,"orcid":63},"Low German (Low Saxon, ISO 639-2 nds) is an underresourced West Germanic language spoken in Northern Germany (Plattdütsch), in the Netherlands (Nedersaksisch) and in an international diaspora (Plautdietsch, Pomerano, etc.). As a minority language, it is under pressure from the respective national languages, and considered threatened. Although NLP and digital language resources might play a role in facilitating the use of the language on the web and to support intergenerational transmission, no NLP tools are known to exist, and no adequate corpora that such tools could be trained on. This paper describes the construction of a novel corpus of North Markian, a dialect of East Low German, its morphosyntactic annotation and morphological analysis, and in particular explores methods to bootstrap and develop such resources in the face of a complete lack of training data.",{"paper_id":20958,"title":20959,"year":7,"month":188,"day":63,"doi":20960,"resource_url":20961,"first_page":20962,"last_page":20963,"pdf_url":20964,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20965,"paper_type":860,"authors":20966,"abstract":20970},"lrec2026-main-917","Cross-Dataset Inconsistencies in Morphological Annotation: Evidence from Universal Dependencies","10.63317\u002F55hiti2bjus3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-917","11715","11723","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.917.pdf","ohldalov-2026-cross",[20967],{"paper_id":20958,"author_seq":247,"given_name":20968,"surname":20969,"affiliation":63,"orcid":63},"Vlasta","Ohlídalová","Ensuring annotation consistency is a challenging task in language dataset development. While difficulty is typically increasing at higher levels of linguistic complexity, we show that it is a critical issue even for fundamental linguistic tasks such as morphological annotation. Contrary to previous research that targeted intra-dataset inconsistencies, this study investigates inconsistencies across various pre-existing datasets for the same language. On the example of Universal Dependencies datasets, we examined what morphological categories exhibit the most disagreement. The analysis revealed that there are specific categories with low inconsistency score that indicates good agreement on these features (namely Case, Gender, Number and to a lesser extent Animacy). On the other hand, the Part-of-Speech (UPOS) tag stands out as a \"red flag\" due to high inconsistency score. Analysis of the most frequent inconsistencies suggest that they are dataset-specific artifacts rather than inherently language-specific phenomena.",{"paper_id":20972,"title":20973,"year":7,"month":188,"day":63,"doi":20974,"resource_url":20975,"first_page":20976,"last_page":20977,"pdf_url":20978,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20979,"paper_type":860,"authors":20980,"abstract":20984},"lrec2026-main-918","Improving Latvian Morphosyntactic Parsing with Pretrained Encoders and Analyzer-Constrained Decoding","10.63317\u002F5khpzsaiqrzw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-918","11724","11734","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.918.pdf","znotins-2026-improving",[20981],{"paper_id":20972,"author_seq":247,"given_name":20982,"surname":20983,"affiliation":63,"orcid":63},"Arturs","Znotins","We present a systematic evaluation of Latvian morphosyntactic parsing with pretrained transformer encoders in a unified joint architecture for tagging, lemmatization, and dependency parsing. We benchmark multilingual and Latvian-specific models and show that language-specific adaptation, even with modest in-language data, substantially improves performance. We further demonstrate that factored morphological modeling improves robustness and that integrating a Latvian morphological analyzer through constrained decoding yields consistent gains in XPOS tagging and lemmatization. The best system achieves new state-of-the-art results, reaching 95.22% XPOS accuracy, 98.72% lemma accuracy, and 93.19% LAS.",{"paper_id":20986,"title":20987,"year":7,"month":188,"day":63,"doi":20988,"resource_url":20989,"first_page":20990,"last_page":20991,"pdf_url":20992,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":20993,"paper_type":860,"authors":20994,"abstract":21009},"lrec2026-main-919","CommonMorph: Participatory Morphological Documentation Platform","10.63317\u002F5gqigwzjjv4b","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-919","11735","11746","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.919.pdf","mahmudi-etal-2026-commonmorph",[20995,20998,20999,21002,21005,21007],{"paper_id":20986,"author_seq":247,"given_name":20996,"surname":20997,"affiliation":63,"orcid":63},"Aso","Mahmudi",{"paper_id":20986,"author_seq":232,"given_name":7147,"surname":7148,"affiliation":63,"orcid":63},{"paper_id":20986,"author_seq":218,"given_name":21000,"surname":21001,"affiliation":63,"orcid":63},"Kemal Maulana","Kurniawan",{"paper_id":20986,"author_seq":203,"given_name":21003,"surname":21004,"affiliation":63,"orcid":63},"Rico","Sennrich",{"paper_id":20986,"author_seq":188,"given_name":21006,"surname":19852,"affiliation":63,"orcid":63},"Eduard H.",{"paper_id":20986,"author_seq":172,"given_name":6029,"surname":21008,"affiliation":63,"orcid":63},"Vylomova","Collecting and annotating morphological data present significant challenges, requiring linguistic expertise, methodological rigour, and substantial resources. These barriers are particularly acute for low-resource languages and varieties. To accelerate this process, we introduce CommonMorph, a comprehensive platform that streamlines morphological data collection development through a three-tiered approach: expert linguistic definition, contributor elicitation, and community validation. The platform minimises manual work by incorporating active learning, annotation suggestions, and tools to import and adapt materials from related languages. It accommodates diverse morphological systems, including fusional, agglutinative, and root-and-pattern morphologies. Its open-source design and UniMorph-compatible outputs ensure accessibility and interoperability with NLP tools. Our platform is accessible at https:\u002F\u002Fcommon-morph.com, offering a replicable model for preserving linguistic diversity through collaborative technology.",{"paper_id":21011,"title":21012,"year":7,"month":188,"day":63,"doi":21013,"resource_url":21014,"first_page":21015,"last_page":21016,"pdf_url":21017,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21018,"paper_type":860,"authors":21019,"abstract":21024},"lrec2026-main-920","Datasets for Verb Alternations across Languages: BLM Templates and Data Augmentation Strategies","10.63317\u002F4t48qjruy2ce","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-920","11747","11760","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.920.pdf","samo-etal-2026-datasets",[21020,21022],{"paper_id":21011,"author_seq":247,"given_name":2709,"surname":21021,"affiliation":63,"orcid":63},"Samo",{"paper_id":21011,"author_seq":232,"given_name":19822,"surname":21023,"affiliation":63,"orcid":63},"Merlo","Large language models (LLMs) have shown remarkable performance across various sentence-based linguistic phenomena, yet their ability to capture cross-sentence paradigmatic patterns, such as verb alternations, remains underexplored. In this work, we present curated paradigm-based datasets for four languages, designed to probe systematic cross-sentence knowledge of verb alternations (change-of-state and object-drop constructions in English, German and Italian, and Hebrew binyanim). The datasets comprise thousands of the Blackbird Language Matrices (BLMs) problems. The BLM task – an RPM\u002FARC-like task devised specifically for language – is a controlled linguistic puzzle where models must select the sentence that completes a pattern according to syntactic and semantic rules. We introduce three types of templates varying in complexity and apply linguistically-informed data augmentation strategies across synthetic and natural data. We provide simple baseline performance results across English, Italian, German, and Hebrew, that demonstrate the diagnostic usefulness of the datasets.",{"paper_id":21026,"title":21027,"year":7,"month":188,"day":63,"doi":21028,"resource_url":21029,"first_page":21030,"last_page":21031,"pdf_url":21032,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21033,"paper_type":860,"authors":21034,"abstract":21045},"lrec2026-main-921","A Large and Balanced Multi-Domain Arabic Corpus Annotated for Morphology, Syntax, and Readability","10.63317\u002F45f2o6t8piyi","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-921","11761","11775","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.921.pdf","elmadani-etal-2026-large",[21035,21038,21041,21044],{"paper_id":21026,"author_seq":247,"given_name":21036,"surname":21037,"affiliation":63,"orcid":63},"Khalid N.","Elmadani",{"paper_id":21026,"author_seq":232,"given_name":21039,"surname":21040,"affiliation":63,"orcid":63},"Adel Mahmoud","Wizani",{"paper_id":21026,"author_seq":218,"given_name":21042,"surname":21043,"affiliation":63,"orcid":63},"Hanada Taha","Thomure",{"paper_id":21026,"author_seq":203,"given_name":4229,"surname":4230,"affiliation":63,"orcid":63},"We present BAREC-10M, an expanded version of the Balanced Arabic Readability Evaluation Corpus (BAREC). This new release extends the original 1M-word corpus to 10 million words and broadens its scope to include balanced multi-domain coverage annotated for morphology, syntax, and readability. The corpus integrates 45 sub-corpora drawn from diverse sources, including news, educational materials, literature, children’s texts, and religious discourse. Each text is labeled for domain, readership level, and genre, and automatically analyzed using state-of-the-art morphological and syntactic tools. To enhance coverage of underrepresented varieties, we manually digitized and included children’s materials, magazines, and curriculum-based content. The resulting dataset provides a balanced resource for studying Arabic linguistic variation across styles, audiences, and levels of complexity.",{"paper_id":21047,"title":21048,"year":7,"month":188,"day":63,"doi":21049,"resource_url":21050,"first_page":21051,"last_page":21052,"pdf_url":21053,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21054,"paper_type":860,"authors":21055,"abstract":21059},"lrec2026-main-922","The DELPH-IN Grammary: A Curated Repository of Grammars and Treebanks","10.63317\u002F3gs4hx77nwhw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-922","11776","11786","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.922.pdf","bond-etal-2026-delph",[21056,21057],{"paper_id":21047,"author_seq":247,"given_name":7075,"surname":15132,"affiliation":63,"orcid":63},{"paper_id":21047,"author_seq":232,"given_name":9100,"surname":21058,"affiliation":63,"orcid":63},"Flickinger","Precision computational grammars encode detailed linguistic analyses and compositional semantics that support rigorous investigation of grammatical phenomena, but their development requires substantial expertise and maintenance. To ensure long-term sustainability and accessibility of these resources, we present the DELPH-IN Grammary, a curated collection of twenty three HPSG grammars spanning seventeen languages and eight language families. The repository includes mature broad-coverage grammars (English, German, Japanese, Norwegian, Spanish) with associated treebanks, as well as grammars for typologically diverse and less-resourced languages. Each grammar is standardized with metadata, compiled using the ACE parser\u002Fgenerator, and loaded into the Linguistic Type Database for detailed inspection. Following FAIR principles, all resources are version-controlled and archived on Zenodo with annual releases synchronized to community development cycles. The Grammary enables reproducible grammar research, cross-linguistic typological studies, semantic parsing development, and grammar engineering pedagogy, providing the depth and theoretical grounding that complements data-driven approaches in computational linguistics. Our goal is to establish a sustainable model for preserving these valuable resources which bridge the critical gap between theoretical linguistics and empirical corpus based research. The Grammary is available at https:\u002F\u002Fgithub.com\u002Fdelph-in\u002Fgrammary (Zenodo doi: zenodo.18945956).",{"paper_id":21061,"title":21062,"year":7,"month":188,"day":63,"doi":21063,"resource_url":21064,"first_page":21065,"last_page":21066,"pdf_url":21067,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21068,"paper_type":860,"authors":21069,"abstract":21080},"lrec2026-main-923","Morphemes without Borders: Evaluating Root–Pattern Morphology in Arabic Tokenizers and LLMs","10.63317\u002F3bk9j7feheoq","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-923","11787","11799","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.923.pdf","alakeel-etal-2026-morphemes",[21070,21073,21076,21077],{"paper_id":21061,"author_seq":247,"given_name":21071,"surname":21072,"affiliation":63,"orcid":63},"Yara Yousif","Alakeel",{"paper_id":21061,"author_seq":232,"given_name":21074,"surname":21075,"affiliation":63,"orcid":63},"Chatrine","Qwaider",{"paper_id":21061,"author_seq":218,"given_name":1775,"surname":1776,"affiliation":63,"orcid":63},{"paper_id":21061,"author_seq":203,"given_name":21078,"surname":21079,"affiliation":63,"orcid":63},"Sawsan","Alqahtani","This work investigates how effectively large language models (LLMs) and their tokenization schemes represent and generate Arabic root–pattern morphology, probing whether they capture genuine morphological structure or rely on surface memorization. Arabic morphological system provides a rich testbed for analyzing how LLMs handle complex, non-concatenative forms and how tokenization choices influence this process. Our study begins with an evaluation of morphological fidelity across Arabic and multilingual tokenizers against gold-standard segmentation, followed by an analysis of LLM performance in productive root–pattern generation using a newly developed benchmark. Our findings across seven Arabic-centric and multilingual LLMs and their respective tokenizers reveal that tokenizer morphological alignment is not necessary nor sufficient for morphological generation, which questions the role of morphological tokenization in downstream performance.",{"paper_id":21082,"title":21083,"year":7,"month":188,"day":63,"doi":21084,"resource_url":21085,"first_page":21086,"last_page":21087,"pdf_url":21088,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21089,"paper_type":860,"authors":21090,"abstract":21101},"lrec2026-main-924","APODICTUS: Automatic Processing of DICTionary Update candidateS","10.63317\u002F3rtgegtzmpmv","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-924","11800","11812","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.924.pdf","blessing-etal-2026-apodictus",[21091,21093,21096,21098,21099,21100],{"paper_id":21082,"author_seq":247,"given_name":12079,"surname":21092,"affiliation":63,"orcid":63},"Blessing",{"paper_id":21082,"author_seq":232,"given_name":21094,"surname":21095,"affiliation":63,"orcid":63},"Johannes S.","Sax",{"paper_id":21082,"author_seq":218,"given_name":1296,"surname":21097,"affiliation":63,"orcid":63},"Kaufmann",{"paper_id":21082,"author_seq":203,"given_name":3270,"surname":18789,"affiliation":63,"orcid":63},{"paper_id":21082,"author_seq":188,"given_name":3405,"surname":3406,"affiliation":63,"orcid":63},{"paper_id":21082,"author_seq":172,"given_name":18345,"surname":18346,"affiliation":63,"orcid":63},"Dictionaries have to be regularly updated. Some dictionary-makers gather proposals for updates of sense entries in internal databases. We automate the process of verifying and prioritizing such sense proposals, and facilitate their addition to a dictionary, by building a sophisticated processing pipeline relying on state-of-the-art language models. Our pipeline presents the first systematic, large-scale, and comprehensive solution for processing candidates for inclusion in a dictionary, which is tested in an industry-relevant context. We conduct several experiments to evaluate the pipeline and provide an annotated dataset for future work. Model performance is acceptable for words which are not yet in the dictionary, but low for in-dictionary words. Through an error analysis and model component ablation, we gain further insight on directions of future model improvements.",{"paper_id":21103,"title":21104,"year":7,"month":188,"day":63,"doi":21105,"resource_url":21106,"first_page":21107,"last_page":21108,"pdf_url":21109,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21110,"paper_type":860,"authors":21111,"abstract":21114},"lrec2026-main-925","A Test Collection for Part-of-Speech Tagging and Word Sense Disambiguation","10.63317\u002F5mukdiuk65f4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-925","11813","11821","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.925.pdf","krovetz-2026-test",[21112],{"paper_id":21103,"author_seq":247,"given_name":3172,"surname":21113,"affiliation":63,"orcid":63},"Krovetz","We evaluate a focused test collection at the intersection of part-of-speech tagging and word‑sense disambiguation. The collection targets words such as train, novel, and lean, where part-of-speech contrasts align with clear meaning differences. We use it to detect regressions across tagger versions, track quantitative and qualitative progress over time, and test robustness to orthographic variation. Experiments with the Stanford and TnT taggers show 68% accuracy, compared with 92% for a recent spaCy transformer model. Earlier taggers erred mainly on noun–verb distinctions; spaCy’s errors more often involve noun–adjective distinctions. Uppercase text roughly doubles error rates for all taggers. We discuss common problems and propose directions for future testing.",{"paper_id":21116,"title":21117,"year":7,"month":188,"day":63,"doi":21118,"resource_url":21119,"first_page":21120,"last_page":21121,"pdf_url":21122,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21123,"paper_type":860,"authors":21124,"abstract":21153},"lrec2026-main-926","Creating a Hybrid Rule and Neural Network Based Semantic Tagger Using Silver Standard Data: The PyMUSAS Framework for Multilingual Semantic Annotation","10.63317\u002F4ngkupgnrbgc","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-926","11822","11833","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.926.pdf","moore-etal-2026-creating",[21125,21127,21128,21130,21132,21133,21136,21139,21142,21144,21147,21149,21151],{"paper_id":21116,"author_seq":247,"given_name":14130,"surname":21126,"affiliation":63,"orcid":63},"Moore",{"paper_id":21116,"author_seq":232,"given_name":1216,"surname":12462,"affiliation":63,"orcid":63},{"paper_id":21116,"author_seq":218,"given_name":2843,"surname":21129,"affiliation":63,"orcid":63},"Archer",{"paper_id":21116,"author_seq":203,"given_name":9249,"surname":21131,"affiliation":63,"orcid":63},"Czerniak",{"paper_id":21116,"author_seq":188,"given_name":2843,"surname":2844,"affiliation":63,"orcid":63},{"paper_id":21116,"author_seq":172,"given_name":21134,"surname":21135,"affiliation":63,"orcid":63},"Daisy Monika","Lal",{"paper_id":21116,"author_seq":155,"given_name":21137,"surname":21138,"affiliation":63,"orcid":63},"Gearóid Ó","Donnchadha",{"paper_id":21116,"author_seq":138,"given_name":21140,"surname":21141,"affiliation":63,"orcid":63},"Mícheál J. Ó","Meachair",{"paper_id":21116,"author_seq":121,"given_name":1395,"surname":21143,"affiliation":63,"orcid":63},"Piao",{"paper_id":21116,"author_seq":104,"given_name":21145,"surname":21146,"affiliation":63,"orcid":63},"Elaine Uí","Dhonnchadha",{"paper_id":21116,"author_seq":87,"given_name":8211,"surname":21148,"affiliation":63,"orcid":63},"Vuorinen",{"paper_id":21116,"author_seq":73,"given_name":10609,"surname":21150,"affiliation":63,"orcid":63},"Yabo",{"paper_id":21116,"author_seq":55,"given_name":21152,"surname":6675,"affiliation":63,"orcid":63},"Xiaobin","Word Sense Disambiguation (WSD) has been widely evaluated using the semantic frameworks of WordNet, BabelNet, and the Oxford Dictionary of English. However, for the UCREL Semantic Analysis System (USAS) framework, no open extensive evaluation has been performed beyond lexical coverage or single language evaluation. In this work, we perform the largest semantic tagging evaluation of the rule based system that uses the lexical resources in the USAS framework covering five different languages using four existing datasets and one novel Chinese dataset. We create a new silver labelled English dataset, to overcome the lack of manually tagged training data, that we train and evaluate various mono and multilingual neural models in both mono and cross-lingual evaluation setups with comparisons to their rule based counterparts, and show how a rule based system can be enhanced with a neural network model. The resulting neural network models, including the data they were trained on, the Chinese evaluation dataset, and all of the code will be released as open resources.",{"paper_id":21155,"title":21156,"year":7,"month":188,"day":63,"doi":21157,"resource_url":21158,"first_page":21159,"last_page":21160,"pdf_url":21161,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21162,"paper_type":860,"authors":21163,"abstract":21174},"lrec2026-main-927","Scare Quotes as Markers of \"Questionable\" Word Usages and Misalignment in Conversation: An Annotation Study","10.63317\u002F4g6qf5k2z4pw","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-927","11834","11851","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.927.pdf","soler-etal-2026-scare",[21164,21167,21170,21173],{"paper_id":21155,"author_seq":247,"given_name":21165,"surname":21166,"affiliation":63,"orcid":63},"Aina Garí","Soler",{"paper_id":21155,"author_seq":232,"given_name":21168,"surname":21169,"affiliation":63,"orcid":63},"Juan Carlos Zevallos","Huaco",{"paper_id":21155,"author_seq":218,"given_name":21171,"surname":21172,"affiliation":63,"orcid":63},"Matthieu","Labeau",{"paper_id":21155,"author_seq":203,"given_name":5369,"surname":5370,"affiliation":63,"orcid":63},"Scare quotes are a subtle yet powerful device: they can mark irony, distance, or disagreement about word meaning or lexical choices. We present a large-scale manual annotation of quoted word usages focused on the scare versus non-scare quote distinction as well as on their role in managing (mis)alignment in conversation. Our analysis reveals that scare quotes can mark problematic word usages, and they are often used to contest or criticize other speakers’ word choices. However, non-scare, meta-linguistic usages of quotes are also often involved in explicit efforts toward lexico-semantic alignment.",{"paper_id":21176,"title":21177,"year":7,"month":188,"day":63,"doi":21178,"resource_url":21179,"first_page":21180,"last_page":21181,"pdf_url":21182,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21183,"paper_type":860,"authors":21184,"abstract":21199},"lrec2026-main-928","Modeling Clinical Uncertainty in Radiology Reports: From Explicit Uncertainty Markers to Implicit Reasoning Pathways","10.63317\u002F2i8nrgcnb52p","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-928","11852","11873","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.928.pdf","rabaey-etal-2026-modeling",[21185,21187,21190,21192,21194,21196,21198],{"paper_id":21176,"author_seq":247,"given_name":8604,"surname":21186,"affiliation":63,"orcid":63},"Rabaey",{"paper_id":21176,"author_seq":232,"given_name":21188,"surname":21189,"affiliation":63,"orcid":63},"Jong Hak","Moon",{"paper_id":21176,"author_seq":218,"given_name":21191,"surname":1359,"affiliation":63,"orcid":63},"Jung-Oh",{"paper_id":21176,"author_seq":203,"given_name":21193,"surname":5173,"affiliation":63,"orcid":63},"Min Gwan",{"paper_id":21176,"author_seq":188,"given_name":21195,"surname":11013,"affiliation":63,"orcid":63},"Hangyul",{"paper_id":21176,"author_seq":172,"given_name":1316,"surname":21197,"affiliation":63,"orcid":63},"Demeester",{"paper_id":21176,"author_seq":155,"given_name":10323,"surname":8691,"affiliation":63,"orcid":63},"Radiology reports are invaluable for clinical decision-making and hold great potential for automated analysis when structured into machine-readable formats. These reports often contain uncertainty, which we categorize into two distinct types: (i) Explicit uncertainty reflects doubt about the presence or absence of findings, conveyed through hedging phrases. These vary in meaning depending on the context, making rule-based systems insufficient to quantify the level of uncertainty for specific findings; (ii) Implicit uncertainty arises when radiologists omit parts of their reasoning, recording only key findings or diagnoses. Here, it is often unclear whether omitted findings are truly absent or simply unmentioned for brevity. We address these challenges with a two-part framework. We quantify explicit uncertainty by creating an expert-validated, LLM-based reference ranking of common hedging phrases, and mapping each finding to a probability value based on this reference. In addition, we model implicit uncertainty through an expansion framework that systematically adds characteristic sub-findings derived from expert-defined diagnostic pathways for 14 common diagnoses. Using these methods, we release Lunguage++, an expanded, uncertainty-aware version of the Lunguage benchmark of fine-grained structured radiology reports. This enriched resource enables uncertainty-aware image classification, faithful diagnostic reasoning, and new investigations into the clinical impact of diagnostic uncertainty.",{"paper_id":21201,"title":21202,"year":7,"month":188,"day":63,"doi":21203,"resource_url":21204,"first_page":21205,"last_page":21206,"pdf_url":21207,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21208,"paper_type":860,"authors":21209,"abstract":21214},"lrec2026-main-929","ArabDiscrim: A Decade-Long Arabic Facebook Corpus on Racism and Discrimination","10.63317\u002F363iym4fo7cx","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-929","11874","11884","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.929.pdf","zaghouani-etal-2026-arabdiscrim",[21210,21211,21212,21213],{"paper_id":21201,"author_seq":247,"given_name":2365,"surname":2366,"affiliation":63,"orcid":63},{"paper_id":21201,"author_seq":232,"given_name":12635,"surname":12636,"affiliation":63,"orcid":63},{"paper_id":21201,"author_seq":218,"given_name":12629,"surname":12630,"affiliation":63,"orcid":63},{"paper_id":21201,"author_seq":203,"given_name":12710,"surname":12711,"affiliation":63,"orcid":63},"We present ArabDiscrim, a decade-long lexical resource and corpus of 293K public Arabic Facebook posts (2014–2024) discussing racism and discrimination. Unlike existing Twitter-centric datasets, ArabDiscrim integrates platform-native engagement signals, including reactions, shares, comments, and page metadata, enabling joint analysis of language and audience response. The resource includes 200 curated terms (100 racism, 100 discrimination) with morphological regex families (13+ inflections per lemma), and 20 discrimination axes capturing identity-based grounds for unequal treatment. It also provides explicit attribution patterns. Released under a restricted research-use license for ethical compliance with platform terms, ArabDiscrim supports weak supervision, axis-aware sampling, and platform ecology research. By bridging lexical depth and ecological validity, it establishes a foundation for fairness-oriented, platform-aware Arabic NLP.",{"paper_id":21216,"title":21217,"year":7,"month":188,"day":63,"doi":21218,"resource_url":21219,"first_page":21220,"last_page":21221,"pdf_url":21222,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21223,"paper_type":860,"authors":21224,"abstract":21232},"lrec2026-main-930","DAMETA: An LLM Benchmark for Danish Metaphor Interpretation with Systematically Varied Distractors","10.63317\u002F5myuqkhnpm7e","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-930","11885","11895","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.930.pdf","schneidermann-etal-2026-dameta",[21225,21228,21229,21230,21231],{"paper_id":21216,"author_seq":247,"given_name":21226,"surname":21227,"affiliation":63,"orcid":63},"Nina Skovgaard","Schneidermann",{"paper_id":21216,"author_seq":232,"given_name":17472,"surname":17473,"affiliation":63,"orcid":63},{"paper_id":21216,"author_seq":218,"given_name":8547,"surname":8548,"affiliation":63,"orcid":63},{"paper_id":21216,"author_seq":203,"given_name":17491,"surname":14917,"affiliation":63,"orcid":63},{"paper_id":21216,"author_seq":188,"given_name":8879,"surname":8280,"affiliation":63,"orcid":63},"We present DAMETA, the first evaluation benchmark for Danish metaphor interpretation in language models, derived from the following sources: an annotated corpus (the Dafig Corpus), the Danish dictionary (DDO) and culture reviews in Danish newspapers. Each of the 900 data instances contains a sentence with a metaphorical target word and four human-created paraphrase options; one correct interpretation and three systematic errors or distractors: i) a false literal paraphrase (typically concrete), ii) a false figurative paraphrase (typically abstract), and iii) a false contradictory paraphrase. The benchmark is tested on seven language models, and 5% of the data is further tested on humans for comparison. Results show, among others, that when informed in the prompt that the target word is a metaphor, the models tend to be most distracted by the false figurative paraphrase; in contrast, when uninformed about the metaphorical setting, the models are more distracted by the false literal paraphrase. The dataset goes beyond standard by incorporating descriptive metadata regarding metaphor conventionality on a 3-graded scale (lexicalised, implicit, and ad-hoc), alongside a range of dictionary-derived source domains (military, gastronomy, health, meteorology, etc.). These metadata enable deeper analysis and potentially innovative insights of model performance regarding creativity, language change, and culture-sensitivity.",{"paper_id":21234,"title":21235,"year":7,"month":188,"day":63,"doi":21236,"resource_url":21237,"first_page":21238,"last_page":21239,"pdf_url":21240,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21241,"paper_type":860,"authors":21242,"abstract":21252},"lrec2026-main-931","A New Semantic Artifact Based Framework for Studying and Documenting Algospeak and Related Phenomena","10.63317\u002F2kpkun3trrua","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-931","11896","11906","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.931.pdf","khan-etal-2026-new",[21243,21244,21246,21248,21251],{"paper_id":21234,"author_seq":247,"given_name":2908,"surname":2909,"affiliation":63,"orcid":63},{"paper_id":21234,"author_seq":232,"given_name":2155,"surname":21245,"affiliation":63,"orcid":63},"Gugliotta",{"paper_id":21234,"author_seq":218,"given_name":2155,"surname":21247,"affiliation":63,"orcid":63},"Squadrito",{"paper_id":21234,"author_seq":203,"given_name":21249,"surname":21250,"affiliation":63,"orcid":63},"Maura","Tarquini",{"paper_id":21234,"author_seq":188,"given_name":1110,"surname":2903,"affiliation":63,"orcid":63},"In this paper we present a new framework for analysis, documenting and publishing resources about the recent linguistic phenomenon of algospeak. This proposed framework features the use of two semantic artifacts (both of which we make available as SKOS semantic artifacts in RDF), and a cross-lingual lexicon of algospeak terms which follows a schema intended to facilitate the comparison of algospeak across languages and cultural contexts. Our article also features a discussion of the use of algospeak in two non-anglophone contexts (Italian and Arabic) which resulted from a period of data collection which the authors undertook as preparation for the creation of our framework and the categories which underlie it.",{"paper_id":21254,"title":21255,"year":7,"month":188,"day":63,"doi":21256,"resource_url":21257,"first_page":21258,"last_page":21259,"pdf_url":21260,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21261,"paper_type":860,"authors":21262,"abstract":21274},"lrec2026-main-932","Creating a High Quality Abstract Meaning Representation Dataset Automatically","10.63317\u002F3qai4xpkg9v4","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-932","11907","11915","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.932.pdf","heinecke-etal-2026-creating",[21263,21265,21268,21271],{"paper_id":21254,"author_seq":247,"given_name":1521,"surname":21264,"affiliation":63,"orcid":63},"Heinecke",{"paper_id":21254,"author_seq":232,"given_name":21266,"surname":21267,"affiliation":63,"orcid":63},"Asadullah","Munshi",{"paper_id":21254,"author_seq":218,"given_name":21269,"surname":21270,"affiliation":63,"orcid":63},"Frédéric","Herledan",{"paper_id":21254,"author_seq":203,"given_name":21272,"surname":21273,"affiliation":63,"orcid":63},"Geraldine","Damnati","As only a few gold training datasets are available today, Abstract Meaning Representation (AMR) parsers are mainly trained on AMR 3.0, the largest dataset (Knight et al., 2020) which contains 55k sentences for training. Even if great progress has been made, leading to parsers that can reach Smatch scores higher than 83% on the AMR 3.0 test dataset, this is not accurate enough to be used in real world application pipelines. More data could help improve performance, but manually annotating sentences is costly. So, we have investigated an approach to automatically create synthetic data using different existing tools and models trained on AMR 3.0. This leads to better parsing performance with Smatch scores increased by 1 to 2 points (depending on the 3 gold test datasets used) with models trained on the augmented data.",{"paper_id":21276,"title":21277,"year":7,"month":188,"day":63,"doi":21278,"resource_url":21279,"first_page":21280,"last_page":21281,"pdf_url":21282,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21283,"paper_type":860,"authors":21284,"abstract":21289},"lrec2026-main-933","Towards a Comprehensive English Wordnet-Wikidata Mapping","10.63317\u002F2hv5jxde5mv5","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-933","11916","11925","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.933.pdf","mccrae-etal-2026-comprehensive",[21285,21286,21288],{"paper_id":21276,"author_seq":247,"given_name":2271,"surname":2272,"affiliation":63,"orcid":63},{"paper_id":21276,"author_seq":232,"given_name":5192,"surname":21287,"affiliation":63,"orcid":63},"Bergh",{"paper_id":21276,"author_seq":218,"given_name":20636,"surname":20637,"affiliation":63,"orcid":63},"In this study, we present a comprehensive investigation into the mapping of English Wordnet to Wikidata, focusing on the existing mappings created by different projects. We systematically analyze the current mapping methodologies and their effectiveness, highlighting the strengths and limitations of each approach. Through a comparative analysis, we identified overlaps and discrepancies among the mappings, revealing insights into the relationships between the data sets. Our findings underscore the need for a more unified dataset that consolidates disparate mappings into a comprehensive unified Wordnet-Wikidata mapping. We propose a novel construction methodology for this unified data set, taking advantage of existing mappings while addressing their shortcomings. In addition, we discuss future perspectives and advanced techniques for mapping the remaining unmapped records, such as machine learning algorithms. This work not only contributes to the enhancement of data interoperability between Wordnet and Wikidata but also sets the stage for future research aimed at refining mapping techniques and expanding coverage.",{"paper_id":21291,"title":21292,"year":7,"month":188,"day":63,"doi":21293,"resource_url":21294,"first_page":21295,"last_page":21296,"pdf_url":21297,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21298,"paper_type":860,"authors":21299,"abstract":21304},"lrec2026-main-934","AmDi - Ambiguous Words Diachronic Dataset","10.63317\u002F46xmhmibv9hz","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-934","11926","11941","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.934.pdf","thielen-etal-2026-amdi",[21300,21302],{"paper_id":21291,"author_seq":247,"given_name":12079,"surname":21301,"affiliation":63,"orcid":63},"Thielen",{"paper_id":21291,"author_seq":232,"given_name":3610,"surname":21303,"affiliation":63,"orcid":63},"Kugler","Two fundamental tasks in computational linguistics are Lexical Semantic Change Detection and Word Sense Disambiguation. Both commonly rely on large annotated datasets. Most available datasets cover only one of two areas: diachronic corpora used for Semantic Change Detection, or synchronic datasets for Word Sense Disambiguation. To address this gap, the AmDi dataset is introduced as a German-language resource that supports a more fine-grained diachronic analysis of word meanings, while also enabling the investigation of embeddings generated with corresponding models, as well as providing a foundation for Word Sense Disambiguation tasks.",{"paper_id":21306,"title":21307,"year":7,"month":188,"day":63,"doi":21308,"resource_url":21309,"first_page":21310,"last_page":21311,"pdf_url":21312,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21313,"paper_type":860,"authors":21314,"abstract":21320},"lrec2026-main-935","GerVLPro: A CEFR-Graded Vocabulary List of L2 Learners' Productive Vocabulary in German","10.63317\u002F3cokpao2odep","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-935","11942","11959","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.935.pdf","michael-etal-2026-gervlpro",[21315,21317,21319],{"paper_id":21306,"author_seq":247,"given_name":21316,"surname":1732,"affiliation":63,"orcid":63},"Noah-Manuel",{"paper_id":21306,"author_seq":232,"given_name":2742,"surname":21318,"affiliation":63,"orcid":63},"Huelsing",{"paper_id":21306,"author_seq":218,"given_name":1104,"surname":13571,"affiliation":63,"orcid":63},"CEFR-graded vocabulary lists are a valuable tool for second-language (L2) learners as they provide guidance on the order in which to acquire vocabulary items. Thus, they are essential for informing computer-assisted language learning solutions that target vocabulary development in learners. However, the vast majority of GVLs are prescriptive in that they determine which items learners should learn at each level, and they provide little information about which items learners actually know. Moreover, in the case of German, almost all established GVLs focus exclusively on learners’ receptive vocabulary. To remedy this, we introduce GerVLPro: A CEFR-Graded Vocabulary List of L2 learners’ Productive vocabulary in German. We derived GerVLPro from a comprehensive aggregation of available CEFR-annotated German L2 learner corpora to represent a wide range of learners and contexts. The resulting list comprises 4,015 lemma-POS entries (A1: 611; A2: 1,134; B1: 903; B2: 1,103; C1: 249; C2: 15), assigned via a normalized share-based method. We then conducted a large-scale cross-evaluation against seven established GVLs and six prominent frequency lists. Despite sizable lexical overlap among resources, we found only weak to moderate alignment with GerVLPro. Finally, we investigated whether Gpt-4o and Gpt-5 can reliably grade the productive vocabulary items in GerVLPro. Although both models exhibit roughly similar predictive capacity, they underperform most of the established GVLs on alignment and do not accurately capture productive difficulty. Overall, our findings suggest that established GVLs, frequency lists, and LLM grading insufficiently reflect the trajectory of learners’ productive vocabulary, underscoring the need for descriptive, learner-based resources such as GerVLPro.",{"paper_id":21322,"title":21323,"year":7,"month":188,"day":63,"doi":21324,"resource_url":21325,"first_page":21326,"last_page":21327,"pdf_url":21328,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21329,"paper_type":860,"authors":21330,"abstract":21343},"lrec2026-main-936","Building Bridges between Student and Curricular Language: Creating a Corpus of Abstract Meaning Representations for the Classroom","10.63317\u002F5hgw65vj8nxj","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-936","11960","11971","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.936.pdf","wrightbettner-etal-2026-building",[21331,21334,21335,21337,21339,21342],{"paper_id":21322,"author_seq":247,"given_name":21332,"surname":21333,"affiliation":63,"orcid":63},"Kristin","Wright-Bettner",{"paper_id":21322,"author_seq":232,"given_name":11194,"surname":14884,"affiliation":63,"orcid":63},{"paper_id":21322,"author_seq":218,"given_name":21336,"surname":18789,"affiliation":63,"orcid":63},"Zekun",{"paper_id":21322,"author_seq":203,"given_name":21338,"surname":3843,"affiliation":63,"orcid":63},"James H.",{"paper_id":21322,"author_seq":188,"given_name":21340,"surname":21341,"affiliation":63,"orcid":63},"Jeffrey","Flanigan",{"paper_id":21322,"author_seq":172,"given_name":17580,"surname":17581,"affiliation":63,"orcid":63},"The potential of AI conversational agents to foster student learning and reduce teacher strain in classroom settings has made the development of pedagogical agents a prime research target. An effective AI agent in particular must be able to understand both student language and the content they are learning and, furthermore, map between them. Curricular terminology and student speech, though topically and semantically related, differ significantly in surface-form expression. We present the JIA-AMRs Collection, a new resource for exploring whether Abstract Meaning Representations (AMRs) can optimize interventions by a conversational AI agent in a middle-school classroom by providing structured semantic representations of classroom language. This resource also provides an avenue by which we can verify interventions by the agent. We discuss the challenges of creating a corpus of meaning representations that map across highly-dissimilar classroom data (multimedia curriculum, student spoken language, and student written language) and our promising results of a nearly 30-point gain in trained-parser performance over the off-the-shelf model.",{"paper_id":21345,"title":21346,"year":7,"month":188,"day":63,"doi":21347,"resource_url":21348,"first_page":21349,"last_page":21350,"pdf_url":21351,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21352,"paper_type":860,"authors":21353,"abstract":21379},"lrec2026-main-937","Mu'jam Arriyadh: A Comprehensive Lexicon for Contemporary Arabic Language","10.63317\u002F5ftvqaetzu76","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-937","11972","11978","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.937.pdf","altamimi-etal-2026-mu",[21354,21355,21356,21357,21358,21359,21360,21361,21364,21365,21368,21369,21370,21373,21375,21378],{"paper_id":21345,"author_seq":247,"given_name":3879,"surname":3880,"affiliation":63,"orcid":63},{"paper_id":21345,"author_seq":232,"given_name":3895,"surname":3896,"affiliation":63,"orcid":63},{"paper_id":21345,"author_seq":218,"given_name":3887,"surname":3877,"affiliation":63,"orcid":63},{"paper_id":21345,"author_seq":203,"given_name":3892,"surname":3893,"affiliation":63,"orcid":63},{"paper_id":21345,"author_seq":188,"given_name":3882,"surname":3883,"affiliation":63,"orcid":63},{"paper_id":21345,"author_seq":172,"given_name":1691,"surname":3885,"affiliation":63,"orcid":63},{"paper_id":21345,"author_seq":155,"given_name":1775,"surname":3877,"affiliation":63,"orcid":63},{"paper_id":21345,"author_seq":138,"given_name":21362,"surname":21363,"affiliation":63,"orcid":63},"Abdulrahman Saeed","Alshehri",{"paper_id":21345,"author_seq":121,"given_name":3889,"surname":3890,"affiliation":63,"orcid":63},{"paper_id":21345,"author_seq":104,"given_name":21366,"surname":21367,"affiliation":63,"orcid":63},"Maryam H.","Algarny",{"paper_id":21345,"author_seq":87,"given_name":3898,"surname":3899,"affiliation":63,"orcid":63},{"paper_id":21345,"author_seq":73,"given_name":3876,"surname":3877,"affiliation":63,"orcid":63},{"paper_id":21345,"author_seq":55,"given_name":21371,"surname":21372,"affiliation":63,"orcid":63},"Saleh Zaidan","Albalawi",{"paper_id":21345,"author_seq":38,"given_name":21374,"surname":3899,"affiliation":63,"orcid":63},"Fawziah Mohammed",{"paper_id":21345,"author_seq":17,"given_name":21376,"surname":21377,"affiliation":63,"orcid":63},"Sara Ali","Alhifthi",{"paper_id":21345,"author_seq":2971,"given_name":3901,"surname":3902,"affiliation":63,"orcid":63},"This paper provides an overview of Contemporary Arabic Lexicon (Mu’jam Arriyadh). It is a contemporary and inclusive Arabic dictionary that has been specifically developed to cater to the needs of both native and non-native Arabic speakers. The corpus utilized in this study is derived from the Arabic Contemporary Corpus for Analysis (ACCA), which encompasses a vast collection of 450 million words of Modern Standard Arabic spanning the previous century. Significantly, the lexicon in question prioritizes lemma-based entries over root forms, hence enhancing its user-friendliness and adaptability across different contexts. The resource offers comprehensive linguistic data pertaining to a wide array of Arabic vocabulary, encompassing morphological, morph-syntactic, and semantic aspects. The Lexicon has been developed in accordance with the ISO 24613 standard, which improves its ability to be processed by machines and facilitates the utilization of natural language processing systems. The database encompasses a range of linguistic aspects, such as synonyms, antonyms, and root forms, offering a comprehensive compilation. Mu’jam Arriyadh is a contemporary Arabic lexicon that is designed to be accessible to users, compatible with machine processing, and highly beneficial for anyone studying the language, conducting research, and utilizing natural language processing technologies.",{"paper_id":21381,"title":21382,"year":7,"month":188,"day":63,"doi":21383,"resource_url":21384,"first_page":21385,"last_page":21386,"pdf_url":21387,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21388,"paper_type":860,"authors":21389,"abstract":21397},"lrec2026-main-938","The Romanian Corpus Annotated with Multiword Expressions. PARSEME-Ro Version 2.0","10.63317\u002F3dnsaryv4kdh","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-938","11979","11991","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.938.pdf","mititelu-etal-2026-romanian",[21390,21391,21393,21395],{"paper_id":21381,"author_seq":247,"given_name":9441,"surname":9442,"affiliation":63,"orcid":63},{"paper_id":21381,"author_seq":232,"given_name":9153,"surname":21392,"affiliation":63,"orcid":63},"Cristescu",{"paper_id":21381,"author_seq":218,"given_name":2968,"surname":21394,"affiliation":63,"orcid":63},"Irimia",{"paper_id":21381,"author_seq":203,"given_name":21396,"surname":9434,"affiliation":63,"orcid":63},"Carmen Mîrzea","The Romanian journalistic corpus previously annotated with verbal multiword expressions (PARSEME-Ro) has been extended recently with other journalistic texts and annotated with multiword expressions of all parts of speech closely observing version 2.0 of the PARSEME guidelines. The corpus size has been increased by about 40%, it underwent automatic morpho-syntactic annotation following the Universal Dependencies principles, as well as extensive semi-automatic annotation of multiword expressions of all morphological types (nominal, adjectival, adverbial, determiner, pronominal, prepositional, conjunction, interjection, and verbal for the newly added texts). We present here our work methodology, which involves an automatic annotation phase, but the manual work prevails in checking the annotation and its consistency. We also offer quantitative data about the new version of the corpus, the types of multiword expressions existing in Romanian and occurring therein, and characteristics thereof. The new version of the PARSEME-Ro corpus contributes to the field of developing multiword expressions resources per se, i.e. describing this language phenomenon, as well as resources for training, tuning and testing the performance of tools and large language models when dealing with this linguistic phenomenon.The paper also discusses some remarks on the MWE paraphrasing subtask in which a part of the corpus was used. The corpus is released with a permissive license.",{"paper_id":21399,"title":21400,"year":7,"month":188,"day":63,"doi":21401,"resource_url":21402,"first_page":21403,"last_page":21404,"pdf_url":21405,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21406,"paper_type":860,"authors":21407,"abstract":21411},"lrec2026-main-939","Missing Links: LLM-Augmentation of Event Triggers of State Changes in the OpenPI Dataset","10.63317\u002F4ga5mnybeeam","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-939","11992","12006","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.939.pdf","rim-etal-2026-missing",[21408,21410],{"paper_id":21399,"author_seq":247,"given_name":21409,"surname":4355,"affiliation":63,"orcid":63},"Kyeongmin",{"paper_id":21399,"author_seq":232,"given_name":4091,"surname":4092,"affiliation":63,"orcid":63},"Effective computational understanding of procedural text requires modeling not just the state changes that occur (entity transformations), but also the specific actions that cause them (event triggers). A lack of datasets that explicitly link these two primary information sources has hindered progress in theory-oriented research and applications of NLP. This paper presents two primary contributions: (i) a new silver-standard dataset where event trigger annotations are added to existing state-change data on task-oriented procedural text, enabling both theoretical investigation and practical benchmarking; and (ii) inverse annotation, a framework for recovering missing linguistic annotations from existing semantic annotations—which we apply to recover event triggers from OpenPI’s state-change outcomes. We provide detailed pipeline analysis including error modes and quality filtering, and validate the dataset through comprehensive baseline evaluation of diverse trigger detection systems. Our work delivers both a reusable methodological framework applicable to other annotation recovery tasks and a new benchmark resource for modeling the relationship between linguistic actions and their semantic outcomes in procedural domains.",{"paper_id":21413,"title":21414,"year":7,"month":188,"day":63,"doi":21415,"resource_url":21416,"first_page":21417,"last_page":21418,"pdf_url":21419,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21420,"paper_type":860,"authors":21421,"abstract":21423},"lrec2026-main-940","VUPMC: A New Political Metaphor Corpus in Mandarin Chinese","10.63317\u002F5exd4mc9kh4d","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-940","12007","12018","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.940.pdf","tan-2026-vupmc",[21422],{"paper_id":21413,"author_seq":247,"given_name":8937,"surname":916,"affiliation":63,"orcid":63},"This article proposes the Conventional and Novel Metaphor Identification Procedure (CNMIP) for Mandarin Chinese and applies this replicable protocol to annotate the VUPMC dataset, a new Political Metaphor Corpus developed at VU University Amsterdam. The VUPMC corpus contains three Chinese political genres (Policy Documents, Remarks, News Reports) and includes over 220,000 tokens of concordance sentences for the node word 贸易 ‘trade’. The corpus analysis shows that 6.64% of lexical units in the VUPMC dataset are used as metaphor-related words (MRWs) to frame trade (e.g., using ‘war’ to frame trade as a war). Further tests show that distributions of MRWs differ significantly across genres and Parts of Speech. Similarities in MRW distributions between the VUPMC and other datasets confirm the reliability of the CNMIP procedure. The differences, however, highlight the methodological advances in manual annotation of conventional and novel MRWs as well as the distinctive features of Chinese political genres. The VUPMC dataset serves as a valuable language resource for computational detection of Chinese conventional and novel metaphors.",{"paper_id":21425,"title":21426,"year":7,"month":188,"day":63,"doi":21427,"resource_url":21428,"first_page":21429,"last_page":21430,"pdf_url":21431,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21432,"paper_type":860,"authors":21433,"abstract":21439},"lrec2026-main-941","Not All Disneys Are the Same: Making Coreference Metonymy-Aware","10.63317\u002F2xn8s7qim92p","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-941","12019","12030","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.941.pdf","ye-etal-2026-not",[21434,21436,21438],{"paper_id":21425,"author_seq":247,"given_name":21435,"surname":1896,"affiliation":63,"orcid":63},"Bingyang",{"paper_id":21425,"author_seq":232,"given_name":21437,"surname":7773,"affiliation":63,"orcid":63},"Jingxuan",{"paper_id":21425,"author_seq":218,"given_name":4091,"surname":4092,"affiliation":63,"orcid":63},"Metonymy, a type of referential transfer in which a name evokes a conceptually related entity (e.g., \"Disney\" for the theme park), is a pervasive and systematic feature of natural language. Yet, despite its impact on entity interpretation, coreference research has rarely treated metonymy explicitly. Computational models of metonymy, in turn, typically analyze local, sentence-level cases, leaving unexplored how metonymic reference interacts with discourse-level coreference phenomena. We bridge this gap by introducing CoNLL-Coref-Met, a metonymy-aware annotation layer on top of CoNLL-2012 that flags metonymic mentions in context. Using this lens, we show that state-of-the-art neural resolvers and LLMs systematically underperform on metonymic clusters relative to literal counterparts. We then (i) correct clusters affected by metonymy to reflect semantic reference rather than surface form and (ii) introduce a metonymy-aware LLM procedure to resolve semantic ambiguities introduced by metonymic shifts. Our pipeline introduces a novel way to see, measure, and mitigate metonymy effects on coreference.",{"paper_id":21441,"title":21442,"year":7,"month":188,"day":63,"doi":21443,"resource_url":21444,"first_page":21445,"last_page":21446,"pdf_url":21447,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21448,"paper_type":860,"authors":21449,"abstract":21458},"lrec2026-main-942","JSTS-Neg: Japanese Semantic Textual Similarity Dataset for Evaluating Negation Understanding Ability","10.63317\u002F4rywvofmkvff","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-942","12031","12041","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.942.pdf","yuasa-etal-2026-jsts",[21450,21453,21455],{"paper_id":21441,"author_seq":247,"given_name":21451,"surname":21452,"affiliation":63,"orcid":63},"Reiko","Yuasa",{"paper_id":21441,"author_seq":232,"given_name":21454,"surname":11797,"affiliation":63,"orcid":63},"Yoshihide",{"paper_id":21441,"author_seq":218,"given_name":21456,"surname":21457,"affiliation":63,"orcid":63},"Shigeki","Matsubara","Negation is a common linguistic phenomenon in natural language. Thus, datasets and benchmarks focused on negation are being constructed to evaluate the negation understanding abilities of language models. Negation is especially crucial when estimating the semantic similarity between sentences because it inverses their meaning. Although semantic textual similarity (STS) is one of the useful tasks to evaluate the abilities of large language models (LLMs), few STS datasets focus on negation. In this research, we introduce JSTS-Neg, a new Japanese STS dataset focusing on negation. Most instances in JSTS-Neg include negations and they are composed of both clausal and sub-clausal negations to reflect a variety of negation types. Moreover, JSTS-Neg consists of negation minimal pairs that only differ in the presence or absence of a negation cue. We evaluate the performance of existing LLMs on JSTS-Neg using negation minimal pairs to explore their abilities and limitations in understanding negation. LLMs tend to predict the similarity of two sentences ignoring negation cues in specific settings.",{"paper_id":21460,"title":21461,"year":7,"month":188,"day":63,"doi":21462,"resource_url":21463,"first_page":21464,"last_page":21465,"pdf_url":21466,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21467,"paper_type":860,"authors":21468,"abstract":21475},"lrec2026-main-943","Few-shot Prompting or Supervised Tuning? A Comparative Study of LLMs for Linguistically Distant Language Pairs in BDI","10.63317\u002F5aqjysq8avd3","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-943","12042","12053","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.943.pdf","naorem-etal-2026-few",[21469,21472,21473,21474],{"paper_id":21460,"author_seq":247,"given_name":21470,"surname":21471,"affiliation":63,"orcid":63},"Deepen","Naorem",{"paper_id":21460,"author_seq":232,"given_name":2252,"surname":2253,"affiliation":63,"orcid":63},{"paper_id":21460,"author_seq":218,"given_name":9620,"surname":2253,"affiliation":63,"orcid":63},{"paper_id":21460,"author_seq":203,"given_name":4467,"surname":4468,"affiliation":63,"orcid":63},"Bilingual Dictionary Induction (BDI) presents significant challenges in distant language pairs, particularly in light of the non-isomorphic nature and complexity of linguistic structures. This paper systematically evaluates the performance of unsupervised, supervised fine-tuning, and few-shot prompting approaches on BDI using Large Language Models (LLMs) on a diverse set of distant language pairs. The unsupervised approach explores the inherent multilingual capabilities of LLMs without fine-tuning, while the supervised fine-tuning method utilizes extensive labeled datasets to train models explicitly for BDI tasks. On the other hand, few-shot prompting leverages minimal examples to elicit accurate responses from the LLMs in a zero-shot or few-shot learning paradigm. Our experimental results reveal that the 5-shot prompting approach outperforms unsupervised and zero-shot settings in all cases and surpasses supervised settings in 82.86% of the cases. Few-shot prompting demonstrates robustness against overfitting, leveraging LLMs’ in-context learning and multilingual capabilities, making it particularly effective in target-to-source translation, even for morphologically complex language pairs. At the same time, few-shot prompting in LLM models, such as Llama, remains ineffective for morphologically rich language pairs like En-Mn and En-Ta in source-to-target BDI tasks. These findings suggest that few-shot prompting is a cost-effective and powerful alternative for BDI tasks, with future work enhancing BDI tasks in morphologically rich pairs.",{"paper_id":21477,"title":21478,"year":7,"month":188,"day":63,"doi":21479,"resource_url":21480,"first_page":21481,"last_page":21482,"pdf_url":21483,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":21484,"paper_type":860,"authors":21485,"abstract":21501},"lrec2026-main-944","When Structure Matters: Cross-Lingual Hyperbolic Embeddings for Chinese and English Wordnets","10.63317\u002F55a4sr9mfucq","https:\u002F\u002Flrec.elra.info\u002Flrec2026-main-944","12054","12071","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002Fpdf\u002F2026.lrec2026-1.944.pdf","ku-etal-2026-when",[21486,21489,21492,21494,21496,21498],{"paper_id":21477,"author_seq":247,"given_name":21487,"surname":21488,"affiliation":63,"orcid":63},"Mao-Chang","Ku",{"paper_id":21477,"author_seq":232,"given_name":21490,"surname":21491,"affiliation":63,"orcid":63},"da-Chen","Lian",{"paper_id":21477,"author_seq":218,"given_name":21493,"surname":1840,"affiliation":63,"orcid":63},"Pin-Er",{"paper_id":21477,"author_seq":203,"given_name":21495,"surname":3676,"affiliation":63,"orcid":63},"Po-Ya Angela",{"paper_id":21477,"author_seq":188,"given_name":21497,"surname":1840,"affiliation":63,"orcid":63},"Wei-Ling",{"paper_id":21477,"author_seq":172,"given_name":21499,"surname":21500,"affiliation":63,"orcid":63},"Shu-Kai","Hsieh","Hyperbolic embeddings such as the Poincaré model effectively represent lexical hierarchies with low distortion, yet their cross-lingual generalizability remains largely unexplored. This study investigates cross-lingual transfer by training 20-dimensional Poincaré embeddings exclusively on Open English WordNet (OEWN) hypernymy relations and evaluating on aligned Chinese Wordnet (CWN) synsets under a vocabulary-constrained transfer setting, where CWN-relevant synsets appear in OEWN training data but no Chinese-language supervision is used. We report robust statistical evidence based on the final 10 training checkpoints: Poincaré embeddings achieve 2.57× higher Mean Reciprocal Rank (MRR) than Euclidean embeddings on CWN (0.030 ± 0.001 vs 0.012 ± 0.000, p \u003C 0.001, Cohen’s d = 34.48) and 5.61× higher on OEWN (0.016 ± 0.000 vs 0.003 ± 0.000, p \u003C 0.001, d = 42.48). Furthermore, hierarchical filtering leveraging the radial dimension of hyperbolic space provides substantial additional gains: +74.6% MRR improvement on CWN and +25.8% on OEWN (both p \u003C 0.001). The model achieves higher absolute performance on the zero-shot CWN test set (MRR = 0.052 ± 0.002) than on the in-domain OEWN test set (MRR = 0.020 ± 0.001). We attribute this to structural alignment: CWN’s broader branching factor (4.32 vs 1.10) and moderate depth naturally suit hyperbolic geometry’s capacity to compactly represent hierarchies. Our findings demonstrate that geometric properties learned from English hypernymy transfer robustly across languages when semantic structures align. We release the aligned CWN–OEWN hypernymy evaluation dataset and complete evaluation framework to facilitate future research on geometry-based cross-lingual semantic modeling."]