[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"sidebar-data":3,"breadcrumb-conf-2004":849,"conference-2004":850,"papers-2004":851},{"conferences":4,"tutorials":254,"workshops":260},[5,27,47,65,79,96,113,130,147,164,180,195,211,225,240],{"conference_id":6,"year":7,"proceedings_title":8,"venue_ids":9,"isbn":10,"issn":11,"doi":12,"publisher":13,"editors":14,"conference_name":15,"conference_acronym":16,"conference_number":17,"conference_location":18,"conference_city":19,"conference_country":20,"conference_start_date":21,"conference_end_date":22,"conference_url":23,"pdf_url":24,"img_conf_url":25,"paperCount":26},"lrec2026","2026","Proceedings of the Fifteenth Language Resources and Evaluation Conference (LREC 2026)","lrec","978-2-493814-49-4","2522-2686","10.63317\u002F4fxzgre27xzj","European Language Resources Association (ELRA)","Stelios Piperidis, Núria Bel, Henk van den Heuvel, Nancy Ide, Simon Krek, Antonio Toral","The Fifteenth Language Resources and Evaluation Conference (LREC 2026)","LREC","15","Palau de Congressos de Palma","Palma, Mallorca","Spain","2026-05-11","2026-05-16","https:\u002F\u002Flrec2026.info","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002FLREC-2026.pdf",null,944,{"conference_id":28,"year":29,"proceedings_title":30,"venue_ids":31,"isbn":32,"issn":11,"doi":33,"publisher":34,"editors":35,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_city":40,"conference_country":41,"conference_start_date":42,"conference_end_date":43,"conference_url":44,"pdf_url":45,"img_conf_url":25,"paperCount":46},"lrec2024","2024","Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)","lrec|coling","979-10-95546-34-4","10.63317\u002F375ba8vd9q2v","European Language Resources Association (ELRA) and ICCL","Nicoletta Calzolari, Min-Yen Kan, Veronique Hoste, Alessandro Lenci, Sakriani Sakti, Nianwen Xue","Joint International Conference on Computational Linguistics, Language Resources and Evaluation","LREC-COLING","14","Lingotto Conference Centre","Turin","Italy","2024-05-20","2024-05-25","https:\u002F\u002Flrec-coling-2024.org","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002FLREC-2024.pdf",1554,{"conference_id":48,"year":49,"proceedings_title":50,"venue_ids":9,"isbn":51,"issn":11,"doi":52,"publisher":13,"editors":53,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_city":57,"conference_country":58,"conference_start_date":59,"conference_end_date":60,"conference_url":61,"pdf_url":62,"img_conf_url":63,"paperCount":64},"lrec2022","2022","Proceedings of the Thirteenth International Conference on Language Resources and Evaluation (LREC 2022)","79-10-95546-38-2","10.63317\u002F296vkvmh42ye","Nicoletta Calzolari, Frédéric Béchet, Philippe Blache, Khalid Choukri, Christopher Cieri, Thierry Declerck, Sara Goggi, Hitoshi Isahara, Bente Maegaard, Joseph Mariani, Hélène Mazo, Jan Odijk, Stelios Piperidis2020","Thirteenth Language Resources and Evaluation Conference","13","Palais du Pharo","Marseille","France","2022-06-20","2022-06-25","https:\u002F\u002Flrec2022.lrec-conf.org\u002Fen\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2022\u002FLREC-2022.pdf","",804,{"conference_id":66,"year":67,"proceedings_title":68,"venue_ids":9,"isbn":69,"issn":11,"doi":70,"publisher":13,"editors":71,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_city":57,"conference_country":58,"conference_start_date":74,"conference_end_date":75,"conference_url":76,"pdf_url":77,"img_conf_url":63,"paperCount":78},"lrec2020","2020","Proceedings of the Twelfth International Conference on Language Resources and Evaluation (LREC 2020)","79-10-95546-34-4","10.63317\u002F4j46u44gnpwr","Nicoletta Calzolari, Frédéric Béchet, Philippe Blache, Khalid Choukri, Christopher Cieri, Thierry Declerck, Sara Goggi, Hitoshi Isahara, Bente Maegaard, Joseph Mariani, Hélène Mazo, Asuncion Moreno, Jan Odijk, Stelios Piperidis","Twelfth Language Resources and Evaluation Conference","12","2020-05-11","2020-05-16","https:\u002F\u002Flrec2020.lrec-conf.org\u002Fen\u002Findex.html","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002FLREC-2020.pdf",895,{"conference_id":80,"year":81,"proceedings_title":82,"venue_ids":9,"isbn":83,"issn":11,"doi":84,"publisher":13,"editors":85,"conference_name":86,"conference_acronym":16,"conference_number":87,"conference_location":88,"conference_city":89,"conference_country":90,"conference_start_date":91,"conference_end_date":92,"conference_url":93,"pdf_url":94,"img_conf_url":63,"paperCount":95},"lrec2018","2018","Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)","79-10-95546-00-9","10.63317\u002F25jzjyk647iz","Nicoletta Calzolari, Khalid Choukri, Christopher Cieri, Thierry Declerck, Sara Goggi, Koiti Hasida, Hitoshi Isahara, Bente Maegaard, Joseph Mariani, Hélène Mazo, Asuncion Moreno, Jan Odijk, Stelios Piperidis, Takenobu Tokunaga","Eleventh International Conference on Language Resources and Evaluation","11","Phoenix Seagaia Resort","Miyazaki","Japan","2018-05-07","2018-05-12","http:\u002F\u002Flrec2018.lrec-conf.org\u002Fen\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2018\u002FLREC2018_Proceedings.zip",728,{"conference_id":97,"year":98,"proceedings_title":99,"venue_ids":9,"isbn":100,"issn":11,"doi":101,"publisher":13,"editors":102,"conference_name":103,"conference_acronym":16,"conference_number":104,"conference_location":105,"conference_city":106,"conference_country":107,"conference_start_date":108,"conference_end_date":109,"conference_url":110,"pdf_url":111,"img_conf_url":63,"paperCount":112},"lrec2016","2016","Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)","978-2-9517408-9-1","10.63317\u002F5mruwrazrwbg","Nicoletta Calzolari, Khalid Choukri, Thierry Declerck, Sara Goggi, Marko Grobelnik, Bente Maegaard, Joseph Mariani, Hélène Mazo, Asunción Moreno, Jan Odijk, Stelios Piperidis","Tenth International Conference on Language Resources and Evaluation","10","Bernardinsko Naselje","Portorož","Slovenia","2016-05-23","2016-05-28","http:\u002F\u002Flrec2016.lrec-conf.org\u002Fen\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2016\u002FLREC2016_Proceedings.zip",745,{"conference_id":114,"year":115,"proceedings_title":116,"venue_ids":9,"isbn":117,"issn":11,"doi":118,"publisher":13,"editors":119,"conference_name":120,"conference_acronym":16,"conference_number":121,"conference_location":122,"conference_city":123,"conference_country":124,"conference_start_date":125,"conference_end_date":126,"conference_url":127,"pdf_url":128,"img_conf_url":63,"paperCount":129},"lrec2014","2014","Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC 2014)","978-2-9517408-8-4","10.63317\u002F3ebxpiqq4ikp","Nicoletta Calzolari, Khalid Choukri, Thierry Declerck, Hrafn Loftsson, Bente Maegaard, Joseph Mariani, Asuncion Moreno, Jan Odijk, Stelios Piperidis","Ninth International Conference on Language Resources and Evaluation","9","Harpa Concert Hall and Conference Centre","Reykjavik","Iceland","2014-05-26","2014-05-31","http:\u002F\u002Flrec2014.lrec-conf.org\u002Fen\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2014\u002FLREC2014_Proceedings.zip",746,{"conference_id":131,"year":132,"proceedings_title":133,"venue_ids":9,"isbn":134,"issn":11,"doi":135,"publisher":13,"editors":136,"conference_name":137,"conference_acronym":16,"conference_number":138,"conference_location":139,"conference_city":140,"conference_country":141,"conference_start_date":142,"conference_end_date":143,"conference_url":144,"pdf_url":145,"img_conf_url":63,"paperCount":146},"lrec2012","2012","Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC 2012)","978-2-9517408-7-7","10.63317\u002F42za3jv29xvs","Nicoletta Calzolari, Khalid Choukri, Thierry Declerck, Mehmet Doğan, Bente Maegaard, Joseph Mariani, Asuncion Moreno, Jan Odijk, Stelios Piperidis","Eighth International Conference on Language Resources and Evaluation","8","Istanbul Convention & Exhibition Centre (ICEC) (Lütfi Kırdar)","Istanbul","Turkey","2012-05-21","2012-05-27","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2012\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2012\u002FLREC2012_Proceedings.zip",670,{"conference_id":148,"year":149,"proceedings_title":150,"venue_ids":9,"isbn":151,"issn":11,"doi":152,"publisher":13,"editors":153,"conference_name":154,"conference_acronym":16,"conference_number":155,"conference_location":156,"conference_city":157,"conference_country":158,"conference_start_date":159,"conference_end_date":160,"conference_url":161,"pdf_url":162,"img_conf_url":63,"paperCount":163},"lrec2010","2010","Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC 2010)","2-9517408-6-7","10.63317\u002F32m6vov78mmv","Nicoletta Calzolari, Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Mike Rosner, Daniel Tapias","Seventh International Conference on Language Resources and Evaluation","7","Mediterranean Conference Centre (MCC)","Valletta","Malta","2010-05-17","2010-05-23","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2010\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2010\u002FLREC2010_Proceedings.zip",645,{"conference_id":165,"year":166,"proceedings_title":167,"venue_ids":9,"isbn":168,"issn":11,"doi":169,"publisher":13,"editors":170,"conference_name":171,"conference_acronym":16,"conference_number":172,"conference_location":173,"conference_city":174,"conference_country":175,"conference_start_date":176,"conference_end_date":177,"conference_url":178,"pdf_url":63,"img_conf_url":63,"paperCount":179},"lrec2008","2008","Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC 2008)","2-9517408-4-0","10.63317\u002F3c6xa89msnta","Nicoletta Calzolari, Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Daniel Tapias","Sixth International Conference on Language Resources and Evaluation","6","Palais des Congrès","Marrakech","Morocco","2008-05-28","2008-05-30","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2008\u002F",620,{"conference_id":181,"year":182,"proceedings_title":183,"venue_ids":9,"isbn":184,"issn":11,"doi":185,"publisher":13,"editors":186,"conference_name":187,"conference_acronym":16,"conference_number":188,"conference_location":189,"conference_city":190,"conference_country":41,"conference_start_date":191,"conference_end_date":192,"conference_url":193,"pdf_url":63,"img_conf_url":63,"paperCount":194},"lrec2006","2006","Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC 2006)","2-9517408-2-4","10.63317\u002F2xx3x75ppppa","Nicoletta Calzolari, Khalid Choukri, Aldo Gangemi, Bente Maegaard, Joseph Mariani, Jan Odijk, Daniel Tapias","Fifth International Conference on Language Resources and Evaluation","5","Magazzini del Cotone","Genoa","2006-05-24","2006-05-26","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2006\u002F",513,{"conference_id":196,"year":197,"proceedings_title":198,"venue_ids":9,"isbn":199,"issn":11,"doi":200,"publisher":13,"editors":201,"conference_name":202,"conference_acronym":16,"conference_number":203,"conference_location":204,"conference_city":205,"conference_country":206,"conference_start_date":207,"conference_end_date":208,"conference_url":209,"pdf_url":63,"img_conf_url":63,"paperCount":210},"lrec2004","2004","Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC 2004)","2-9517408-1-6","10.63317\u002F2s47745g6zhw","Maria Teresa Lino, Maria Francisca Xavier, Fatima Ferreira, Rute Costa, Raquel Silva","Fourth International Conference on Language Resources and Evaluation","4","Centro Cultural de Belém","Lisbon","Portugal","2004-05-26","2004-05-28","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2004\u002F",524,{"conference_id":212,"year":213,"proceedings_title":214,"venue_ids":9,"isbn":63,"issn":11,"doi":215,"publisher":13,"editors":216,"conference_name":217,"conference_acronym":16,"conference_number":218,"conference_location":219,"conference_city":220,"conference_country":20,"conference_start_date":221,"conference_end_date":222,"conference_url":223,"pdf_url":63,"img_conf_url":63,"paperCount":224},"lrec2002","2002","Proceedings of the Third International Conference on Language Resources and Evaluation (LREC 2002)","10.63317\u002F3ha6dpna2o97","Manuel González Rodríguez, Carmen Paz Suarez Araujo","Third International Conference on Language Resources and Evaluation","3","Auditorio Alfredo Kraus","Las Palmas","2002-05-29","2002-05-31","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2002\u002F",354,{"conference_id":226,"year":227,"proceedings_title":228,"venue_ids":9,"isbn":63,"issn":11,"doi":229,"publisher":13,"editors":230,"conference_name":231,"conference_acronym":16,"conference_number":232,"conference_location":233,"conference_city":234,"conference_country":235,"conference_start_date":236,"conference_end_date":237,"conference_url":238,"pdf_url":63,"img_conf_url":63,"paperCount":239},"lrec2000","2000","Proceedings of the Second International Conference on Language Resources and Evaluation (LREC 2000)","10.63317\u002F3yosukd7w6sn","Maria Gavrilidou, George Carayannis, Stella Markantonatou, Stelios Piperidis, Greg Stainhauer","Second International Conference on Language Resources and Evaluation","2","Zappeion Megaron","Athens","Greece","2000-05-31","2000-06-02","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2000\u002F",280,{"conference_id":241,"year":242,"proceedings_title":243,"venue_ids":9,"isbn":63,"issn":11,"doi":244,"publisher":13,"editors":245,"conference_name":246,"conference_acronym":16,"conference_number":247,"conference_location":248,"conference_city":249,"conference_country":20,"conference_start_date":250,"conference_end_date":251,"conference_url":252,"pdf_url":63,"img_conf_url":63,"paperCount":253},"lrec1998","1998","Proceedings of the First International Conference on Language Resources and Evaluation (LREC 1998)","10.63317\u002F5a986fnjefzm","Antonio Rubio,Natividad Gallardo, Rosa Castro, Antonio Tejada","Language Resources and Evaluation Conference","1","Palacio de Congresos de Granada","Granada","1998-05-28","1998-05-30","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec1998\u002F",212,[255],{"year":29,"proceedings_title":256,"paperCount":257,"doi":258,"pdf_url":259,"venue_ids":31,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024): Tutorial Summaries",13,"10.63317\u002F3piy8jnqffp3","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Ftutorials\u002FLREC-2024-Tutorials.pdf",{"2020":261,"2022":452,"2024":617},[262,269,276,282,289,296,302,308,315,322,328,335,341,348,354,359,364,370,376,382,387,392,397,402,407,413,419,425,431,436,442,447],{"workshop_id":263,"year":67,"full_workshop_id":264,"proceedings_title":265,"paperCount":266,"doi":267,"pdf_url":268,"venue_ids":263,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"aespen","lrec2020_ws_aespen","Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020",11,"10.63317\u002F58onsa8rnrrz","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FAESPEN2020\u002FAESPEN-2020.pdf",{"workshop_id":270,"year":67,"full_workshop_id":271,"proceedings_title":272,"paperCount":273,"doi":274,"pdf_url":275,"venue_ids":270,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"ai4hi","lrec2020_ws_ai4hi","Proceedings of the 1st International Workshop on Artificial Intelligence for Historical Image Enrichment and Access",5,"10.63317\u002F3m5ep69cw7jj","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FAI4HI2020\u002FAI4HI-2020.pdf",{"workshop_id":277,"year":67,"full_workshop_id":278,"proceedings_title":279,"paperCount":266,"doi":280,"pdf_url":281,"venue_ids":277,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"bucc","lrec2020_ws_bucc","Proceedings of the 13th Workshop on Building and Using Comparable Corpora","10.63317\u002F2fx83jms4c9r","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FBUCC2020\u002FBUCC-2020.pdf",{"workshop_id":283,"year":67,"full_workshop_id":284,"proceedings_title":285,"paperCount":286,"doi":287,"pdf_url":288,"venue_ids":283,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"calcs","lrec2020_ws_calcs","Proceedings of the 4th Workshop on Computational Approaches to Code Switching",9,"10.63317\u002F3jbxrkvj6qkv","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FCS2020\u002FCALCS-2020.pdf",{"workshop_id":290,"year":67,"full_workshop_id":291,"proceedings_title":292,"paperCount":293,"doi":294,"pdf_url":295,"venue_ids":290,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"cllrd","lrec2020_ws_cllrd","Proceedings of the LREC 2020 Workshop on \"Citizen Linguistics in Language Resource Development\"",8,"10.63317\u002F3qo9e6q6vq5f","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002Fcllrd2020\u002FCLLRD-2020.pdf",{"workshop_id":297,"year":67,"full_workshop_id":298,"proceedings_title":299,"paperCount":266,"doi":300,"pdf_url":301,"venue_ids":297,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"clssts","lrec2020_ws_clssts","Proceedings of the workshop on Cross-Language Search and Summarization of Text and Speech (CLSSTS2020)","10.63317\u002F3p6twmv6mhc5","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FCLSSTS2020\u002FCLSSTS-2020.pdf",{"workshop_id":303,"year":67,"full_workshop_id":304,"proceedings_title":305,"paperCount":286,"doi":306,"pdf_url":307,"venue_ids":303,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"cmlc","lrec2020_ws_cmlc","Proceedings of the 8th Workshop on Challenges in the Management of Large Corpora","10.63317\u002F236pt6g4g4s4","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FCMLC-8\u002FCMLC-2020.pdf",{"workshop_id":309,"year":67,"full_workshop_id":310,"proceedings_title":311,"paperCount":312,"doi":313,"pdf_url":314,"venue_ids":309,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"computerm","lrec2020_ws_computerm","Proceedings of the 6th International Workshop on Computational Terminology",15,"10.63317\u002F4jp43md9xe2q","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FCOMPUTERM2020\u002FCOMPUTERM-2020.pdf",{"workshop_id":316,"year":67,"full_workshop_id":317,"proceedings_title":318,"paperCount":319,"doi":320,"pdf_url":321,"venue_ids":316,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"framenet","lrec2020_ws_framenet","Proceedings of the International FrameNet Workshop 2020: Towards a Global, Multilingual FrameNet",12,"10.63317\u002F4tjynpg2ohf3","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002Fframenet2020\u002FFrameNet-2020.pdf",{"workshop_id":323,"year":67,"full_workshop_id":324,"proceedings_title":325,"paperCount":319,"doi":326,"pdf_url":327,"venue_ids":323,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"gamnlp","lrec2020_ws_gamnlp","Proceedings of the Workshop on Games and Natural Language Processing","10.63317\u002F5ahttrxdfnza","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FGames-NLP\u002FGAMNLP-2020.pdf",{"workshop_id":329,"year":67,"full_workshop_id":330,"proceedings_title":331,"paperCount":332,"doi":333,"pdf_url":334,"venue_ids":329,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"globalex","lrec2020_ws_globalex","Proceedings of the 2020 Globalex Workshop on Linked Lexicography",18,"10.63317\u002F34yjjfrnwvj8","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FGLOBALEX2020\u002FGLOBALEX-2020.pdf",{"workshop_id":336,"year":67,"full_workshop_id":337,"proceedings_title":338,"paperCount":319,"doi":339,"pdf_url":340,"venue_ids":336,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"isa","lrec2020_ws_isa","Proceedings of the 16th Joint ACL-ISO Workshop on Interoperable Semantic Annotation","10.63317\u002F5id7rv8izjcd","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FISA16\u002FISA-2020.pdf",{"workshop_id":342,"year":67,"full_workshop_id":343,"proceedings_title":344,"paperCount":345,"doi":346,"pdf_url":347,"venue_ids":342,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"iwltp","lrec2020_ws_iwltp","Proceedings of the 1st International Workshop on Language Technology Platforms",17,"10.63317\u002F4hc34do825yz","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FIWLTP2020\u002FIWLTP-2020.pdf",{"workshop_id":349,"year":67,"full_workshop_id":350,"proceedings_title":351,"paperCount":319,"doi":352,"pdf_url":353,"venue_ids":349,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"ldl","lrec2020_ws_ldl","Proceedings of the 7th Workshop on Linked Data in Linguistics (LDL-2020)","10.63317\u002F3mn9ttzvdbxs","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FLDL2020\u002FLDL-2020.pdf",{"workshop_id":355,"year":67,"full_workshop_id":356,"proceedings_title":357,"paperCount":293,"doi":358,"pdf_url":63,"venue_ids":355,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"lincr","lrec2020_ws_lincr","Proceedings of the Second Workshop on Linguistic and Neurocognitive Resources","10.63317\u002F24gnv8q9cz94",{"workshop_id":360,"year":67,"full_workshop_id":361,"proceedings_title":362,"paperCount":286,"doi":363,"pdf_url":63,"venue_ids":360,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"lr4sshoc","lrec2020_ws_lr4sshoc","Proceedings of the Workshop about Language Resources for the SSH Cloud","10.63317\u002F5j7vesdm7yia",{"workshop_id":365,"year":67,"full_workshop_id":366,"proceedings_title":367,"paperCount":368,"doi":369,"pdf_url":63,"venue_ids":365,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"lt4gov","lrec2020_ws_lt4gov","Proceedings of the 1st Workshop on Language Technologies for Government and Public Administration (LT4Gov)",6,"10.63317\u002F5i8su82ish3i",{"workshop_id":371,"year":67,"full_workshop_id":372,"proceedings_title":373,"paperCount":374,"doi":375,"pdf_url":63,"venue_ids":371,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"lt4hala","lrec2020_ws_lt4hala","Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies for Historical and Ancient Languages",21,"10.63317\u002F4jnfg39ctsra",{"workshop_id":377,"year":67,"full_workshop_id":378,"proceedings_title":379,"paperCount":380,"doi":381,"pdf_url":63,"venue_ids":377,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"mmw","lrec2020_ws_mmw","Proceedings of the LREC 2020 Workshop on Multimodal Wordnets (MMW2020)",7,"10.63317\u002F5pcp4c88d6n8",{"workshop_id":383,"year":67,"full_workshop_id":384,"proceedings_title":385,"paperCount":368,"doi":386,"pdf_url":63,"venue_ids":383,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"multilingualbio","lrec2020_ws_multilingualbio","Proceedings of the LREC 2020 Workshop on Multilingual Biomedical Text Processing (MultilingualBIO 2020)","10.63317\u002F4pfckaywoxxa",{"workshop_id":388,"year":67,"full_workshop_id":389,"proceedings_title":390,"paperCount":273,"doi":391,"pdf_url":63,"venue_ids":388,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"onion","lrec2020_ws_onion","Proceedings of LREC2020 Workshop \"People in language, vision and the mind\" (ONION2020)","10.63317\u002F2oxdsr8tue27",{"workshop_id":393,"year":67,"full_workshop_id":394,"proceedings_title":395,"paperCount":332,"doi":396,"pdf_url":63,"venue_ids":393,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"osact","lrec2020_ws_osact","Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection","10.63317\u002F2xjmcg9vsxcp",{"workshop_id":398,"year":67,"full_workshop_id":399,"proceedings_title":400,"paperCount":257,"doi":401,"pdf_url":63,"venue_ids":398,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"parlaclarin","lrec2020_ws_parlaclarin","Proceedings of the Second ParlaCLARIN Workshop","10.63317\u002F3qhkh6dmemmn",{"workshop_id":403,"year":67,"full_workshop_id":404,"proceedings_title":405,"paperCount":286,"doi":406,"pdf_url":63,"venue_ids":403,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"rail","lrec2020_ws_rail","Proceedings of the first workshop on Resources for African Indigenous Languages","10.63317\u002F3fjhbkudhcmc",{"workshop_id":408,"year":67,"full_workshop_id":409,"proceedings_title":410,"paperCount":411,"doi":412,"pdf_url":63,"venue_ids":408,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"readi","lrec2020_ws_readi","Proceedings of the 1st Workshop on Tools and Resources to Empower People with REAding DIfficulties (READI)",14,"10.63317\u002F4p5m2euxriim",{"workshop_id":414,"year":67,"full_workshop_id":415,"proceedings_title":416,"paperCount":417,"doi":418,"pdf_url":63,"venue_ids":414,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"restup","lrec2020_ws_restup","Proceedings of the Workshop on Resources and Techniques for User and Author Profiling in Abusive Language",4,"10.63317\u002F3y3vzhsp3qb7",{"workshop_id":420,"year":67,"full_workshop_id":421,"proceedings_title":422,"paperCount":423,"doi":424,"pdf_url":63,"venue_ids":420,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"signlang","lrec2020_ws_signlang","Proceedings of the LREC2020 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives",36,"10.63317\u002F3nocn9xntuki",{"workshop_id":426,"year":67,"full_workshop_id":427,"proceedings_title":428,"paperCount":429,"doi":430,"pdf_url":63,"venue_ids":426,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"sltu","lrec2020_ws_sltu","Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)",52,"10.63317\u002F25p2yts6fk3q",{"workshop_id":432,"year":67,"full_workshop_id":433,"proceedings_title":434,"paperCount":293,"doi":435,"pdf_url":63,"venue_ids":432,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"stoc","lrec2020_ws_stoc","Proceedings for the First International Workshop on Social Threats in Online Conversations: Understanding and Management","10.63317\u002F4p7j6t9bjg8m",{"workshop_id":437,"year":67,"full_workshop_id":438,"proceedings_title":439,"paperCount":440,"doi":441,"pdf_url":63,"venue_ids":437,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"trac","lrec2020_ws_trac","Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying",25,"10.63317\u002F27yyhn22v2fc",{"workshop_id":443,"year":67,"full_workshop_id":444,"proceedings_title":445,"paperCount":293,"doi":446,"pdf_url":63,"venue_ids":443,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"wac","lrec2020_ws_wac","Proceedings of the 12th Web as Corpus Workshop","10.63317\u002F2va68regv5ni",{"workshop_id":448,"year":67,"full_workshop_id":449,"proceedings_title":450,"paperCount":319,"doi":451,"pdf_url":63,"venue_ids":448,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"wildre","lrec2020_ws_wildre","Proceedings of the WILDRE5– 5th Workshop on Indian Language Data: Resources and Evaluation","10.63317\u002F2ydivss2veo9",[453,458,463,467,472,478,483,488,494,499,504,509,514,520,525,530,535,540,545,550,554,559,564,569,573,577,582,587,593,598,603,608,613],{"workshop_id":277,"year":49,"full_workshop_id":454,"proceedings_title":455,"paperCount":286,"doi":456,"pdf_url":457,"venue_ids":277,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_bucc","Proceedings of the BUCC Workshop within LREC 2022","10.63317\u002F2mqwgvrp7zkn","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2022\u002Fworkshops\u002FBUCC\u002F2022.bucc-1.0.pdf",{"workshop_id":459,"year":49,"full_workshop_id":460,"proceedings_title":461,"paperCount":332,"doi":462,"pdf_url":63,"venue_ids":459,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"cltw","lrec2022_ws_cltw","Proceedings of the 4th Celtic Language Technology Workshop within LREC2022","10.63317\u002F3x8fjtq6m25s",{"workshop_id":303,"year":49,"full_workshop_id":464,"proceedings_title":465,"paperCount":368,"doi":466,"pdf_url":63,"venue_ids":303,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_cmlc","Proceedings of the Workshop on Challenges in the Management of Large Corpora (CMLC-10)","10.63317\u002F2ajestpwy3c8",{"workshop_id":468,"year":49,"full_workshop_id":469,"proceedings_title":470,"paperCount":293,"doi":471,"pdf_url":63,"venue_ids":468,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"csrnlp","lrec2022_ws_csrnlp","Proceedings of the First Computing Social Responsibility Workshop within the 13th Language Resources and Evaluation Conference","10.63317\u002F3xphwxosghv8",{"workshop_id":473,"year":49,"full_workshop_id":474,"proceedings_title":475,"paperCount":476,"doi":477,"pdf_url":63,"venue_ids":473,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"dclrl","lrec2022_ws_dclrl","Proceedings of the Workshop on Dataset Creation for Lower-Resourced Languages within the 13th Language Resources and Evaluation Conference",10,"10.63317\u002F4652bsvzarmy",{"workshop_id":479,"year":49,"full_workshop_id":480,"proceedings_title":481,"paperCount":368,"doi":482,"pdf_url":63,"venue_ids":479,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"digitam","lrec2022_ws_digitam","Proceedings of the Workshop on Processing Language Variation: Digital Armenian (DigitAm) within the 13th Language Resources and Evaluation Conference","10.63317\u002F369nz2tcm6qc",{"workshop_id":484,"year":49,"full_workshop_id":485,"proceedings_title":486,"paperCount":332,"doi":487,"pdf_url":63,"venue_ids":484,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"eurali","lrec2022_ws_eurali","Proceedings of the Workshop on Resources and Technologies for Indigenous, Endangered and Lesser-resourced Languages in Eurasia within the 13th Language Resources and Evaluation Conference","10.63317\u002F4dhjcavy7q7y",{"workshop_id":489,"year":49,"full_workshop_id":490,"proceedings_title":491,"paperCount":492,"doi":493,"pdf_url":63,"venue_ids":489,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"fnp","lrec2022_ws_fnp","Proceedings of the 4th Financial Narrative Processing Workshop @LREC2022",24,"10.63317\u002F29xpoafy85p4",{"workshop_id":495,"year":49,"full_workshop_id":496,"proceedings_title":497,"paperCount":380,"doi":498,"pdf_url":63,"venue_ids":495,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"games","lrec2022_ws_games","Proceedings of the 9th Workshop on Games and Natural Language Processing within the 13th Language Resources and Evaluation Conference","10.63317\u002F5om6f5meam4s",{"workshop_id":500,"year":49,"full_workshop_id":501,"proceedings_title":502,"paperCount":257,"doi":503,"pdf_url":63,"venue_ids":500,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"gwll","lrec2022_ws_gwll","Proceedings of Globalex Workshop on Linked Lexicography within the 13th Language Resources and Evaluation Conference","10.63317\u002F5knvvemaz9uw",{"workshop_id":336,"year":49,"full_workshop_id":505,"proceedings_title":506,"paperCount":507,"doi":508,"pdf_url":63,"venue_ids":336,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_isa","Proceedings of the 18th Joint ACL - ISO Workshop on Interoperable Semantic Annotation within LREC2022",19,"10.63317\u002F4h3ue6m3sam4",{"workshop_id":510,"year":49,"full_workshop_id":511,"proceedings_title":512,"paperCount":368,"doi":513,"pdf_url":63,"venue_ids":510,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lateraisse","lrec2022_ws_lateraisse","Proceedings of the First Workshop on Language Technology and Resources for a Fair, Inclusive, and Safe Society within the 13th Language Resources and Evaluation Conference","10.63317\u002F5osn5jjjbomp",{"workshop_id":515,"year":49,"full_workshop_id":516,"proceedings_title":517,"paperCount":518,"doi":519,"pdf_url":63,"venue_ids":515,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"law","lrec2022_ws_law","Proceedings of the 16th Linguistic Annotation Workshop (LAW-XVI) within LREC2022",20,"10.63317\u002F3fysdho22dbb",{"workshop_id":521,"year":49,"full_workshop_id":522,"proceedings_title":523,"paperCount":312,"doi":524,"pdf_url":63,"venue_ids":521,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"legal","lrec2022_ws_legal","Proceedings of the Workshop on Ethical and Legal Issues in Human Language Technologies and Multilingual De-Identification of Sensitive Data In Language Resources within the 13th Language Resources and Evaluation Conference","10.63317\u002F273whfjsjapd",{"workshop_id":371,"year":49,"full_workshop_id":526,"proceedings_title":527,"paperCount":528,"doi":529,"pdf_url":63,"venue_ids":371,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_lt4hala","Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages",31,"10.63317\u002F3dte53mz4zvu",{"workshop_id":531,"year":49,"full_workshop_id":532,"proceedings_title":533,"paperCount":345,"doi":534,"pdf_url":63,"venue_ids":531,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"mwe","lrec2022_ws_mwe","Proceedings of the 18th Workshop on Multiword Expressions @LREC2022","10.63317\u002F2fftdmypb747",{"workshop_id":536,"year":49,"full_workshop_id":537,"proceedings_title":538,"paperCount":286,"doi":539,"pdf_url":63,"venue_ids":536,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"nidcp","lrec2022_ws_nidcp","Proceedings of the 2nd Workshop on Novel Incentives in Data Collection from People: models, implementations, challenges and results within LREC 2022","10.63317\u002F2dox4kgfq3mg",{"workshop_id":541,"year":49,"full_workshop_id":542,"proceedings_title":543,"paperCount":312,"doi":544,"pdf_url":63,"venue_ids":541,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"nlperspectives","lrec2022_ws_nlperspectives","Proceedings of the 1st Workshop on Perspectivist Approaches to NLP @LREC2022","10.63317\u002F5nzs42fwjimz",{"workshop_id":393,"year":49,"full_workshop_id":546,"proceedings_title":547,"paperCount":548,"doi":549,"pdf_url":63,"venue_ids":393,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_osact","Proceedinsg of the 5th Workshop on Open-Source Arabic Corpora and Processing Tools with Shared Tasks on Qur'an QA and Fine-Grained Hate Speech Detection",28,"10.63317\u002F4u4quhegagc5",{"workshop_id":398,"year":49,"full_workshop_id":551,"proceedings_title":552,"paperCount":507,"doi":553,"pdf_url":63,"venue_ids":398,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_parlaclarin","Proceedings of the Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference","10.63317\u002F4zzb69hz9ebb",{"workshop_id":555,"year":49,"full_workshop_id":556,"proceedings_title":557,"paperCount":411,"doi":558,"pdf_url":63,"venue_ids":555,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"politicalnlp","lrec2022_ws_politicalnlp","Proceedings of the LREC 2022 workshop on Natural Language Processing for Political Sciences","10.63317\u002F5h778npybpti",{"workshop_id":560,"year":49,"full_workshop_id":561,"proceedings_title":562,"paperCount":368,"doi":563,"pdf_url":63,"venue_ids":560,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"pvlam","lrec2022_ws_pvlam","Proceedings of the 2nd Workshop on People in Vision, Language, and the Mind","10.63317\u002F52rissdzp475",{"workshop_id":565,"year":49,"full_workshop_id":566,"proceedings_title":567,"paperCount":319,"doi":568,"pdf_url":63,"venue_ids":565,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"rapid","lrec2022_ws_rapid","Proceedings of the RaPID Workshop - Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive\u002Fpsychiatric\u002Fdevelopmental impairments - within the 13th Language Resources and Evaluation Conference","10.63317\u002F4jch25mm92pa",{"workshop_id":408,"year":49,"full_workshop_id":570,"proceedings_title":571,"paperCount":286,"doi":572,"pdf_url":63,"venue_ids":408,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_readi","Proceedings of the 2nd Workshop on Tools and Resources to Empower People with REAding DIfficulties (READI) within the 13th Language Resources and Evaluation Conference","10.63317\u002F56pm6ipunttk",{"workshop_id":414,"year":49,"full_workshop_id":574,"proceedings_title":575,"paperCount":417,"doi":576,"pdf_url":63,"venue_ids":414,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_restup","Proceedings of the Second International Workshop on Resources and Techniques for User Information in Abusive Language Analysis","10.63317\u002F53due7cg7w44",{"workshop_id":578,"year":49,"full_workshop_id":579,"proceedings_title":580,"paperCount":368,"doi":581,"pdf_url":63,"venue_ids":578,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"salld","lrec2022_ws_salld","Proceedings of the 2nd Workshop on Sentiment Analysis and Linguistic Linked Data","10.63317\u002F2ueor4yvpz4s",{"workshop_id":420,"year":49,"full_workshop_id":583,"proceedings_title":584,"paperCount":585,"doi":586,"pdf_url":63,"venue_ids":420,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_signlang","Proceedings of the LREC2022 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources",32,"10.63317\u002F2rifm6bf4efz",{"workshop_id":588,"year":49,"full_workshop_id":589,"proceedings_title":590,"paperCount":591,"doi":592,"pdf_url":63,"venue_ids":588,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"sigul","lrec2022_ws_sigul","Proceedings of the 1st Annual Meeting of the ELRA\u002FISCA Special Interest Group on Under-Resourced Languages",27,"10.63317\u002F5nb3qu29q9zi",{"workshop_id":594,"year":49,"full_workshop_id":595,"proceedings_title":596,"paperCount":507,"doi":597,"pdf_url":63,"venue_ids":594,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"sltat","lrec2022_ws_sltat","Proceedings of the 7th International Workshop on Sign Language Translation and Avatar Technology: The Junction of the Visual and the Textual: Challenges and Perspectives","10.63317\u002F3xfoevzar6ig",{"workshop_id":599,"year":49,"full_workshop_id":600,"proceedings_title":601,"paperCount":476,"doi":602,"pdf_url":63,"venue_ids":599,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"smila","lrec2022_ws_smila","Proceedings of the Workshop on Smiling and Laughter across Contexts and the Life-span within the 13th Language Resources and Evaluation Conference","10.63317\u002F47g2oou8nqdu",{"workshop_id":604,"year":49,"full_workshop_id":605,"proceedings_title":606,"paperCount":368,"doi":607,"pdf_url":63,"venue_ids":604,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"tdle","lrec2022_ws_tdle","Proceedings of the Workshop Towards Digital Language Equality within the 13th Language Resources and Evaluation Conference","10.63317\u002F3cx3opcocn9i",{"workshop_id":609,"year":49,"full_workshop_id":610,"proceedings_title":611,"paperCount":380,"doi":612,"pdf_url":63,"venue_ids":609,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"term","lrec2022_ws_term","Proceedings of the Workshop on Terminology in the 21st century: many faces, many places","10.63317\u002F23pdrqa3onr3",{"workshop_id":448,"year":49,"full_workshop_id":614,"proceedings_title":615,"paperCount":345,"doi":616,"pdf_url":63,"venue_ids":448,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_wildre","Proceedings of the WILDRE-6 Workshop within the 13th Language Resources and Evaluation Conference","10.63317\u002F34agbocrmxe4",[618,623,630,638,644,649,656,663,670,677,683,690,696,703,711,717,723,729,735,742,748,755,762,768,774,780,786,792,798,805,812,818,825,831,837,843],{"workshop_id":277,"year":29,"full_workshop_id":619,"proceedings_title":620,"paperCount":312,"doi":621,"pdf_url":622,"venue_ids":277,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_bucc","Proceedings of the 17th Workshop on Building and Using Comparable Corpora (BUCC) @ LREC-COLING 2024","10.63317\u002F3tk8bqt3knqn","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fbucc\u002F2024.bucc-1.0.pdf",{"workshop_id":624,"year":29,"full_workshop_id":625,"proceedings_title":626,"paperCount":293,"doi":627,"pdf_url":628,"venue_ids":629,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"cawl","lrec2024_ws_cawl","Proceedings of the Second Workshop on Computation and Written Language (CAWL) @ LREC-COLING 2024","10.63317\u002F5jv5da4ct2px","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fcawl\u002F2024.cawl-1.0.pdf","cawl|ws",{"workshop_id":631,"year":29,"full_workshop_id":632,"proceedings_title":633,"paperCount":634,"doi":635,"pdf_url":636,"venue_ids":637,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"cl4health","lrec2024_ws_cl4health","Proceedings of the First Workshop on Patient-Oriented Language Processing (CL4Health) @ LREC-COLING 2024",33,"10.63317\u002F3keuurbv54de","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fpolp\u002F2024.cl4health-1.0.pdf","cl4health|ws",{"workshop_id":639,"year":29,"full_workshop_id":640,"proceedings_title":641,"paperCount":507,"doi":642,"pdf_url":643,"venue_ids":639,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"cogalex","lrec2024_ws_cogalex","Proceedings of the Workshop on Cognitive Aspects of the Lexicon @ LREC-COLING 2024","10.63317\u002F2gq7359pqznx","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fdelite\u002F2024.delite-1.0.pdf",{"workshop_id":645,"year":29,"full_workshop_id":646,"proceedings_title":647,"paperCount":380,"doi":648,"pdf_url":643,"venue_ids":645,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"delite","lrec2024_ws_delite","Proceedings of the First Workshop on Language-driven Deliberation Technology (DELITE) @ LREC-COLING 2024","10.63317\u002F3pcpupr4j9wb",{"workshop_id":650,"year":29,"full_workshop_id":651,"proceedings_title":652,"paperCount":332,"doi":653,"pdf_url":654,"venue_ids":655,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"determit","lrec2024_ws_determit","Proceedings of the Workshop on DeTermIt! Evaluating Text Difficulty in a Multilingual Context @ LREC-COLING 2024","10.63317\u002F32qtrrr46eau","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fdetermit\u002F2024.determit-1.0.pdf","determit|ws",{"workshop_id":657,"year":29,"full_workshop_id":658,"proceedings_title":659,"paperCount":293,"doi":660,"pdf_url":661,"venue_ids":662,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"dlnld","lrec2024_ws_dlnld","Proceedings of the Workshop on Deep Learning and Linked Data (DLnLD) @ LREC-COLING 2024","10.63317\u002F543pjjgkbst9","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fdlnld\u002F2024.dlnld-1.0.pdf","dlnld|ws",{"workshop_id":664,"year":29,"full_workshop_id":665,"proceedings_title":666,"paperCount":345,"doi":667,"pdf_url":668,"venue_ids":669,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"dmr","lrec2024_ws_dmr","Proceedings of the Fifth International Workshop on Designing Meaning Representations @ LREC-COLING 2024","10.63317\u002F5q4wbidauaxn","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fdmr\u002F2024.dmr-1.0.pdf","dmr|ws",{"workshop_id":671,"year":29,"full_workshop_id":672,"proceedings_title":673,"paperCount":312,"doi":674,"pdf_url":675,"venue_ids":676,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"ecnlp","lrec2024_ws_ecnlp","Proceedings of the Seventh Workshop on e-Commerce and NLP @ LREC-COLING 2024","10.63317\u002F4upp3i6m57nt","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fecnlp\u002F2024.ecnlp-1.0.pdf","ecnlp|ws",{"workshop_id":484,"year":29,"full_workshop_id":678,"proceedings_title":679,"paperCount":293,"doi":680,"pdf_url":681,"venue_ids":682,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_eurali","Proceedings of the 2nd Workshop on Resources and Technologies for Indigenous, Endangered and Lesser-resourced Languages in Eurasia (EURALI) @ LREC-COLING 2024","10.63317\u002F3z633pd4tyg2","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Feurali\u002F2024.eurali-1.0.pdf","eurali|ws",{"workshop_id":684,"year":29,"full_workshop_id":685,"proceedings_title":686,"paperCount":687,"doi":688,"pdf_url":689,"venue_ids":684,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"finnlp","lrec2024_ws_finnlp","Proceedings of the Joint Workshop of the 7th Financial Technology and Natural Language Processing, the 5th Knowledge Discovery from Unstructured Data in Financial Services, and the 4th Workshop on Economics and Natural Language Processing",34,"10.63317\u002F46uvxxoj8prq","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Ffinnlp\u002F2024.finnlp-1.0.pdf",{"workshop_id":495,"year":29,"full_workshop_id":691,"proceedings_title":692,"paperCount":319,"doi":693,"pdf_url":694,"venue_ids":695,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_games","Proceedings of the 10th Workshop on Games and Natural Language Processing @ LREC-COLING 2024","10.63317\u002F4d46836qy76p","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fgames\u002F2024.games-1.0.pdf","games|ws",{"workshop_id":697,"year":29,"full_workshop_id":698,"proceedings_title":699,"paperCount":286,"doi":700,"pdf_url":701,"venue_ids":702,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"htres","lrec2024_ws_htres","Proceedings of the First Workshop on Holocaust Testimonies as Language Resources (HTRes) @ LREC-COLING 2024","10.63317\u002F47iakwwytvs8","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fhtres\u002F2024.htres-1.0.pdf","htres|ws",{"workshop_id":704,"year":29,"full_workshop_id":705,"proceedings_title":706,"paperCount":707,"doi":708,"pdf_url":709,"venue_ids":710,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"humeval","lrec2024_ws_humeval","Proceedings of the Fourth Workshop on Human Evaluation of NLP Systems (HumEval) @ LREC-COLING 2024",26,"10.63317\u002F3jfrug2yvkgc","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fhumeval\u002F2024.humeval-1.0.pdf","humeval|ws",{"workshop_id":336,"year":29,"full_workshop_id":712,"proceedings_title":713,"paperCount":332,"doi":714,"pdf_url":715,"venue_ids":716,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_isa","Proceedings of the 20th Joint ACL - ISO Workshop on Interoperable Semantic Annotation @ LREC-COLING 2024","10.63317\u002F5g5ddg8i3y47","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fisa\u002F2024.isa-1.0.pdf","isa|ws",{"workshop_id":349,"year":29,"full_workshop_id":718,"proceedings_title":719,"paperCount":312,"doi":720,"pdf_url":721,"venue_ids":722,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_ldl","Proceedings of the 9th Workshop on Linked Data in Linguistics @ LREC-COLING 2024","10.63317\u002F4gz96nfw2gdk","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fldl\u002F2024.ldl-1.0.pdf","ldl|ws",{"workshop_id":521,"year":29,"full_workshop_id":724,"proceedings_title":725,"paperCount":266,"doi":726,"pdf_url":727,"venue_ids":728,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_legal","Proceedings of the Workshop on Legal and Ethical Issues in Human Language Technologies @ LREC-COLING 2024","10.63317\u002F2wkziwv5fb97","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Flegal\u002F2024.legal-1.0.pdf","legal|ws",{"workshop_id":371,"year":29,"full_workshop_id":730,"proceedings_title":731,"paperCount":634,"doi":732,"pdf_url":733,"venue_ids":734,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_lt4hala","Proceedings of the Third Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA) @ LREC-COLING-2024","10.63317\u002F2vavxjcscp8z","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Flt4hala\u002F2024.lt4hala-1.0.pdf","lt4hala|ws",{"workshop_id":736,"year":29,"full_workshop_id":737,"proceedings_title":738,"paperCount":273,"doi":739,"pdf_url":740,"venue_ids":741,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"mathnlp","lrec2024_ws_mathnlp","Proceedings of the 2nd Workshop on Mathematical Natural Language Processing @ LREC-COLING 2024","10.63317\u002F2ydwrzo67zpj","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fmathnlp\u002F2024.mathnlp-1.0.pdf","mathnlp|ws",{"workshop_id":531,"year":29,"full_workshop_id":743,"proceedings_title":744,"paperCount":591,"doi":745,"pdf_url":746,"venue_ids":747,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_mwe","Proceedings of the Joint Workshop on Multiword Expressions and Universal Dependencies (MWE-UD) @ LREC-COLING 2024","10.63317\u002F42csaq87z39r","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fmwe\u002F2024.mweud-1.0.pdf","mwe|udw|ws",{"workshop_id":749,"year":29,"full_workshop_id":750,"proceedings_title":751,"paperCount":273,"doi":752,"pdf_url":753,"venue_ids":754,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"neusymbridge","lrec2024_ws_neusymbridge","Proceedings of the Workshop: Bridging Neurons and Symbols for Natural Language Processing and Knowledge Graphs Reasoning (NeusymBridge) @ LREC-COLING-2024","10.63317\u002F2vsheftp3ti9","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fneusymbridge\u002F2024.neusymbridge-1.0.pdf","neusymbridge|ws",{"workshop_id":541,"year":29,"full_workshop_id":756,"proceedings_title":757,"paperCount":758,"doi":759,"pdf_url":760,"venue_ids":761,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_nlperspectives","Proceedings of the 3rd Workshop on Perspectivist Approaches to NLP (NLPerspectives) @ LREC-COLING 2024",16,"10.63317\u002F2cojnfknheph","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fnlperspectives\u002F2024.nlperspectives-1.0.pdf","nlperspectives|ws",{"workshop_id":393,"year":29,"full_workshop_id":763,"proceedings_title":764,"paperCount":345,"doi":765,"pdf_url":766,"venue_ids":767,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_osact","Proceedings of the 6th Workshop on Open-Source Arabic Corpora and Processing Tools (OSACT) with Shared Tasks on Arabic LLMs Hallucination and Dialect to MSA Machine Translation @ LREC-COLING 2024","10.63317\u002F5d5qxytkajay","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fosact\u002F2024.osact-1.0.pdf","osact|ws",{"workshop_id":398,"year":29,"full_workshop_id":769,"proceedings_title":770,"paperCount":440,"doi":771,"pdf_url":772,"venue_ids":773,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_parlaclarin","Proceedings of the IV Workshop on Creating, Analysing, and Increasing Accessibility of Parliamentary Corpora (ParlaCLARIN) @ LREC-COLING 2024","10.63317\u002F46c8xka7m8f7","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fparlaclarin\u002F2024.parlaclarin-1.0.pdf","parlaclarin|ws",{"workshop_id":555,"year":29,"full_workshop_id":775,"proceedings_title":776,"paperCount":476,"doi":777,"pdf_url":778,"venue_ids":779,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_politicalnlp","Proceedings of the Second Workshop on Natural Language Processing for Political Sciences @ LREC-COLING 2024","10.63317\u002F3qf3r8pwtkvp","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fpoliticalnlp\u002F2024.politicalnlp-1.0.pdf","politicalnlp|ws",{"workshop_id":403,"year":29,"full_workshop_id":781,"proceedings_title":782,"paperCount":345,"doi":783,"pdf_url":784,"venue_ids":785,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_rail","Proceedings of the Fifth Workshop on Resources for African Indigenous Languages @ LREC-COLING 2024","10.63317\u002F2iyqymd34fup","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Frail\u002F2024.rail-1.0.pdf","rail|ws",{"workshop_id":565,"year":29,"full_workshop_id":787,"proceedings_title":788,"paperCount":266,"doi":789,"pdf_url":790,"venue_ids":791,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_rapid","Proceedings of the Fifth Workshop on Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive\u002Fpsychiatric\u002Fdevelopmental impairments @LREC-COLING 2024","10.63317\u002F5pc4wtot6r3x","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Frapid\u002F2024.rapid-1.0.pdf","rapid|ws",{"workshop_id":408,"year":29,"full_workshop_id":793,"proceedings_title":794,"paperCount":286,"doi":795,"pdf_url":796,"venue_ids":797,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_readi","Proceedings of the 3rd Workshop on Tools and Resources for People with REAding DIfficulties (READI) @ LREC-COLING 2024","10.63317\u002F4b546asxrjr6","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Freadi\u002F2024.readi-1.0.pdf","readi|ws",{"workshop_id":799,"year":29,"full_workshop_id":800,"proceedings_title":801,"paperCount":273,"doi":802,"pdf_url":803,"venue_ids":804,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"rfp","lrec2024_ws_rfp","Proceedings of the First Workshop on Reference, Framing, and Perspective @ LREC-COLING 2024","10.63317\u002F4xwx3twp9qoy","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Frfp\u002F2024.rfp-1.0.pdf","rfp|ws",{"workshop_id":806,"year":29,"full_workshop_id":807,"proceedings_title":808,"paperCount":273,"doi":809,"pdf_url":810,"venue_ids":811,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"safety4convai","lrec2024_ws_safety4convai","Proceedings of Safety4ConvAI: The Third Workshop on Safety for Conversational AI @ LREC-COLING 2024","10.63317\u002F4johe7jpagg6","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fsafeai\u002F2024.safety4convai-1.0.pdf","safety4convai|ws",{"workshop_id":420,"year":29,"full_workshop_id":813,"proceedings_title":814,"paperCount":815,"doi":816,"pdf_url":817,"venue_ids":420,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_signlang","Proceedings of the LREC-COLING 2024 11th Workshop on the Representation and Processing of Sign Languages: Evaluation of Sign Language Resources",45,"10.63317\u002F4e7aayu2htd6","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fsignlang\u002F2024.signlang-1.0.pdf",{"workshop_id":588,"year":29,"full_workshop_id":819,"proceedings_title":820,"paperCount":821,"doi":822,"pdf_url":823,"venue_ids":824,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_sigul","Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024",50,"10.63317\u002F55wjiy53vy99","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fsigul\u002F2024.sigul-1.0.pdf","sigul|ws",{"workshop_id":604,"year":29,"full_workshop_id":826,"proceedings_title":827,"paperCount":368,"doi":828,"pdf_url":829,"venue_ids":830,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_tdle","Proceedings of the Second International Workshop Towards Digital Language Equality (TDLE): Focusing on Sustainability @ LREC-COLING 2024","10.63317\u002F3p5nrhhwdhbe","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Ftdle\u002F2024.tdle-1.0.pdf","tdle|ws",{"workshop_id":437,"year":29,"full_workshop_id":832,"proceedings_title":833,"paperCount":345,"doi":834,"pdf_url":835,"venue_ids":836,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_trac","Proceedings of the Fourth Workshop on Threat, Aggression & Cyberbullying @ LREC-COLING-2024","10.63317\u002F2ev2ox49nijy","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Ftrac\u002F2024.trac-1.0.pdf","trac|ws",{"workshop_id":838,"year":29,"full_workshop_id":839,"proceedings_title":840,"paperCount":758,"doi":841,"pdf_url":842,"venue_ids":838,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"unlp","lrec2024_ws_unlp","Proceedings of the Third Ukrainian Natural Language Processing Workshop (UNLP) @ LREC-COLING 2024","10.63317\u002F5bwu58575ghh","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Funlp\u002F2024.unlp-1.0.pdf",{"workshop_id":448,"year":29,"full_workshop_id":844,"proceedings_title":845,"paperCount":266,"doi":846,"pdf_url":847,"venue_ids":848,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_wildre","Proceedings of the 7th Workshop on Indian Language Data: Resources and Evaluation","10.63317\u002F52j5bum2j3fk","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fwildre\u002F2024.wildre-1.0.pdf","wildre|ws",{"conference_id":196,"year":197,"proceedings_title":198,"venue_ids":9,"isbn":199,"issn":11,"doi":200,"publisher":13,"editors":201,"conference_name":202,"conference_acronym":16,"conference_number":203,"conference_location":204,"conference_city":205,"conference_country":206,"conference_start_date":207,"conference_end_date":208,"conference_url":209,"pdf_url":63,"img_conf_url":63,"paperCount":210},{"conference_id":196,"year":197,"proceedings_title":198,"venue_ids":9,"isbn":199,"issn":11,"doi":200,"publisher":13,"editors":201,"conference_name":202,"conference_acronym":16,"conference_number":203,"conference_location":204,"conference_city":205,"conference_country":206,"conference_start_date":207,"conference_end_date":208,"conference_url":209,"pdf_url":63,"img_conf_url":63,"paperCount":210},[852,865,876,887,898,909,920,931,949,964,1009,1036,1051,1066,1120,1135,1153,1163,1178,1188,1206,1221,1236,1254,1265,1280,1304,1319,1336,1348,1366,1378,1390,1405,1420,1437,1447,1469,1480,1493,1510,1522,1534,1551,1569,1587,1599,1614,1635,1653,1681,1696,1711,1734,1755,1802,1820,1836,1845,1858,1873,1888,1906,1921,1933,1945,1958,1970,1985,2009,2022,2034,2060,2078,2093,2107,2119,2139,2149,2172,2190,2208,2226,2248,2259,2274,2289,2316,2328,2356,2382,2394,2404,2419,2430,2445,2463,2485,2506,2521,2536,2550,2583,2595,2610,2625,2645,2660,2675,2693,2709,2724,2742,2759,2771,2795,2810,2825,2849,2867,2877,2893,2914,2929,2947,2962,2980,2996,3023,3035,3048,3063,3072,3093,3115,3132,3142,3160,3179,3188,3203,3221,3231,3243,3265,3291,3303,3317,3335,3359,3378,3390,3411,3435,3449,3467,3481,3494,3506,3519,3536,3562,3577,3592,3609,3623,3644,3667,3683,3698,3713,3734,3758,3781,3794,3806,3824,3839,3858,3875,3886,3905,3917,3930,3948,3959,3977,4006,4021,4032,4050,4070,4081,4106,4123,4141,4158,4173,4187,4235,4257,4274,4290,4312,4323,4339,4348,4359,4372,4386,4408,4423,4440,4452,4473,4485,4495,4511,4529,4544,4561,4577,4605,4629,4645,4661,4678,4702,4719,4736,4747,4761,4776,4795,4814,4836,4854,4869,4895,4909,4922,4933,4951,4965,4977,5001,5013,5032,5051,5067,5079,5094,5111,5129,5139,5150,5174,5188,5211,5228,5256,5268,5285,5303,5319,5332,5345,5368,5395,5410,5425,5440,5457,5469,5484,5501,5517,5530,5545,5555,5571,5588,5608,5627,5641,5651,5671,5684,5698,5711,5728,5751,5767,5781,5794,5808,5820,5837,5857,5870,5885,5907,5923,5937,5951,5967,5978,5987,5998,6009,6021,6035,6049,6062,6077,6089,6104,6115,6129,6145,6170,6186,6200,6215,6230,6240,6257,6273,6286,6296,6309,6323,6339,6358,6370,6385,6411,6428,6453,6467,6487,6521,6532,6547,6561,6574,6585,6598,6616,6634,6648,6662,6675,6696,6710,6721,6732,6744,6762,6772,6790,6800,6821,6844,6865,6879,6892,6904,6924,6942,6955,6966,6977,6990,7006,7023,7035,7046,7072,7094,7109,7129,7152,7167,7182,7202,7222,7239,7251,7275,7287,7300,7333,7348,7366,7379,7404,7416,7428,7439,7450,7470,7484,7497,7512,7523,7534,7560,7571,7585,7597,7608,7631,7651,7662,7673,7685,7699,7718,7741,7752,7767,7779,7789,7827,7844,7857,7886,7900,7918,7938,7952,7992,8003,8018,8048,8071,8083,8099,8115,8130,8159,8174,8185,8199,8212,8225,8242,8254,8267,8282,8293,8305,8320,8343,8354,8374,8390,8401,8418,8439,8458,8475,8488,8503,8517,8529,8552,8569,8584,8595,8607,8619,8637,8657,8672,8689,8701,8712,8729,8742,8760,8791,8806,8820,8835,8847,8860,8877,8889,8900,8945,8969,8980,8992,9005,9015,9026,9039,9052,9064,9076,9091,9104,9122,9137,9155,9175,9185,9195,9211,9224,9234,9261,9275,9287,9297,9315,9331,9343,9361,9377,9394,9404],{"paper_id":853,"title":854,"year":197,"month":855,"day":63,"doi":856,"resource_url":857,"first_page":63,"last_page":63,"pdf_url":858,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":859,"paper_type":860,"authors":861,"abstract":63},"lrec2004-main-001","Can We Talk? Prospects for Automatically Training Spoken Dialogue Systems","05","10.63317\u002F5inqffv24rfv","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-001","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002FkII.pdf","walker-2004-talk","main",[862],{"paper_id":853,"author_seq":247,"given_name":863,"surname":864,"affiliation":63,"orcid":63},"Marilyn","Walker",{"paper_id":866,"title":867,"year":197,"month":855,"day":63,"doi":868,"resource_url":869,"first_page":63,"last_page":63,"pdf_url":870,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":871,"paper_type":860,"authors":872,"abstract":63},"lrec2004-main-002","Strategic Directions of National and International Research Funding","10.63317\u002F236thr9tzydy","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-002","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002FIII.pdf","uszkoreit-2004-strategic",[873],{"paper_id":866,"author_seq":247,"given_name":874,"surname":875,"affiliation":63,"orcid":63},"Hans","Uszkoreit",{"paper_id":877,"title":878,"year":197,"month":855,"day":63,"doi":879,"resource_url":880,"first_page":63,"last_page":63,"pdf_url":881,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":882,"paper_type":860,"authors":883,"abstract":63},"lrec2004-main-003","Multilingual Content Processing","10.63317\u002F4suqfapi3vmc","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-003","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002FkIV.pdf","thurmair-2004-multilingual",[884],{"paper_id":877,"author_seq":247,"given_name":885,"surname":886,"affiliation":63,"orcid":63},"Gregor","Thurmair",{"paper_id":888,"title":889,"year":197,"month":855,"day":63,"doi":890,"resource_url":891,"first_page":63,"last_page":63,"pdf_url":892,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":893,"paper_type":860,"authors":894,"abstract":63},"lrec2004-main-004","Collaborative Commentary: Opening Up Spoken Language Databases","10.63317\u002F2i9w64oqyrj5","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-004","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002FII.pdf","macwhinney-2004-collaborative",[895],{"paper_id":888,"author_seq":247,"given_name":896,"surname":897,"affiliation":63,"orcid":63},"Brian","MacWhinney",{"paper_id":899,"title":900,"year":197,"month":855,"day":63,"doi":901,"resource_url":902,"first_page":63,"last_page":63,"pdf_url":903,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":904,"paper_type":860,"authors":905,"abstract":63},"lrec2004-main-005","Getting to the Heart of the Matter; Speech is More than Just the Expression of Text or Language","10.63317\u002F5izvqvpapvc5","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-005","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002FkIII.pdf","campbell-2004-getting",[906],{"paper_id":899,"author_seq":247,"given_name":907,"surname":908,"affiliation":63,"orcid":63},"Nick","Campbell",{"paper_id":910,"title":911,"year":197,"month":855,"day":63,"doi":912,"resource_url":913,"first_page":63,"last_page":63,"pdf_url":914,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":915,"paper_type":860,"authors":916,"abstract":63},"lrec2004-main-006","Industrial Needs for Language Resources","10.63317\u002F33i4kjshofor","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-006","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002FI.pdf","maegaard-2004-industrial",[917],{"paper_id":910,"author_seq":247,"given_name":918,"surname":919,"affiliation":63,"orcid":63},"Bente","Maegaard",{"paper_id":921,"title":922,"year":197,"month":855,"day":63,"doi":923,"resource_url":924,"first_page":63,"last_page":63,"pdf_url":925,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":926,"paper_type":860,"authors":927,"abstract":63},"lrec2004-main-007","Thesaurus or Logical Ontology, Which do we Need for Mining Text?","10.63317\u002F2y7u3czf8pwc","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-007","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002FkI.pdf","tsujii-2004-thesaurus",[928],{"paper_id":921,"author_seq":247,"given_name":929,"surname":930,"affiliation":63,"orcid":63},"Junichi","Tsujii",{"paper_id":932,"title":933,"year":197,"month":855,"day":63,"doi":934,"resource_url":935,"first_page":63,"last_page":63,"pdf_url":936,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":937,"paper_type":860,"authors":938,"abstract":948},"lrec2004-main-008","Information Extraction from Hindi Texts","10.63317\u002F4zo5feiim2hw","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-008","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F1.pdf","dutta-etal-2004-information",[939,942,945],{"paper_id":932,"author_seq":247,"given_name":940,"surname":941,"affiliation":63,"orcid":63},"Kamlesh","Dutta",{"paper_id":932,"author_seq":232,"given_name":943,"surname":944,"affiliation":63,"orcid":63},"Saroj","Kaushik",{"paper_id":932,"author_seq":218,"given_name":946,"surname":947,"affiliation":63,"orcid":63},"Nupur","Prakash","The paper presents an information extraction system that takes input from Hindi texts and improves the information content retrieved by using anaphor\u002Fpronoun resolution mechanism. The information extraction system developed consists of three major modules: The language Parser, Resolution System and Information Extractor. The language parser used is HPSG (Head-Driven Phrase Structure Grammar) based that provides both syntactic and semantic information to the anaphor resolution system. HPSG was chosen because it provides a set of constraint on the co-referential structures in the language, which bounds the search for an antecedent to a more precise location in the discourse. The semantic information included in its parsing may be helpful for removing ambiguity in anaphor\u002Fpronoun resolution. The anaphor resolution system uses few heuristic rules to resolve intrasentential references while centering theory is used for intersentential resolution.",{"paper_id":950,"title":951,"year":197,"month":855,"day":63,"doi":952,"resource_url":953,"first_page":63,"last_page":63,"pdf_url":954,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":955,"paper_type":860,"authors":956,"abstract":963},"lrec2004-main-009","The Language Belongs to the People!","10.63317\u002F4r7fcmuqvabr","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-009","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F2.pdf","koster-gradmann-2004-language",[957,960],{"paper_id":950,"author_seq":247,"given_name":958,"surname":959,"affiliation":63,"orcid":63},"Cornelis H.A.","Koster",{"paper_id":950,"author_seq":232,"given_name":961,"surname":962,"affiliation":63,"orcid":63},"Stefan","Gradmann","For every natural language, computer-readable basic resources like grammars and linguistic lexica are increasingly needed for educational, scientific and economical reasons. Especially in countries with a strong and modern economy, enormous efforts are invested in developing such resources, without common purpose and synergy. Property rights are yealously guarded by industrial and academical developers alike. Enormous amounts of subvention are wasted on projects that can not make use of the results of others, and whose results either are hidden or just evaporate. We investigate by small case studies the predicaments faced by linguistic academic researchers, research consortia, traditional publishers and application developers, and show that the prevailing restrictive attitude is counterproductive. It is impossible to ever earn back the (public) investment in linguistic resources, even the costs of distribution and incasso exceed the budgets of academics and industries feel compelled to invest in developing their own resources rather than base themselves on resources made my others. The present situation of linguistic resources resembles that of software systems and applications in the eighties: on the one hand many people try in vain to make some money from their intellectual work, hampering free exchange and collaboration, on the other hand monopolies threaten to corner the market. The resemblance is large enough to suggest the same solution: basic linguistic resources which have been developed with public support should be made freely available in the public domain. We propose therefore to treat such basic resources as Free Software, with the following advantages: - distribution of the most recent version per ftp is very cheap - developers are invited to improve and extend the work of their predecessors, rather that repeat it, and share the results - the quality of the resources is continuously improved, the maintenance problem is solved - academic researchers are liberated from the problems of using proprietary resources - commercial use is possible without fuzz and (unpredictable) expenses. As a good example, we discuss the AGFL project. With the aid of the Dutch Research Organisation NWO and (later) the NLnet foundation, the AGFL parser generator system for natural languages was brought under the GNU public licence in 2001. Since then it has been freely distributed, along with the EP4IR grammar and lexicon of English, which was developed in the course of two European projects, along with a grammar and lexicon of Dutch. Similar grammars for Russian, German, Spanish and Modern Standard Arabic are under development by other groups.",{"paper_id":965,"title":966,"year":197,"month":855,"day":63,"doi":967,"resource_url":968,"first_page":63,"last_page":63,"pdf_url":969,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":970,"paper_type":860,"authors":971,"abstract":1008},"lrec2004-main-010","ALLES: Integrating NLP in ICALL Applications","10.63317\u002F46bz7a63t97u","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-010","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F3.pdf","schmidt-etal-2004-alles",[972,975,978,981,984,987,990,993,996,999,1002,1005],{"paper_id":965,"author_seq":247,"given_name":973,"surname":974,"affiliation":63,"orcid":63},"Paul","Schmidt",{"paper_id":965,"author_seq":232,"given_name":976,"surname":977,"affiliation":63,"orcid":63},"Sandrine","Garnier",{"paper_id":965,"author_seq":218,"given_name":979,"surname":980,"affiliation":63,"orcid":63},"Mike","Sharwood",{"paper_id":965,"author_seq":203,"given_name":982,"surname":983,"affiliation":63,"orcid":63},"Toni","Badia",{"paper_id":965,"author_seq":188,"given_name":985,"surname":986,"affiliation":63,"orcid":63},"Lourdes","Díaz",{"paper_id":965,"author_seq":172,"given_name":988,"surname":989,"affiliation":63,"orcid":63},"Martí","Quixal",{"paper_id":965,"author_seq":155,"given_name":991,"surname":992,"affiliation":63,"orcid":63},"Ana","Ruggia",{"paper_id":965,"author_seq":138,"given_name":994,"surname":995,"affiliation":63,"orcid":63},"Antonio S.","Valderrabanos",{"paper_id":965,"author_seq":121,"given_name":997,"surname":998,"affiliation":63,"orcid":63},"Alberto J.","Cruz",{"paper_id":965,"author_seq":104,"given_name":1000,"surname":1001,"affiliation":63,"orcid":63},"Enrique","Torrejon",{"paper_id":965,"author_seq":87,"given_name":1003,"surname":1004,"affiliation":63,"orcid":63},"Celia","Rico",{"paper_id":965,"author_seq":73,"given_name":1006,"surname":1007,"affiliation":63,"orcid":63},"Jorge","Jimenez","This paper describes how mature NLP that has been successfully applied in the area of controlled language checking can be used to deliver intelligent CALL applications. It describe how an autonomous, long-distance second-language learning system for advanced learners can be created. The architecture of the system consists of a multimodal user interface, a set of skill-specific learning tools, and a set of NLP-based evaluation tools. All modules are integrated in a flexible and scalable software architecture allowing for the use of NLP and ensuring a user-friendly environment based on advanced concepts in language didactics. The multimodal user interface is web-based and incorporates off-the-shelf ASR. The set of skill-specific learning tools consists of a reading, listening, speaking and a writing tool. The thrust of the project is to show the potential of NLP in evaluation of students' productions.",{"paper_id":1010,"title":1011,"year":197,"month":855,"day":63,"doi":1012,"resource_url":1013,"first_page":63,"last_page":63,"pdf_url":1014,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1015,"paper_type":860,"authors":1016,"abstract":1035},"lrec2004-main-011","The Automatic Content Extraction (ACE) Program – Tasks, Data, and Evaluation","10.63317\u002F4o6ysc5sicba","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-011","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F5.pdf","doddington-etal-2004-automatic",[1017,1020,1023,1026,1029,1032],{"paper_id":1010,"author_seq":247,"given_name":1018,"surname":1019,"affiliation":63,"orcid":63},"George","Doddington",{"paper_id":1010,"author_seq":232,"given_name":1021,"surname":1022,"affiliation":63,"orcid":63},"Alexis","Mitchell",{"paper_id":1010,"author_seq":218,"given_name":1024,"surname":1025,"affiliation":63,"orcid":63},"Mark","Przybocki",{"paper_id":1010,"author_seq":203,"given_name":1027,"surname":1028,"affiliation":63,"orcid":63},"Lance","Ramshaw",{"paper_id":1010,"author_seq":188,"given_name":1030,"surname":1031,"affiliation":63,"orcid":63},"Stephanie","Strassel",{"paper_id":1010,"author_seq":172,"given_name":1033,"surname":1034,"affiliation":63,"orcid":63},"Ralph","Weischedel","The objective of the ACE program is to develop technology to automatically infer from human language data the entities being mentioned, the relations among these entities that are directly expressed, and the events in which these entities participate. Data sources include audio and image data in addition to pure text, and Arabic and Chinese in addition to English. The effort involves defining the research tasks in detail, collecting and annotating data needed for training, development, and evaluation, and supporting the research with evaluation tools and research workshops. This program began with a pilot study in 1999. The next evaluation is scheduled for September 2004.",{"paper_id":1037,"title":1038,"year":197,"month":855,"day":63,"doi":1039,"resource_url":1040,"first_page":63,"last_page":63,"pdf_url":1041,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1042,"paper_type":860,"authors":1043,"abstract":1050},"lrec2004-main-012","Multilingual Corpus-based Approach to the Resolution of English –ing","10.63317\u002F3iro6vbtr5wb","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-012","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F8.pdf","schwartz-aikawa-2004-multilingual",[1044,1047],{"paper_id":1037,"author_seq":247,"given_name":1045,"surname":1046,"affiliation":63,"orcid":63},"Lee","Schwartz",{"paper_id":1037,"author_seq":232,"given_name":1048,"surname":1049,"affiliation":63,"orcid":63},"Takako","Aikawa","Corpus data has proven to be useful for dealing with ambiguities in natural language processing (NLP). A number of studies, for example, have dealt with disambiguating English PP attachments, using corpus data. This paper explores a novel approach to resolving ambiguities associated with ing + Noun constructions in English. We use an aligned multilingual (English, Spanish, French, German and Japanese) corpus to extract lexical information necessary for disambiguation. Our premise is that while in English -ing constructions are highly ambiguous, corresponding constructions in other languages may not be ambiguous, and can thus provide English with disambiguating information. We argue that with aligned multilingual corpora, languages can learn non-trivial linguistic information from one another.",{"paper_id":1052,"title":1053,"year":197,"month":855,"day":63,"doi":1054,"resource_url":1055,"first_page":63,"last_page":63,"pdf_url":1056,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1057,"paper_type":860,"authors":1058,"abstract":1065},"lrec2004-main-013","On the Problems of Creating a Golden Standard of Inflected Forms in Portuguese","10.63317\u002F2uat8ifbd2zc","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-013","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F9.pdf","santos-barreiro-2004-problems",[1059,1062],{"paper_id":1052,"author_seq":247,"given_name":1060,"surname":1061,"affiliation":63,"orcid":63},"Diana","Santos",{"paper_id":1052,"author_seq":232,"given_name":1063,"surname":1064,"affiliation":63,"orcid":63},"Anabela","Barreiro","This paper describes our attempt to build a consensus round the morphological analysis of a set of forms for Portuguese, to be used as a basis for the creation of a “golden list” in the first Morpholympics for Portuguese, Morfolimpíadas, an evaluation contest on Portuguese morphological analysis. This golden standard was used to rank participating morphological systems according to precision and coverage. The discussion in the paper is centered on the general choices made and the problems encountered. The paper ends with a short description of the (publicly available) resource.",{"paper_id":1067,"title":1068,"year":197,"month":855,"day":63,"doi":1069,"resource_url":1070,"first_page":63,"last_page":63,"pdf_url":1071,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1072,"paper_type":860,"authors":1073,"abstract":1119},"lrec2004-main-014","INSPIRE: Evaluation of a Smart-Home System for Infotainment Management and Device Control","10.63317\u002F4civyevayq8h","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-014","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F12.pdf","moller-etal-2004-inspire",[1074,1077,1080,1083,1086,1089,1092,1095,1098,1101,1104,1107,1110,1113,1116],{"paper_id":1067,"author_seq":247,"given_name":1075,"surname":1076,"affiliation":63,"orcid":63},"Sebastian","Möller",{"paper_id":1067,"author_seq":232,"given_name":1078,"surname":1079,"affiliation":63,"orcid":63},"Jan","Krebber",{"paper_id":1067,"author_seq":218,"given_name":1081,"surname":1082,"affiliation":63,"orcid":63},"Alexander","Raake",{"paper_id":1067,"author_seq":203,"given_name":1084,"surname":1085,"affiliation":63,"orcid":63},"Paula","Smeele",{"paper_id":1067,"author_seq":188,"given_name":1087,"surname":1088,"affiliation":63,"orcid":63},"Martin","Rajman",{"paper_id":1067,"author_seq":172,"given_name":1090,"surname":1091,"affiliation":63,"orcid":63},"Mirek","Melichar",{"paper_id":1067,"author_seq":155,"given_name":1093,"surname":1094,"affiliation":63,"orcid":63},"Vincenzo","Pallotta",{"paper_id":1067,"author_seq":138,"given_name":1096,"surname":1097,"affiliation":63,"orcid":63},"Gianna","Tsakou",{"paper_id":1067,"author_seq":121,"given_name":1099,"surname":1100,"affiliation":63,"orcid":63},"Basilis","Kladis",{"paper_id":1067,"author_seq":104,"given_name":1102,"surname":1103,"affiliation":63,"orcid":63},"Anestis","Vovos",{"paper_id":1067,"author_seq":87,"given_name":1105,"surname":1106,"affiliation":63,"orcid":63},"Jettie","Hoonhout",{"paper_id":1067,"author_seq":73,"given_name":1108,"surname":1109,"affiliation":63,"orcid":63},"Dietmar","Schuchardt",{"paper_id":1067,"author_seq":55,"given_name":1111,"surname":1112,"affiliation":63,"orcid":63},"Nikos","Fakotakis",{"paper_id":1067,"author_seq":38,"given_name":1114,"surname":1115,"affiliation":63,"orcid":63},"Todor","Ganchev",{"paper_id":1067,"author_seq":17,"given_name":1117,"surname":1118,"affiliation":63,"orcid":63},"Ilyas","Potamitis","This paper gives an overview of the assessment and evaluation methods which have been used to determine the quality of the INSPIRE smart home system. The system allows different home appliances to be controlled via speech, and consists of speech and speaker recognition, speech understanding, dialogue management, and speech output components. The performance of these components is first assessed individually, and then the entire system is evaluated in an interaction experiment with test users. Initial results of the assessment and evaluation are given, in particular with respect to the transmission channel impact on speech and speaker recognition, and the assessment of speech output for different system metaphors.",{"paper_id":1121,"title":1122,"year":197,"month":855,"day":63,"doi":1123,"resource_url":1124,"first_page":63,"last_page":63,"pdf_url":1125,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1126,"paper_type":860,"authors":1127,"abstract":1134},"lrec2004-main-015","Evaluating Multimodal NLG Using Production Experiments","10.63317\u002F2ynx4iz6jqry","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-015","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F14.pdf","van-der-sluis-krahmer-2004-evaluating",[1128,1131],{"paper_id":1121,"author_seq":247,"given_name":1129,"surname":1130,"affiliation":63,"orcid":63},"Ielka","van der Sluis",{"paper_id":1121,"author_seq":232,"given_name":1132,"surname":1133,"affiliation":63,"orcid":63},"Emiel","Krahmer","In this paper we report on an evaluation study for the generation of multimodal referring expressions. To test our algorithm, which allows for various gradations of preciseness in pointing, subjects performed an object identification task in a strict experimental setting. 20 subjects participated and were instructed to always use a pointing gesture (they were led to believe they were testing a new kind of `digital pointing device'). The subjects performed their tasks on two distances: close (10 subjects) and at a distance of 2.5 meters (10 subjects). The assumption is that these conditions yield precise and imprecise pointing gestures respectively. In addition we varied the `type' of target objects (geometrical figures versus pictures of persons). This study resulted in a corpus of 600 multimodal referring expressions. A statistical analysis (ANOVA) revealed a main effect of distance (subjects adapt their language to the kind of pointing gesture) and also a main effect of target (persons are more difficult to describe than objects). The advantages and disadvantages of this evaluation method are discussed.",{"paper_id":1136,"title":1137,"year":197,"month":855,"day":63,"doi":1138,"resource_url":1139,"first_page":63,"last_page":63,"pdf_url":1140,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1141,"paper_type":860,"authors":1142,"abstract":1152},"lrec2004-main-016","Concept Creation in Lexical Ontologies","10.63317\u002F244zxninykcq","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-016","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F15.pdf","seco-etal-2004-concept",[1143,1146,1149],{"paper_id":1136,"author_seq":247,"given_name":1144,"surname":1145,"affiliation":63,"orcid":63},"Nuno","Seco",{"paper_id":1136,"author_seq":232,"given_name":1147,"surname":1148,"affiliation":63,"orcid":63},"Tony","Veale",{"paper_id":1136,"author_seq":218,"given_name":1150,"surname":1151,"affiliation":63,"orcid":63},"Jer","Hayes","The compositional mechanisms involved in the comprehension and creation of concepts is of much interest to the communities studying Cognitive Science and Artificial Intelligence. Nevertheless, comprehension has been largely studied while the creation or production of novel concepts has been somewhat forgotten. We present a model for concept generation using a well known lexical ontology --- WordNet --- along with the results of our experiments that evaluate the creative characteristics of the generated concepts. We also explain how these ideas may be applied to other areas of research, namely to Information Retrieval systems.",{"paper_id":1154,"title":1155,"year":197,"month":855,"day":63,"doi":1156,"resource_url":1157,"first_page":63,"last_page":63,"pdf_url":1158,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1159,"paper_type":860,"authors":1160,"abstract":1162},"lrec2004-main-017","Polysemy and Category Structure in WordNet: An Evidential Approach","10.63317\u002F3fq4fzxjgfc5","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-017","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F16.pdf","veale-2004-polysemy",[1161],{"paper_id":1154,"author_seq":247,"given_name":1147,"surname":1148,"affiliation":63,"orcid":63},"While polysemy is a form of ambiguity that can complicate natural language processing, it is also a rich lexical resource that yields useful insights into the mapping between words and concepts. WordNet, a comprehensive lexical knowledge-base of English word meanings, is replete with instances of polysemy, but also contains many instances of homonymy, and fails to distinguish between both kinds of ambiguity. We propose in this paper an alternative to the distributional approach for recognizing polysemous sense-pairs in WordNet. Our approach does not rely on the systematicity of regular polysemy to identify the families of words that instantiate a particular metonymic pattern, but seeks instead local ontological evidence for each word, on a case by case basis, before polysemy is hypothesized.",{"paper_id":1164,"title":1165,"year":197,"month":855,"day":63,"doi":1166,"resource_url":1167,"first_page":63,"last_page":63,"pdf_url":1168,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1169,"paper_type":860,"authors":1170,"abstract":1177},"lrec2004-main-018","Towards a Reference Annotation Framework","10.63317\u002F23w949c4thtk","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-018","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F17.pdf","salmon-alt-romary-2004-towards",[1171,1174],{"paper_id":1164,"author_seq":247,"given_name":1172,"surname":1173,"affiliation":63,"orcid":63},"Susanne","Salmon-Alt",{"paper_id":1164,"author_seq":232,"given_name":1175,"surname":1176,"affiliation":63,"orcid":63},"Laurent","Romary","This paper discusses the main characteristics of a possible unified framework for specifying annotation schemes dedicated to the task of reference identification and linking on linguistic corpora. Built upon the foundation principles of the Linguistic Annotation Framework, the model (RAF, Reference Annotation Framework) is based on the combination of a simple meta-model (expressing markables and links between them) and a selection of data categories representing the information actually attached to each component of the meta-model. Based on the observation of existing practices we show how this model can be used in a variety of practical and theoretical configurations.",{"paper_id":1179,"title":1180,"year":197,"month":855,"day":63,"doi":1181,"resource_url":1182,"first_page":63,"last_page":63,"pdf_url":1183,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1184,"paper_type":860,"authors":1185,"abstract":1187},"lrec2004-main-019","A New ITU-T Recommendation on the Evaluation of Telephone-Based Spoken Dialogue Systems","10.63317\u002F26mm8pk8y9zr","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-019","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F18.pdf","moller-2004-new",[1186],{"paper_id":1179,"author_seq":247,"given_name":1075,"surname":1076,"affiliation":63,"orcid":63},"This article describes efforts which have recently been undertaken by the International Telecommunication Union (ITU-T) to agree on common methods for evaluating telephone services based on spoken dialogue systems. As a result of these efforts, a new ITU-T Recommendation P.851 (2003) has been approved. It summarizes on the one hand the factors of the system, of the service and of the user which influence the service quality. On the other hand, guidelines are presented on how to evaluate services with the help of subjective interaction experiments, in order to determine the user’s quality perceptions. The relationships between influencing factors and perceived quality dimensions are displayed with the help of a taxonomy. This taxonomy puts different quality aspects into a logical relationship, and shows which factors have to be taken into account in the experimental set-up. The article discusses what has been reached in the new Recommendation, but also what is still missing in order to get more analytic information about the perform-ance of system characteristics and their influence on overall service quality.",{"paper_id":1189,"title":1190,"year":197,"month":855,"day":63,"doi":1191,"resource_url":1192,"first_page":63,"last_page":63,"pdf_url":1193,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1194,"paper_type":860,"authors":1195,"abstract":1205},"lrec2004-main-020","Raising the Bar: Stacked Conservative Error Correction Beyond Boosting","10.63317\u002F4owx7w564o9k","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-020","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F19.pdf","wu-etal-2004-raising",[1196,1199,1202],{"paper_id":1189,"author_seq":247,"given_name":1197,"surname":1198,"affiliation":63,"orcid":63},"Dekai","Wu",{"paper_id":1189,"author_seq":232,"given_name":1200,"surname":1201,"affiliation":63,"orcid":63},"Grace","Ngai",{"paper_id":1189,"author_seq":218,"given_name":1203,"surname":1204,"affiliation":63,"orcid":63},"Marine","Carpuat","We introduce a conservative error correcting model, Stacked TBL, that is designed to improve the performance of even high-performing models like boosting, with little risk of accidentally degrading performance. Stacked TBL is particularly well suited for corpus-based natural language applications involving high-dimensional feature spaces, since it leverages the characteristics of the TBL paradigm that we appropriate. We consider here the task of automatically annonating named entities in text corpora. The task does pose a number of challenges for TBL, to which there are some simple yet effective solutions. We discuss the empirical behavior of Stacked TBL, and consider evidence that despite its simplicity, more complex and time-consuming variants are not generally required.",{"paper_id":1207,"title":1208,"year":197,"month":855,"day":63,"doi":1209,"resource_url":1210,"first_page":63,"last_page":63,"pdf_url":1211,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1212,"paper_type":860,"authors":1213,"abstract":1220},"lrec2004-main-021","An Analysis of the Relative Difficulty of Reuters-21578 Subsets","10.63317\u002F4qqwnaz86qg4","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-021","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F21.pdf","debole-sebastiani-2004-analysis",[1214,1217],{"paper_id":1207,"author_seq":247,"given_name":1215,"surname":1216,"affiliation":63,"orcid":63},"Franca","Debole",{"paper_id":1207,"author_seq":232,"given_name":1218,"surname":1219,"affiliation":63,"orcid":63},"Fabrizio","Sebastiani","The existence, public availability, and widespread acceptance of a standard benchmark for a given information retrieval (IR) task are beneficial to research on this task, since they allow different researchers to experimentally compare their own systems by comparing the results they have obtained on this benchmark. The Reuters-21578 test collection, together with its earlier variants, has been such a standard benchmark for the text categorization (TC) task throughout the last ten years. However, the benefits that this has brought about have somehow been limited by the fact that different researchers have ``carved'' different subsets out of this collection, and tested their systems on one of these subsets only; systems that have been tested on different Reuters-21578 subsets are thus not readily comparable. In this paper we present a systematic, comparative experimental study of the three subsets of Reuters-21578 that have been most popular among TC researchers. The results we obtain allow us to determine the relative difficulty of these subsets, thus establishing an indirect means for comparing TC systems that have, or will be, tested on these different subsets.",{"paper_id":1222,"title":1223,"year":197,"month":855,"day":63,"doi":1224,"resource_url":1225,"first_page":63,"last_page":63,"pdf_url":1226,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1227,"paper_type":860,"authors":1228,"abstract":1235},"lrec2004-main-022","Experiences in Collection of Handwriting Data for Online Handwriting Recognition in Indic Scripts","10.63317\u002F58utgfkytti7","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-022","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F23.pdf","bhaskarabhatla-madhvanath-2004-experiences",[1229,1232],{"paper_id":1222,"author_seq":247,"given_name":1230,"surname":1231,"affiliation":63,"orcid":63},"Ajay S.","Bhaskarabhatla",{"paper_id":1222,"author_seq":232,"given_name":1233,"surname":1234,"affiliation":63,"orcid":63},"Sriganesh","Madhvanath","In this paper, we describe initial efforts at Hewlett-Packard Labs, Bangalore, to create datasets of online handwriting in Indic scripts to support research in online handwriting recognition for the Indic scripts. The term \"online\" here refers to the fact that handwriting is captured as a stream of (x,y) points using an appropriate pen position sensor (often called a digitizer), rather than as a bitmap (image). The paper describes the structure of Indic scripts in brief. It identifes different choices for segmenting characters into simpler shapes that can then be recognized using pattern recognition techniques. The paper discusses these issues in the context of the Tamil script. The remainder of the paper provides an overview of two distinct data collection efforts for the Tamil script - one at the isolated character level, and the other for isolated words. In the context of these efforts, we briefy describe the data collection procedure, tools for collection and subsequent annotation, user-interface issues, the annotation scheme, and the organization of the dataset. The paper concludes with the current status of the effort and future directions.",{"paper_id":1237,"title":1238,"year":197,"month":855,"day":63,"doi":1239,"resource_url":1240,"first_page":63,"last_page":63,"pdf_url":1241,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1242,"paper_type":860,"authors":1243,"abstract":1253},"lrec2004-main-023","Collocation Extraction Using Web Statistics","10.63317\u002F5eqinh6ndsjq","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-023","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F24.pdf","chen-etal-2004-collocation",[1244,1247,1250],{"paper_id":1237,"author_seq":247,"given_name":1245,"surname":1246,"affiliation":63,"orcid":63},"Hsin-Hsi","Chen",{"paper_id":1237,"author_seq":232,"given_name":1248,"surname":1249,"affiliation":63,"orcid":63},"Yi-Cheng","Yu",{"paper_id":1237,"author_seq":218,"given_name":1251,"surname":1252,"affiliation":63,"orcid":63},"Chih-Long","Lin","This paper mines collocations from two different web usage corpora, NTU proxy log and TTS search log. The precisions for NTU and TTS test data are 61.76% and 57.50%, respectively, by human judgment for 2% sampling of extracted collocations. For automatic evaluation, we submit extracted collocation to Google search engine, and the resulting page counts are used to compute the mutual information of the collocation. Experimental results show that total 43.27% and 42.65% of collocations mined from NTU and TTS corpora passed the examination of MIs.",{"paper_id":1255,"title":1256,"year":197,"month":855,"day":63,"doi":1257,"resource_url":1258,"first_page":63,"last_page":63,"pdf_url":1259,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1260,"paper_type":860,"authors":1261,"abstract":1264},"lrec2004-main-024","An XML Representation for Annotated Handwriting Datasets for Online Handwriting Recognition","10.63317\u002F4cicesacbhmr","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-024","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F26.pdf","bhaskarabhatla-madhvanath-2004-xml",[1262,1263],{"paper_id":1255,"author_seq":247,"given_name":1230,"surname":1231,"affiliation":63,"orcid":63},{"paper_id":1255,"author_seq":232,"given_name":1233,"surname":1234,"affiliation":63,"orcid":63},"In this paper, we briefy descibe an XML representation for annotation of online handwriting data to support the development and evaluation of handwriting recognition algorithms, that is based on the emerging Digital Ink Markup Language (InkML) draft standard from W3C. In particular, we describe how the XML representation we have de ned attempts to address issues of (i) support for different scripts, (ii) partial automation of labeling using recognition engines, (iii) planned as well as casual capture of handwriting data and (iv) semantic annotation of handwriting data at various levels such as character, word and phrase. The representation keeps the raw handwriting data (described by InkML) separate from its semantic interpretations. We also compare and contrast the XML representation with the extant UNIPEN representation for annotation of handwriting data.",{"paper_id":1266,"title":1267,"year":197,"month":855,"day":63,"doi":1268,"resource_url":1269,"first_page":63,"last_page":63,"pdf_url":1270,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1271,"paper_type":860,"authors":1272,"abstract":1279},"lrec2004-main-025","Reusing Language Resources for Speech Applications involving Emotion","10.63317\u002F3ghbaitfser6","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-025","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F27.pdf","alexandris-fotinea-2004-reusing",[1273,1276],{"paper_id":1266,"author_seq":247,"given_name":1274,"surname":1275,"affiliation":63,"orcid":63},"Christina","Alexandris",{"paper_id":1266,"author_seq":232,"given_name":1277,"surname":1278,"affiliation":63,"orcid":63},"Stavroula-Evita","Fotinea","The present paper involves using a spoken corpus for the construction of a written corpus which in turn will be used for speech applications involving emotion, namely an emotional Text-to-Speech system or a Speech-to-Emotion system which requires emotional speech recognition and consequent text to emotion conversion. Such speech application systems, involve the construction of a corpus of written artificial dialogs, which is intended to depict\u002Fconvey the speakers emotional state. From the analysis of the sublanguage of the spoken corpus as well as the analysis and evaluation of the sublanguage of the (constructed) written corpus, we classified the extra-linguistic markers into three types ('Pause', 'Emphasis' and 'Hesitation') and we observe that, in Greek, extra-linguistic markers may behave as pointers to key-information. The correspondence between extra-linguistic markers and key-information was evaluated with an additional spoken corpus of recorded dialogs from selected Greek TV programs. The written corpus containing the inserted extra-linguistic markers was evaluated by native speakers with linguistic knowledge. The speakers were asked to classify them according to their acceptability in Greek (Alexandris &amp; Fotinea, 2003).",{"paper_id":1281,"title":1282,"year":197,"month":855,"day":63,"doi":1283,"resource_url":1284,"first_page":63,"last_page":63,"pdf_url":1285,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1286,"paper_type":860,"authors":1287,"abstract":1303},"lrec2004-main-026","Designing and Recording an Audiovisual Database of Emotional Speech in Basque","10.63317\u002F2f62vzdunkv5","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-026","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F28.pdf","navas-etal-2004-designing",[1288,1291,1294,1297,1300],{"paper_id":1281,"author_seq":247,"given_name":1289,"surname":1290,"affiliation":63,"orcid":63},"Eva","Navas",{"paper_id":1281,"author_seq":232,"given_name":1292,"surname":1293,"affiliation":63,"orcid":63},"Amaia","Castelruiz",{"paper_id":1281,"author_seq":218,"given_name":1295,"surname":1296,"affiliation":63,"orcid":63},"Iker","Luengo",{"paper_id":1281,"author_seq":203,"given_name":1298,"surname":1299,"affiliation":63,"orcid":63},"Jon","Sánchez",{"paper_id":1281,"author_seq":188,"given_name":1301,"surname":1302,"affiliation":63,"orcid":63},"Inmaculada","Hernáez","This paper describes an emotional speech database recorded for standard Basque. This database was recorded in the framework of a project in which the goal was to develop an avatar. therefore, the image corresponding to the expression of the different emotions was also needed. This is why an audiovisual database was developed. The designed database contains six basic emotions as well as the neutral speaking style. It consists in isolated words and sentences read by a professional dubbing actress. At present, this database is being used to study the prosodic models related with each emotion in standard Basque.",{"paper_id":1305,"title":1306,"year":197,"month":855,"day":63,"doi":1307,"resource_url":1308,"first_page":63,"last_page":63,"pdf_url":1309,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1310,"paper_type":860,"authors":1311,"abstract":1318},"lrec2004-main-027","Evaluation of Different Similarity Measures for the Extraction of Multiword Units in a Reinforcement Learning Environment","10.63317\u002F254wvts346u4","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-027","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F29.pdf","dias-nunes-2004-evaluation",[1312,1315],{"paper_id":1305,"author_seq":247,"given_name":1313,"surname":1314,"affiliation":63,"orcid":63},"Gaël","Dias",{"paper_id":1305,"author_seq":232,"given_name":1316,"surname":1317,"affiliation":63,"orcid":63},"Sérgio","Nunes","In this paper, we present an evaluation of four different similarity measures for the extraction of multiword units in the context of the GALEMU software that proposes an innovative architecture based on a floating point representation genetic algorithm. In particular, we will show that the Bray and Curtis measure leads to improved extraction results both in precision and recall.",{"paper_id":1320,"title":1321,"year":197,"month":855,"day":63,"doi":1322,"resource_url":1323,"first_page":63,"last_page":63,"pdf_url":1324,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1325,"paper_type":860,"authors":1326,"abstract":63},"lrec2004-main-028","Terminal Device Oriented Comparable Corpora and its Alignment- Towards Extracting Paraphrasing Patterns","10.63317\u002F372a5iq8p8ef","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-028","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F30.pdf","nakagawa-etal-2004-terminal",[1327,1330,1333],{"paper_id":1320,"author_seq":247,"given_name":1328,"surname":1329,"affiliation":63,"orcid":63},"Hiroshi","Nakagawa",{"paper_id":1320,"author_seq":232,"given_name":1331,"surname":1332,"affiliation":63,"orcid":63},"Hidetaka","Masuda",{"paper_id":1320,"author_seq":218,"given_name":1334,"surname":1335,"affiliation":63,"orcid":63},"Dai","Sato",{"paper_id":1337,"title":1338,"year":197,"month":855,"day":63,"doi":1339,"resource_url":1340,"first_page":63,"last_page":63,"pdf_url":1341,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1342,"paper_type":860,"authors":1343,"abstract":1347},"lrec2004-main-029","Towards Basic Categories for Describing Properties of Texts in a Corpus","10.63317\u002F4xcyvey59cnv","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-029","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F32.pdf","sharoff-2004-towards",[1344],{"paper_id":1337,"author_seq":247,"given_name":1345,"surname":1346,"affiliation":63,"orcid":63},"Serge","Sharoff","The paper discusses the basic principles for describing properties of texts to be stored in a corpus and suggests the standard that is used in the majority of corpora developed at the University of Leeds and can be potentially employed for describing texts in any corpus collecting activity. The standard defines the minimal subset of tags and attributes that are necessary for describing texts stored in a corpus. The proposed text typology helps to position a corpus under development with respect to a reference corpus covering all pos-sible features by explicit selection of a subset of features to be considered in the study.",{"paper_id":1349,"title":1350,"year":197,"month":855,"day":63,"doi":1351,"resource_url":1352,"first_page":63,"last_page":63,"pdf_url":1353,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1354,"paper_type":860,"authors":1355,"abstract":1365},"lrec2004-main-030","Using Weighted Abduction to Align Term Variant Translations in Bilingual Texts","10.63317\u002F25o7r2sse4tk","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-030","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F34.pdf","carl-etal-2004-using",[1356,1359,1362],{"paper_id":1349,"author_seq":247,"given_name":1357,"surname":1358,"affiliation":63,"orcid":63},"Michael","Carl",{"paper_id":1349,"author_seq":232,"given_name":1360,"surname":1361,"affiliation":63,"orcid":63},"Ecaterina","Rascu",{"paper_id":1349,"author_seq":218,"given_name":1363,"surname":1364,"affiliation":63,"orcid":63},"Johann","Haller","In this paper we describe a method for detecting terminological variants and their translations in bilingual texts. Our approach is based on abductive reasoning and combines various monolingual and bilingual resources. A small scale experiment shows that precision and recall increase when using more resources and when the resources interfere in a less restricted way. In order to tune our system, we develop a weighing strategy based on the precision of term translation alignments in a reference text. We feed these weights back into the linguistic resources and repeat the experiment. The results show that precision values are considerably higher when weighing term alignments.",{"paper_id":1367,"title":1368,"year":197,"month":855,"day":63,"doi":1369,"resource_url":1370,"first_page":63,"last_page":63,"pdf_url":1371,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1372,"paper_type":860,"authors":1373,"abstract":1377},"lrec2004-main-031","Investigation on Semantics to Improve the COVAX System","10.63317\u002F43bwwn85mayj","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-031","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F35.pdf","bordoni-2004-investigation",[1374],{"paper_id":1367,"author_seq":247,"given_name":1375,"surname":1376,"affiliation":63,"orcid":63},"Luciana","Bordoni","The purpose of COVAX (Contemporary Culture Virtual Archives in XML) financed by the European Commission in IST Programme was to analyse and draw up the technical solutions required to provide access through the Internet to homogeneously-encoded document descriptions of archive, library and museum collections based on the application of XML. The project demonstrated its feasibility through a prototype containing a meaningful sample of all the different types of documents to build a global system for search and retrieval. The aim of this paper is to create in the COVAX system a new presentation of the documents. A system capable of processing markup semantics declarations can act as an interactive environment for testing conjectures and validating hypotheses. Semantics is one of the ways of improving information retrieval performances; we will explore this problem in the COVAX case study. We will investigate the possibility to derive a semantic knowledge from COVAX repositories, in order to improve the site analysis process and the query answering process.",{"paper_id":1379,"title":1380,"year":197,"month":855,"day":63,"doi":1381,"resource_url":1382,"first_page":63,"last_page":63,"pdf_url":1383,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1384,"paper_type":860,"authors":1385,"abstract":1389},"lrec2004-main-032","Incremental Knowledge Acquisition from WordNet and EuroWordNet","10.63317\u002F5b2rtja8s92z","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-032","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F36.pdf","peters-2004-incremental",[1386],{"paper_id":1379,"author_seq":247,"given_name":1387,"surname":1388,"affiliation":63,"orcid":63},"Wim","Peters","This paper describes the process of the creation and extraction of implicit knowledge from WordNet (Fellbaum, 1998) and EuroWordNet (Vossen, 1998). This knowledge is an extension of the explicit knowledge structures already provided by the wordnets in the form of synsets and semantic relations, and is contained both within (Euro)WordNet's hierarchical structure and the glosses that are associated with each WordNet synset.",{"paper_id":1391,"title":1392,"year":197,"month":855,"day":63,"doi":1393,"resource_url":1394,"first_page":63,"last_page":63,"pdf_url":1395,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1396,"paper_type":860,"authors":1397,"abstract":1404},"lrec2004-main-033","Finding Semantic Associations on Express Lane","10.63317\u002F52wv2es2kv7z","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-033","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F38.pdf","nastase-mihalcea-2004-finding",[1398,1401],{"paper_id":1391,"author_seq":247,"given_name":1399,"surname":1400,"affiliation":63,"orcid":63},"Vivi","Năstase",{"paper_id":1391,"author_seq":232,"given_name":1402,"surname":1403,"affiliation":63,"orcid":63},"Rada","Mihalcea","We introduce a new codification scheme for efficient computation of measures in semantic networks. The scheme is particularly useful for fast computation of semantic associations between words and implementation of an informational retrieval operator for efficient search in semantic spaces. Other applications may also be possible.",{"paper_id":1406,"title":1407,"year":197,"month":855,"day":63,"doi":1408,"resource_url":1409,"first_page":63,"last_page":63,"pdf_url":1410,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1411,"paper_type":860,"authors":1412,"abstract":1419},"lrec2004-main-034","Infrastructure for Collaborative Annotation of Speech","10.63317\u002F2g68i5myhjks","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-034","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F39.pdf","gronroos-miettinen-2004-infrastructure",[1413,1416],{"paper_id":1406,"author_seq":247,"given_name":1414,"surname":1415,"affiliation":63,"orcid":63},"Mickel","Grönroos",{"paper_id":1406,"author_seq":232,"given_name":1417,"surname":1418,"affiliation":63,"orcid":63},"Manne","Miettinen","Vast amounts of digital language data (primary data) and increasingly complex linguistic annotations (secondary data) are being created around the world with accelerating speed. There is a real risk of losing much of this data unless the compilers of language resources (primary and secondary data) and creators of tools start to pay more attention to the reusability of the resources and the interoperability of the tools. In this poster we report our effort to create best practices for the creation and dissemination of reusable speech resources in Finland. Our suggested solution allows collaborative annotation, which means that researchers in different sites can work on the same speech data, adding different kinds of linguistic annotation and share their work with other researchers.",{"paper_id":1421,"title":1422,"year":197,"month":855,"day":63,"doi":1423,"resource_url":1424,"first_page":63,"last_page":63,"pdf_url":1425,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1426,"paper_type":860,"authors":1427,"abstract":1436},"lrec2004-main-035","Automatic Language-Independent Induction of Gazetteer Lists","10.63317\u002F56afcatbzppm","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-035","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F40.pdf","maynard-etal-2004-automatic",[1428,1430,1433],{"paper_id":1421,"author_seq":247,"given_name":1060,"surname":1429,"affiliation":63,"orcid":63},"Maynard",{"paper_id":1421,"author_seq":232,"given_name":1431,"surname":1432,"affiliation":63,"orcid":63},"Kalina","Bontcheva",{"paper_id":1421,"author_seq":218,"given_name":1434,"surname":1435,"affiliation":63,"orcid":63},"Hamish","Cunningham","Adaptation of existing Information Extraction (IE) systems to new languages and domains is the focus of much current research, but progress is often hindered by the lack of available resources to enable developers to get a new system up and running fast. It has previously been shown that a good set of gazetteer lists can have a vital role here, but creation of lists for a new language or domain can be time-consuming and laborious. In this paper we demonstrate a tool for inducing gazetteer lists from a small set of annotated corpora and creating a baseline IE system. We also describe an extension to this, using bootstrapping techniques in order to generate much larger volumes of noisy training texts. High quality results have been achieved in this way on Hindi, Chinese and Arabic.",{"paper_id":1438,"title":1439,"year":197,"month":855,"day":63,"doi":1440,"resource_url":1441,"first_page":63,"last_page":63,"pdf_url":1442,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1443,"paper_type":860,"authors":1444,"abstract":1446},"lrec2004-main-036","Corpus Design, Recording and Phonetic Analysis of Greek Emotional Database","10.63317\u002F3azt6qsacagi","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-036","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F41.pdf","fakotakis-2004-corpus",[1445],{"paper_id":1438,"author_seq":247,"given_name":1111,"surname":1112,"affiliation":63,"orcid":63},"A Greek Emotional Speech Database has been recorded and analyzed. The scope of this work is to study through this database the prosodical phenomena as a function of each emotional state and the application to a text to speech synthesis system. Our database contains recordings of a female professional actress. We used an actress for this task because in order to faithfully simulate a number of emotions. The simulated emotions for our database were sadness, anger, fear, joy and a neutral session. The recordings consisted of ten single words, twenty short sentences, twenty five long sentences and twelve passages of fluent speech (ranging from three to five sentences each). Following the recordings, a listening test was performed to test whether normal listeners could identify the type of emotion that characterized the recorded utterances. Six qualified listeners were used, both men and women, of different ages, from several social environments.",{"paper_id":1448,"title":1449,"year":197,"month":855,"day":63,"doi":1450,"resource_url":1451,"first_page":63,"last_page":63,"pdf_url":1452,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1453,"paper_type":860,"authors":1454,"abstract":1468},"lrec2004-main-037","Human Dialogue Modelling Using Annotated Corpora","10.63317\u002F34tu5n4uwxcu","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-037","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F42.pdf","wilks-etal-2004-human",[1455,1458,1460,1463,1465],{"paper_id":1448,"author_seq":247,"given_name":1456,"surname":1457,"affiliation":63,"orcid":63},"Yorick","Wilks",{"paper_id":1448,"author_seq":232,"given_name":907,"surname":1459,"affiliation":63,"orcid":63},"Webb",{"paper_id":1448,"author_seq":218,"given_name":1461,"surname":1462,"affiliation":63,"orcid":63},"Andrea","Setzer",{"paper_id":1448,"author_seq":203,"given_name":1024,"surname":1464,"affiliation":63,"orcid":63},"Hepple",{"paper_id":1448,"author_seq":188,"given_name":1466,"surname":1467,"affiliation":63,"orcid":63},"Roberta","Catizone","We describe two major dialogue system segments: first we describe a Dialogue Manager which uses a representation of stereotypical dialogue patterns that we call Dialogue Action Frames and which, we believe, generate strong and novel constraints on later access to incomplete dialogue topics. Secondly, an analysis module that learns to assign dialogue acts from corpora, but on the basis of limited quantities of data, and up to what seems to be some kind of limit on this task, a fact we also discuss.",{"paper_id":1470,"title":1471,"year":197,"month":855,"day":63,"doi":1472,"resource_url":1473,"first_page":63,"last_page":63,"pdf_url":1474,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1475,"paper_type":860,"authors":1476,"abstract":1479},"lrec2004-main-038","CrossTowns: Automatically Generated Phonetic Lexicons of Cross-lingual Pronunciation Variants of European City Names","10.63317\u002F4muf7zobbm99","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-038","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F43.pdf","schaden-2004-crosstowns",[1477],{"paper_id":1470,"author_seq":247,"given_name":961,"surname":1478,"affiliation":63,"orcid":63},"Schaden","The CrossTowns lexicons are part of a study that focuses on the phonetic variants that occur when speakers of different native languages   (L1) with varying degrees of target language (L2) proficiency pronounce foreign city names. Based on a collection of speech   data from this domain, it is one of the aims to identify the most common pronunciation errors in a particular L1\u002FL2 pair (language direction)   and to model them by phonological rewrite rules. Although derived from only a small corpus of names, the rule sets already   generate plausible variants when applied to unseen material. Yet there is a need for improvement. To demonstrate the current state of   affairs, sample lexicons of 1.000 place names for English, French, and German were compiled and converted into various interlanguage   pronunciation lexicons using the accent rule sets. In the paper, the procedures involved in the data collection, an outline of the   rule-based accent generation technique, and a discussion of the problems involved in modelling non-native pronunciations on the lexicon   level will be presented.",{"paper_id":1481,"title":1482,"year":197,"month":855,"day":63,"doi":1483,"resource_url":1484,"first_page":63,"last_page":63,"pdf_url":1485,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1486,"paper_type":860,"authors":1487,"abstract":1492},"lrec2004-main-039","Pattern Discovery in Named Organization Corpus","10.63317\u002F4qtuqftbt4in","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-039","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F46.pdf","chen-chu-2004-pattern",[1488,1489],{"paper_id":1481,"author_seq":247,"given_name":1245,"surname":1246,"affiliation":63,"orcid":63},{"paper_id":1481,"author_seq":232,"given_name":1490,"surname":1491,"affiliation":63,"orcid":63},"Yi-Lin","Chu","This paper presents how to mine formulation rules from a named organization corpus. The TEIRESIAS algorithm, which is widely used in bioinformatics domain, is adopted. The experimental results based on MET2 test bed show that the approach of regarding the morpheme of a keyword as a cluster is the best, the approach of regarding all the keywords as the same cluster is the next, and the approach of regarding each keyword as a cluster is the worse. The performance using morpheme-based approach is a little better than that of hand-crafted approach. The methodology can be easily extended to other types of named entities.",{"paper_id":1494,"title":1495,"year":197,"month":855,"day":63,"doi":1496,"resource_url":1497,"first_page":63,"last_page":63,"pdf_url":1498,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1499,"paper_type":860,"authors":1500,"abstract":1509},"lrec2004-main-040","Connector Usage in the English Essay Writing of Japanese EFL Learners","10.63317\u002F4gwcjs5r93fo","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-040","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F48.pdf","narita-etal-2004-connector",[1501,1504,1506],{"paper_id":1494,"author_seq":247,"given_name":1502,"surname":1503,"affiliation":63,"orcid":63},"Masumi","Narita",{"paper_id":1494,"author_seq":232,"given_name":1505,"surname":1335,"affiliation":63,"orcid":63},"Chieko",{"paper_id":1494,"author_seq":218,"given_name":1507,"surname":1508,"affiliation":63,"orcid":63},"Masatoshi","Sugiura","In this paper we report on our quantitative analysis of 25 logical connectors in advanced Japanese university students' essay writing and compare it with the use in comparable types of native English writing. We also present a brief comparison of the Japanese learners' usage with that of advanced French, Swedish or Chinese learners of English. As our research targets, we chose 25 logical connectors and selected two sub-corpora of the ICLE (International Corpus of Learner English) project to obtain comparable data on the usage of these logical connectors by advanced Japanese EFL learners and English native speakers. Every instance of our target logical connectors was extracted from the two corpora, and not only the frequency counts but also the occurrence position of each connector was examined. Our research findings show that Japanese EFL learners significantly overuse these logical connectors in sentence-initial position and that they significantly overuse such connectors as 'for example,' 'of course,' and 'first,' whereas they significantly underuse such connectors as 'then,' 'yet,' and 'instead.' The findings also show that there exist certain similarities and differences among the four learner groups in the use of logical connectors.",{"paper_id":1511,"title":1512,"year":197,"month":855,"day":63,"doi":1513,"resource_url":1514,"first_page":63,"last_page":63,"pdf_url":1515,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1516,"paper_type":860,"authors":1517,"abstract":1521},"lrec2004-main-041","Detection of Domain Specific Terminology Using Corpora Comparison","10.63317\u002F2ccdwd2a9gnc","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-041","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F49.pdf","drouin-2004-detection",[1518],{"paper_id":1511,"author_seq":247,"given_name":1519,"surname":1520,"affiliation":63,"orcid":63},"Patrick","Drouin","Identifying terms in specialized corpora is a central task in terminological work (compilation of domain-specific dictionaries), but is labour-intensive, especially when the corpora are voluminous which is often the case nowadays. For the past decade, terminologists and specialized lexicographers have been able to rely on term-extraction tools to assist them in the selection of terms. However, most term-extractors focus on the identification of complex terms. Although complex terms (cellular telephone) are central to terminology processing, retrieval of uniterms (telephone) is still a major challenge. This paper evaluates the usefulness of a corpora comparison approach in order to find pinpoint corpus specific words in order to identify uniterms in the field of telecommunications.",{"paper_id":1523,"title":1524,"year":197,"month":855,"day":63,"doi":1525,"resource_url":1526,"first_page":63,"last_page":63,"pdf_url":1527,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1528,"paper_type":860,"authors":1529,"abstract":1533},"lrec2004-main-042","Comparative Evaluation of a Stochastic Parser on Semantic and Syntactic-semantic Labels","10.63317\u002F4wndph756dxy","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-042","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F51.pdf","minker-2004-comparative",[1530],{"paper_id":1523,"author_seq":247,"given_name":1531,"surname":1532,"affiliation":63,"orcid":63},"Wolfgang","Minker","This paper deals with the evaluation of a stochastic component for natural language understanding alternatively trained on semantic and syntactic-semantic labels. The parser uses semantically-labeled speech data gathered using the LIMSI-ARISE interactive speech system for train travel information retrieval in French. The study shows that introducing additional and coherent information into the semantic corpus allows to relatively improve the semantic frame accuracy of the parser by up to 16.5%. The more complex models yielding a high number of parameters are justified, as long as they convey significant information.",{"paper_id":1535,"title":1536,"year":197,"month":855,"day":63,"doi":1537,"resource_url":1538,"first_page":63,"last_page":63,"pdf_url":1539,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1540,"paper_type":860,"authors":1541,"abstract":1550},"lrec2004-main-043","Sinica BOW (Bilingual Ontological Wordnet): Integration of Bilingual WordNet and SUMO","10.63317\u002F393fh8jxva28","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-043","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F53.pdf","huang-etal-2004-sinica",[1542,1545,1548],{"paper_id":1535,"author_seq":247,"given_name":1543,"surname":1544,"affiliation":63,"orcid":63},"Chu-Ren","Huang",{"paper_id":1535,"author_seq":232,"given_name":1546,"surname":1547,"affiliation":63,"orcid":63},"Ru-Yng","Chang",{"paper_id":1535,"author_seq":218,"given_name":1549,"surname":1045,"affiliation":63,"orcid":63},"Hsiang-Pin","The Academia Sinica Bilingual Ontological Wordnet (Sinica BOW) integrates three resources: WordNet, English-Chinese Translation Equivalents Database (ECTED), and SUMO (Suggested Upper Merged Ontology). The three resources were originally linked in two pairs: WordNet 1.6 was manually mapped to SUMO (Niles and Pease 2003) and also to ECTED (the English lemmas in WordNet were mapped to their Chinese lexical equivalents). ECTED encodes both equivalent pairs and their semantic relations (Huang et al. 2003). With the integration of these three key resources, Sinica BOW functions both as an English-Chinese bilingual wordnet and a bilingual lexical access to SUMO. Sinica BOW allows versatile access and facilitates a combination of lexical, semantic, and ontological information. Versatility is built in with its bilinguality, and the lemma-based merging of multiple resources. First, either English or Chinese can be used for the query, as well as for presenting the content of the resources. Second, the user can easily access the logical structure of both the WordNet and SUMO ontology using either words or conceptual nodes. Third, multiple linguistic indexing is built in to allow additional versatility. Fourth, domain information allows another dimension of knowledge manipulation.",{"paper_id":1552,"title":1553,"year":197,"month":855,"day":63,"doi":1554,"resource_url":1555,"first_page":63,"last_page":63,"pdf_url":1556,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1557,"paper_type":860,"authors":1558,"abstract":1568},"lrec2004-main-044","How to Disassemble Alphabetical Processions - Morphological Treatment of Unknown Words","10.63317\u002F25cwee6svqau","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-044","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F54.pdf","bopp-etal-2004-disassemble",[1559,1562,1565],{"paper_id":1552,"author_seq":247,"given_name":1560,"surname":1561,"affiliation":63,"orcid":63},"Stephan","Bopp",{"paper_id":1552,"author_seq":232,"given_name":1563,"surname":1564,"affiliation":63,"orcid":63},"Sandro","Pedrazzini",{"paper_id":1552,"author_seq":218,"given_name":1566,"surname":1567,"affiliation":63,"orcid":63},"Elisabeth","Maier","This paper describes an approach how to integrate the decomposition of non-lexicalized word compounds and derivations into the morphological analyzers of a NLP product line. The component employs word formation rules and filtering techniques to decompose words, which are not contained in the underlying dictionary database, thereby increasing the average word recognition rate of the morphological analyzers from 90.6% to 95.4%.",{"paper_id":1570,"title":1571,"year":197,"month":855,"day":63,"doi":1572,"resource_url":1573,"first_page":63,"last_page":63,"pdf_url":1574,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1575,"paper_type":860,"authors":1576,"abstract":1586},"lrec2004-main-045","Creating Slovenian Language Resources for Development of Speech-to-speech Translation Components","10.63317\u002F5jzk6skgvgvw","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-045","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F57.pdf","verdonik-etal-2004-creating",[1577,1580,1583],{"paper_id":1570,"author_seq":247,"given_name":1578,"surname":1579,"affiliation":63,"orcid":63},"Darinka","Verdonik",{"paper_id":1570,"author_seq":232,"given_name":1581,"surname":1582,"affiliation":63,"orcid":63},"Matej","Rojc",{"paper_id":1570,"author_seq":218,"given_name":1584,"surname":1585,"affiliation":63,"orcid":63},"Zdravko","Kačič","Article brings detailed information about procedures of building Slovenian lexica within the LC-STAR project, and also detailed information about the size of that lexica. University of Maribor joined the LC-STAR project in order to provide appropriate language resources for developing speech-to-speech translation technology for Slovenian language. Lexica exists from three parts: 65.000 common words, 45.000 proper names and 6.000 special application domain words. All lexica will be morpho-syntactically tagged and phonetically transcribed. Quality of produced language resources is ensured by independent validation.",{"paper_id":1588,"title":1589,"year":197,"month":855,"day":63,"doi":1590,"resource_url":1591,"first_page":63,"last_page":63,"pdf_url":1592,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1593,"paper_type":860,"authors":1594,"abstract":1598},"lrec2004-main-046","Automatic Bilingual Lexicon Acquisition Using Random Indexing of Aligned Bilingual Data","10.63317\u002F4rpvvw3oeu6i","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-046","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F58.pdf","sahlgren-2004-automatic",[1595],{"paper_id":1588,"author_seq":247,"given_name":1596,"surname":1597,"affiliation":63,"orcid":63},"Magnus","Sahlgren","This paper presents a very simple and effective approach to automatic bilingual lexicon acquisition. The approach is cooccurrence-based, and uses the Random Indexing vector space methodology applied to aligned bilingual data. The approach is simple, efficient and scalable, and generate promising results when compared to a manually compiled lexicon. The paper also discusses some of the methodological problems with the prefered evaluation procedure.",{"paper_id":1600,"title":1601,"year":197,"month":855,"day":63,"doi":1602,"resource_url":1603,"first_page":63,"last_page":63,"pdf_url":1604,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1605,"paper_type":860,"authors":1606,"abstract":63},"lrec2004-main-047","The Development and Integration of the LDA-Toolkit Into COST249 SpeechDat(II) SIG Reference Recognizer","10.63317\u002F2orvjdesmfhr","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-047","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F59.pdf","kotnik-etal-2004-development",[1607,1610,1611],{"paper_id":1600,"author_seq":247,"given_name":1608,"surname":1609,"affiliation":63,"orcid":63},"Bojan","Kotnik",{"paper_id":1600,"author_seq":232,"given_name":1584,"surname":1585,"affiliation":63,"orcid":63},{"paper_id":1600,"author_seq":218,"given_name":1612,"surname":1613,"affiliation":63,"orcid":63},"Bogomir","Horvat",{"paper_id":1615,"title":1616,"year":197,"month":855,"day":63,"doi":1617,"resource_url":1618,"first_page":63,"last_page":63,"pdf_url":1619,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1620,"paper_type":860,"authors":1621,"abstract":1634},"lrec2004-main-048","Duration Modeling For Turkish Text-to-Speech Synthesis System","10.63317\u002F4p9taobj665f","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-048","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F60.pdf","ozturk-etal-2004-duration",[1622,1625,1628,1631],{"paper_id":1615,"author_seq":247,"given_name":1623,"surname":1624,"affiliation":63,"orcid":63},"Özlem","Öztürk",{"paper_id":1615,"author_seq":232,"given_name":1626,"surname":1627,"affiliation":63,"orcid":63},"Özgul","Salor",{"paper_id":1615,"author_seq":218,"given_name":1629,"surname":1630,"affiliation":63,"orcid":63},"Tolga","Çiloğlu",{"paper_id":1615,"author_seq":203,"given_name":1632,"surname":1633,"affiliation":63,"orcid":63},"Mubeccel","Demirekler","Naturalness of synthetic speech depends on appropriate modeling of prosodic aspects. Mostly, three prosody components are modeled: segmental duration, pitch contour and intensity. In this study, we present our work on modeling segmental duration in Turkish by using machine-learning algorithms. The models predict phone durations based on attributes such as phone identity, neighboring phone identities, lexical stress, position of syllable in word, part-of-speech information, word length in number of syllables and position of word in utterance. Obtained models predict segment durations better than mean duration approximations.",{"paper_id":1636,"title":1637,"year":197,"month":855,"day":63,"doi":1638,"resource_url":1639,"first_page":63,"last_page":63,"pdf_url":1640,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1641,"paper_type":860,"authors":1642,"abstract":1652},"lrec2004-main-049","Clustering Concept Hierarchies from Text","10.63317\u002F4zz3gxi7kkbn","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-049","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F62.pdf","cimiano-etal-2004-clustering",[1643,1646,1649],{"paper_id":1636,"author_seq":247,"given_name":1644,"surname":1645,"affiliation":63,"orcid":63},"Philipp","Cimiano",{"paper_id":1636,"author_seq":232,"given_name":1647,"surname":1648,"affiliation":63,"orcid":63},"Andreas","Hotho",{"paper_id":1636,"author_seq":218,"given_name":1650,"surname":1651,"affiliation":63,"orcid":63},"Steffen","Staab","We present a novel approach to learning taxonomies or concept hierarchies from text. The approach is based on Formal Concept Analysis, a method mainly used for the analysis of data, i.e. for investigating and processing explicitly given information. Our approach is based on the distributional hypothesis, i.e. that nouns or terms are similar to the extent to which they share contexts. Further, we assume that verbs pose more or less strong selectional restrictions on their arguments. The concept hierarchy is built via Formal Concept Analysis using syntactic dependencies as attributes. The approach is evaluated by comparing the produced concept hierarchies against two handcrafted taxonomies from two different domains: tourism and finance. We compare the results of our approach against a hierarchical bottom-up clustering algorithm as well as against Bi-Section-Kmeans as an instance of a top-down clustering algorithm.",{"paper_id":1654,"title":1655,"year":197,"month":855,"day":63,"doi":1656,"resource_url":1657,"first_page":63,"last_page":63,"pdf_url":1658,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1659,"paper_type":860,"authors":1660,"abstract":1680},"lrec2004-main-050","NIST Language Technology Evaluation Cookbook","10.63317\u002F3befofgvgqxg","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-050","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F64.pdf","martin-etal-2004-nist",[1661,1663,1666,1669,1672,1675,1677],{"paper_id":1654,"author_seq":247,"given_name":1662,"surname":1087,"affiliation":63,"orcid":63},"Alvin F.",{"paper_id":1654,"author_seq":232,"given_name":1664,"surname":1665,"affiliation":63,"orcid":63},"John S.","Garofolo",{"paper_id":1654,"author_seq":218,"given_name":1667,"surname":1668,"affiliation":63,"orcid":63},"Jonathan C.","Fiscus",{"paper_id":1654,"author_seq":203,"given_name":1670,"surname":1671,"affiliation":63,"orcid":63},"Audrey N.","Le",{"paper_id":1654,"author_seq":188,"given_name":1673,"surname":1674,"affiliation":63,"orcid":63},"David S.","Pallett",{"paper_id":1654,"author_seq":172,"given_name":1676,"surname":1025,"affiliation":63,"orcid":63},"Mark A.",{"paper_id":1654,"author_seq":155,"given_name":1678,"surname":1679,"affiliation":63,"orcid":63},"Gregory A.","Sanders","We review some of the methodology applied to the various NIST language technology evaluations. We discuss the elements included in each evaluation plan, and suggest what we believe are key practices for successful evaluations, and what pitfalls should be avoided. A couple of lessons learned are noted.",{"paper_id":1682,"title":1683,"year":197,"month":855,"day":63,"doi":1684,"resource_url":1685,"first_page":63,"last_page":63,"pdf_url":1686,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1687,"paper_type":860,"authors":1688,"abstract":1695},"lrec2004-main-051","Definition, Dictionaries and Tagger for Extended Named Entity Hierarchy","10.63317\u002F3zc57kdyb4p6","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-051","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F65.pdf","sekine-nobata-2004-definition",[1689,1692],{"paper_id":1682,"author_seq":247,"given_name":1690,"surname":1691,"affiliation":63,"orcid":63},"Satoshi","Sekine",{"paper_id":1682,"author_seq":232,"given_name":1693,"surname":1694,"affiliation":63,"orcid":63},"Chikashi","Nobata","The tagging of Named Entities, the names of particular things or classes, is regarded as an important component technology for many NLP applications. The first Named Entity set had 7 types, organization, location, person, date, time, money and percent expressions. Later, in the IREX project artifact was added and ACE added two, GPE and facility, to pursue the generalization of the technology. However, 7 or 8 kinds of NE are not broad enough to cover general applications. We proposed about 150 categories of NE (Sekine et al. 2002) and now we have extended it again to 200 categories. Also we have developed dictionaries and an automatic tagger for NEs in Japanese.",{"paper_id":1697,"title":1698,"year":197,"month":855,"day":63,"doi":1699,"resource_url":1700,"first_page":63,"last_page":63,"pdf_url":1701,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1702,"paper_type":860,"authors":1703,"abstract":1710},"lrec2004-main-052","Sejong Korean Corpora in the Making","10.63317\u002F5k6g83yq7toj","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-052","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F66.pdf","kang-kim-2004-sejong",[1704,1707],{"paper_id":1697,"author_seq":247,"given_name":1705,"surname":1706,"affiliation":63,"orcid":63},"Beom-mo","Kang",{"paper_id":1697,"author_seq":232,"given_name":1708,"surname":1709,"affiliation":63,"orcid":63},"Hunggyu","Kim","We introduce a set of Korean corpora in the making. One of them is a corpus consisting of morphologically analyzed Korean words and it is called \"Sejong Morph Tagged Corpus\". It is a part of Sejong Corpora, which are the results of a government-sponsored language resources compiling project in Korea. We give an outline of the corpus building component of the project and describe in some detail \"Sejong Morph Tagged Corpus\". The latter is being further processed for disambiguation to be turned into \"Sejong Morph Sense Tagged Corpus\" and into a Korean Treebank of syntactically parsed sentences.",{"paper_id":1712,"title":1713,"year":197,"month":855,"day":63,"doi":1714,"resource_url":1715,"first_page":63,"last_page":63,"pdf_url":1716,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1717,"paper_type":860,"authors":1718,"abstract":1733},"lrec2004-main-053","Creation and Assessment of Korean Speech and Noise DB in Car Environment","10.63317\u002F4c8swpnfon55","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-053","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F68.pdf","lee-etal-2004-creation",[1719,1721,1723,1725,1728,1730],{"paper_id":1712,"author_seq":247,"given_name":1720,"surname":1045,"affiliation":63,"orcid":63},"Yong-Ju",{"paper_id":1712,"author_seq":232,"given_name":1722,"surname":1709,"affiliation":63,"orcid":63},"Bong-Wan",{"paper_id":1712,"author_seq":218,"given_name":1724,"surname":1709,"affiliation":63,"orcid":63},"Young-Il",{"paper_id":1712,"author_seq":203,"given_name":1726,"surname":1727,"affiliation":63,"orcid":63},"Dae-Lim","Choi",{"paper_id":1712,"author_seq":188,"given_name":1729,"surname":1045,"affiliation":63,"orcid":63},"Kwang-Hyun",{"paper_id":1712,"author_seq":172,"given_name":1731,"surname":1732,"affiliation":63,"orcid":63},"Yongnam","Um","Researches into robust recognition in noise environment, especially in car environment, are being carried out actively in speech community. In this paper we introduce three types of corpora that SITEC (Speech Information Technology &amp; Industry Promotion Center) has created for research into speech recognition in car noise environment. The first is the recordings of 900 Korean native speakers, distributed according to gender, age, and region, who uttered command words in car environment. The second is the collection of mixed noise in 3 models of cars by while setting up various noise patterns which can be obtained with the car engine on or off, at different driving speed, and in different road conditions with windows open or closed. The third is the recording of simulated speech by HATS (Head and Torso Simulator) in car environment with the internal and external noise factors added. These three types of recordings were all made through synchronized 7 channels and a separate hands-free channel fixed in a car. The creation and specifications of these corpora will be reported on in detail.",{"paper_id":1735,"title":1736,"year":197,"month":855,"day":63,"doi":1737,"resource_url":1738,"first_page":63,"last_page":63,"pdf_url":1739,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1740,"paper_type":860,"authors":1741,"abstract":1754},"lrec2004-main-054","Automatic Generation of Glosses in the OntoLearn System","10.63317\u002F3jr3omnkw65b","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-054","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F69.pdf","cucchiarelli-etal-2004-automatic",[1742,1745,1748,1751],{"paper_id":1735,"author_seq":247,"given_name":1743,"surname":1744,"affiliation":63,"orcid":63},"Alessandro","Cucchiarelli",{"paper_id":1735,"author_seq":232,"given_name":1746,"surname":1747,"affiliation":63,"orcid":63},"Roberto","Navigli",{"paper_id":1735,"author_seq":218,"given_name":1749,"surname":1750,"affiliation":63,"orcid":63},"Francesca","Neri",{"paper_id":1735,"author_seq":203,"given_name":1752,"surname":1753,"affiliation":63,"orcid":63},"Paola","Velardi","OntoLearn is a system for automatic acquisition of specialized ontologies from domain corpora, based on a syntactic pattern matching technique for word sense disambiguation, called structural semantic interconnection (SSI). We use SSI to extract from corpora complex domain concepts and create a specialized version of WordNet. In order to facilitate the task of domain specialists who inspects and evaluate the newly acquired domain ontology, we defined a method to automatically generate glosses for the learned concepts. Glosses provide an informal description, in natural language, of the formal specifications of a concept, facilitating a per-concept evaluation of the ontology by domain specialists, who are usually unfamiliar with the formal language used to describe a computational ontology. The proposed evaluation framework has been tested in a financial domain.",{"paper_id":1756,"title":1757,"year":197,"month":855,"day":63,"doi":1758,"resource_url":1759,"first_page":63,"last_page":63,"pdf_url":1760,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1761,"paper_type":860,"authors":1762,"abstract":1801},"lrec2004-main-055","The COST278 Pan-European Broadcast News Database","10.63317\u002F4wfiy7mczmz2","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-055","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F70.pdf","vandecatseye-etal-2004-cost278",[1763,1766,1769,1772,1775,1778,1781,1783,1786,1788,1791,1794,1797,1800],{"paper_id":1756,"author_seq":247,"given_name":1764,"surname":1765,"affiliation":63,"orcid":63},"An","Vandecatseye",{"paper_id":1756,"author_seq":232,"given_name":1767,"surname":1768,"affiliation":63,"orcid":63},"Jean-Pierre","Martens",{"paper_id":1756,"author_seq":218,"given_name":1770,"surname":1771,"affiliation":63,"orcid":63},"Joao","Neto",{"paper_id":1756,"author_seq":203,"given_name":1773,"surname":1774,"affiliation":63,"orcid":63},"Hugo","Meinedo",{"paper_id":1756,"author_seq":188,"given_name":1776,"surname":1777,"affiliation":63,"orcid":63},"Carmen","Garcia-Mateo",{"paper_id":1756,"author_seq":172,"given_name":1779,"surname":1780,"affiliation":63,"orcid":63},"Javier","Dieguez",{"paper_id":1756,"author_seq":155,"given_name":58,"surname":1782,"affiliation":63,"orcid":63},"Mihelic",{"paper_id":1756,"author_seq":138,"given_name":1784,"surname":1785,"affiliation":63,"orcid":63},"Janez","Zibert",{"paper_id":1756,"author_seq":121,"given_name":1078,"surname":1787,"affiliation":63,"orcid":63},"Nouza",{"paper_id":1756,"author_seq":104,"given_name":1789,"surname":1790,"affiliation":63,"orcid":63},"Petr","David",{"paper_id":1756,"author_seq":87,"given_name":1792,"surname":1793,"affiliation":63,"orcid":63},"Matus","Pleva",{"paper_id":1756,"author_seq":73,"given_name":1795,"surname":1796,"affiliation":63,"orcid":63},"Anton","Cizmar",{"paper_id":1756,"author_seq":55,"given_name":1798,"surname":1799,"affiliation":63,"orcid":63},"Harris","Papageorgiou",{"paper_id":1756,"author_seq":38,"given_name":1274,"surname":1275,"affiliation":63,"orcid":63},"This paper describes a pan-European multilingual audio and video database of broadcast news shows. The database was constructed by seven institutions that are collaborating in the European COST278 action on Spoken Language Interaction in Telecommunications. At present, the database comprises broadcast news shows in seven languages, namely Dutch, Portuguese, Galician, Czech, Slovenian, Slovakian and Greek, but the policy is to attract new partners that bring in new data which are constructed and transcribed according to the rules and procedures outlined in this paper. The data comes with evaluation software that should facilitate a comparison of experiments.",{"paper_id":1803,"title":1804,"year":197,"month":855,"day":63,"doi":1805,"resource_url":1806,"first_page":63,"last_page":63,"pdf_url":1807,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1808,"paper_type":860,"authors":1809,"abstract":1819},"lrec2004-main-056","A Spoken Afrikaans Language Resource Designed for Research on Pronunciation Variations","10.63317\u002F3zryxigktcen","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-056","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F71.pdf","wissing-etal-2004-spoken",[1810,1813,1814,1817],{"paper_id":1803,"author_seq":247,"given_name":1811,"surname":1812,"affiliation":63,"orcid":63},"Daan","Wissing",{"paper_id":1803,"author_seq":232,"given_name":1767,"surname":1768,"affiliation":63,"orcid":63},{"paper_id":1803,"author_seq":218,"given_name":1815,"surname":1816,"affiliation":63,"orcid":63},"Ulrike","Janke",{"paper_id":1803,"author_seq":203,"given_name":1387,"surname":1818,"affiliation":63,"orcid":63},"Goedertier","In this contribution, the design, collection, annotation and planned distribution of a new spoken language resource of Afrikaans (SALAR) is discussed. The corpus contains speech of mother tongue speakers of Afrikaans, and is intended to become a primary national language resource for phonetic research and research on pronunciation variations. As such, the corpus is designed to expose pronunciation variations due to regional accents, speech rate (normal and fast speech) and speech mode (read and spontaneous speech). The corpus is collected by the Potchefstroom Campus of the North-West University, but in all phases of the corpus creation process there was a close collaboration with ELIS-UG (Belgium), one of the institutions that has been engaged in the creation of the Spoken Dutch Corpus (CGN).",{"paper_id":1821,"title":1822,"year":197,"month":855,"day":63,"doi":1823,"resource_url":1824,"first_page":63,"last_page":63,"pdf_url":1825,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1826,"paper_type":860,"authors":1827,"abstract":1835},"lrec2004-main-057","The BITS Speech Synthesis Corpus for German","10.63317\u002F2huz7uek5s54","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-057","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F72.pdf","ellbogen-etal-2004-bits",[1828,1831,1834],{"paper_id":1821,"author_seq":247,"given_name":1829,"surname":1830,"affiliation":63,"orcid":63},"Tania","Ellbogen",{"paper_id":1821,"author_seq":232,"given_name":1832,"surname":1833,"affiliation":63,"orcid":63},"Florian","Schiel",{"paper_id":1821,"author_seq":218,"given_name":1081,"surname":1650,"affiliation":63,"orcid":63},"In this paper we announce the new BITS1 Synthesis Corpus for German. The BITS project is funded by the German Ministry of Education and Science to provide a publicly available synthesis corpus for German. The corpus comprises the voices of four German speakers (two male and two female) and consists of two parts: a set of logatome recordings for controlled diphone synthesis and a set of sentence recordings for unit selection. The paper gives an overview about the basic specifications, the profiles of the speakers, the casting procedure and quality control. Annotation and its organisation are described in detail. The final BITS speech synthesis corpus will be available via BAS and ELDA probably end of 2005.",{"paper_id":1837,"title":1838,"year":197,"month":855,"day":63,"doi":1839,"resource_url":1840,"first_page":63,"last_page":63,"pdf_url":1841,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1842,"paper_type":860,"authors":1843,"abstract":63},"lrec2004-main-058","MAUS Goes Iterative","10.63317\u002F3xzek8yeuo2o","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-058","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F73.pdf","schiel-2004-maus",[1844],{"paper_id":1837,"author_seq":247,"given_name":1832,"surname":1833,"affiliation":63,"orcid":63},{"paper_id":1846,"title":1847,"year":197,"month":855,"day":63,"doi":1848,"resource_url":1849,"first_page":63,"last_page":63,"pdf_url":1850,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1851,"paper_type":860,"authors":1852,"abstract":1857},"lrec2004-main-059","EuroWordNet as a Resource for Cross-language Information Retrieval","10.63317\u002F5ks97t8frskz","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-059","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F76.pdf","stevenson-clough-2004-eurowordnet",[1853,1855],{"paper_id":1846,"author_seq":247,"given_name":1024,"surname":1854,"affiliation":63,"orcid":63},"Stevenson",{"paper_id":1846,"author_seq":232,"given_name":973,"surname":1856,"affiliation":63,"orcid":63},"Clough","One of the aims of EuroWordNet (EWN) was to provide a resource for Cross-Language Information Retrieval (CLIR). In this paper we present experiments to test the usefulness of EWN for this purpose via a formal evaluation using the Spanish queries from the TREC6 CLIR test set. All CLIR systems using bilingual dictionaries must find a way of dealing with multiple translations and we employ a word sense disambiguation algorithm for this purpose. Retrieval performance using when the disambiguation algorithm was used was 90% of that recorded using queries which had been disambiguated manually.",{"paper_id":1859,"title":1860,"year":197,"month":855,"day":63,"doi":1861,"resource_url":1862,"first_page":63,"last_page":63,"pdf_url":1863,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1864,"paper_type":860,"authors":1865,"abstract":1872},"lrec2004-main-060","Finding the Correct Interpretation of Swedish Compounds, a Statistical Approach","10.63317\u002F56987bva7ys6","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-060","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F79.pdf","sjobergh-kann-2004-finding",[1866,1869],{"paper_id":1859,"author_seq":247,"given_name":1867,"surname":1868,"affiliation":63,"orcid":63},"Jonas","Sjöbergh",{"paper_id":1859,"author_seq":232,"given_name":1870,"surname":1871,"affiliation":63,"orcid":63},"Viggo","Kann","This paper treats compound splitting for Swedish, where compounding is productive and very common. A method for splitting compounds and several methods for choosing the correct interpretation of ambiguous compounds are presented. 99% of all compounds are split, 97% of these are correctly interpreted.",{"paper_id":1874,"title":1875,"year":197,"month":855,"day":63,"doi":1876,"resource_url":1877,"first_page":63,"last_page":63,"pdf_url":1878,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1879,"paper_type":860,"authors":1880,"abstract":63},"lrec2004-main-061","Automatic Extraction of Hyponyms from Japanese Newspapers. Using Lexico-syntactic Patterns","10.63317\u002F2dmtvnmb2vim","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-061","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F80.pdf","ando-etal-2004-automatic",[1881,1884,1885],{"paper_id":1874,"author_seq":247,"given_name":1882,"surname":1883,"affiliation":63,"orcid":63},"Maya","Ando",{"paper_id":1874,"author_seq":232,"given_name":1690,"surname":1691,"affiliation":63,"orcid":63},{"paper_id":1874,"author_seq":218,"given_name":1886,"surname":1887,"affiliation":63,"orcid":63},"Shun","Ishizaki",{"paper_id":1889,"title":1890,"year":197,"month":855,"day":63,"doi":1891,"resource_url":1892,"first_page":63,"last_page":63,"pdf_url":1893,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1894,"paper_type":860,"authors":1895,"abstract":1905},"lrec2004-main-062","Extending a Verb-lexicon Using a Semantically Annotated Corpus","10.63317\u002F4z9hipjipqqb","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-062","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F84.pdf","kipper-etal-2004-extending",[1896,1899,1902],{"paper_id":1889,"author_seq":247,"given_name":1897,"surname":1898,"affiliation":63,"orcid":63},"Karin","Kipper",{"paper_id":1889,"author_seq":232,"given_name":1900,"surname":1901,"affiliation":63,"orcid":63},"Benjamin","Snyder",{"paper_id":1889,"author_seq":218,"given_name":1903,"surname":1904,"affiliation":63,"orcid":63},"Martha","Palmer","This paper describes the association of a hierarchical verb lexicon, VerbNet, with a semantically annotated corpus, the Proposition Bank. It focuses on comparisons of the syntactic coverage of the two resources as a method of evaluating their correspondence. Both VerbNet and PropBank have explicit syntactic frames associated with each verb, which allowed an automatic mapping between the two resources for almost 50,000 instances. We aim at both a quantitative and qualitative analysis of how the syntactic frames in VerbNet reflect the syntactic frames that actually occurred in PropBank. VerbNet accounts for about 78% exact matches to these frames and 81% somewhat more relaxed matches.",{"paper_id":1907,"title":1908,"year":197,"month":855,"day":63,"doi":1909,"resource_url":1910,"first_page":63,"last_page":63,"pdf_url":1911,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1912,"paper_type":860,"authors":1913,"abstract":1920},"lrec2004-main-063","The Centre for Dutch Language and Speech Technology (TST Centre)","10.63317\u002F4gfdxgh7mgc8","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-063","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F85.pdf","beeken-van-der-kamp-2004-centre",[1914,1917],{"paper_id":1907,"author_seq":247,"given_name":1915,"surname":1916,"affiliation":63,"orcid":63},"J.C.T.","Beeken",{"paper_id":1907,"author_seq":232,"given_name":1918,"surname":1919,"affiliation":63,"orcid":63},"P.H.J.","van der Kamp","In September 2003, the Centre for Language and Speech Technology (TST Centre) was created. The TST Centre is responsible for the maintenance, management, distribution, availability and accessibility of basic digital language resources (LRs) of Dutch that are financed by public funding; the TST Centre is located at the Institute for Dutch Lexicology (INL) in Leiden, the Netherlands. In the first year of its existence, the TST Centre at the INL in Leiden and its dependance at the University of Antwerp (B) made agreements with various Dutch and Flemish academic as well as industrial subcontractors to assist them in performing the tasks linked to the maintenance, management and accessibility of the LRs. The partners involved are the Max Planck Institute in Nijmegen (NL), the University of Leuven (B), the Free University of Amsterdam (NL) and the company Technologie &amp; Integratie in Ghent (B). The initial set of LRs entrusted to the Centre are the Spoken Dutch Corpus (CGN), the bilingual and monolingual lexicons of the CLVV (Commissie Lexicografische Vertaalvoorzieningen), and some of the major products of the INL.",{"paper_id":1922,"title":1923,"year":197,"month":855,"day":63,"doi":1924,"resource_url":1925,"first_page":63,"last_page":63,"pdf_url":1926,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1927,"paper_type":860,"authors":1928,"abstract":1932},"lrec2004-main-064","A Global Data Category Registry for Interoperable Language Resources","10.63317\u002F3afmqoh2ei58","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-064","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F87.pdf","wright-2004-global",[1929],{"paper_id":1922,"author_seq":247,"given_name":1930,"surname":1931,"affiliation":63,"orcid":63},"Sue Ellen","Wright","ISO TC 37 is creating a Data Category Registry (DCR) as an online open-source RDF-based resource for use by implementers of electronic language resources, including terminologies, presentational and non-presentational lexical resources, NLP lexica, etc. The DCR will allow dynamic generation of data category selections (DCSs), e.g., subsets of the collection reflecting various thematic domains and different data category classes and functions. The DCR will facilitate interchange and interoperability in heterogeneous environments. Participation of a wide range of experts from the broader computing community is important, as is provision for user-friendly guidance for implementers of databases and other resources.",{"paper_id":1934,"title":1935,"year":197,"month":855,"day":63,"doi":1936,"resource_url":1937,"first_page":63,"last_page":63,"pdf_url":1938,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1939,"paper_type":860,"authors":1940,"abstract":1944},"lrec2004-main-065","The Integrated Language Database of 8th - 21st-Century Dutch","10.63317\u002F2omzonqrdfty","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-065","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F88.pdf","kruyt-2004-integrated",[1941],{"paper_id":1934,"author_seq":247,"given_name":1942,"surname":1943,"affiliation":63,"orcid":63},"J. G.","Kruyt","The Institute for Dutch Lexicology (INL) has a long-standing tradition in corpus-based lexicography. The results include electronic scholarly dictionaries of Dutch covering the vocabulary from 1200 up to 1976, linguistically annotated electronic text corpora of historical and present-day Dutch, and computational lexica. Added value to these data is given in an on-going long-term INL project, the Integrated Language Database of 8th-21st-Century Dutch (ILD). The aim is to create a flexible linguistic research instrument by linking the dictionaries, a balanced diachronic text corpus and lexica of historical and present-day Dutch. We will link part of our data with data collections stored at other institutes, creating a supra-institutional research instrument. The paper reports on the overall ILD design and the user's perspective. Focus is on the ILD prototype which, when finished, will function as a demonstration model to verify and assess user needs. It now functions to test the design empirically for its applicability to 'real data', as well as to obtain figures on workload, etc. The conclusion is that the latter function proved the prototype to be an indispensable pilot for the ILD.",{"paper_id":1946,"title":1947,"year":197,"month":855,"day":63,"doi":1948,"resource_url":1949,"first_page":63,"last_page":63,"pdf_url":1950,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1951,"paper_type":860,"authors":1952,"abstract":1957},"lrec2004-main-066","From Acts and Topics to Transactions and Dialogue Smoothness","10.63317\u002F2fbznajsp4zs","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-066","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F92.pdf","dybkjaer-dybkjaer-2004-acts",[1953,1955],{"paper_id":1946,"author_seq":247,"given_name":874,"surname":1954,"affiliation":63,"orcid":63},"Dybkjær",{"paper_id":1946,"author_seq":232,"given_name":1956,"surname":1954,"affiliation":63,"orcid":63},"Laila","Measuring transaction success and dialogue smoothness is extremely time consuming and costly when done manually and on many dialogues but is the only possibility today for spoken dialogue systems without a very clear success state. This paper investigates the possibility of automatic derivation of transaction success for task-oriented dialogues based on simple act-topic annotations.",{"paper_id":1959,"title":1960,"year":197,"month":855,"day":63,"doi":1961,"resource_url":1962,"first_page":63,"last_page":63,"pdf_url":1963,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1964,"paper_type":860,"authors":1965,"abstract":1969},"lrec2004-main-067","Grouping Synonymous Sentences from a Parallel Corpus","10.63317\u002F5cxebhgvndau","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-067","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F93.pdf","kashioka-2004-grouping",[1966],{"paper_id":1959,"author_seq":247,"given_name":1967,"surname":1968,"affiliation":63,"orcid":63},"Hideki","Kashioka","Recently, natural language processing researches have focused on data or processing techniques for paraphrasing. Unfortunately, however, we have little data for paraphrasing. There are some research reports on collecting synonymous expressions with parallel corpus, though no suitable corpus for collecting a set of paraphrases is yet available. Therefore, we obtain a few variations of expression in paraphrase sets when we tried to apply this method with a parallel corpus. In this paper, we propose a grouping method based on the basic idea of grouping synonymous sentences related to the translation recursively, and decompose incorrect groups using the DM-decomposition algorithm. The incorrect groups include expressions that cannot be paraphrased because some words or expressions have different meanings in different situations. We discuss our method and experimental results with respect to BTEC, which is a multilingual parallel corpus.",{"paper_id":1971,"title":1972,"year":197,"month":855,"day":63,"doi":1973,"resource_url":1974,"first_page":63,"last_page":63,"pdf_url":1975,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1976,"paper_type":860,"authors":1977,"abstract":1984},"lrec2004-main-068","Discovery of (New) Knowledge and the Analysis of Text Corpora","10.63317\u002F33a5my9h89im","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-068","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F95.pdf","ahmad-musacchio-2004-discovery",[1978,1981],{"paper_id":1971,"author_seq":247,"given_name":1979,"surname":1980,"affiliation":63,"orcid":63},"Khurshid","Ahmad",{"paper_id":1971,"author_seq":232,"given_name":1982,"surname":1983,"affiliation":63,"orcid":63},"Maria Teresa","Musacchio","This paper describes how methods and techniques developed in corpus linguistics can be used to compare and contrast samples of language use over time and across genres. A diachronic Italian corpus of nuclear physics texts belonging to different genres is collected, organised, and analysed to demonstrate the use of language in shaping one of the key sciences of the 20th century.",{"paper_id":1986,"title":1987,"year":197,"month":855,"day":63,"doi":1988,"resource_url":1989,"first_page":63,"last_page":63,"pdf_url":1990,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1991,"paper_type":860,"authors":1992,"abstract":2008},"lrec2004-main-069","Evaluation of Microphone Array Front-Ends for ASR - an Extension of the AURORA Framework","10.63317\u002F38ko7u7bv659","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-069","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F96.pdf","hoge-etal-2004-evaluation",[1993,1996,1999,2002,2005],{"paper_id":1986,"author_seq":247,"given_name":1994,"surname":1995,"affiliation":63,"orcid":63},"Harald","Höge",{"paper_id":1986,"author_seq":232,"given_name":1997,"surname":1998,"affiliation":63,"orcid":63},"Josef G.","Bauer",{"paper_id":1986,"author_seq":218,"given_name":2000,"surname":2001,"affiliation":63,"orcid":63},"Christian","Geißler",{"paper_id":1986,"author_seq":203,"given_name":2003,"surname":2004,"affiliation":63,"orcid":63},"Panji","Setiawan",{"paper_id":1986,"author_seq":188,"given_name":2006,"surname":2007,"affiliation":63,"orcid":63},"Kai","Steinert","The paper is focused on evaluating recognizers for automotive applications using microphone arrays. We propose to extend the framework developed within the AURORA project. Based on measurements of the impulse responses within a given car between the position of the speaker and the microphone array and based on multi-channel in-car noises recorded at the microphone array output we propose to convert the close-talk speech recordings of the AURORA databases to multi-channel databases. The resulting data can be used to evaluate algorithms of microphone array front-ends. To compare the performance of these algorithms we start with the AURORA Advanced Front-End (AFE) as a baseline for mono-channel processing and demonstrate the evaluation with basic multi-channel algorithms. Our actual goal is to propose a certain evaluation procedure and to encourage others to support it.",{"paper_id":2010,"title":2011,"year":197,"month":855,"day":63,"doi":2012,"resource_url":2013,"first_page":63,"last_page":63,"pdf_url":2014,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2015,"paper_type":860,"authors":2016,"abstract":2021},"lrec2004-main-070","Development of Slovenian Broadcast News Speech Database","10.63317\u002F4vvihhxuwe5p","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-070","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F98.pdf","zibert-mihelic-2004-development",[2017,2019],{"paper_id":2010,"author_seq":247,"given_name":1784,"surname":2018,"affiliation":63,"orcid":63},"Žibert",{"paper_id":2010,"author_seq":232,"given_name":58,"surname":2020,"affiliation":63,"orcid":63},"Mihelič","The paper reviews the development of a new Slovenian broadcast news speech database. The database consists of audio, video and annotation transcripts of about 34 hours of television daily news program captured from the public TV station RTVSLO. The paper addresses issues concerning transcription and annotation of the collected data, provides information on content analysis and basic statistics of the collected material and reports about preliminary evaluation of automatic segmentation.",{"paper_id":2023,"title":2024,"year":197,"month":855,"day":63,"doi":2025,"resource_url":2026,"first_page":63,"last_page":63,"pdf_url":2027,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2028,"paper_type":860,"authors":2029,"abstract":2033},"lrec2004-main-071","A Named Entity Recognizer for Danish","10.63317\u002F434c6motvhdi","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-071","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F99.pdf","bick-2004-named",[2030],{"paper_id":2023,"author_seq":247,"given_name":2031,"surname":2032,"affiliation":63,"orcid":63},"Eckhard","Bick","This paper describes how a preexisting Constraint Grammar based parser for Danish (DanGram, Bick 2002) has been adapted and semantically enhanced in order to accommodate for named entity recognition (NER), using rule based and lexical, rather than probabilistic methodology. The project is part of a multi-lingual Nordic initiative, Nomen Nescio, which targets 6 primary name types (human, organisation, place, event, title\u002Fsemantic product and brand\u002Fobject). Training data, examples and statistical text data specifics were taken from the Korpus90\u002F2000 annotation initiative (Bick 2003-1). The NER task is addressed following the progressive multi-level parsing architecture of DanGram, delegating different NER-subtasks to different specialised levels. Thus named entities are successively treated as first strings, words, types, and then as contextual units at the morphological, syntactic and semantic levels, consecutively. While lower levels mainly use pattern matching tools, the higher levels make increasing use of context based Constraint Grammar rules on the one hand, and lexical information, both morphological and semantic, on the other hand. Levels are implemented as a sequential chain of Perl-programs and CG-grammars. Two evaluation runs on Korpus90\u002F2000 data showed about 2% chunking errors and false positive\u002Ffalse negative proper noun readings (originating at the lower levels), while the NER-typer as such had a 5% error rate with 0.1 - 0.5% remaining ambiguity, if measured only for correctly chunked proper nouns.",{"paper_id":2035,"title":2036,"year":197,"month":855,"day":63,"doi":2037,"resource_url":2038,"first_page":63,"last_page":63,"pdf_url":2039,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2040,"paper_type":860,"authors":2041,"abstract":2059},"lrec2004-main-072","The GENOMA-KB Project: Towards the Integration of Concepts, Terms, Textual Corpora and Entities","10.63317\u002F4eaxcmdf88zd","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-072","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F100.pdf","cabre-etal-2004-genoma",[2042,2045,2048,2051,2054,2057],{"paper_id":2035,"author_seq":247,"given_name":2043,"surname":2044,"affiliation":63,"orcid":63},"M. Teresa","Cabré",{"paper_id":2035,"author_seq":232,"given_name":2046,"surname":2047,"affiliation":63,"orcid":63},"Carme","Bach",{"paper_id":2035,"author_seq":218,"given_name":2049,"surname":2050,"affiliation":63,"orcid":63},"Rosa","Estopà",{"paper_id":2035,"author_seq":203,"given_name":2052,"surname":2053,"affiliation":63,"orcid":63},"Judit","Feliu",{"paper_id":2035,"author_seq":188,"given_name":2055,"surname":2056,"affiliation":63,"orcid":63},"Gemma","Martínez",{"paper_id":2035,"author_seq":172,"given_name":1006,"surname":2058,"affiliation":63,"orcid":63},"Vivaldi","In the past twenty years much efforts have been devoted to the development of ontologies and term bases for different fields. All this work has been done separately or with slight integration. The GENOMA-KB is a project whose main aim is to integrate, at least, both resources. In this paper, most relevant aspects of the project are presented. Each module is individually described and the links among them are highlighted. Finally, a query system to interrogate the knowledge base is briefly introduced.",{"paper_id":2061,"title":2062,"year":197,"month":855,"day":63,"doi":2063,"resource_url":2064,"first_page":63,"last_page":63,"pdf_url":2065,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2066,"paper_type":860,"authors":2067,"abstract":2077},"lrec2004-main-073","Portuguese Large-scale Language Resources for NLP Applications","10.63317\u002F23hpx3ecr9b2","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-073","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F102.pdf","ranchhod-etal-2004-portuguese",[2068,2071,2073,2076],{"paper_id":2061,"author_seq":247,"given_name":2069,"surname":2070,"affiliation":63,"orcid":63},"Elisabete","Ranchhod",{"paper_id":2061,"author_seq":232,"given_name":1084,"surname":2072,"affiliation":63,"orcid":63},"Carvalho",{"paper_id":2061,"author_seq":218,"given_name":2074,"surname":2075,"affiliation":63,"orcid":63},"Cristina","Mota",{"paper_id":2061,"author_seq":203,"given_name":1063,"surname":1064,"affiliation":63,"orcid":63},"The paper describes Portuguese large-scale linguistic resources, mainly computational lexicons and grammars, developed by LabEL. These resources are formalized and applied to texts by means of finite-state techniques, more and more acknowledged in Natural Language Processing. On the one hand, it illustrates methods on lexical representation for simple words and multi-word expressions; on the other hand, it provides examples (in form of concordances) of linguistic structures recognized after the application of disambiguation and parsing grammars to texts. The paper ends with a short reference to the publicly available data highlighting its contribution towards dissemination of LabEL’s knowledge on language technology.",{"paper_id":2079,"title":2080,"year":197,"month":855,"day":63,"doi":2081,"resource_url":2082,"first_page":63,"last_page":63,"pdf_url":2083,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2084,"paper_type":860,"authors":2085,"abstract":2092},"lrec2004-main-074","Development of a Corpus Workbench for the METU Turkish Corpus","10.63317\u002F2xy82pz88ter","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-074","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F103.pdf","ozge-say-2004-development",[2086,2089],{"paper_id":2079,"author_seq":247,"given_name":2087,"surname":2088,"affiliation":63,"orcid":63},"Umut","Özge",{"paper_id":2079,"author_seq":232,"given_name":2090,"surname":2091,"affiliation":63,"orcid":63},"Bilge","Say","We will introduce a corpus workbench designed and implemented for the METU Turkish Corpus. The workbench design introduces a number of useful features and the workbench itself is basically usable with any TEI and XML compliant corpus, provided that it can be indexed in the desired way.",{"paper_id":2094,"title":2095,"year":197,"month":855,"day":63,"doi":2096,"resource_url":2097,"first_page":63,"last_page":63,"pdf_url":2098,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2099,"paper_type":860,"authors":2100,"abstract":2106},"lrec2004-main-075","Mercedes, a Term-in-Context Highlighter","10.63317\u002F3hubcje3icbd","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-075","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F104.pdf","araya-vivaldi-2004-mercedes",[2101,2104],{"paper_id":2094,"author_seq":247,"given_name":2102,"surname":2103,"affiliation":63,"orcid":63},"Raúl","Araya",{"paper_id":2094,"author_seq":232,"given_name":2105,"surname":2058,"affiliation":63,"orcid":63},"Jordi","It happens very often that researchers in Terminology need to know about the terms included in a given LSP corpus. One possibility is to run a term extractor but in this case such a tool provides just term candidates, but not valid terms. Therefore, it is mandatory a term validation process that is not always easy and affordable. A different option is to take profit of the public lexical resources (dictionaries and glossaries) that are increasingly freely available through the Internet. To accept only such a terms as the valid ones is usually enough for most of the researches. Mercedes, a term recogniser developed at the IULA, has been designed to fulfil this need. It integrates different public available lexical resources in order to provide researchers with a tool to show and highlight the valid terms (and its contexts) found in a certain text from a given domain. Mercedes consists of two main modules, the recogniser program and the dictionaries module. The recogniser can be run on specialised texts that have been previously tagged and lemmatised as part of the IULA's Corpus Tècnic. After running the recogniser, the user can then navigate the output with any web browser.",{"paper_id":2108,"title":2109,"year":197,"month":855,"day":63,"doi":2110,"resource_url":2111,"first_page":63,"last_page":63,"pdf_url":2112,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2113,"paper_type":860,"authors":2114,"abstract":2118},"lrec2004-main-076","The Bilingual Web Dictionary on Demand","10.63317\u002F552zjqbo4h3r","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-076","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F106.pdf","sorensen-2004-bilingual",[2115],{"paper_id":2108,"author_seq":247,"given_name":2116,"surname":2117,"affiliation":63,"orcid":63},"Henrik Selsøe","Sørensen","What do you do when you need to find terminology in a foreign language and available bilingual sources are of no help at all? With the fast-growing complexity in all fields of knowledge and the parallel creation of neologisms needed to distribute the new knowledge, dictionaries and term databases are lagging behind, in particular when you work with two less widely spoken languages. This paper investigates methods and strategies for solving the problem and proposes to lay the grounds for a Bilingual Web Dictionary on Demand. This virtual dictionary is conceived as a number of knowledge-based methodologies allowing users to get across the language barrier using standard search engines and the Web as corpus. The key methodology aims at identifying a target language text containing an equivalent to the source language term using a so-called Cluster Method. Once a candidate term in the target language is identified, it must be validated. Intended users are translators, lexicographers, terminologists who must be bilingual in order to be able to select and adjust clusters.",{"paper_id":2120,"title":2121,"year":197,"month":855,"day":63,"doi":2122,"resource_url":2123,"first_page":63,"last_page":63,"pdf_url":2124,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2125,"paper_type":860,"authors":2126,"abstract":2138},"lrec2004-main-077","Making an XML-based Japanese-Slovene Learners’ Dictionary","10.63317\u002F5ev3k2fnz7gb","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-077","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F107.pdf","erjavec-etal-2004-making",[2127,2130,2133,2136],{"paper_id":2120,"author_seq":247,"given_name":2128,"surname":2129,"affiliation":63,"orcid":63},"Tomaž","Erjavec",{"paper_id":2120,"author_seq":232,"given_name":2131,"surname":2132,"affiliation":63,"orcid":63},"Kristina Hmeljak","Sangawa",{"paper_id":2120,"author_seq":218,"given_name":2134,"surname":2135,"affiliation":63,"orcid":63},"Irena","Srdanović",{"paper_id":2120,"author_seq":203,"given_name":1795,"surname":2137,"affiliation":63,"orcid":63},"ml. Vahčič","In this paper we present a hypertext dictionary of Japanese lexical units for Slovene students of Japanese at the Faculty of Arts of Ljubljana University. The dictionary is planned as a long-term project in which a simple dictionary is to be gradually enlarged and enhanced, taking into account the needs of the students. Initially, the dictionary was encoded in a tabular format, in a mixture of encodings, and subsequently rendered in HTML. The paper first discusses the conversion of the dictionary into XML, into an encoding that complies with the Text Encoding Initiative (TEI) Guidelines. The conversion into such an encoding validates, enriches, explicates and standardises the structure of the dictionary, thus making it more usable for further development and linguistically oriented research. We also present the current Web implementation of the dictionary, which offers full text search and a tool for practising inflected parts of speech. The paper gives an overview of related research, i.e. other XML oriented Web dictionaries of Slovene and East Asian languages and presents planned developments, i.e. the inclusion of the dictionary into the Reading Tutor program.",{"paper_id":2140,"title":2141,"year":197,"month":855,"day":63,"doi":2142,"resource_url":2143,"first_page":63,"last_page":63,"pdf_url":2144,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2145,"paper_type":860,"authors":2146,"abstract":2148},"lrec2004-main-078","MULTEXT-East Version 3: Multilingual Morphosyntactic Specifications, Lexicons and Corpora","10.63317\u002F3jcg5oyv4jwz","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-078","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F109.pdf","erjavec-2004-multext",[2147],{"paper_id":2140,"author_seq":247,"given_name":2128,"surname":2129,"affiliation":63,"orcid":63},"The paper presents the third edition of the MULTEXT-East language resources, a multilingual dataset for language engineering research and development. This standardised and linked set of resources covers a large number of mainly Central and Eastern European languages and includes the EAGLES-based morphosyntactic specifications, defining the features that describe word-level syntactic annotations; medium scale morphosyntactic lexica; and annotated parallel, comparable, and speech corpora. The most important component is the linguistically annotated corpus consisting of Orwell's novel \"1984\" in the English original and translations. The resources are the results of several EU projects: MULTEXT-East (produced linked resources for Romanian, Slovene, Czech, Bulgarian, Estonian, Hungarian and English), TELRI (added resources for Lithuanian, Croatian, Serbian, and Russian; first release), and CONCEDE (validation, re-encoding; partial re-release). This paper presents the third release of the resources, which brings together the first two, makes them available in TEI P4 XML, and introduces further extensions, e.g. the specification for Resian, a dialect of Slovene. This dataset, unique in terms of languages and the wealth of encoding, is extensively documented, and freely available for research purposes. The paper presents the component resources, reviews some research undertaken on the basis of the first two editions, and discusses future plans.",{"paper_id":2150,"title":2151,"year":197,"month":855,"day":63,"doi":2152,"resource_url":2153,"first_page":63,"last_page":63,"pdf_url":2154,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2155,"paper_type":860,"authors":2156,"abstract":2171},"lrec2004-main-079","A Galician Textual Corpus for Morphosyntactic Tagging with Application to Text-to-Speech Synthesis","10.63317\u002F3j6vtq5qa7pg","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-079","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F111.pdf","pereiro-etal-2004-galician",[2157,2160,2163,2166,2168],{"paper_id":2150,"author_seq":247,"given_name":2158,"surname":2159,"affiliation":63,"orcid":63},"Lorena Seijo","Pereiro",{"paper_id":2150,"author_seq":232,"given_name":2161,"surname":2162,"affiliation":63,"orcid":63},"Ana Martínez","Ínsua",{"paper_id":2150,"author_seq":218,"given_name":2164,"surname":2165,"affiliation":63,"orcid":63},"Francisco Méndez","Pazó",{"paper_id":2150,"author_seq":203,"given_name":2167,"surname":986,"affiliation":63,"orcid":63},"Francisco Campillo",{"paper_id":2150,"author_seq":188,"given_name":2169,"surname":2170,"affiliation":63,"orcid":63},"Eduardo Rodríguez","Banga","This paper will present the morphosintactic tagger and the corpus of contemporary written Galician which are being employed in the development of the Galician version of our tex-to-speech synthesizer. Their quality and accuracy make them useful for speech technology applications and turn them into possible references for further investigation and research projects about Galician language. In essence, the tagger assigns automatically the morphosyntactic categories and other additional labels to the words in the corpus by resorting to a combination of both a reduced (although highly reliable) set of rules, and a stochastic language model that employs class n-grams whose probabilities are trained using the corpus itself. A bootstrapping technique is employed for tagging the texts contained in the corpus: a small amount of text is initially tagged automatically making use of a reduced set of linguistic rules and then, gathering together the results obtained at this stage of the process (after the manual revision of the tagging), an initial statistical model is built. The tagging process may be said to consist essentialy of a number of consecutive automatic-tagging stages that enclose: the use of the latest version of the statistical model, the manual revision, and the subsequent updating of the stochastic model with the correctly tagged text.",{"paper_id":2173,"title":2174,"year":197,"month":855,"day":63,"doi":2175,"resource_url":2176,"first_page":63,"last_page":63,"pdf_url":2177,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2178,"paper_type":860,"authors":2179,"abstract":2189},"lrec2004-main-080","The SPARTACUS-Database: a Spanish Sentence Database for Offline Handwriting Recognition","10.63317\u002F3im6oznjusva","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-080","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F112.pdf","espana-etal-2004-spartacus",[2180,2183,2186],{"paper_id":2173,"author_seq":247,"given_name":2181,"surname":2182,"affiliation":63,"orcid":63},"Salvador","España",{"paper_id":2173,"author_seq":232,"given_name":2184,"surname":2185,"affiliation":63,"orcid":63},"María José","Castro",{"paper_id":2173,"author_seq":218,"given_name":2187,"surname":2188,"affiliation":63,"orcid":63},"José Luis","Hidalgo","In this paper we describe a database that consists of offline handwritten Spanish sentences from four different subtasks. The database includes 1,500 forms produced by the same number of writers. A total of around 100,000 word instances out of a vocabulary of around 3,300 words occur in the collection. This database is intended to be used for offline handwriting recognition tasks. However, this database is expected to be specially useful for recognition systems that may take advantage of language models of restricted-semantic tasks. The database also includes a few image-processing procedures for extraction of handwritten text images from the forms and segmentation of the images into lines and words.",{"paper_id":2191,"title":2192,"year":197,"month":855,"day":63,"doi":2193,"resource_url":2194,"first_page":63,"last_page":63,"pdf_url":2195,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2196,"paper_type":860,"authors":2197,"abstract":2207},"lrec2004-main-081","Exploring Balkanet Shared Ontology for Multilingual Conceptual Indexing","10.63317\u002F3uro6avpkr7z","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-081","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F113.pdf","stamou-etal-2004-exploring",[2198,2201,2204],{"paper_id":2191,"author_seq":247,"given_name":2199,"surname":2200,"affiliation":63,"orcid":63},"Sofia","Stamou",{"paper_id":2191,"author_seq":232,"given_name":2202,"surname":2203,"affiliation":63,"orcid":63},"Goran","Nenadic",{"paper_id":2191,"author_seq":218,"given_name":2205,"surname":2206,"affiliation":63,"orcid":63},"Dimitris","Christodoulakis","As the size of the Web grows, it becomes an imperative to equip search engines with sophisticated indexing modules in order to enable a meaningful organization of the stored data. In this paper we present a structured multilingual conceptual repository that has been employed as the backbone of a conceptual indexing and retrieval system. Our conceptual warehouse originates from a multilingual semantic network (Balkanet) and its Inter-Lingual-Index, which was enriched with domain ontology information inherited from the SUMO ontology. We report on the ontology's design principles and provide a description of its structure. We argue that an important attribute of the Balkanet’s ILI is its flexibility in incorporating new concepts and\u002For languages by allowing the percolation of shared semantic attributes to all concepts represented within taxonomies. We further present our approach to conceptual indexing, and introduce an indexing algorithm that utilizes Balkanet’s classified conceptual taxonomies. Finally, we discuss how conceptual taxonomies can help retrieval algorithms in making links between terms used in search requests and semantically related terms that might be found in the indexed documents.",{"paper_id":2209,"title":2210,"year":197,"month":855,"day":63,"doi":2211,"resource_url":2212,"first_page":63,"last_page":63,"pdf_url":2213,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2214,"paper_type":860,"authors":2215,"abstract":2225},"lrec2004-main-082","Building a Paraphrase Corpus for Speech Translation","10.63317\u002F5hkbhrdm4ghu","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-082","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F114.pdf","shimohata-etal-2004-building",[2216,2219,2222],{"paper_id":2209,"author_seq":247,"given_name":2217,"surname":2218,"affiliation":63,"orcid":63},"Mitsuo","Shimohata",{"paper_id":2209,"author_seq":232,"given_name":2220,"surname":2221,"affiliation":63,"orcid":63},"Eiichiro","Sumita",{"paper_id":2209,"author_seq":218,"given_name":2223,"surname":2224,"affiliation":63,"orcid":63},"Yuji","Matsumoto","When a machine translation (MT) system receives input sentences of spoken language, the following two types of sentences are difficult to translate: (1) long sentences and (2) sentences having redundant expressions often seen in spoken language. To reduce these difficulties, we are developing methods to paraphrase input sentences into more translatable ones. In this paper, we report a preliminary Japanese paraphrase corpus. The corpus consists of original sentences derived from travel conversation and versions of them paraphrased by humans. We use three paraphrasing methods: plain, segment, and summary paraphrasing. Plain paraphrasing is applied to short sentences, where redundant expressions are replaced with plain ones. Segment and summary paraphrasing is applied to long sentences, where long sentences are converted into one or several short sentences. We also report a comparison of machine translation quality between the original sentences and the paraphrased sentences. We use two corpus-based machine translation systems in the experiment.",{"paper_id":2227,"title":2228,"year":197,"month":855,"day":63,"doi":2229,"resource_url":2230,"first_page":63,"last_page":63,"pdf_url":2231,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2232,"paper_type":860,"authors":2233,"abstract":2247},"lrec2004-main-083","Incremental Methods to Select Test Sentences for Evaluating Translation Ability","10.63317\u002F34wm7zvaa6ve","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-083","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F115.pdf","akiba-etal-2004-incremental",[2234,2237,2238,2241,2244],{"paper_id":2227,"author_seq":247,"given_name":2235,"surname":2236,"affiliation":63,"orcid":63},"Yasuhiro","Akiba",{"paper_id":2227,"author_seq":232,"given_name":2220,"surname":2221,"affiliation":63,"orcid":63},{"paper_id":2227,"author_seq":218,"given_name":2239,"surname":2240,"affiliation":63,"orcid":63},"Hiromi","Nakaiwa",{"paper_id":2227,"author_seq":203,"given_name":2242,"surname":2243,"affiliation":63,"orcid":63},"Seiichi","Yamamoto",{"paper_id":2227,"author_seq":188,"given_name":2245,"surname":2246,"affiliation":63,"orcid":63},"Hiroshi G.","Okuno","This paper addresses the problem of selecting test sentences for automatically evaluating language learners' translation ability within a smaller error. In this paper, the ability to translate is measured as a TOEIC score. The existing selection methods only check whether an individual test sentence contributes to the estimation of the ability to translate or that of more general academic abilities, although combinations of test sentences may be used to contribute the estimation. This paper proposes two methods that solve the selection problem. The first method selects test sentences to minimize the estimation errors of learners' TOEIC scores. The second method selects test sentences to maximize the correlation coefficient between the number of correct translations and learners' estimated TOEIC scores. The optimization technique used in both of the proposed methods is the gradient technique in mathematical programming. The proposed methods proved to be more accurate than any of the existing methods we tested, and they estimated each TOEIC score within a permissible error of 69 points.",{"paper_id":2249,"title":2250,"year":197,"month":855,"day":63,"doi":2251,"resource_url":2252,"first_page":63,"last_page":63,"pdf_url":2253,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2254,"paper_type":860,"authors":2255,"abstract":2258},"lrec2004-main-084","Reusable Lexical Representations for Idioms","10.63317\u002F2bzm2d5342u2","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-084","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F116.pdf","odijk-2004-reusable",[2256],{"paper_id":2249,"author_seq":247,"given_name":1078,"surname":2257,"affiliation":63,"orcid":63},"Odijk","In this paper I introduce (1) a technically simple and highly theory-independent way for lexically representing flexible idiomatic expressions, and (2) a procedure to incorporate these lexical representations in a wide variety of NLP systems. The method is based on Structural EQuivalence Classes for Idioms and therefore called the SEQCI method. I illustrate the approach using the Rosetta MT system as an example of an NLP system. I discuss the advantages and some possible objections to the method. I conclude that the method is a good candidate for a standard for the lexical representation of idioms. The method also has the potential to be used for multi-word expressions other than idioms.",{"paper_id":2260,"title":2261,"year":197,"month":855,"day":63,"doi":2262,"resource_url":2263,"first_page":63,"last_page":63,"pdf_url":2264,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2265,"paper_type":860,"authors":2266,"abstract":2273},"lrec2004-main-085","The Design of Czech Language Formal Listening Tests for the Evaluation of TTS Systems","10.63317\u002F2mzisujdmvjn","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-085","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F119.pdf","tihelka-matousek-2004-design",[2267,2270],{"paper_id":2260,"author_seq":247,"given_name":2268,"surname":2269,"affiliation":63,"orcid":63},"Daniel","Tihelka",{"paper_id":2260,"author_seq":232,"given_name":2271,"surname":2272,"affiliation":63,"orcid":63},"Jindřich","Matoušek","This paper presents an attempt to design listening tests for the Czech synthesis speech evaluation. The design is based on standardized and widely used listening tests for English; therefore, we can benefit from the advantages provided by standards. Bearing the Czech language phenomena in mind, we filled the standard frameworks of several listening tests, especially the MRT (Modified Rhyme Test) and the SUS (Semantically Unpredictable Sentences) test; the Czech National Corpus was used for this purpose. Designed tests were instantly used for real tests in which 88 people took part, a procedure which proved correct. This was the first attempt to design Czech listening tests according to given standard frameworks and it was successful.",{"paper_id":2275,"title":2276,"year":197,"month":855,"day":63,"doi":2277,"resource_url":2278,"first_page":63,"last_page":63,"pdf_url":2279,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2280,"paper_type":860,"authors":2281,"abstract":63},"lrec2004-main-086","A Data-driven Adaptation of Prosody in a Multilingual TTS","10.63317\u002F4nws8aesp4fm","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-086","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F130.pdf","stergar-etal-2004-data",[2282,2284,2287,2288],{"paper_id":2275,"author_seq":247,"given_name":1784,"surname":2283,"affiliation":63,"orcid":63},"Stergar",{"paper_id":2275,"author_seq":232,"given_name":2285,"surname":2286,"affiliation":63,"orcid":63},"Caglayan","Erdem",{"paper_id":2275,"author_seq":218,"given_name":1612,"surname":1613,"affiliation":63,"orcid":63},{"paper_id":2275,"author_seq":203,"given_name":1584,"surname":1585,"affiliation":63,"orcid":63},{"paper_id":2290,"title":2291,"year":197,"month":855,"day":63,"doi":2292,"resource_url":2293,"first_page":63,"last_page":63,"pdf_url":2294,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2295,"paper_type":860,"authors":2296,"abstract":2315},"lrec2004-main-087","MiniCors and Cast3LB: Two Semantically Tagged Spanish Corpora","10.63317\u002F4ygkrqori6n5","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-087","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F121.pdf","taule-etal-2004-minicors",[2297,2300,2302,2305,2307,2310,2312],{"paper_id":2290,"author_seq":247,"given_name":2298,"surname":2299,"affiliation":63,"orcid":63},"M.","Taulé",{"paper_id":2290,"author_seq":232,"given_name":2298,"surname":2301,"affiliation":63,"orcid":63},"Civit",{"paper_id":2290,"author_seq":218,"given_name":2303,"surname":2304,"affiliation":63,"orcid":63},"N.","Artigas",{"paper_id":2290,"author_seq":203,"given_name":2298,"surname":2306,"affiliation":63,"orcid":63},"García",{"paper_id":2290,"author_seq":188,"given_name":2308,"surname":2309,"affiliation":63,"orcid":63},"L.","Màrquez",{"paper_id":2290,"author_seq":172,"given_name":2311,"surname":988,"affiliation":63,"orcid":63},"M.A.",{"paper_id":2290,"author_seq":155,"given_name":2313,"surname":2314,"affiliation":63,"orcid":63},"B.","Navarro","In this paper we present two Spanish corpora, MiniCors and Cast3LB, semantically tagged according to different annotation criteria and objectives. In order to guarantee the quality of the results, we have established a methodology for the development of these corpora. The resulting resources consist of a semantically tagged corpus according to the lexical sample task, and a semantically tagged corpus according to the all words task, both of them defined within the Senseval framework.",{"paper_id":2317,"title":2318,"year":197,"month":855,"day":63,"doi":2319,"resource_url":2320,"first_page":63,"last_page":63,"pdf_url":2321,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2322,"paper_type":860,"authors":2323,"abstract":2327},"lrec2004-main-088","Probabilistic Detection of Context-Sensitive Spelling Errors","10.63317\u002F4r35may9oxf2","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-088","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F122.pdf","bigert-2004-probabilistic",[2324],{"paper_id":2317,"author_seq":247,"given_name":2325,"surname":2326,"affiliation":63,"orcid":63},"Johnny","Bigert","This article focuses on the evaluation of a novel algorithm for the detection of context-sensitive spelling errors. We present a fully automatic evaluation procedure with no requirements of manual work or resources annotated with spelling errors. The evaluation method is applicable to any language and tag set, and is easily adaptable to other NLP systems such as taggers and parsers.",{"paper_id":2329,"title":2330,"year":197,"month":855,"day":63,"doi":2331,"resource_url":2332,"first_page":63,"last_page":63,"pdf_url":2333,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2334,"paper_type":860,"authors":2335,"abstract":2355},"lrec2004-main-089","Acquisition and Annotation of Slovenian Broadcast News Database","10.63317\u002F252dprucwskf","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-089","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F123.pdf","zgank-etal-2004-acquisition",[2336,2339,2341,2344,2345,2347,2350,2353,2354],{"paper_id":2329,"author_seq":247,"given_name":2337,"surname":2338,"affiliation":63,"orcid":63},"Andrej","Žgank",{"paper_id":2329,"author_seq":232,"given_name":2128,"surname":2340,"affiliation":63,"orcid":63},"Rotovnik",{"paper_id":2329,"author_seq":218,"given_name":2342,"surname":2343,"affiliation":63,"orcid":63},"Mirjam Sepesy","Maučec",{"paper_id":2329,"author_seq":203,"given_name":1578,"surname":1579,"affiliation":63,"orcid":63},{"paper_id":2329,"author_seq":188,"given_name":1784,"surname":2346,"affiliation":63,"orcid":63},"Kitak",{"paper_id":2329,"author_seq":172,"given_name":2348,"surname":2349,"affiliation":63,"orcid":63},"Damjan","Vlaj",{"paper_id":2329,"author_seq":155,"given_name":2351,"surname":2352,"affiliation":63,"orcid":63},"Vladimir","Hozjan",{"paper_id":2329,"author_seq":138,"given_name":1584,"surname":1585,"affiliation":63,"orcid":63},{"paper_id":2329,"author_seq":121,"given_name":1612,"surname":1613,"affiliation":63,"orcid":63},"This paper presents the Slovenian Broadcast News Database project that was started in year 2002 as cooperation between University of Maribor and Slovenian national broadcaster RTV Slovenia. The resulting database will be used for large vocabulary continuous speech recognition and multimedia database retrieval or archive indexation. First some organizational aspects that were needed in initial phase of the project are described. The raw audio and video material was acquired from the original Analog Beta SP Master tapes that are preserved in the RTV Slovenia's archive. Raw material was copied to DAT and DVD media. Also additional teletext material was collected. The manual annotation of speech material is performed with the Transcriber tool. The annotation rules were defined on the basis of general rules for Broadcast News databases, with some special language dependent sections. Also some statistics on a part of current material are given.",{"paper_id":2357,"title":2358,"year":197,"month":855,"day":63,"doi":2359,"resource_url":2360,"first_page":63,"last_page":63,"pdf_url":2361,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2362,"paper_type":860,"authors":2363,"abstract":2381},"lrec2004-main-090","The COST 278 MASPER Initiative - Crosslingual Speech Recognition with Large Telephone Databases","10.63317\u002F4u3f65hffhha","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-090","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F124.pdf","zgank-etal-2004-cost",[2364,2365,2366,2369,2372,2375,2378],{"paper_id":2357,"author_seq":247,"given_name":2337,"surname":2338,"affiliation":63,"orcid":63},{"paper_id":2357,"author_seq":232,"given_name":1584,"surname":1585,"affiliation":63,"orcid":63},{"paper_id":2357,"author_seq":218,"given_name":2367,"surname":2368,"affiliation":63,"orcid":63},"Frank","Diehl",{"paper_id":2357,"author_seq":203,"given_name":2370,"surname":2371,"affiliation":63,"orcid":63},"Klara","Vicsi",{"paper_id":2357,"author_seq":188,"given_name":2373,"surname":2374,"affiliation":63,"orcid":63},"Gyorgy","Szaszak",{"paper_id":2357,"author_seq":172,"given_name":2376,"surname":2377,"affiliation":63,"orcid":63},"Jozef","Juhar",{"paper_id":2357,"author_seq":155,"given_name":2379,"surname":2380,"affiliation":63,"orcid":63},"Slavomir","Lihan","This paper presents the work on crosslingual speech recognition carried out by the MASPER initiative that was formed as a part of the COST 278 Action. Two different approaches for transfering monolingual source acoustic models to a new language were compared. The first one was expert-driven, based on the IPA scheme. The second was data-driven, based on a crosslingual phoneme confusion matrix. German, Spanish, Hungarian and Slovak were used as sourcelanguages. Slovenian was selected to be the target language. All experiments were carried out on SpeechDat databases. The results' analysis showed that the expert-driven method outperforms the data-driven one, and that similarities between source and target language have a significant influence on the performance.",{"paper_id":2383,"title":2384,"year":197,"month":855,"day":63,"doi":2385,"resource_url":2386,"first_page":63,"last_page":63,"pdf_url":2387,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2388,"paper_type":860,"authors":2389,"abstract":2393},"lrec2004-main-091","Utilizing the One-Sense-per-Discourse Constraint for Fully Unsupervised Word Sense Induction and Disambiguation","10.63317\u002F3ae4nds3wmyi","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-091","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F125.pdf","rapp-2004-utilizing",[2390],{"paper_id":2383,"author_seq":247,"given_name":2391,"surname":2392,"affiliation":63,"orcid":63},"Reinhard","Rapp","Recent advances in word sense induction rely on clustering related words. In this paper, instead of using a clustering algorithm, we suggest to perform a Singular Value Decomposition (SVD) which can be guaranteed to always find a global optimum. However, in order to apply this method to the problem of word sense induction, a semantic interpretation of the dimensions computed by the SVD is required. Our finding is that in our specific setting the first dimension relates to semantic similarities between words, and the second dimension distinguishes between the two main senses of an ambiguous word. Based on this result we present an algorithm for fully unsupervised word sense induction and disambiguation.",{"paper_id":2395,"title":2396,"year":197,"month":855,"day":63,"doi":2397,"resource_url":2398,"first_page":63,"last_page":63,"pdf_url":2399,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2400,"paper_type":860,"authors":2401,"abstract":2403},"lrec2004-main-092","A Freely Available Automatically Generated Thesaurus of Related Words","10.63317\u002F2g6of5nnznhw","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-092","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F127.pdf","rapp-2004-freely",[2402],{"paper_id":2395,"author_seq":247,"given_name":2391,"surname":2392,"affiliation":63,"orcid":63},"A freely available English thesaurus of related words is presented that has been automatically compiled by analyzing the distributional similarities of words in the British National Corpus. The quality of the results has been evaluated by comparison with human judg­ments as obtained from non-native and native speakers of English who were asked to provide rankings of word similarities. According to this measure, the results generated by our system are better than the judgments of the non-native speakers and come close to the native speakers’ performance. An advantage of our approach is that it does not require syntax parsing and therefore can be more easily adapted to other languages. As an example, a similar thesaurus for German has already been completed.",{"paper_id":2405,"title":2406,"year":197,"month":855,"day":63,"doi":2407,"resource_url":2408,"first_page":63,"last_page":63,"pdf_url":2409,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2410,"paper_type":860,"authors":2411,"abstract":2418},"lrec2004-main-093","Using a Parallel Transcript\u002FSubtitle Corpus for Sentence Compression","10.63317\u002F2zdefmrmcbbn","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-093","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F128.pdf","vandeghinste-tjong-kim-sang-2004-using",[2412,2415],{"paper_id":2405,"author_seq":247,"given_name":2413,"surname":2414,"affiliation":63,"orcid":63},"Vincent","Vandeghinste",{"paper_id":2405,"author_seq":232,"given_name":2416,"surname":2417,"affiliation":63,"orcid":63},"Erik","Tjong Kim Sang","The paper describes the construction and usage of a parallel corpus consisting of transcripts of television programs on the one hand and subtitles of those television programs on the other hand. The subtitles were targeted at hearing-impaired people. They are in the same language as the television programs (Dutch). Our goal is to convert transcripts to subtitles. We will apply the corpus for learning how to perform sentence compression in much the same way as Jing (2001).",{"paper_id":2420,"title":2421,"year":197,"month":855,"day":63,"doi":2422,"resource_url":2423,"first_page":63,"last_page":63,"pdf_url":2424,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2425,"paper_type":860,"authors":2426,"abstract":2429},"lrec2004-main-094","Handling Subtle Sense Distinctions Through Wordnet Semantic Types","10.63317\u002F2ytrdnsxc347","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-094","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F133.pdf","stamou-christodoulakis-2004-handling",[2427,2428],{"paper_id":2420,"author_seq":247,"given_name":2199,"surname":2200,"affiliation":63,"orcid":63},{"paper_id":2420,"author_seq":232,"given_name":2205,"surname":2206,"affiliation":63,"orcid":63},"In this paper we challenge the question of whether there is value in having multiple layers of semantic information associated with corpus semantic annotation. In this context we introduce a semantic annotation experiment in which novice annotators were asked to assign sense tags to a set of polysemous corpus nouns, using Wordnet as their referential sense repository. Wordnet is a rich sense inventory that provides explicit information of the semantic types associated with every word sense. To measure the effect semantic types’ knowledge has on the sense assignment process, we carried out two annotation sessions. In the first session, annotators relied exclusively on Wordnet synsets to annotate corpus nouns, whereas in the second session the same pool of annotators examined Wordnet synsets in conjunction with their semantic types, prior assigning a sense tag. Comparing annotators’ performance in both sessions shows that when consulting semantic types, annotators assigned more salient senses to highly polysemous nouns, whereas for the same set of terms, when relying exclusively on Wordnet synsets, annotators tended to assign narrower senses, which whatsoever were more error-prone. Results indicate that semantic types have a potential in dealing with subtle sense distinctions in the course of corpus annotation.",{"paper_id":2431,"title":2432,"year":197,"month":855,"day":63,"doi":2433,"resource_url":2434,"first_page":63,"last_page":63,"pdf_url":2435,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2436,"paper_type":860,"authors":2437,"abstract":2444},"lrec2004-main-095","Multi-lingual Evaluation of a Natural Language Generation System","10.63317\u002F4cav3xnsmeib","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-095","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F134.pdf","karasimos-isard-2004-multi",[2438,2441],{"paper_id":2431,"author_seq":247,"given_name":2439,"surname":2440,"affiliation":63,"orcid":63},"Athanasios","Karasimos",{"paper_id":2431,"author_seq":232,"given_name":2442,"surname":2443,"affiliation":63,"orcid":63},"Amy","Isard","This paper describes a user evaluation of the text output from the M-PIRO (Multilingual Personalised Information Objects) system, which dynamically generates descriptions of exhibits for a virtual museum. We show that subjects performed significantly better in a factual recall test when the descriptions included more sophisticated text structuring modules. The subjects also judged the structured texts to be more interesting and readable, and felt that they had learned more from them.",{"paper_id":2446,"title":2447,"year":197,"month":855,"day":63,"doi":2448,"resource_url":2449,"first_page":63,"last_page":63,"pdf_url":2450,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2451,"paper_type":860,"authors":2452,"abstract":2462},"lrec2004-main-096","The Tüba-D\u002FZ Treebank: Annotating German with a Context-Free Backbone","10.63317\u002F4zwasx6q5qah","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-096","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F135.pdf","telljohann-etal-2004-tuba",[2453,2456,2459],{"paper_id":2446,"author_seq":247,"given_name":2454,"surname":2455,"affiliation":63,"orcid":63},"Heike","Telljohann",{"paper_id":2446,"author_seq":232,"given_name":2457,"surname":2458,"affiliation":63,"orcid":63},"Erhard","Hinrichs",{"paper_id":2446,"author_seq":218,"given_name":2460,"surname":2461,"affiliation":63,"orcid":63},"Sandra","Kübler","The purpose of this paper is to describe the TüBa-D\u002FZ treebank of written German and to compare it to the independently developed TIGER treebank. Both treebanks, TIGER and TüBa-D\u002FZ, use an annotation framework that is based on phrase structure grammar and that is enhanced by a level of predicate-argument structure. The comparison between the annotation schemes of the two treebanks focuses on the different treatments of free word order and discontinuous constituents in German as well as on differences in phrase-internal annotation.",{"paper_id":2464,"title":2465,"year":197,"month":855,"day":63,"doi":2466,"resource_url":2467,"first_page":63,"last_page":63,"pdf_url":2468,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2469,"paper_type":860,"authors":2470,"abstract":2484},"lrec2004-main-097","The NIST Meeting Room Pilot Corpus","10.63317\u002F2rot3oygexpw","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-097","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F137.pdf","garofolo-etal-2004-nist",[2471,2472,2475,2478,2481],{"paper_id":2464,"author_seq":247,"given_name":1664,"surname":1665,"affiliation":63,"orcid":63},{"paper_id":2464,"author_seq":232,"given_name":2473,"surname":2474,"affiliation":63,"orcid":63},"Christophe D.","Laprun",{"paper_id":2464,"author_seq":218,"given_name":2476,"surname":2477,"affiliation":63,"orcid":63},"Martial","Michel",{"paper_id":2464,"author_seq":203,"given_name":2479,"surname":2480,"affiliation":63,"orcid":63},"Vincent M.","Stanford",{"paper_id":2464,"author_seq":188,"given_name":2482,"surname":2483,"affiliation":63,"orcid":63},"Elham","Tabassi","One of the next big challenges in Automatic Speech Recognition (ASR) is the transcription of speech in meetings. This task is particularly problematic for current recognition technologies because, in most realistic meeting scenarios, the vocabularies are unconstrained, the speech is spontaneous and often overlapping, and the microphones are inconspicuously placed. To support the development of meeting recognition technologies by both the speech recognition and video extraction research communities, NIST is providing a development and evaluation infrastructure including: a multi-media corpus of audio and video from meetings collected at NIST using a variety of microphones and video cameras, new evaluation protocols, metrics, software, rich transcription conventions, sponsoring evaluations and workshops, facilitating multi-site data pooling, and helping bring the community together to focus on the technical challenges. To date, NIST has collected a pilot corpus of 15 hours of meetings in its specially-instrumented Meeting Data Collection Laboratory. The corpus includes digital recordings from close-talking mics, lapel mics, distantly-placed mics, 5 digitally-recorded camera views, and full speaker\u002Fword-level transcripts. This data is being used in the development and evaluation of speech technologies and by the video extraction community under the auspices of the ARDA Video Analysis and Content Exploitation (VACE) program.",{"paper_id":2486,"title":2487,"year":197,"month":855,"day":63,"doi":2488,"resource_url":2489,"first_page":63,"last_page":63,"pdf_url":2490,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2491,"paper_type":860,"authors":2492,"abstract":2505},"lrec2004-main-098","Securing Interpretability: The Case of Ega Language Documentation","10.63317\u002F2anwtx949d29","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-098","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F138.pdf","gibbon-etal-2004-securing",[2493,2496,2499,2502],{"paper_id":2486,"author_seq":247,"given_name":2494,"surname":2495,"affiliation":63,"orcid":63},"Dafydd","Gibbon",{"paper_id":2486,"author_seq":232,"given_name":2497,"surname":2498,"affiliation":63,"orcid":63},"Catherine","Bow",{"paper_id":2486,"author_seq":218,"given_name":2500,"surname":2501,"affiliation":63,"orcid":63},"Steven","Bird",{"paper_id":2486,"author_seq":203,"given_name":2503,"surname":2504,"affiliation":63,"orcid":63},"Baden","Hughes","The prime consideration in designing sustainable language resources is to ensure that they remain interpretable for coming generations of users. In this paper we adopt a new perspective on resource creation - securing the interpretability of data, using a case study of Ega, an endangered African language for which a small amount of legacy data is available. Basic ste ps to securing interpretability are to transfer files to durable media, and where possible, to convert all legacy data into XML files with Unicode character encodings. In the absence of agreed `best practice' standards, we propose a methodology of `better practice' to assist in the transition process towards this goal. We discuss a number of issues involved in securing interpretability of the lexicon, character encodings, interlinear glossed text, annotated recordings and nomenclature in linguistic descriptions, and describe our solutions.",{"paper_id":2507,"title":2508,"year":197,"month":855,"day":63,"doi":2509,"resource_url":2510,"first_page":63,"last_page":63,"pdf_url":2511,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2512,"paper_type":860,"authors":2513,"abstract":2520},"lrec2004-main-099","A Comparative Study on Human Communication Behaviors and Linguistic Characteristics for Speech-to-Speech Translation","10.63317\u002F3pkj7kd7s6p2","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-099","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F139.pdf","takezawa-kikui-2004-comparative",[2514,2517],{"paper_id":2507,"author_seq":247,"given_name":2515,"surname":2516,"affiliation":63,"orcid":63},"Toshiyuki","Takezawa",{"paper_id":2507,"author_seq":232,"given_name":2518,"surname":2519,"affiliation":63,"orcid":63},"Genichiro","Kikui","A large bilingual corpus of English and Japanese is being built at ATR Spoken Language Translation Research Laboratories in order to improve speech translation technology to the level where people can use a portable translation system for traveling abroad, dining and shopping, and hotel situations. As a part of these corpus construction activities, we have been collecting spoken dialogue data by using an experimental translation system between English and Japanese. In a previous study, we found that humans communicate as part of their daily social life, so they prefer using complex sentences and saying than one sentence per utterance. However, corpus-based machine translation systems for conversational expressions tend to be limited to dealing with short simple sentences. To find a way to bridge the gap between human communication behaviors and system performance, we examined the relationship between instructions and linguistic expressions. The experimental results suggest that a state-of-the-art translation system may be useful for subjects who can make their utterance length short by following instructions.",{"paper_id":2522,"title":2523,"year":197,"month":855,"day":63,"doi":2524,"resource_url":2525,"first_page":63,"last_page":63,"pdf_url":2526,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2527,"paper_type":860,"authors":2528,"abstract":63},"lrec2004-main-100","Cost-effective Cross-lingual Document Classification","10.63317\u002F5o28xtyb558p","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-100","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F201.pdf","bel-etal-2004-cost",[2529,2532,2533],{"paper_id":2522,"author_seq":247,"given_name":2530,"surname":2531,"affiliation":63,"orcid":63},"Núria","Bel",{"paper_id":2522,"author_seq":232,"given_name":958,"surname":959,"affiliation":63,"orcid":63},{"paper_id":2522,"author_seq":218,"given_name":2534,"surname":2535,"affiliation":63,"orcid":63},"Marta","Villegas",{"paper_id":2537,"title":2538,"year":197,"month":855,"day":63,"doi":2539,"resource_url":2540,"first_page":63,"last_page":63,"pdf_url":2541,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2542,"paper_type":860,"authors":2543,"abstract":2549},"lrec2004-main-101","A Powerful and Versatile XML Format for Representing Role-semantic Annotation","10.63317\u002F4dqd789mhqek","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-101","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F202.pdf","erk-pado-2004-powerful",[2544,2547],{"paper_id":2537,"author_seq":247,"given_name":2545,"surname":2546,"affiliation":63,"orcid":63},"Katrin","Erk",{"paper_id":2537,"author_seq":232,"given_name":1075,"surname":2548,"affiliation":63,"orcid":63},"Padó","We present two XML formats for the description and encoding of semantic role information in corpora. The TIGER\u002FSALSA XML format provides a modular representation for semantic roles and syntactic structure. The Text-SALSA XML format is a lightweight version of TIGER\u002FSALSA XML designed for manual annotation with an XML editor rather than a special tool. Both formats can deal with underspecification, roles crossing the sentence boundary, compound splitting, and whole-sentence tags for meta-level comments.",{"paper_id":2551,"title":2552,"year":197,"month":855,"day":63,"doi":2553,"resource_url":2554,"first_page":63,"last_page":63,"pdf_url":2555,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2556,"paper_type":860,"authors":2557,"abstract":2582},"lrec2004-main-102","The MULI Project: Annotation and Analysis of Information Structure in German and English","10.63317\u002F5m6hv2qjdkze","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-102","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F204.pdf","baumann-etal-2004-muli",[2558,2560,2563,2566,2569,2572,2575,2578,2581],{"paper_id":2551,"author_seq":247,"given_name":961,"surname":2559,"affiliation":63,"orcid":63},"Baumann",{"paper_id":2551,"author_seq":232,"given_name":2561,"surname":2562,"affiliation":63,"orcid":63},"Caren","Brinckmann",{"paper_id":2551,"author_seq":218,"given_name":2564,"surname":2565,"affiliation":63,"orcid":63},"Silvia","Hansen-Schirra",{"paper_id":2551,"author_seq":203,"given_name":2567,"surname":2568,"affiliation":63,"orcid":63},"Geert-Jan","Kruijff",{"paper_id":2551,"author_seq":188,"given_name":2570,"surname":2571,"affiliation":63,"orcid":63},"Ivana","Kruijff-Korbayová",{"paper_id":2551,"author_seq":172,"given_name":2573,"surname":2574,"affiliation":63,"orcid":63},"Stella","Neumann",{"paper_id":2551,"author_seq":155,"given_name":2576,"surname":2577,"affiliation":63,"orcid":63},"Erich","Steiner",{"paper_id":2551,"author_seq":138,"given_name":2579,"surname":2580,"affiliation":63,"orcid":63},"Elke","Teich",{"paper_id":2551,"author_seq":121,"given_name":874,"surname":875,"affiliation":63,"orcid":63},"The goal of the MULI (MUltiLingual Information structure) project is to empirically analyse information structure in German and English newspaper texts. In contrast to other projects in which information structure is annotated and investigated (e.g. in the Prague Dependency Treebank, which mirrors the basic information about the topic-focus articulation of the sentence), we do not annotate theory-biased categories like topic-focus or theme-rheme. Trying to be as theory-independent as possible, we annotate those features which are relevant to information structure and on the basis of which typical patterns, co-occurrences or correlations can be determined. We distinguish between three annotation levels: syntax, discourse and prosody. The data is based on the TIGER Corpus for German and the Penn Treebank for English, since the existing information on part-of-speech and syntactic structure can be re-used for our purposes. The actual annotation of an English example sequence illustrates our choice of categories on each level. Their combination offers the possibility to investigate how information structure is realised and can be interpreted.",{"paper_id":2584,"title":2585,"year":197,"month":855,"day":63,"doi":2586,"resource_url":2587,"first_page":63,"last_page":63,"pdf_url":2588,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2589,"paper_type":860,"authors":2590,"abstract":2594},"lrec2004-main-103","Putting the Dutch PAROLE Corpus to Work","10.63317\u002F3ewh995pvtus","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-103","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F207.pdf","van-der-kamp-kruyt-2004-putting",[2591,2593],{"paper_id":2584,"author_seq":247,"given_name":2592,"surname":1919,"affiliation":63,"orcid":63},"P. H. J.",{"paper_id":2584,"author_seq":232,"given_name":1942,"surname":1943,"affiliation":63,"orcid":63},"We discuss the activities towards the development of the retrieval application of the Dutch PAROLE Corpus. Compared to the other corpora developed by INL, the PAROLE Corpus has been encoded with more extended types of metadata, conformant to the TEI standard for text encoding. A search engine and a web-based user interface, both newly developed by INL, provide the user with the functionality to explore the corpus, not only at the level of the text, but also at the level of the metadata or a combination of the two. In view of our experience with corpus retrieval, we did not follow the complete system development cycle, but used an alternative method instead.",{"paper_id":2596,"title":2597,"year":197,"month":855,"day":63,"doi":2598,"resource_url":2599,"first_page":63,"last_page":63,"pdf_url":2600,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2601,"paper_type":860,"authors":2602,"abstract":2609},"lrec2004-main-104","Acquiring Reusable Multilingual Phonotactic Resources","10.63317\u002F362bsfizhbmz","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-104","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F208.pdf","carson-berndsen-kelly-2004-acquiring",[2603,2606],{"paper_id":2596,"author_seq":247,"given_name":2604,"surname":2605,"affiliation":63,"orcid":63},"Julie","Carson-Berndsen",{"paper_id":2596,"author_seq":232,"given_name":2607,"surname":2608,"affiliation":63,"orcid":63},"Robert","Kelly","This paper presents a fully automatic procedure for acquiring reusable phonotactic resources from syllable annotated data. The procedure makes use of a regular inference algorithm and the acquired resources are stored in a specialised XML representation. The technique is then extended to support acquisition from phoneme labelled data while providing a semi-automatic annotation system assisting user annotations of phoneme labelled data with syllable boundaries.",{"paper_id":2611,"title":2612,"year":197,"month":855,"day":63,"doi":2613,"resource_url":2614,"first_page":63,"last_page":63,"pdf_url":2615,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2616,"paper_type":860,"authors":2617,"abstract":2624},"lrec2004-main-105","Phonological Treebanks. Issues in Generation and Application","10.63317\u002F57jzgnu5b7b8","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-105","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F209.pdf","neugebauer-wilson-2004-phonological",[2618,2621],{"paper_id":2611,"author_seq":247,"given_name":2619,"surname":2620,"affiliation":63,"orcid":63},"Moritz","Neugebauer",{"paper_id":2611,"author_seq":232,"given_name":2622,"surname":2623,"affiliation":63,"orcid":63},"Stephen","Wilson","The continuing popularity of XML as a data exchange format and the concurrent rise of treebanks as natural language resources within various domains of language processing have led us to extend their domain of application to phonological data. Typically, treebanks are a language resource that provides annotations of natural languages at various levels of structure and in this paper we present a tree-based format to capture phonological information at the syllable level, at the segment level and even including more fine-grained featural information. Two integrated modules in relation to phonological treebanks are described: the first uses a multilingual feature set to augment segment-annotated corpora in terms of a tree-based structure represented in XML. The second module allows these feature trees to be traversed and the data contained in it to be optimised in a purely data-driven manner.",{"paper_id":2626,"title":2627,"year":197,"month":855,"day":63,"doi":2628,"resource_url":2629,"first_page":63,"last_page":63,"pdf_url":2630,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2631,"paper_type":860,"authors":2632,"abstract":2644},"lrec2004-main-106","Methodology for Rapid Prototyping and Testing of ASR Based User Interfaces","10.63317\u002F2fovghd4ykce","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-106","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F210.pdf","cerezo-etal-2004-methodology",[2633,2636,2639,2642],{"paper_id":2626,"author_seq":247,"given_name":2634,"surname":2635,"affiliation":63,"orcid":63},"Pedro Concejero","Cerezo",{"paper_id":2626,"author_seq":232,"given_name":2637,"surname":2638,"affiliation":63,"orcid":63},"Juan José Rodríguez","Soler",{"paper_id":2626,"author_seq":218,"given_name":2640,"surname":2641,"affiliation":63,"orcid":63},"Daniel Tapias","Merino",{"paper_id":2626,"author_seq":203,"given_name":2643,"surname":2306,"affiliation":63,"orcid":63},"Alberto J. Sánchez","One of the greatest challenges in the design of speech recognition based interfaces is about the navigation through the service menu structure. On the one hand, the interactions based on human machine dialogues force a high level of hierarchical structuring of services, and on the other hand, it is necessary to wait for the last phases of the user interface development to obtain a global vision of the dialogue problems by means of user trials. To tackle these problems, when Telefónica Móviles España began developing its voice portal, great care was taken in stablishing a methodology based on rapid prototyping of the user interfaces, so that designers could integrate the process of design of voice interfaces with emulations of the navigation through the flow charts. This was also the starting point for a specific software product (SIS PRUEBA) which addresses the needs of rapid prototyping of these user interfaces from the earliest stages of the design and analysis phases. SIS PRUEBA has been applied to this and to other types of voice and data services. For example, in complex services like \"CINES\" (to request information about cinemas, films, etc.) and \"FINANZAS\" (to consult financial information), the use of this methodology helped to review and to apply recommendations of the speech interfaces usability guide on concrete points of the flow charts and, consequently, the time and resources needed to redesign the critical parts of the user interface were reduced dramatically. ). In addition, SIS PRUEBA has also been successfully applied to prototype simpler services, like \"ALERTAS\" (which allows to program alert messages about football).",{"paper_id":2646,"title":2647,"year":197,"month":855,"day":63,"doi":2648,"resource_url":2649,"first_page":63,"last_page":63,"pdf_url":2650,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2651,"paper_type":860,"authors":2652,"abstract":2659},"lrec2004-main-107","Open Resources for Language Technology","10.63317\u002F3bju3hf92ruv","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-107","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F211.pdf","degerstedt-jonsson-2004-open",[2653,2656],{"paper_id":2646,"author_seq":247,"given_name":2654,"surname":2655,"affiliation":63,"orcid":63},"Lars","Degerstedt",{"paper_id":2646,"author_seq":232,"given_name":2657,"surname":2658,"affiliation":63,"orcid":63},"Arne","Jönsson","Nlpfarm is an Open Source code repository for development and sharing of language technology resources. Nlpfarm hosts a number of projects covering various language technology needs, providing possibilities to develop more robust and well-formed applications. Nlpfarm has been in use for more than a year and our experience is that it has facilitated co-operation and sharing of resources but that there are still issues to consider.",{"paper_id":2661,"title":2662,"year":197,"month":855,"day":63,"doi":2663,"resource_url":2664,"first_page":63,"last_page":63,"pdf_url":2665,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2666,"paper_type":860,"authors":2667,"abstract":2674},"lrec2004-main-108","Unsupervised Text Mining for Ontology Extraction: An Evaluation of Statistical Measures","10.63317\u002F4opg2nmcvq62","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-108","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F212.pdf","reinberger-daelemans-2004-unsupervised",[2668,2671],{"paper_id":2661,"author_seq":247,"given_name":2669,"surname":2670,"affiliation":63,"orcid":63},"Marie-Laure","Reinberger",{"paper_id":2661,"author_seq":232,"given_name":2672,"surname":2673,"affiliation":63,"orcid":63},"Walter","Daelemans","We report on a comparative evaluation carried out in the field of unsupervised text mining. We have worked on a parsed medical corpus, on which we have used different statistical measures. Using those measures, we rate the verb-object dependencies and we select the most reliable ones according to each measure. We then apply pattern matching and clustering algorithms to the classes of dependencies in order to build sets of semantically related words and establish semantic links between them. Finally, we evaluate the impact of the statistical measures used for the initial selection of the dependencies on the quality of the results.",{"paper_id":2676,"title":2677,"year":197,"month":855,"day":63,"doi":2678,"resource_url":2679,"first_page":63,"last_page":63,"pdf_url":2680,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2681,"paper_type":860,"authors":2682,"abstract":2692},"lrec2004-main-109","A Multilingual Phonological Resource Toolkit for Ubiquitous Speech Technology","10.63317\u002F3ao5pcw4umzj","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-109","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F213.pdf","aioanei-etal-2004-multilingual",[2683,2685,2686,2689,2690,2691],{"paper_id":2676,"author_seq":247,"given_name":2268,"surname":2684,"affiliation":63,"orcid":63},"Aioanei",{"paper_id":2676,"author_seq":232,"given_name":2604,"surname":2605,"affiliation":63,"orcid":63},{"paper_id":2676,"author_seq":218,"given_name":2687,"surname":2688,"affiliation":63,"orcid":63},"Anja","Geumann",{"paper_id":2676,"author_seq":203,"given_name":2607,"surname":2608,"affiliation":63,"orcid":63},{"paper_id":2676,"author_seq":188,"given_name":2619,"surname":2620,"affiliation":63,"orcid":63},{"paper_id":2676,"author_seq":172,"given_name":2622,"surname":2623,"affiliation":63,"orcid":63},"This paper outlines the generation process of a specific computational linguistic representation termed the Multilingual Time Map, conceptually a multi-tape finite state transducer encoding linguistic data at different levels of granularity. The first component acquires phonological data from syllable labeled speech data, the second component defines feature profiles, the third component generates feature hierarchies and augments the acquired data with the defined feature profiles, and the fourth component displays the Multilingual Time Map as a graph.",{"paper_id":2694,"title":2695,"year":197,"month":855,"day":63,"doi":2696,"resource_url":2697,"first_page":63,"last_page":63,"pdf_url":2698,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2699,"paper_type":860,"authors":2700,"abstract":63},"lrec2004-main-110","Benchmarking Ontology Tools. A Case Study for the WebODE Platform.","10.63317\u002F4zmk7jfcwb8o","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-110","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F217.pdf","corcho-etal-2004-benchmarking",[2701,2704,2706],{"paper_id":2694,"author_seq":247,"given_name":2702,"surname":2703,"affiliation":63,"orcid":63},"Oscar","Corcho",{"paper_id":2694,"author_seq":232,"given_name":2102,"surname":2705,"affiliation":63,"orcid":63},"García-Castro",{"paper_id":2694,"author_seq":218,"given_name":2707,"surname":2708,"affiliation":63,"orcid":63},"Asunción","Gómez-Pérez",{"paper_id":2710,"title":2711,"year":197,"month":855,"day":63,"doi":2712,"resource_url":2713,"first_page":63,"last_page":63,"pdf_url":2714,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2715,"paper_type":860,"authors":2716,"abstract":2723},"lrec2004-main-111","A Chatbot as a Novel Corpus Visualization Tool","10.63317\u002F59r7uzupnexz","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-111","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F218.pdf","shawar-atwell-2004-chatbot",[2717,2720],{"paper_id":2710,"author_seq":247,"given_name":2718,"surname":2719,"affiliation":63,"orcid":63},"Bayan Abu","Shawar",{"paper_id":2710,"author_seq":232,"given_name":2721,"surname":2722,"affiliation":63,"orcid":63},"Eric","Atwell","The classical way to overview a large data-set is to use a visualization process, which maps the data from numerical or textual form to a visual representation that our mind can easily interpret, such as: using graphical diagrams, charts, and geometric representations. In this paper we introduce a new idea to visualize a dialogue corpus using a chatbot interface tool. We developed a java program to convert a readable text (corpus) to AIML format to retrain ALICE. We use specific domains of the BNC spoken files to retrain ALICE, and visualise the data contents of these domains via chatting. Effectiveness of this visualization of the corpus using a chatbot depends on the chatting time and the size of the corpus. Our main conclusion is that it is possible to use the chatbot tool as a visualization process of a dialogue corpus, and that we can use different training corpora to build different chatbot personalities.",{"paper_id":2725,"title":2726,"year":197,"month":855,"day":63,"doi":2727,"resource_url":2728,"first_page":63,"last_page":63,"pdf_url":2729,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2730,"paper_type":860,"authors":2731,"abstract":2741},"lrec2004-main-112","Evaluating Variants of the Lesk Approach for Disambiguating Words","10.63317\u002F5n9i64yqu75i","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-112","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F219.pdf","vasilescu-etal-2004-evaluating",[2732,2735,2738],{"paper_id":2725,"author_seq":247,"given_name":2733,"surname":2734,"affiliation":63,"orcid":63},"Florentina","Vasilescu",{"paper_id":2725,"author_seq":232,"given_name":2736,"surname":2737,"affiliation":63,"orcid":63},"Philippe","Langlais",{"paper_id":2725,"author_seq":218,"given_name":2739,"surname":2740,"affiliation":63,"orcid":63},"Guy","Lapalme","This paper presents a detailed analysis of the factors determining the performance of Lesk-based word sense disambiguation methods. We conducted a series of experiments on the original Lesk algorithm, adapted to WORDNET, and on some variants. These methods were evaluated on the test corpus from SENSEVAL2, English All Words, and on excerpts from SEMCOR. We designed a fine grain analysis of the answers provided by each variant in order to better understand the algorithms than by the mere precision and recall figures.",{"paper_id":2743,"title":2744,"year":197,"month":855,"day":63,"doi":2745,"resource_url":2746,"first_page":63,"last_page":63,"pdf_url":2747,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2748,"paper_type":860,"authors":2749,"abstract":2758},"lrec2004-main-113","The Rationale for Building an Ontology Expressly for NLP","10.63317\u002F5q9x3rtnegpe","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-113","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F223.pdf","nirenburg-etal-2004-rationale",[2750,2753,2756],{"paper_id":2743,"author_seq":247,"given_name":2751,"surname":2752,"affiliation":63,"orcid":63},"Sergei","Nirenburg",{"paper_id":2743,"author_seq":232,"given_name":2754,"surname":2755,"affiliation":63,"orcid":63},"Marjorie","McShane",{"paper_id":2743,"author_seq":218,"given_name":2622,"surname":2757,"affiliation":63,"orcid":63},"Beale","In this paper we argue for the need of NLP-specific resources to support truly high level, semantically oriented applications. We describe what, in our experience, constitutes useful knowledge for such applications and why most extant resources are not sufficient for this purpose, leading our Ontological Semantics group to build its own. We suggest that extensive time and en-ergy are being spent on resources for NLP, though not on de-veloping ones of higher utility but, rather, on trying to discover ways of using less than ideal ones. We believe that a more use-ful long-term approach to the problem of knowledge acquisition for NLP would be to acquire what is needed from the outset, since it is likely that in the end such work will prove necessary anyway.",{"paper_id":2760,"title":2761,"year":197,"month":855,"day":63,"doi":2762,"resource_url":2763,"first_page":63,"last_page":63,"pdf_url":2764,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2765,"paper_type":860,"authors":2766,"abstract":2770},"lrec2004-main-114","Some Meaning Procedures of Ontological Semantics","10.63317\u002F2wnixeac725a","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-114","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F224.pdf","mcshane-etal-2004-meaning",[2767,2768,2769],{"paper_id":2760,"author_seq":247,"given_name":2754,"surname":2755,"affiliation":63,"orcid":63},{"paper_id":2760,"author_seq":232,"given_name":2622,"surname":2757,"affiliation":63,"orcid":63},{"paper_id":2760,"author_seq":218,"given_name":2751,"surname":2752,"affiliation":63,"orcid":63},"This paper presents implemented algorithms for interpreting the meaning of certain context-dependent lexical items within the Ontological Semantic text processing environment. We discuss the form, function and rationale behind three meaning proce-dures, all of which are, in a certain sense, numerically oriented. We show that only a knowledge-rich processing system can fully interpret such entities, and that an integrated combination of static resources and processors provides sufficient foundation for high-quality text interpretation.",{"paper_id":2772,"title":2773,"year":197,"month":855,"day":63,"doi":2774,"resource_url":2775,"first_page":63,"last_page":63,"pdf_url":2776,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2777,"paper_type":860,"authors":2778,"abstract":2794},"lrec2004-main-115","Using the Penn Treebank to Evaluate Non-Treebank Parsers","10.63317\u002F2zzkf5nuy9e6","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-115","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F226.pdf","ringger-etal-2004-using",[2779,2782,2785,2788,2791],{"paper_id":2772,"author_seq":247,"given_name":2780,"surname":2781,"affiliation":63,"orcid":63},"Eric K.","Ringger",{"paper_id":2772,"author_seq":232,"given_name":2783,"surname":2784,"affiliation":63,"orcid":63},"Robert C.","Moore",{"paper_id":2772,"author_seq":218,"given_name":2786,"surname":2787,"affiliation":63,"orcid":63},"Eugene","Charniak",{"paper_id":2772,"author_seq":203,"given_name":2789,"surname":2790,"affiliation":63,"orcid":63},"Lucy","Vanderwende",{"paper_id":2772,"author_seq":188,"given_name":2792,"surname":2793,"affiliation":63,"orcid":63},"Hisami","Suzuki","This paper describes a method for conducting evaluations of Treebank and non-Treebank parsers alike against the English language U. Penn Treebank (Marcus et al., 1993) using a metric that focuses on the accuracy of relatively non-controversial aspects of parse structure. Our conjecture is that if we focus on maximal projections of heads (MPH), we are likely to find much broader agreement than if we try to evaluate based on order of attachment. We hope that this method may find wider acceptance and be useful in establishing a generally applicable framework for evaluation in natural language parsing. We employ this method in an evaluation of NLPWin (Heidorn, 2000), a parser developed at Microsoft Research without reference to the Penn Treebank, and, for comparison, the well-known statistical Treebank parser of Charniak (2000).",{"paper_id":2796,"title":2797,"year":197,"month":855,"day":63,"doi":2798,"resource_url":2799,"first_page":63,"last_page":63,"pdf_url":2800,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2801,"paper_type":860,"authors":2802,"abstract":2809},"lrec2004-main-116","Comparison of Some Automatic and Manual Methods for Summary Evaluation Based on the Text Summarization Challenge 2","10.63317\u002F4uhkaa65ad5n","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-116","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F229.pdf","nanba-okumura-2004-comparison",[2803,2806],{"paper_id":2796,"author_seq":247,"given_name":2804,"surname":2805,"affiliation":63,"orcid":63},"Hidetsugu","Nanba",{"paper_id":2796,"author_seq":232,"given_name":2807,"surname":2808,"affiliation":63,"orcid":63},"Manabu","Okumura","In this paper, we compare some automatic and manual methods for summary evaluation. One of the essential points for evaluating a summary is how well the evaluation measure recognizes slight differences in the quality of the computer-produced summaries. In terms of this point, we examined 'evaluation by revision' using the data of the Text Summarization Challenge 2 (TSC2). Evaluation by revision is a manual method that was first used in TSC2, whose effectiveness has not been tested. First, we compared evaluation by revision with a ranking evaluation, which is a manual method used both in TSC1 and in TSC2, by checking the gaps of the edit distance from 0 to 1 at 0.1 intervals. To investigate the effectiveness of evaluation by revision, we also tested other automatic methods: content-based evaluation, BLEU and RED, and compare their results with that of evaluation by revision for reference. As a result, we found that evaluation by revision is effective for recognizing slight differences between computer-produced summaries. Second, we evaluated content-based evaluation, BLEU and RED by evaluation by revision, and compared the effectiveness of the three automatic methods. We found that RED is superior to the others in some examinations.",{"paper_id":2811,"title":2812,"year":197,"month":855,"day":63,"doi":2813,"resource_url":2814,"first_page":63,"last_page":63,"pdf_url":2815,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2816,"paper_type":860,"authors":2817,"abstract":2824},"lrec2004-main-117","The Lancaster Corpus of Mandarin Chinese: A Corpus for Monolingual and Contrastive Language Study","10.63317\u002F3rj7enx8i87i","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-117","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F231.pdf","mcenery-xiao-2004-lancaster",[2818,2821],{"paper_id":2811,"author_seq":247,"given_name":2819,"surname":2820,"affiliation":63,"orcid":63},"Anthony","McEnery",{"paper_id":2811,"author_seq":232,"given_name":2822,"surname":2823,"affiliation":63,"orcid":63},"Zhonghua","Xiao","This paper presents the newly released Lancaster Corpus of Mandarin Chinese (LCMC), a Chinese match for the FLOB and Frown corpora of British and American English. LCMC is a one-million-word balanced corpus of written Mandarin Chinese. The corpus contains five hundred 2,000-word samples of written Chinese texts sampled from fifteen text categories published in Mainland China around 1991, totalling one million words. LCMC is XML-compliant and conforms to CES, with each document containing a corpus header giving general information about the corpus and a body of text. The corpus is segmented and POS tagged with a tagging precision rate of over 98%. The corpus is a useful resource for research into modern Chinese as well as the cross-linguistic contrast between English and Chinese.",{"paper_id":2826,"title":2827,"year":197,"month":855,"day":63,"doi":2828,"resource_url":2829,"first_page":63,"last_page":63,"pdf_url":2830,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2831,"paper_type":860,"authors":2832,"abstract":2848},"lrec2004-main-118","Highlighting Latent Structure in Documents","10.63317\u002F4sjf6ibymo3i","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-118","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F232.pdf","folch-etal-2004-highlighting",[2833,2836,2838,2840,2842,2845],{"paper_id":2826,"author_seq":247,"given_name":2834,"surname":2835,"affiliation":63,"orcid":63},"H.","Folch",{"paper_id":2826,"author_seq":232,"given_name":2313,"surname":2837,"affiliation":63,"orcid":63},"Habert",{"paper_id":2826,"author_seq":218,"given_name":2298,"surname":2839,"affiliation":63,"orcid":63},"Jardino",{"paper_id":2826,"author_seq":203,"given_name":2303,"surname":2841,"affiliation":63,"orcid":63},"Pernelle",{"paper_id":2826,"author_seq":188,"given_name":2843,"surname":2844,"affiliation":63,"orcid":63},"M.C.","Rousset",{"paper_id":2826,"author_seq":172,"given_name":2846,"surname":2847,"affiliation":63,"orcid":63},"A.","Termier","Extensible Markup Language (XML) is playing an increasingly important role in the exchange of a wide variety of data on the Web and elsewhere. It is a simple, very flexible text format, used to annotate data by means of markup. XML documents can be checked for syntactic well-formedness and semantic coherence through DTD and schema validation which makes their processing easier. In particular, data with nested structure can be easily represented with embedded tags. This structured representation should be used in information retrieval models which take structure into account. As such, it is meta-data and therefore a contribution to the Semantic Web. However, nowadays, there exists huge quantities of raw texts and the issue is how to find an easy way to provide these texts with sensible XML structure. Here we present an automatic method to extract tree structure from raw texts. This work has been supported by the Paris XI University (BQR2002 project, Paris-XI University).",{"paper_id":2850,"title":2851,"year":197,"month":855,"day":63,"doi":2852,"resource_url":2853,"first_page":63,"last_page":63,"pdf_url":2854,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2855,"paper_type":860,"authors":2856,"abstract":2866},"lrec2004-main-119","Word Sense Disambiguation as a Wordnets’ Validation Method in Balkanet","10.63317\u002F4abt25cx4nhh","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-119","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F234.pdf","tufis-etal-2004-word",[2857,2860,2863],{"paper_id":2850,"author_seq":247,"given_name":2858,"surname":2859,"affiliation":63,"orcid":63},"Dan","Tufis",{"paper_id":2850,"author_seq":232,"given_name":2861,"surname":2862,"affiliation":63,"orcid":63},"Radu","Ion",{"paper_id":2850,"author_seq":218,"given_name":2864,"surname":2865,"affiliation":63,"orcid":63},"Nancy","Ide","BalkaNet is a European project which aims at the development of monolingual wordnets for five languages in the Balkans area (Bulgarian, Greek, Romanian Serbia, and Turkish) and at improvement of the Czech wordnet developed in the EuroWordNet project. The wordnets are aligned to the Princeton Wordnet, according to the principles established by the EuroWordNet consortium. One of the main concerns of this project is the interlingual validation of the wordnets alignment. To this end, we have developed a WSD system, based on parallel corpora, which exploits the common intuition according to which words that are reciprocal translations in a parallel texts should be linked to the same(or closely related) interlingual concepts. An embedded word aligner provides the wordnet-based algorithm, described in the paper, with pairs of words which are reciproca translations and which are subject to mutually disambiguate each other. With wordnets under construction, our WSD system is useful mainly for validation, pinpointing wrong interlingual alignments, incomplete or missing synsets in one or the other of the wordnets. With robust wordnets, the system is a proper word sense disambiguation tool for parallel corpora. The sense granularity at which the WSD is achieved is the one in the Princeton Wordnet. The challenge of this approach, besides its high accuracy and fine-grained disambiguation is that it may be used to automatically sense-tag corpora in not only one language, but rather several at once and by the same sense inventory. WSD is evaluated on an Romanian-English bitext, extracted form the multilingual parallel corpus \"1984\", against a hand sense-tagging used as a Gold-Standard.",{"paper_id":2868,"title":2869,"year":197,"month":855,"day":63,"doi":2870,"resource_url":2871,"first_page":63,"last_page":63,"pdf_url":2872,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2873,"paper_type":860,"authors":2874,"abstract":2876},"lrec2004-main-120","Term Translations in Parallel Corpora: Discovery and Consistency Check","10.63317\u002F2qf4jzysj3fj","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-120","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F235.pdf","tufis-2004-term",[2875],{"paper_id":2868,"author_seq":247,"given_name":2858,"surname":2859,"affiliation":63,"orcid":63},"The paper describes a method for identifying term translations in parallel corpora, developed within the FF-POIROT European project. This project aims at building multilingual (Dutch, Italian, French and English) resources in the financial\u002Flegal domain that may be used in knowledge and information systems by investigative bodies, and law enforcement in order to detect, investigate or help prevent instances of actual or attempted financial fraud. The methodology builds on our word alignment procedure based on translation equivalents extracted from parallel corpora. When a validated list of multiword terms is available in one language, the procedure provides the translations in any of the languages present in the parallel corpus. Given that a term is usually semantically non-ambiguous, the found translations of different occurrences of the same term should be the same (modulo inflectional variations). If this is not the case, one might suspect a non-systematic translation of the original term. When a man-made term list is not available, the system tries to discover the term candidates extracting sequences of words that appear together more frequently than expected by chance. By the procedure mentioned before, the candidate terms occurrences in one language are linked to their translation equivalents in the other languages.",{"paper_id":2878,"title":2879,"year":197,"month":855,"day":63,"doi":2880,"resource_url":2881,"first_page":63,"last_page":63,"pdf_url":2882,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2883,"paper_type":860,"authors":2884,"abstract":2892},"lrec2004-main-121","The Corpógrafo – a Web-based Environment for Corpora Research","10.63317\u002F5q5nbf4y4jwi","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-121","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F236.pdf","sarmento-etal-2004-corpografo",[2885,2888,2891],{"paper_id":2878,"author_seq":247,"given_name":2886,"surname":2887,"affiliation":63,"orcid":63},"Luís","Sarmento",{"paper_id":2878,"author_seq":232,"given_name":2889,"surname":2890,"affiliation":63,"orcid":63},"Belinda","Maia",{"paper_id":2878,"author_seq":218,"given_name":1060,"surname":1061,"affiliation":63,"orcid":63},"In this paper we present the Corpógrafo, an integrated web-based environment for corpora linguistics and knowledge engineering that is being developed at the Porto node of Linguateca. The Corpógrafo aims to provide an integrated corpora research environment by making freely available on the web a comprehensive set of text and language tools (http:\u002F\u002Fwww.linguateca.pt\u002Fcorpografo\u002F). We present the current stage of development of the Corpógrafo, discuss its current limitations and propose possible developments.",{"paper_id":2894,"title":2895,"year":197,"month":855,"day":63,"doi":2896,"resource_url":2897,"first_page":63,"last_page":63,"pdf_url":2898,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2899,"paper_type":860,"authors":2900,"abstract":63},"lrec2004-main-122","Automatic Classification of Geographic Named Entities","10.63317\u002F5fg23g4d4can","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-122","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F237.pdf","ferres-etal-2004-automatic",[2901,2903,2906,2909,2912],{"paper_id":2894,"author_seq":247,"given_name":2268,"surname":2902,"affiliation":63,"orcid":63},"Ferrés",{"paper_id":2894,"author_seq":232,"given_name":2904,"surname":2905,"affiliation":63,"orcid":63},"Marc","Massot",{"paper_id":2894,"author_seq":218,"given_name":2907,"surname":2908,"affiliation":63,"orcid":63},"Muntsa","Padró",{"paper_id":2894,"author_seq":203,"given_name":2910,"surname":2911,"affiliation":63,"orcid":63},"Horacio","Rodríguez",{"paper_id":2894,"author_seq":188,"given_name":2105,"surname":2913,"affiliation":63,"orcid":63},"Turmo",{"paper_id":2915,"title":2916,"year":197,"month":855,"day":63,"doi":2917,"resource_url":2918,"first_page":63,"last_page":63,"pdf_url":2919,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2920,"paper_type":860,"authors":2921,"abstract":2928},"lrec2004-main-123","Acquiring Bayesian Networks from Text","10.63317\u002F5dvtwdvxevd7","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-123","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F240.pdf","sanchez-graillet-poesio-2004-acquiring",[2922,2925],{"paper_id":2915,"author_seq":247,"given_name":2923,"surname":2924,"affiliation":63,"orcid":63},"Olivia","Sanchez-Graillet",{"paper_id":2915,"author_seq":232,"given_name":2926,"surname":2927,"affiliation":63,"orcid":63},"Massimo","Poesio","Causal inference is one of the most fundamental reasoning processes and one that is essential for question-answering as well as more general AI applications such as decision-making and diagnosis. Bayesian Networks are a popular formalism for encoding (probabilistic) causal knowledge that allows for inference. We developed a system for acquiring causal knowledge from text. Our system identifies sentences that specify causal relations and extracts from them causal patterns, taking into account connectives such as conjunction, disjunction and negation, and recognising causes and effects by analysing terms. The dependencies among the causes and effects found in text can be encoded as Bayesian networks. We evaluated our work by comparing the network structures obtained by our system with the ones created by a human evaluator.",{"paper_id":2930,"title":2931,"year":197,"month":855,"day":63,"doi":2932,"resource_url":2933,"first_page":63,"last_page":63,"pdf_url":2934,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2935,"paper_type":860,"authors":2936,"abstract":2946},"lrec2004-main-124","Developping Tools and Building Linguistic Resources for Vietnamese Morpho-syntactic Processing","10.63317\u002F57x2eodtjdhq","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-124","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F241.pdf","nguyen-etal-2004-developping",[2937,2940,2942,2943],{"paper_id":2930,"author_seq":247,"given_name":2938,"surname":2939,"affiliation":63,"orcid":63},"Thanh Bon","Nguyen",{"paper_id":2930,"author_seq":232,"given_name":2941,"surname":2939,"affiliation":63,"orcid":63},"Thi Minh Huyen",{"paper_id":2930,"author_seq":218,"given_name":1175,"surname":1176,"affiliation":63,"orcid":63},{"paper_id":2930,"author_seq":203,"given_name":2944,"surname":2945,"affiliation":63,"orcid":63},"Xuan Luong","Vu","Vietnamese is spoken by about 80 millions people around the world, yet very few concrete works on this language have been noticed in Natural Language Processing (NLP) until now. The fundamental problems in automatic analysis of Vietnamese, such as part-of-speech (POS) tagging, parsing, etc. are extremely difficult due to the lack of formal linguistic knowledge on one hand, and the speci-ficities of isolating languages on the other hand. In this paper we present our efforts to develop a set of tools permitting the construc-tion and management of language resources for Vietnamese in a normalized framework, whose aim is to be largely distributed and usable for research purposes in NLP. We first define a tagset by constructing Vietnamese morpho-syntactic descriptors that fit in a model compatible with MULTEXT (http:\u002F\u002Fwww.lpl.univ-aix.fr\u002Fprojects\u002Fmultext), so as to account for possible multilingual applications as well as the reusability of defined tagsets. We then implement a system undertaking the tasks of word segmentation and POS tagging (vnACCMS - http:\u002F\u002Fwww.loria.fr\u002Fequipes\u002Fled\u002Foutils.php). Our system ensures a representa-tion format of linguistic resources that is currently considered in the framework of ISO TC37 SC4 (http:\u002F\u002Fwww.tc37sc4.org). Finally we attempt to construct a formal syntactic description of nominal groups using the Tree Adjoining Grammar (TAG) formalism.",{"paper_id":2948,"title":2949,"year":197,"month":855,"day":63,"doi":2950,"resource_url":2951,"first_page":63,"last_page":63,"pdf_url":2952,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2953,"paper_type":860,"authors":2954,"abstract":2961},"lrec2004-main-125","SpeechRecorder - a Universal Platform Independent Multi-Channel Audio Recording Software","10.63317\u002F28vq7953s8kb","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-125","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F242.pdf","draxler-jansch-2004-speechrecorder",[2955,2958],{"paper_id":2948,"author_seq":247,"given_name":2956,"surname":2957,"affiliation":63,"orcid":63},"Christoph","Draxler",{"paper_id":2948,"author_seq":232,"given_name":2959,"surname":2960,"affiliation":63,"orcid":63},"Klaus","Jänsch","SpeechRecorder is a platform independent audio recording software for speech corpus recordings. It is implemented in Java in a clean object-oriented design and adheres to established technology standards and document interchange formats. SpeechRecorder allows Unicode text and multimedia prompts, it supports audio recordings via more than two channels, and it features multiple configurable screens. Recording sessions are defined by recording scripts written in XML. The recording scripts can be executed manually by the experimenter, or automatically for unsupervised recordings; progress through the script can be sequential or randomized. SpeechRecorder is based on URLs to access local and network resources and thus allows recordings via the WWW.",{"paper_id":2963,"title":2964,"year":197,"month":855,"day":63,"doi":2965,"resource_url":2966,"first_page":63,"last_page":63,"pdf_url":2967,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2968,"paper_type":860,"authors":2969,"abstract":2979},"lrec2004-main-126","An Evaluation Protocol for Text Mining Tools : ALCESTE, SAS Text Miner, SPAD-CRM and Temis Text Mining Solutions Testing","10.63317\u002F5j9mk26ckve8","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-126","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F244.pdf","quatrain-etal-2004-evaluation",[2970,2973,2976],{"paper_id":2963,"author_seq":247,"given_name":2971,"surname":2972,"affiliation":63,"orcid":63},"Yasmina","Quatrain",{"paper_id":2963,"author_seq":232,"given_name":2974,"surname":2975,"affiliation":63,"orcid":63},"Sylvaine","Nugier",{"paper_id":2963,"author_seq":218,"given_name":2977,"surname":2978,"affiliation":63,"orcid":63},"Anne","Peradotto","Within the context of the opening of the electricity market, EDF needs to be able to analyse large volumes of text data to enable the company to have a better knowledge of its customers. With this in mind, several text mining tools intended for analysing this very diverse information in large quantities have been evaluated using three different corpora. It appeared essential to create a table to enable easy comparison of the software. Inspired by existing expertise in data mining tools, this was carried out while being careful not to favour statistical over linguistic results. This table has ten subjects varying from the editing company to the fields of application passing through data access and lexical table analysis. In addition to the carrying out of the evaluation and its results on four market tools, this article retraces the method for creating the test table, the choice of the tools evaluated and the criteria retained. Moreover, this experience supports the use of a detailed protocol permitting indispensable functions to be identified and evaluated according to the objectives and the profile of the software user and the nature of the corpus to be analysed.",{"paper_id":2981,"title":2982,"year":197,"month":855,"day":63,"doi":2983,"resource_url":2984,"first_page":63,"last_page":63,"pdf_url":2985,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2986,"paper_type":860,"authors":2987,"abstract":2995},"lrec2004-main-127","Using PiTagger for Lemmatization and PoS Tagging of a Spontaneous Speech Corpus: C-Oral-Rom Italian","10.63317\u002F35twf4sof7ma","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-127","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F246.pdf","panunzi-etal-2004-using",[2988,2990,2993],{"paper_id":2981,"author_seq":247,"given_name":1743,"surname":2989,"affiliation":63,"orcid":63},"Panunzi",{"paper_id":2981,"author_seq":232,"given_name":2991,"surname":2992,"affiliation":63,"orcid":63},"Eugenio","Picchi",{"paper_id":2981,"author_seq":218,"given_name":2926,"surname":2994,"affiliation":63,"orcid":63},"Moneglia","The automatic lemmatization and morpho-syntactic annotation of spoken language is a quite recent and complex task for Natural Language Processing. The state of the art on written corpora don’t provide us with a satisfactory level of analysis regarding spontaneous spoken language (Uchimoto et al., 2002; Moreno &amp; Guirao, 2003). The spontaneous speech corpus Italian C-ORAL-ROM has been tagged with Part of Speech (Pos) and morpho-syntactic information, using and adapting an already existing tool trained on Italian written resources (PiTagger, developed by Eugenio Picchi, ILC-CNR Pisa). The incidence of spoken domain on the performance is within a 10% of errors detected in the manual evaluation procedure. Some issues concerning spoken language emerged. The definition of significant contexts for PoS statistics is to be provided by utterance boundaries; moreover, the relevance of a series of phenomena related to the prosodic parsing has been highlighted: fragmentation phenomena, a relative lack of information for all word adjacent to utterance boundaries; under-specification of PoS for words in connection to secondary prosodic breaks and one word utterances.",{"paper_id":2997,"title":2998,"year":197,"month":855,"day":63,"doi":2999,"resource_url":3000,"first_page":63,"last_page":63,"pdf_url":3001,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3002,"paper_type":860,"authors":3003,"abstract":3022},"lrec2004-main-128","Introducing the La Repubblica Corpus: A Large, Annotated, TEI(XML)-compliant Corpus of Newspaper Italian","10.63317\u002F5ih5p32xxoy6","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-128","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F247.pdf","baroni-etal-2004-introducing",[3004,3007,3009,3012,3015,3018,3020],{"paper_id":2997,"author_seq":247,"given_name":3005,"surname":3006,"affiliation":63,"orcid":63},"Marco","Baroni",{"paper_id":2997,"author_seq":232,"given_name":2564,"surname":3008,"affiliation":63,"orcid":63},"Bernardini",{"paper_id":2997,"author_seq":218,"given_name":3010,"surname":3011,"affiliation":63,"orcid":63},"Federica","Comastri",{"paper_id":2997,"author_seq":203,"given_name":3013,"surname":3014,"affiliation":63,"orcid":63},"Lorenzo","Piccioni",{"paper_id":2997,"author_seq":188,"given_name":3016,"surname":3017,"affiliation":63,"orcid":63},"Alessandra","Volpi",{"paper_id":2997,"author_seq":172,"given_name":2739,"surname":3019,"affiliation":63,"orcid":63},"Aston",{"paper_id":2997,"author_seq":155,"given_name":3005,"surname":3021,"affiliation":63,"orcid":63},"Mazzoleni","This paper describes the La Repubblica corpus, currently being developed at the SSLMIT of the University of Bologna. The corpus is a very large collection of newspaper text, currently amounting to 175 million words, but expected to grow to 400 million before the end of 2004. When completed, it will contain all the articles published between 1985 and 2000 by the national daily La Repubblica. The paper discusses the techniques used to extract the text, tokenize it and annotate it (basic TEI annotation, POS tagging, genre\u002Ftopic categorization), it presents examples of how it can be used, and gives details of the ways in which interested users can access it. The paper concludes with a discussion of current and future developments, and of weak and strong points of this resource.",{"paper_id":3024,"title":3025,"year":197,"month":855,"day":63,"doi":3026,"resource_url":3027,"first_page":63,"last_page":63,"pdf_url":3028,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3029,"paper_type":860,"authors":3030,"abstract":3034},"lrec2004-main-129","Exploiting Semantic Web Technologies for Intelligent Access to Historical Documents","10.63317\u002F4hnh5b6iaru5","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-129","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F248.pdf","ide-woolner-2004-exploiting",[3031,3032],{"paper_id":3024,"author_seq":247,"given_name":2864,"surname":2865,"affiliation":63,"orcid":63},{"paper_id":3024,"author_seq":232,"given_name":1790,"surname":3033,"affiliation":63,"orcid":63},"Woolner","The FDR\u002FPearl Harbor Project involves the enhancement of materials drawn from the Franklin D. Roosevelt Library and Digital Archives, which includes a range of image, sound, video and textual data. The project is undertaking the encoding, annotation, and multi-modal linkage of a portion of the collection, and enhancement of a web-based interface that enables exploitation of state-of-the-art methods for search and retrieval. We are currently developing a pilot project that includes government correspondence and documents produced in the sixth months prior to and including December 7, 1941, the date of the Japanese attack on Pearl Harbor, which has obvious historical, political, and general interest. The major activities in the project involve development of a model for historical documents and associated data and its instantiation using W3 standards, including XML, the Resource Definition Framework (RDF and RDF schemas), and the Ontology Web Language (OWL); development of automated means, or enhancement of existing software, to identify and mark relevant elements within these data; and exploration of the potential to automatically extract ontological information so as to enable sophisticated search and retrieval via inferencing.",{"paper_id":3036,"title":3037,"year":197,"month":855,"day":63,"doi":3038,"resource_url":3039,"first_page":63,"last_page":63,"pdf_url":3040,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3041,"paper_type":860,"authors":3042,"abstract":3047},"lrec2004-main-130","Using Cooccurrence Statistics and the Web to Discover Synonyms in a Technical Language","10.63317\u002F48yvhipoi6dv","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-130","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F249.pdf","baroni-bisi-2004-using",[3043,3044],{"paper_id":3036,"author_seq":247,"given_name":3005,"surname":3006,"affiliation":63,"orcid":63},{"paper_id":3036,"author_seq":232,"given_name":3045,"surname":3046,"affiliation":63,"orcid":63},"Sabrina","Bisi","Turney 2001 has shown that computing the mutual information of a pair of words by using cooccurrence counts obtained via queries to the AltaVista search engine performs very effectively in a synonym detection task. Since manual synonym detection is a challenging task for terminologists, we investigate whether the AltaVista-based Mutual Information (AVMI) method can be applied to the task of finding pairs of synonyms in the lexicon of a specialized sub-language. In particular, we experiment with synonyms in the field of nautical terminology. Our results indicate that AVMI is very good at spotting synonym couples among pairs of unrelated terms (with precision close to 90% at 62.5% recall) and that it outperforms more standard methods based on contextual cosine similarity. However, AVMI is not able to distinguish between synonyms and other semantically related terms. Thus, AVMI can be used for synonym mining only if it is combined with techniques to filter out other semantic relations.",{"paper_id":3049,"title":3050,"year":197,"month":855,"day":63,"doi":3051,"resource_url":3052,"first_page":63,"last_page":63,"pdf_url":3053,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3054,"paper_type":860,"authors":3055,"abstract":3062},"lrec2004-main-131","Semi-supervised Learning by Fuzzy Clustering and Ensemble Learning","10.63317\u002F3z4g63wphprq","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-131","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F253.pdf","shinnou-sasaki-2004-semi",[3056,3059],{"paper_id":3049,"author_seq":247,"given_name":3057,"surname":3058,"affiliation":63,"orcid":63},"Hiroyuki","Shinnou",{"paper_id":3049,"author_seq":232,"given_name":3060,"surname":3061,"affiliation":63,"orcid":63},"Minoru","Sasaki","This paper proposes a semi-supervised learning method using Fuzzy clustering to solve word sense disambiguation problems. Furthermore, we reduce side effects of semi-supervised learning by ensemble learning. We set N classes for N labeled instances. The n-th labeled instance is used as the prototype of the n-th class. By using Fuzzy clustering for unlabeled instances, prototypes are moved to more suitable positions. We can classify a test instance by the k Nearest Neighbor (k-NN) with the moved prototypes. Moreover, to reduce side effects of semi-supervised learning, we use the ensemble learning combined the k-NN with initial labeled instances, which is initial prototype, and the k-NN with prototypes moved by Fuzzy clustering.",{"paper_id":3064,"title":3065,"year":197,"month":855,"day":63,"doi":3066,"resource_url":3067,"first_page":63,"last_page":63,"pdf_url":3068,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3069,"paper_type":860,"authors":3070,"abstract":63},"lrec2004-main-132","Speech & Expression; the Value of a Longitudinal Corpus","10.63317\u002F4jzht7dwhk5b","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-132","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F254.pdf","campbell-2004-speech",[3071],{"paper_id":3064,"author_seq":247,"given_name":907,"surname":908,"affiliation":63,"orcid":63},{"paper_id":3073,"title":3074,"year":197,"month":855,"day":63,"doi":3075,"resource_url":3076,"first_page":63,"last_page":63,"pdf_url":3077,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3078,"paper_type":860,"authors":3079,"abstract":3092},"lrec2004-main-133","A Complete Understanding Speech System Based on Semantic Concepts","10.63317\u002F38mjgckxbvi2","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-133","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F255.pdf","jamoussi-etal-2004-complete",[3080,3083,3086,3089],{"paper_id":3073,"author_seq":247,"given_name":3081,"surname":3082,"affiliation":63,"orcid":63},"Salma","Jamoussi",{"paper_id":3073,"author_seq":232,"given_name":3084,"surname":3085,"affiliation":63,"orcid":63},"Kamel","Smaïli",{"paper_id":3073,"author_seq":218,"given_name":3087,"surname":3088,"affiliation":63,"orcid":63},"Dominique","Fohr",{"paper_id":3073,"author_seq":203,"given_name":3090,"surname":3091,"affiliation":63,"orcid":63},"Jean-Paul","Haton","In this work, we present a complete speech understanding system based on our speech recognizer: ESPERE. The input signal is processed and the best sentence is then proposed to the understanding module. In our case, the understanding problem is considered as a matching process between two different languages. At the entry, the request expressed in natural language and at the output the corresponding SQL form. The SQL request is obtained after an intermediate step in which the entry is expressed in terms of concepts. A concept represents a given meaning, it is defined by a set of words sharing the same semantic properties. In this paper, we propose a new Bayesian classifier to automatically extract the underlined concepts. We also propose a new approach for vector representation of words. Then, we describe the postprocessing step during which, we label our sentences and we generate the corresponding SQL queries. We conclude our paper by describing the integration step of our understanding module in a complete platform of human-machine oral intercation.",{"paper_id":3094,"title":3095,"year":197,"month":855,"day":63,"doi":3096,"resource_url":3097,"first_page":63,"last_page":63,"pdf_url":3098,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3099,"paper_type":860,"authors":3100,"abstract":3114},"lrec2004-main-134","The CLaRK System: XML-based Corpora Development System for Rapid Prototyping","10.63317\u002F34ct4i5896mw","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-134","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F258.pdf","simov-etal-2004-clark",[3101,3104,3105,3108,3111],{"paper_id":3094,"author_seq":247,"given_name":3102,"surname":3103,"affiliation":63,"orcid":63},"Kiril","Simov",{"paper_id":3094,"author_seq":232,"given_name":1081,"surname":3103,"affiliation":63,"orcid":63},{"paper_id":3094,"author_seq":218,"given_name":3106,"surname":3107,"affiliation":63,"orcid":63},"Hristo","Ganev",{"paper_id":3094,"author_seq":203,"given_name":3109,"surname":3110,"affiliation":63,"orcid":63},"Krasimira","Ivanova",{"paper_id":3094,"author_seq":188,"given_name":3112,"surname":3113,"affiliation":63,"orcid":63},"Ilko","Grigorov","The paper presents the CLaRK System as a tool for the creation of XML-based corpora and a platform for rapid prototyping. The system provides a set of basic tools for processing XML documents. These tools include: tokenizers, regular grammars, constraints; remove, insert, extract, sort, transformation operations. Additionally, the system is equipped with a macro language which allows the creation of tools sequences. The macro language includes a set of control operators for guiding the application of the tools in the macro. Usually, a tool or a macro works over a single document changing it or producing a new document. In some cases processing of more than one document is necessary --- in iterative statistics for treebank transformation, stand-off annotation, etc. For such processing the macro language allows a dynamic change of the processed documents.",{"paper_id":3116,"title":3117,"year":197,"month":855,"day":63,"doi":3118,"resource_url":3119,"first_page":63,"last_page":63,"pdf_url":3120,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3121,"paper_type":860,"authors":3122,"abstract":3131},"lrec2004-main-135","NLP-enhanced Error Checking for Catalan Unrestricted Text","10.63317\u002F4gk8xvfrbz7h","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-135","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F260.pdf","badia-etal-2004-nlp",[3123,3124,3127,3128],{"paper_id":3116,"author_seq":247,"given_name":982,"surname":983,"affiliation":63,"orcid":63},{"paper_id":3116,"author_seq":232,"given_name":3125,"surname":3126,"affiliation":63,"orcid":63},"Àngel","Gil",{"paper_id":3116,"author_seq":218,"given_name":988,"surname":989,"affiliation":63,"orcid":63},{"paper_id":3116,"author_seq":203,"given_name":3129,"surname":3130,"affiliation":63,"orcid":63},"Oriol","Valentín","We present here a general-purpose spell and grammar error detection architecture for Catalan unrestricted text. This architecture is based on a previous existing shallow morphosyntactic parser, which had to be adapted in order to successfully handle ill-formed input. The goal of this research is to obtain an architecture that can be used for developing morphosyntactic error checkers for both native and non-native speakers. We briefly present how we are currently customizing such an architecture in two different projects, as well as a means for annotating and exploiting error corpora (which ultimately condition the implementation of error checkers). We conclude with some remarks and future work.",{"paper_id":3133,"title":3134,"year":197,"month":855,"day":63,"doi":3135,"resource_url":3136,"first_page":63,"last_page":63,"pdf_url":3137,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3138,"paper_type":860,"authors":3139,"abstract":3141},"lrec2004-main-136","Open-source Tools for Creation, Maintenance, and Storage of Lexical Resources for Language Generation from Ontologies","10.63317\u002F4axiadc2anp3","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-136","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F264.pdf","bontcheva-2004-open",[3140],{"paper_id":3133,"author_seq":247,"given_name":1431,"surname":1432,"affiliation":63,"orcid":63},"This paper describes reusable, open-source tools for creation, maintenance, storage, and access of Language Resources (LR) needed for generating natural language texts from ontologies. One advantage of these tools is that they provide a user-friendly interface for NLG LR manipulation. They also provide unified models for accessing NLG lexicons and mappings between lexicons and ontologies.",{"paper_id":3143,"title":3144,"year":197,"month":855,"day":63,"doi":3145,"resource_url":3146,"first_page":63,"last_page":63,"pdf_url":3147,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3148,"paper_type":860,"authors":3149,"abstract":3159},"lrec2004-main-137","User Query Analysis for the Specification and Evaluation of a Dialogue Processing and Retrieval System","10.63317\u002F5p9v7wabfhi4","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-137","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F266.pdf","lisowska-etal-2004-user",[3150,3153,3156],{"paper_id":3143,"author_seq":247,"given_name":3151,"surname":3152,"affiliation":63,"orcid":63},"Agnes","Lisowska",{"paper_id":3143,"author_seq":232,"given_name":3154,"surname":3155,"affiliation":63,"orcid":63},"Andrei","Popescu-Belis",{"paper_id":3143,"author_seq":218,"given_name":3157,"surname":3158,"affiliation":63,"orcid":63},"Susan","Armstrong","This article describes an experiment in user query elicitation for the design of a multimodal meeting processing and retrieval system (MPR). In the experiment, participants are asked to choose between several scenarios of use of an MPR system, then formulate (on paper) queries to the system within the context of their chosen scenario. The analysis of the queries provides us with an initial set of requirements for the design of an MPR system, which will be used to confirm a priori design considerations, and suggest improvements to existing interfaces. This elicitation-design-evaluation process will be iterated, where the next phase will involve experiments using the Wizard-of-Oz methodology.",{"paper_id":3161,"title":3162,"year":197,"month":855,"day":63,"doi":3163,"resource_url":3164,"first_page":63,"last_page":63,"pdf_url":3165,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3166,"paper_type":860,"authors":3167,"abstract":3178},"lrec2004-main-138","Creation of Reusable Components and Language Resources for Named Entity Recognition in Russian","10.63317\u002F46yfx2ijf4z8","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-138","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F267.pdf","popov-etal-2004-creation",[3168,3171,3174,3175],{"paper_id":3161,"author_seq":247,"given_name":3169,"surname":3170,"affiliation":63,"orcid":63},"Borislav","Popov",{"paper_id":3161,"author_seq":232,"given_name":3172,"surname":3173,"affiliation":63,"orcid":63},"Angel","Kirilov",{"paper_id":3161,"author_seq":218,"given_name":1060,"surname":1429,"affiliation":63,"orcid":63},{"paper_id":3161,"author_seq":203,"given_name":3176,"surname":3177,"affiliation":63,"orcid":63},"Dimitar","Manov","This paper describes the development of the RussIE system in which we experimented with the creation of reusable processing components and language resources for a Russian Information Extraction system. The work was done as part of a multilingual project to adapt existing tools and resources for HLT to new domains and languages. The system was developed within the GATE architecture for language processing, and aims to explore the boundaries of language resource reuse and adaptability across languages and language types, rather than to create a full-scale IE system at the very peak of performance. Nevertheless, the systgem achieves a very creditable 71% F-Measure on news texts, and there is much scope for future improvement of this score.",{"paper_id":3180,"title":3181,"year":197,"month":855,"day":63,"doi":3182,"resource_url":3183,"first_page":63,"last_page":63,"pdf_url":3184,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3185,"paper_type":860,"authors":3186,"abstract":63},"lrec2004-main-139","Abstracting a Dialog Act Tagset for Meeting Processing","10.63317\u002F4bmqrpfaffmp","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-139","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F268.pdf","popescu-belis-2004-abstracting",[3187],{"paper_id":3180,"author_seq":247,"given_name":3154,"surname":3155,"affiliation":63,"orcid":63},{"paper_id":3189,"title":3190,"year":197,"month":855,"day":63,"doi":3191,"resource_url":3192,"first_page":63,"last_page":63,"pdf_url":3193,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3194,"paper_type":860,"authors":3195,"abstract":3202},"lrec2004-main-140","Online Evaluation of Coreference Resolution","10.63317\u002F2sa5vvbduo88","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-140","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F270.pdf","popescu-belis-etal-2004-online",[3196,3197,3200,3201],{"paper_id":3189,"author_seq":247,"given_name":3154,"surname":3155,"affiliation":63,"orcid":63},{"paper_id":3189,"author_seq":232,"given_name":3198,"surname":3199,"affiliation":63,"orcid":63},"Loïs","Rigouste",{"paper_id":3189,"author_seq":218,"given_name":1172,"surname":1173,"affiliation":63,"orcid":63},{"paper_id":3189,"author_seq":203,"given_name":1175,"surname":1176,"affiliation":63,"orcid":63},"This paper presents the design of an online evaluation service for coreference resolution in texts. We argue that coreference, as an equivalence relation between referring expressions (RE) in texts, should be properly distinguished from anaphora and has therefore to be evaluated separately. The annotation model for coreference is based on links between REs. The program presented in this article compares two such annotations, which may be the output of coreference resolution tools or of human judgement. In order to evaluate the agreement between the two annotations, the evaluator first converts the input annotation format into a pivot format, then abstracts equivalence classes from the links and provides five scores representing in different ways the similarity between the two partitions: MUC, B3, Kappa, Core-discourse-entity, and Mutual-information. Although we consider that the identification of REs (i.e. the elements of the partition) should not be part of coreference resolution properly speaking, we propose several solutions for the frequent case when the input files do not agree on the elements of the text to consider as REs.",{"paper_id":3204,"title":3205,"year":197,"month":855,"day":63,"doi":3206,"resource_url":3207,"first_page":63,"last_page":63,"pdf_url":3208,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3209,"paper_type":860,"authors":3210,"abstract":3220},"lrec2004-main-141","FreeLing: An Open-Source Suite of Language Analyzers","10.63317\u002F3n2frrga2mkz","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-141","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F271.pdf","carreras-etal-2004-freeling",[3211,3214,3217,3219],{"paper_id":3204,"author_seq":247,"given_name":3212,"surname":3213,"affiliation":63,"orcid":63},"Xavier","Carreras",{"paper_id":3204,"author_seq":232,"given_name":3215,"surname":3216,"affiliation":63,"orcid":63},"Isaac","Chao",{"paper_id":3204,"author_seq":218,"given_name":3218,"surname":2908,"affiliation":63,"orcid":63},"Lluís",{"paper_id":3204,"author_seq":203,"given_name":2907,"surname":2908,"affiliation":63,"orcid":63},"Basic language processing such as tokenizing, morphological analyzers, lemmatizing, PoS tagging, chunking, etc. is a need for most NL applications such as Machine Translation, Summarization, Dialogue systems, etc. A large part of the effort required to develop such applications is devoted to the adaptation of existing software resources to the platform, programming language, format or API of the final system. In LREC'02, we presented the object architecture that we are currently using (Carreras &amp; Padró 02), which enables the quick and easy integration of basic language analyzers in any NLP application. Now we present a suite of analysis tools based on that architecture, which is distributed under Lesser General Public License (LGPL) (Free Software Foundation 99). The first release of the suite will include morphological analyzer and Part-of-Speech tagger for English, Spanish, and Catalan.",{"paper_id":3222,"title":3223,"year":197,"month":855,"day":63,"doi":3224,"resource_url":3225,"first_page":63,"last_page":63,"pdf_url":3226,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3227,"paper_type":860,"authors":3228,"abstract":3230},"lrec2004-main-142","Phrase-Based Dependency Evaluation of a Japanese Parser","10.63317\u002F3s7s7upyehyo","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-142","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F272.pdf","suzuki-2004-phrase",[3229],{"paper_id":3222,"author_seq":247,"given_name":2792,"surname":2793,"affiliation":63,"orcid":63},"Extraction of predicate-argument structure is an important task that requires evaluation for many applications, yet annotated resources of predicate-argument structure are currently scarce, especially for languages other than English. This paper presents an evaluation of a Japanese parser based on dependency relations as proposed by Lin (1995, 1998), but using phrase dependency instead of word dependency. Phrase-based dependency analysis has been the preferred form of Japanese syntactic analysis, yet the use of annotated resources in this format has so far been limited to training and evaluation of dependency analyzers. We will show that (1) evaluation based on phrase-dependency is particularly well-suited for Japanese, even for an evaluation of phrase-structure grammar, and that (2) in spite of shortcomings, the proposed evaluation method has the advantage of utilizing currently available surface-based annotations in a way that is relevant to predicate-argument structure.",{"paper_id":3232,"title":3233,"year":197,"month":855,"day":63,"doi":3234,"resource_url":3235,"first_page":63,"last_page":63,"pdf_url":3236,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3237,"paper_type":860,"authors":3238,"abstract":3242},"lrec2004-main-143","Functional Requirements for an Interlinear Text Editor","10.63317\u002F2wb6z66ygi4m","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-143","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F273.pdf","hughes-etal-2004-functional",[3239,3240,3241],{"paper_id":3232,"author_seq":247,"given_name":2503,"surname":2504,"affiliation":63,"orcid":63},{"paper_id":3232,"author_seq":232,"given_name":2497,"surname":2498,"affiliation":63,"orcid":63},{"paper_id":3232,"author_seq":218,"given_name":2500,"surname":2501,"affiliation":63,"orcid":63},"Interlinear text has long been considered a valuable format in the presentation of multilingual data, and a variety of software tools have facilitated the creation and processing of such texts by researchers. Despite the diversity of tools, a common core of editorial functionality is provided. Identifying these core functions has important implications for software engineers who seek to efficiently build tools that support interlinear text editing. While few applications are specifically designed for the creation or manipulation of interlinear text, a number of tools offer varying degrees of incidental support for this modality. In this paper we provide a comprehensive set of critieria upon which the derivation of functional criteria can be based. We describe the basis on which a group of tools was selected for investigation, along with the evaluation criteria. Finally we consolidate our findings into a functional specification for the development of software applications for the editing of interlinear text.",{"paper_id":3244,"title":3245,"year":197,"month":855,"day":63,"doi":3246,"resource_url":3247,"first_page":63,"last_page":63,"pdf_url":3248,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3249,"paper_type":860,"authors":3250,"abstract":3264},"lrec2004-main-144","Management of Metadata in Linguistic Fieldwork: Experience from the ACLA Project","10.63317\u002F43qiphbmskpx","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-144","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F274.pdf","hughes-etal-2004-management",[3251,3252,3254,3255,3256,3259,3261],{"paper_id":3244,"author_seq":247,"given_name":2503,"surname":2504,"affiliation":63,"orcid":63},{"paper_id":3244,"author_seq":232,"given_name":1790,"surname":3253,"affiliation":63,"orcid":63},"Penton",{"paper_id":3244,"author_seq":218,"given_name":2500,"surname":2501,"affiliation":63,"orcid":63},{"paper_id":3244,"author_seq":203,"given_name":2497,"surname":2498,"affiliation":63,"orcid":63},{"paper_id":3244,"author_seq":188,"given_name":3257,"surname":3258,"affiliation":63,"orcid":63},"Gillian","Wigglesworth",{"paper_id":3244,"author_seq":172,"given_name":1519,"surname":3260,"affiliation":63,"orcid":63},"McConvell",{"paper_id":3244,"author_seq":155,"given_name":3262,"surname":3263,"affiliation":63,"orcid":63},"Jane","Simpson","Many linguistic research projects collect large amounts of multimodal data in digital formats. Despite the plethora of data collection applications available, it is often difficult for researchers to identify and integrate applications which enable the management of collections of multimodal data in addition to facilitating the actual collection process itself. In research projects that involve substantial data analysis, data management becomes a critical issue. Whilst best practice recommendations in regard to data formats themselves are propagated through projects such as EMELD, HRELP and DOBES, there is little corresponding information available regarding best practice for field metadata management beyond the provision of standards by entities such as OLAC and IMDI. These general problems are further exacerbated in the context of multiple researchers in geographically-disparate or connectivity-challenged locations. We describe the design of a solution for a group of researchers collecting data on child language acquisition in Australian indigenous communities. We describe the context, identify pertinent issues, outline the mechanics of a solution, and finally report the implementation. In doing so, we provide an alternative model and an open source software application suite which aims to be sufficiently general that other research groups may consider adopting some or all of the infrastructure.",{"paper_id":3266,"title":3267,"year":197,"month":855,"day":63,"doi":3268,"resource_url":3269,"first_page":63,"last_page":63,"pdf_url":3270,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3271,"paper_type":860,"authors":3272,"abstract":3290},"lrec2004-main-145","A Search Tool for Corpora with Positional Tagsets and Ambiguities","10.63317\u002F3xoo9dkau536","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-145","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F275.pdf","przepiorkowski-etal-2004-search",[3273,3276,3279,3282,3285,3287],{"paper_id":3266,"author_seq":247,"given_name":3274,"surname":3275,"affiliation":63,"orcid":63},"Adam","Przepiórkowski",{"paper_id":3266,"author_seq":232,"given_name":3277,"surname":3278,"affiliation":63,"orcid":63},"Zygmunt","Krynicki",{"paper_id":3266,"author_seq":218,"given_name":3280,"surname":3281,"affiliation":63,"orcid":63},"Łukasz","Dębowski",{"paper_id":3266,"author_seq":203,"given_name":3283,"surname":3284,"affiliation":63,"orcid":63},"Marcin","Woliński",{"paper_id":3266,"author_seq":188,"given_name":2268,"surname":3286,"affiliation":63,"orcid":63},"Janus",{"paper_id":3266,"author_seq":172,"given_name":3288,"surname":3289,"affiliation":63,"orcid":63},"Piotr","Bański","This article describes POLIQARP, a corpus indexing and query tool, which understands positional tagsets and which does not assume that word forms are annotated with unique morphosyntactic tags. POLIQARP is designed to be applicable to a variety of languages and tagsets: it works with XML-encoded texts, uses the UTF-8 character set, and allows for an external specification of the tagset. Currently, POLIQARP is used for indexing and searching a morphosyntactically annotated corpus of Polish.",{"paper_id":3292,"title":3293,"year":197,"month":855,"day":63,"doi":3294,"resource_url":3295,"first_page":63,"last_page":63,"pdf_url":3296,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3297,"paper_type":860,"authors":3298,"abstract":3302},"lrec2004-main-146","The American English SALA-II Data Collection","10.63317\u002F3mugurb2fk4v","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-146","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F276.pdf","heeman-2004-american",[3299],{"paper_id":3292,"author_seq":247,"given_name":3300,"surname":3301,"affiliation":63,"orcid":63},"Peter A.","Heeman","We discuss the collection of the American English SALA-II speech corpus. We focus on how we designed the prompt sheets to ensure maximum variability and on our strategy for recruiting the required 4000 speakers.We also present results on the effectiveness of the phonetically rich sentence. This paper should benefit others who are interested in using this corpus, or who are planning to collect a speech corpus with a large number of speakers.",{"paper_id":3304,"title":3305,"year":197,"month":855,"day":63,"doi":3306,"resource_url":3307,"first_page":63,"last_page":63,"pdf_url":3308,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3309,"paper_type":860,"authors":3310,"abstract":3316},"lrec2004-main-147","How Does Automatic Machine Translation Evaluation Correlate with Human Scoring as the Number of Reference Translations Increases?","10.63317\u002F2agzmwdvyn96","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-147","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F277.pdf","finch-etal-2004-automatic",[3311,3314,3315],{"paper_id":3304,"author_seq":247,"given_name":3312,"surname":3313,"affiliation":63,"orcid":63},"Andrew","Finch",{"paper_id":3304,"author_seq":232,"given_name":2235,"surname":2236,"affiliation":63,"orcid":63},{"paper_id":3304,"author_seq":218,"given_name":2220,"surname":2221,"affiliation":63,"orcid":63},"Automatic machine translation evaluation is a very difficult task due to the wide diversity of valid output translations that may result from translating a single source sentence or textual segment. Recently a number of competing methods of automatic machine translation evaluation have been adopted by the research community, of these the some of the most utilized are BLEU, NIST, mWER and the F-measure. This work extends the work of others in the field looking at how closely these evaluation techniques match human performance at ranking the translation output. However, we focus on investigating how these systems scale up with increasing numbers of human-produced references. We measure the correlation of the automatic ranking of the output from nine different machine translation systems, with the ranking derived from the score assigned by nine human evaluators using up to sixteen references per sentence. Our results show that evaluation performance improves with increasing numbers of references for all of the scoring methods except NIST which only shows improvements with small numbers of references.",{"paper_id":3318,"title":3319,"year":197,"month":855,"day":63,"doi":3320,"resource_url":3321,"first_page":63,"last_page":63,"pdf_url":3322,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3323,"paper_type":860,"authors":3324,"abstract":3334},"lrec2004-main-148","Evaluating the FOKS Error Model","10.63317\u002F3xerem9yy23b","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-148","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F278.pdf","bilac-etal-2004-evaluating",[3325,3328,3331],{"paper_id":3318,"author_seq":247,"given_name":3326,"surname":3327,"affiliation":63,"orcid":63},"Slaven","Bilac",{"paper_id":3318,"author_seq":232,"given_name":3329,"surname":3330,"affiliation":63,"orcid":63},"Timothy","Baldwin",{"paper_id":3318,"author_seq":218,"given_name":3332,"surname":3333,"affiliation":63,"orcid":63},"Hozumi","Tanaka","Learners of Japanese face great difficulty when trying to lookup words containing kanji in a dictionary, due to the requirement of knowing the correct reading of the target word. We propose a system that imitates the cognitive process learners go through in generating readings for novel kanji strings, and provide direct access to the dictionary entries based on the generated readings. In doing so we remove the correct reading requirement. The system described here is implemented in a web-based environment and freely available for general use. In this paper we provide an analysis of query and error data collected by our server.",{"paper_id":3336,"title":3337,"year":197,"month":855,"day":63,"doi":3338,"resource_url":3339,"first_page":63,"last_page":63,"pdf_url":3340,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3341,"paper_type":860,"authors":3342,"abstract":3358},"lrec2004-main-149","Evaluation of a Speech Cuer: From Motion Capture to a Concatenative Text-to-cued Speech System","10.63317\u002F4usinux8tvv8","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-149","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F282.pdf","gibert-etal-2004-evaluation",[3343,3346,3349,3352,3355],{"paper_id":3336,"author_seq":247,"given_name":3344,"surname":3345,"affiliation":63,"orcid":63},"Guillaume","Gibert",{"paper_id":3336,"author_seq":232,"given_name":3347,"surname":3348,"affiliation":63,"orcid":63},"Gérard","Bailly",{"paper_id":3336,"author_seq":218,"given_name":3350,"surname":3351,"affiliation":63,"orcid":63},"Frédéric","Eliséi",{"paper_id":3336,"author_seq":203,"given_name":3353,"surname":3354,"affiliation":63,"orcid":63},"Denis","Beautemps",{"paper_id":3336,"author_seq":188,"given_name":3356,"surname":3357,"affiliation":63,"orcid":63},"Rémi","Brun","We present here our efforts for characterizing the 3D movements of the right hand and the face of a French female during the production of manual cued speech. We analyzed the 3D trajectories of 50 hand and 63 facial fleshpoints during the production of 238 utterances carefully designed for covering all possible diphones of the French language. Linear and non linear statistical models of the hand and face deformations and postures have been developed using both separate and joint corpora. We implement a concatenative audiovisual text-to-cued speech synthesis system.",{"paper_id":3360,"title":3361,"year":197,"month":855,"day":63,"doi":3362,"resource_url":3363,"first_page":63,"last_page":63,"pdf_url":3364,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3365,"paper_type":860,"authors":3366,"abstract":63},"lrec2004-main-150","Beyond TREC’s Filtering Track","10.63317\u002F3rt3gocdeakn","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-150","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F283.pdf","nanas-etal-2004-beyond",[3367,3370,3373,3375],{"paper_id":3360,"author_seq":247,"given_name":3368,"surname":3369,"affiliation":63,"orcid":63},"Nikolaos","Nanas",{"paper_id":3360,"author_seq":232,"given_name":3371,"surname":3372,"affiliation":63,"orcid":63},"Victoria","Uren",{"paper_id":3360,"author_seq":218,"given_name":2977,"surname":3374,"affiliation":63,"orcid":63},"de Roeck",{"paper_id":3360,"author_seq":203,"given_name":3376,"surname":3377,"affiliation":63,"orcid":63},"John","Domingue",{"paper_id":3379,"title":3380,"year":197,"month":855,"day":63,"doi":3381,"resource_url":3382,"first_page":63,"last_page":63,"pdf_url":3383,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3384,"paper_type":860,"authors":3385,"abstract":3389},"lrec2004-main-151","A Corpus-based Syntactic Lexicon for Adverbs","10.63317\u002F2vce5rdkokrc","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-151","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F284.pdf","nimb-2004-corpus",[3386],{"paper_id":3379,"author_seq":247,"given_name":3387,"surname":3388,"affiliation":63,"orcid":63},"Sanni","Nimb","A word class often neglected in the field of NLP resources, namely adverbs, has lately been described in a computational lexicon produced at CST as one of the results of a Ph.D.-project. The adverb lexicon, which is integrated in the Danish STO lexicon, gives detailed syntactic information on the type of modification and position, as well as on other syntactic properties of approx 800 Danish adverbs. One of the aims of the lexicon has been to establish a clear distinction between syntactic and semantic information - where other lexicons often generalize over the syntactic behavior of semantic classes of adverbs, every adverb is described with respect to its proper syntactic behavior in a text corpus, revealing very individual syntactic properties. Syntactic information on adverbs is needed in NLP systems generating text to ensure correct placing in the phrase they modify. Also in systems analyzing text, this information is needed in order to attach the adverbs to the right node in the syntactic parse trees. Within the field of linguistic research, several results can be deduced from the lexicon, e.g. knowledge of syntactic classes of Danish adverbs.",{"paper_id":3391,"title":3392,"year":197,"month":855,"day":63,"doi":3393,"resource_url":3394,"first_page":63,"last_page":63,"pdf_url":3395,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3396,"paper_type":860,"authors":3397,"abstract":3410},"lrec2004-main-152","The Future of Evaluation for Cross-Language Information Retrieval Systems","10.63317\u002F2p2g4zsfge7x","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-152","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F285.pdf","peters-etal-2004-future",[3398,3400,3402,3405,3408],{"paper_id":3391,"author_seq":247,"given_name":3399,"surname":1388,"affiliation":63,"orcid":63},"Carol",{"paper_id":3391,"author_seq":232,"given_name":1087,"surname":3401,"affiliation":63,"orcid":63},"Braschler",{"paper_id":3391,"author_seq":218,"given_name":3403,"surname":3404,"affiliation":63,"orcid":63},"Khalid","Choukri",{"paper_id":3391,"author_seq":203,"given_name":3406,"surname":3407,"affiliation":63,"orcid":63},"Julio","Gonzalo",{"paper_id":3391,"author_seq":188,"given_name":1357,"surname":3409,"affiliation":63,"orcid":63},"Kluck","The objective of the Cross-Language Evaluation Forum (CLEF) is to promote research in the multilingual information access domain. In this short paper, we list the achievements of CLEF during its first four years of activity and describe how the range of tasks has been considerably expanded during this period. The aim of the paper is to demonstrate the importance of evaluation initiatives with respect to system research and development and to show how essential it is for such initiatives to keep abreast of and even anticipate the emerging needs of both system developers and application communities if they are to have a future.",{"paper_id":3412,"title":3413,"year":197,"month":855,"day":63,"doi":3414,"resource_url":3415,"first_page":63,"last_page":63,"pdf_url":3416,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3417,"paper_type":860,"authors":3418,"abstract":3434},"lrec2004-main-153","SALA II Across the Finish Line: A Large Collection of Mobile Telephone Speech Databases from North and Latin America completed","10.63317\u002F5dvp6ckmy2kh","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-153","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F288.pdf","van-den-heuvel-etal-2004-sala",[3419,3422,3425,3426,3428,3431],{"paper_id":3412,"author_seq":247,"given_name":3420,"surname":3421,"affiliation":63,"orcid":63},"Henk","van den Heuvel",{"paper_id":3412,"author_seq":232,"given_name":3423,"surname":3424,"affiliation":63,"orcid":63},"Phil","Hall",{"paper_id":3412,"author_seq":218,"given_name":1994,"surname":1995,"affiliation":63,"orcid":63},{"paper_id":3412,"author_seq":203,"given_name":2707,"surname":3427,"affiliation":63,"orcid":63},"Moreno",{"paper_id":3412,"author_seq":188,"given_name":3429,"surname":3430,"affiliation":63,"orcid":63},"Antonio","Rincon",{"paper_id":3412,"author_seq":172,"given_name":3432,"surname":3433,"affiliation":63,"orcid":63},"Francesco","Senia","The SALA II project comprises mobile telephone recordings according to the SpeechDat (II) paradigm for several languages in North and Latin America. Each database contains the recordings of 1000 speakers, with the exception of US Spanish (2000 speakers) and US English (4000 speakers). A quarter of the recordings of each database are made respectively in a quiet environment (home\u002Foffice), in the street, in a public place, and in a moving vehicle. This paper presents an evaluation of the project. The paper details on experiences with respect to the implementation of design specifications, speaker recruitment, data recordings (on site), data processing, orthographic transcription and lexicon generation. Furthermore, the validation procedure and its results are documented. Finally, the availability and distribution of the databases are addressed.",{"paper_id":3436,"title":3437,"year":197,"month":855,"day":63,"doi":3438,"resource_url":3439,"first_page":63,"last_page":63,"pdf_url":3440,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3441,"paper_type":860,"authors":3442,"abstract":3448},"lrec2004-main-154","Parallel Corpora for the Galician Language: Building and Processing of the CLUVI (Linguistic Corpus of the University of Vigo)","10.63317\u002F2jt9awnaxmpi","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-154","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F290.pdf","gomez-guinovart-fontenla-2004-parallel",[3443,3445],{"paper_id":3436,"author_seq":247,"given_name":3212,"surname":3444,"affiliation":63,"orcid":63},"Gómez-Guinovart",{"paper_id":3436,"author_seq":232,"given_name":3446,"surname":3447,"affiliation":63,"orcid":63},"Elena Sacau","Fontenla","In this paper, we present the methodology developed by the SLI (Computational Linguistics Group of the University of Vigo) for the building and processing of the CLUVI Corpus, showing the TMX-based XML specification designed to encode both morphosyntactic features and translation alignments in parallel corpora, and the solutions adopted for making the CLUVI parallel corpora freely available over the WWW (http:\u002F\u002Fsli.uvigo.es\u002FCLUVI\u002F).",{"paper_id":3450,"title":3451,"year":197,"month":855,"day":63,"doi":3452,"resource_url":3453,"first_page":63,"last_page":63,"pdf_url":3454,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3455,"paper_type":860,"authors":3456,"abstract":3466},"lrec2004-main-155","PBIE: A Data Preparation Toolkit Toward Developing a Parsing-Based Information Extraction System","10.63317\u002F4hqq966p6c2w","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-155","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F292.pdf","hosaka-etal-2004-pbie",[3457,3460,3463],{"paper_id":3450,"author_seq":247,"given_name":3458,"surname":3459,"affiliation":63,"orcid":63},"Junko","Hosaka",{"paper_id":3450,"author_seq":232,"given_name":3461,"surname":3462,"affiliation":63,"orcid":63},"Igor V.","Kurochkin",{"paper_id":3450,"author_seq":218,"given_name":3464,"surname":3465,"affiliation":63,"orcid":63},"Akihiko","Konagaya","We have developed a toolkit in which an annotation tool, a syntactic tree editor, and an extraction rule editor interact dynamically. Its output can be stored in a database for further use. In the field of biomedicine, there is a critical need for automatic text processing. However, current language processing approaches suffer from insufficient basic data incorporating both human domain expertise and domain-specific language processing capabilities. With the annotation tool presented here, a set of �ggold standards�h can be collected, representing what should be extracted. At the same time, any change in annotation can be viewed on an associated syntactic tree. These facilities provide a clear picture of the relationship between the extraction target and the syntactic tree. Underlying sentences can be analyzed with a parser which can be plugged in, or a set of parsed sentences can be used to generate the tree. Extraction rules written with the integrated editor can be applied at once, and their validity can immediately be verified both on the syntactic tree and on the sentence string by coloring the corresponding segments. Thus our toolkit enables the user to efficiently construct parse-based extraction rules. PBIE2 works under Windows 2000\u002FXP and requires Microsoft Internet Explorer 6.0 or higher. The data can be stored in Microsoft Access.",{"paper_id":3468,"title":3469,"year":197,"month":855,"day":63,"doi":3470,"resource_url":3471,"first_page":63,"last_page":63,"pdf_url":3472,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3473,"paper_type":860,"authors":3474,"abstract":3480},"lrec2004-main-156","A Syntactically Annotated Corpus of Tibetan","10.63317\u002F4orq7quvmsnq","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-156","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F293.pdf","wagner-zeisler-2004-syntactically",[3475,3477],{"paper_id":3468,"author_seq":247,"given_name":1647,"surname":3476,"affiliation":63,"orcid":63},"Wagner",{"paper_id":3468,"author_seq":232,"given_name":3478,"surname":3479,"affiliation":63,"orcid":63},"Bettina","Zeisler","This paper describes the creation of a syntactically annotated Tibetan corpus. This corpus forms a part of the TUSNELDA collection of corpora and databases for linguistic research. It will ultimately comprise spoken and written Tibetan texts originating from different regions and historical epochs. These texts are annotated with several kinds of linguistic information, in particular POS tags, phrases, argument structures of verbs, clauses and sentences, as well as several kinds of discourse units and textual segments. The annotation is done in XML. The primary research interest which guides the development of the corpus is the investigation of cross-clausal references, especially the relation between empty arguments (i.e. arguments not overtly realised in a clause) and their antecedents in previous clauses. For this purpose, such references are explicitly encoded so that they can be qualitatively and quantitatively evaluated with the help of standard XML techniques such as XPath search and XSLT transformations. Apart from this primary research interest, we expect that our corpus will be useful for other projects concerning Tibetan and related languages. Like other data in TUSNELDA, it will be made accessible via a WWW query interface.",{"paper_id":3482,"title":3483,"year":197,"month":855,"day":63,"doi":3484,"resource_url":3485,"first_page":63,"last_page":63,"pdf_url":3486,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3487,"paper_type":860,"authors":3488,"abstract":3493},"lrec2004-main-157","Lexical Entry Templates for Robust Deep Parsing","10.63317\u002F235iiond4ncb","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-157","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F295.pdf","marimon-bel-2004-lexical",[3489,3492],{"paper_id":3482,"author_seq":247,"given_name":3490,"surname":3491,"affiliation":63,"orcid":63},"Montserrat","Marimon",{"paper_id":3482,"author_seq":232,"given_name":2530,"surname":2531,"affiliation":63,"orcid":63},"We report on the development and employment of lexical entry templates in a large--coverage unification--based grammar of Spanish. The aim of the work reported in this paper is to provide robust deep linguistic processing in order to make the grammar more adequate for industrial NLP applications.",{"paper_id":3495,"title":3496,"year":197,"month":855,"day":63,"doi":3497,"resource_url":3498,"first_page":63,"last_page":63,"pdf_url":3499,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3500,"paper_type":860,"authors":3501,"abstract":63},"lrec2004-main-158","Tiered Tagging Revisited","10.63317\u002F2679n9aurjvt","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-158","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F296.pdf","tufis-dragomirescu-2004-tiered",[3502,3503],{"paper_id":3495,"author_seq":247,"given_name":2858,"surname":2859,"affiliation":63,"orcid":63},{"paper_id":3495,"author_seq":232,"given_name":3504,"surname":3505,"affiliation":63,"orcid":63},"Liviu","Dragomirescu",{"paper_id":3507,"title":3508,"year":197,"month":855,"day":63,"doi":3509,"resource_url":3510,"first_page":63,"last_page":63,"pdf_url":3511,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3512,"paper_type":860,"authors":3513,"abstract":3518},"lrec2004-main-159","A Methodology and Associated Tools for Building Interlingual Wordnets","10.63317\u002F24itz3shga32","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-159","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F298.pdf","tufis-barbu-2004-methodology",[3514,3515],{"paper_id":3507,"author_seq":247,"given_name":2858,"surname":2859,"affiliation":63,"orcid":63},{"paper_id":3507,"author_seq":232,"given_name":3516,"surname":3517,"affiliation":63,"orcid":63},"Eduard","Barbu","The paper describes the methodology and the tools we developed for the purpose of building a Romanian wordnet. The work is carried out within the BalkaNet European project and is concerned with wordnets for Bulgarian, Czech, Greek, Romanian, Serbian and Turkish all of them aligned via an interlingual index (ILI) to Princeton Wordnet. The wordnets structuring follows the principles adopted in EuroWordNet. In order to ensure maximal cross-lingual lexical coverage, the consortium decided to implement the same concepts, represented by a common set of ILI concepts. We describe the selection of concepts to be implemented in all the monolingual wordnets The methodologies adopted by each partner were different and they depended on the language resources and personnel available. For the Romanian wordnet,we decided that it should be based on the reference lexicographic descriptions of Romanian which we had in electronic forms: EXPD, a heavily XML annotated explanatory dictionary (developed in the previous CONCEDE project and based on the standard Explanatory Dictionary of Romanian), SYND, a published dictionary of synonyms which we keyboarded, encoded and completed with more than 4000 new synonymy sets extracted from EXPD, EnRoD, a Romanian-English dictionary, most part of it being extracted automatically from parallel corpora and further hand validated and extended. Besides these monolingual resources, as all the other members of the consortium, we had at our disposal the interlingual mapping of the Princeton Wordnet. All the above mentioned resources have been incorporated into a user-friendly system, WnBuilder, which allows for cooperative work of a large number of lexicographers. When the distributed work is put together, the synsets are validated. Several errors show up, the most frequent and difficult to solve being the case of a literal with the same sense number appearing in different synsets. We discuss reasons for such conflicts as well as their correction, supported by another utility program called WnCorrector. The full paper presents WnBuilder and WnCorrector, as well as the status of the Romanian wordnet development.",{"paper_id":3520,"title":3521,"year":197,"month":855,"day":63,"doi":3522,"resource_url":3523,"first_page":63,"last_page":63,"pdf_url":3524,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3525,"paper_type":860,"authors":3526,"abstract":3535},"lrec2004-main-160","Construction of a Bilingual Arabic-Spanish Lexicon of Verbs Based on a Parallel Corpus","10.63317\u002F2329d3hyk3qn","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-160","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F299.pdf","samy-etal-2004-construction",[3527,3530,3532],{"paper_id":3520,"author_seq":247,"given_name":3528,"surname":3529,"affiliation":63,"orcid":63},"Doaa","Samy",{"paper_id":3520,"author_seq":232,"given_name":3429,"surname":3531,"affiliation":63,"orcid":63},"Moreno-Sandoval",{"paper_id":3520,"author_seq":218,"given_name":3533,"surname":3534,"affiliation":63,"orcid":63},"José M.","Guirao","Parallel corpora are considered an important resource for the development of linguistic tools. In this paper our main goal is the development of a bilingual lexicon of verbs. The construction of this lexicon is possible using two main resources: I) a parallel corpus (through the alignment); II) the linguistic tools developed for Spanish (which serve as a starting point for developing tools for Arabic language). At the end, aligned equivalent verbs are detected automatically from a parallel corpus Spanish-Arabic. To achieve this goal, we had to pass through different preparatory stages concerning the assesment of the parallel corpus, the monolingual tokenization of each corpus, a preliminary sentence alignment and finally applying the model of automatic extraction of equivalent verbs. Our method is hybrid, since it combines both statistical and linguistic approaches.",{"paper_id":3537,"title":3538,"year":197,"month":855,"day":63,"doi":3539,"resource_url":3540,"first_page":63,"last_page":63,"pdf_url":3541,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3542,"paper_type":860,"authors":3543,"abstract":3561},"lrec2004-main-161","A XML-Based Term Extraction Tool for Basque","10.63317\u002F42fjf2g62hwk","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-161","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F301.pdf","alegria-etal-2004-xml",[3544,3547,3549,3552,3555,3558],{"paper_id":3537,"author_seq":247,"given_name":3545,"surname":3546,"affiliation":63,"orcid":63},"I.","Alegria",{"paper_id":3537,"author_seq":232,"given_name":2846,"surname":3548,"affiliation":63,"orcid":63},"Gurrutxaga",{"paper_id":3537,"author_seq":218,"given_name":3550,"surname":3551,"affiliation":63,"orcid":63},"P.","Lizaso",{"paper_id":3537,"author_seq":203,"given_name":3553,"surname":3554,"affiliation":63,"orcid":63},"X.","Saralegi",{"paper_id":3537,"author_seq":188,"given_name":3556,"surname":3557,"affiliation":63,"orcid":63},"S.","Ugartetxea",{"paper_id":3537,"author_seq":172,"given_name":3559,"surname":3560,"affiliation":63,"orcid":63},"R.","Urizar","This project combines linguistic and statistical information to develop a term extraction tool for Basque. Being Basque an agglutinative and highly inflected language, the treatment of morphosyntactic information is vital. In addition, due to late unification process of the language, texts present more elevated term dispersion than in a highly normalized language. The result is a semi-automatic terminology extraction tool based on XML, for its use in technical and scientific information managing.",{"paper_id":3563,"title":3564,"year":197,"month":855,"day":63,"doi":3565,"resource_url":3566,"first_page":63,"last_page":63,"pdf_url":3567,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3568,"paper_type":860,"authors":3569,"abstract":3576},"lrec2004-main-162","A Bayesian Model for Shallow Syntactic Parsing of Natural Language Texts","10.63317\u002F3wbq7p6hwzr6","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-162","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F302.pdf","maragoudakis-etal-2004-bayesian",[3570,3573,3574],{"paper_id":3563,"author_seq":247,"given_name":3571,"surname":3572,"affiliation":63,"orcid":63},"Manolis","Maragoudakis",{"paper_id":3563,"author_seq":232,"given_name":1111,"surname":1112,"affiliation":63,"orcid":63},{"paper_id":3563,"author_seq":218,"given_name":1018,"surname":3575,"affiliation":63,"orcid":63},"Kokkinakis","For the present work, we introduce and evaluate a novel Bayesian syntactic shallow parser that is able to perform robust detection of pairs of subject-object and subject-direct object-indirect object for a given verb, in a natural language sentence. The shallow parser infers on the correct subject-object pairs based on knowledge provided by Bayesian network learning from annotated text corpora. The DELOS corpus, a collection of economic domain texts that has been automatically annotated using various morphological and syntactic tools was used as training material. Our shallow parser makes use of limited linguistic input. More specifically, we consider only part of speech tagging, the voice and the mood of the verb as well as the head word of a noun phrase. For the task of detecting the head word of a phrase we used a sentence boundary detector. Identifying the head word of a noun phrase, i.e. the word that holds the morphological information (case, number) of the whole phrase, also proves to be very helpful for our task as its morphological tag is all the information that is needed regarding the phrase. The evaluation of the proposed method was performed against three other machine learning techniques, namely naive Bayes, k-Nearest Neighbor and Support Vector Machines, methods that have been previously applied to natural language processing tasks with satisfactory results. The experimental outcomes portray a satisfactory performance of our proposed shallow parser, which reaches almost 92 per cent in terms of precision.",{"paper_id":3578,"title":3579,"year":197,"month":855,"day":63,"doi":3580,"resource_url":3581,"first_page":63,"last_page":63,"pdf_url":3582,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3583,"paper_type":860,"authors":3584,"abstract":3591},"lrec2004-main-163","Multifunctional Computational Lexicon of Contemporary Portuguese: An Available Resource for Multitype Applications","10.63317\u002F2825tapvpq6t","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-163","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F303.pdf","barreto-amaro-2004-multifunctional",[3585,3588],{"paper_id":3578,"author_seq":247,"given_name":3586,"surname":3587,"affiliation":63,"orcid":63},"Florbela","Barreto",{"paper_id":3578,"author_seq":232,"given_name":3589,"surname":3590,"affiliation":63,"orcid":63},"Raquel","Amaro","This paper presents some aspects of the first Portuguese frequency lexicon extracted from a corpus of large dimensions. The Multifunctional Computational Lexicon of Contemporary Portuguese (henceforth MCL) rised from the necessity of filling a gap existent in the studies of the contemporary Portuguese. Until recently, the frequency lexicons of Portuguese were of very small dimensions, such as Português Fundamental, which is constituted by 2.217 words extracted from a 700.000 word corpus and the Frequency Dictionary of Portuguese Words based on a literary corpus of 500.000 words. We describe here the main steps taken for collecting the lexical and frequency data and some of the major problems that arouse in the process. The resulting lexicon is a freely available reliable resource for several types of applications.",{"paper_id":3593,"title":3594,"year":197,"month":855,"day":63,"doi":3595,"resource_url":3596,"first_page":63,"last_page":63,"pdf_url":3597,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3598,"paper_type":860,"authors":3599,"abstract":3608},"lrec2004-main-164","Use and Evaluation of Prosodic Annotations in Dutch","10.63317\u002F56oh7wbdv9ec","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-164","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F304.pdf","duchateau-etal-2004-use",[3600,3603,3606],{"paper_id":3593,"author_seq":247,"given_name":3601,"surname":3602,"affiliation":63,"orcid":63},"Jacques","Duchateau",{"paper_id":3593,"author_seq":232,"given_name":3604,"surname":3605,"affiliation":63,"orcid":63},"Tim","Ceyssens",{"paper_id":3593,"author_seq":218,"given_name":1773,"surname":3607,"affiliation":63,"orcid":63},"Van hamme","In the development of annotations for a spoken database, an important issue is whether the annotations can be generated automatically with sufficient precision, or whether expensive manual annotations are needed. In this paper, the case of prosodic annotations is discussed, which was investigated on the CGN database (Spoken Dutch Corpus). The main conclusions of this work are as follows. First, it was found that the available amount of manual prosodic annotations is sufficient for the development of our (baseline, decision tree based) prosodic models. In other words, more manual annotations do not improve the models. Second, the developed prosodic models for prominence are insufficiently accurate to produce automatic prominence annotations that are as good as the manual ones. But on the other hand the consistency between manual and automatic break annotations is as high as the inter-transcriber consistency for breaks. So given the current amount of manual break annotations, annotations for the remainder of the CGN database can be generated automatically with the same quality as the manual annotations.",{"paper_id":3610,"title":3611,"year":197,"month":855,"day":63,"doi":3612,"resource_url":3613,"first_page":63,"last_page":63,"pdf_url":3614,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3615,"paper_type":860,"authors":3616,"abstract":3622},"lrec2004-main-165","Resources and Techniques for Multilingual Information Extraction","10.63317\u002F2rxmn7tf62ge","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-165","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F306.pdf","busemann-krieger-2004-resources",[3617,3619],{"paper_id":3610,"author_seq":247,"given_name":1560,"surname":3618,"affiliation":63,"orcid":63},"Busemann",{"paper_id":3610,"author_seq":232,"given_name":3620,"surname":3621,"affiliation":63,"orcid":63},"Hans-Ulrich","Krieger","Official travel warnings published regularly in the internet by the ministries for foreign affairs of France, Germany, and the UK provide a useful resource for assessing the risks associated with travelling to some countries. The shallow IE system SProUT has been extended to meet the specific needs of delivering a language-neutral output for English, French, or German input texts. A shared type hierarchy, a feature-enhanced gazetteer resource, and generic techniques of merging chunk analyses into larger results are major reusable results of this work.",{"paper_id":3624,"title":3625,"year":197,"month":855,"day":63,"doi":3626,"resource_url":3627,"first_page":63,"last_page":63,"pdf_url":3628,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3629,"paper_type":860,"authors":3630,"abstract":3643},"lrec2004-main-166","Evaluating Factors Impacting the Accuracy of Forced Alignments in a Multimodal Corpus","10.63317\u002F2tz5hcxupavq","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-166","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F307.pdf","chen-etal-2004-evaluating",[3631,3633,3636,3639,3641],{"paper_id":3624,"author_seq":247,"given_name":3632,"surname":1246,"affiliation":63,"orcid":63},"Lei",{"paper_id":3624,"author_seq":232,"given_name":3634,"surname":3635,"affiliation":63,"orcid":63},"Yang","Liu",{"paper_id":3624,"author_seq":218,"given_name":3637,"surname":3638,"affiliation":63,"orcid":63},"Mary","Harper",{"paper_id":3624,"author_seq":203,"given_name":3640,"surname":2890,"affiliation":63,"orcid":63},"Eduardo",{"paper_id":3624,"author_seq":188,"given_name":3157,"surname":3642,"affiliation":63,"orcid":63},"McRoy","People, when processing human-to-human communication, utilize everything they can in order to understand that communication, including speech and information such as the time and location of an interlocutor's gesture and gaze. Speech and gesture are known to exhibit a synchronous relationship in human communication; however, the precise nature of that relationship requires further investigation. The construction of computer models of multimodal human communication would be enabled by the availability of multimodal communication corpora annotated with synchronized gesture and speech features. To investigate the temporal relationships of these knowledge sources, we have collected and are annotating several multimodal corpora with time-aligned features. Forced alignment between a speech file and its transcription is a crucial part of multimodal corpus production. This paper investigates a number of factors that may contribute to highly accurate forced alignments to support the rapid production of these multimodal corpora including the acoustic model, the match between the speech used for training the system and that to be force aligned, the amount of data used to train the ASR system, the availability of speaker adaptation, and the duration of alignment segments.",{"paper_id":3645,"title":3646,"year":197,"month":855,"day":63,"doi":3647,"resource_url":3648,"first_page":63,"last_page":63,"pdf_url":3649,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3650,"paper_type":860,"authors":3651,"abstract":3666},"lrec2004-main-167","Automatic Audio and Manual Transcripts Alignment, Time-code Transfer and Selection of Exact Transcripts","10.63317\u002F4xtq5qmm8ut4","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-167","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F308.pdf","barras-etal-2004-automatic",[3652,3655,3658,3660,3661,3664],{"paper_id":3645,"author_seq":247,"given_name":3653,"surname":3654,"affiliation":63,"orcid":63},"C.","Barras",{"paper_id":3645,"author_seq":232,"given_name":3656,"surname":3657,"affiliation":63,"orcid":63},"G.","Adda",{"paper_id":3645,"author_seq":218,"given_name":2298,"surname":3659,"affiliation":63,"orcid":63},"Adda-Decker",{"paper_id":3645,"author_seq":203,"given_name":2313,"surname":2837,"affiliation":63,"orcid":63},{"paper_id":3645,"author_seq":188,"given_name":3662,"surname":3663,"affiliation":63,"orcid":63},"P. Boula","de Mareüil",{"paper_id":3645,"author_seq":172,"given_name":3550,"surname":3665,"affiliation":63,"orcid":63},"Paroubek","The present study focuses on automatic processing of sibling resources of audio and written documents, such as available in audio archives or for parliament debates: written texts are close but not exact audio transcripts. Such resources deserve attention for several reasons: they represent an interesting testbed for studying differences between written and spoken material and they yield low cost resources for acoustic model training. When automatically transcribing the audio data, regions of agreement between automatic transcripts and written sources allow to transfer time-codes to the written documents: this may be helpful in an audio archive or audio information retrieval environment. Regions of disagreement can be automatically selected for further correction by human transcribers. This study makes use of 10 hours of French radio interview archives with corresponding press-oriented transcripts. The audio corpus has then been transcribed using the LIMSI speech recognizer resulting in automatic transcripts, exhibiting an average word error rate of 12%. 80% of the text corpus (with word chunks of at least five words) can be exactly aligned with the automatic transcripts of the audio data. The residual word error rate on these 80% is less than 1%.",{"paper_id":3668,"title":3669,"year":197,"month":855,"day":63,"doi":3670,"resource_url":3671,"first_page":63,"last_page":63,"pdf_url":3672,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3673,"paper_type":860,"authors":3674,"abstract":3682},"lrec2004-main-168","Evaluation of a Spoken Phonetic Database in Basque Language","10.63317\u002F258csnemuguh","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-168","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F309.pdf","guijarrubia-etal-2004-evaluation",[3675,3678,3680],{"paper_id":3668,"author_seq":247,"given_name":3676,"surname":3677,"affiliation":63,"orcid":63},"V.","Guijarrubia",{"paper_id":3668,"author_seq":232,"given_name":3545,"surname":3679,"affiliation":63,"orcid":63},"Torres",{"paper_id":3668,"author_seq":218,"given_name":3681,"surname":2911,"affiliation":63,"orcid":63},"L.J.","In this paper we present the evaluation of a spoken phonetic corpus designed to train acoustic models for Speech Recognition applications in Basque Language. A complete set of acoustic-phonetic decoding experiments was carried out over the proposed database. Context dependent and independent phoneme units were used in these experiments with two different approaches to acoustic modeling, namely discrete and continuous Hidden Markov Models (HMMs). A complete set of HMMs were trained and tested with the database. Experimental results reveal that the database is large and phonetically rich enough to get great acoustic models to be integrated in Continuous Speech Recognition Systems.",{"paper_id":3684,"title":3685,"year":197,"month":855,"day":63,"doi":3686,"resource_url":3687,"first_page":63,"last_page":63,"pdf_url":3688,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3689,"paper_type":860,"authors":3690,"abstract":3697},"lrec2004-main-169","Using Paradigm Tables to Generate New Utterances Similar to those Existing in Linguistic Resources","10.63317\u002F2mgxonmbmtx3","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-169","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F314.pdf","lepage-peralta-2004-using",[3691,3694],{"paper_id":3684,"author_seq":247,"given_name":3692,"surname":3693,"affiliation":63,"orcid":63},"Yves","Lepage",{"paper_id":3684,"author_seq":232,"given_name":3695,"surname":3696,"affiliation":63,"orcid":63},"Guilhem","Peralta","We inspect the possibility of creating new linguistic utterances (small sentences) similar to those already present in an existing linguistic resource. Using paradigm tables ensures that the new generated sentences resemble previous data, while being of course different. We report an experiment in which 1,201 new correct sentences were generated starting from only 22 seed sentences.",{"paper_id":3699,"title":3700,"year":197,"month":855,"day":63,"doi":3701,"resource_url":3702,"first_page":63,"last_page":63,"pdf_url":3703,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3704,"paper_type":860,"authors":3705,"abstract":3712},"lrec2004-main-170","Collection and Evaluation of Broadcast News Data for Arabic","10.63317\u002F2pmqwv2r3dun","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-170","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F315.pdf","afify-emam-2004-collection",[3706,3709],{"paper_id":3699,"author_seq":247,"given_name":3707,"surname":3708,"affiliation":63,"orcid":63},"Mohamed","Afify",{"paper_id":3699,"author_seq":232,"given_name":3710,"surname":3711,"affiliation":63,"orcid":63},"Ossama","Emam","This paper focuses on presenting a general methodology for acquiring and automatically segmenting broadcast news data from the web. It was shown that it is possible starting from a relatively small corpus of about 10 hours to segment automatically about 30 hours of data. This step is important because manual segmentation of broadcast news data is generally very tedious and time consuming. In addition to the data collection proposal we show the development of an initial recognition system. We present an automatic procedure for creating vowelizations for Arabic words. This is again important because most available Arabic transcriptions lack vowelization, which is crucial for creating phonetic transcription. The performance of our system is initially 36% error rate.",{"paper_id":3714,"title":3715,"year":197,"month":855,"day":63,"doi":3716,"resource_url":3717,"first_page":63,"last_page":63,"pdf_url":3718,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3719,"paper_type":860,"authors":3720,"abstract":3733},"lrec2004-main-171","A Language Resources Infrastructure for Bulgarian","10.63317\u002F4dzph6jwh2h9","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-171","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F316.pdf","simov-etal-2004-language",[3721,3722,3725,3728,3731],{"paper_id":3714,"author_seq":247,"given_name":3102,"surname":3103,"affiliation":63,"orcid":63},{"paper_id":3714,"author_seq":232,"given_name":3723,"surname":3724,"affiliation":63,"orcid":63},"Petya","Osenova",{"paper_id":3714,"author_seq":218,"given_name":3726,"surname":3727,"affiliation":63,"orcid":63},"Sia","Kolkovska",{"paper_id":3714,"author_seq":203,"given_name":3729,"surname":3730,"affiliation":63,"orcid":63},"Elisaveta","Balabanova",{"paper_id":3714,"author_seq":188,"given_name":3176,"surname":3732,"affiliation":63,"orcid":63},"Doikoff","This paper describes the infrastructure of a basic language resources set for Bulgarian in the context of BLARK initiative requirements. We focus on the treebanking task as a trigger for basic language resources compilation. Two strategies have been applied in this respect: (1) implementing the main pre-processing modules before the treebank compilation and (2) creating more elaborate types of resources in parallel to the treebank compilation. The description of language resources within BulTreeBank project is divided into two parts: language technology, which includes tokenization, morphosyntactic analyzer, morphosyntactic disambiguation, partial grammars, and language data, which includes the layers of the BulTreeBank corpus and the variety of lexicons. The advantages of our approach to a less-spoken language (like Bulgarian) are as follows: it triggers the creation of the basic set of language resources which lack for certain languages and it rises the question about the ways of language resources creation.",{"paper_id":3735,"title":3736,"year":197,"month":855,"day":63,"doi":3737,"resource_url":3738,"first_page":63,"last_page":63,"pdf_url":3739,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3740,"paper_type":860,"authors":3741,"abstract":3757},"lrec2004-main-172","“You Stupid Tin Box” - Children Interacting with the AIBO Robot: A Cross-linguistic Emotional Speech Corpus","10.63317\u002F2g6tkyi6gw2r","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-172","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F317.pdf","batliner-etal-2004-stupid",[3742,3744,3746,3748,3751,3753,3755],{"paper_id":3735,"author_seq":247,"given_name":2846,"surname":3743,"affiliation":63,"orcid":63},"Batliner",{"paper_id":3735,"author_seq":232,"given_name":3653,"surname":3745,"affiliation":63,"orcid":63},"Hacker",{"paper_id":3735,"author_seq":218,"given_name":3556,"surname":3747,"affiliation":63,"orcid":63},"Steidl",{"paper_id":3735,"author_seq":203,"given_name":3749,"surname":3750,"affiliation":63,"orcid":63},"E.","Nöth",{"paper_id":3735,"author_seq":188,"given_name":3556,"surname":3752,"affiliation":63,"orcid":63},"D’Arcy",{"paper_id":3735,"author_seq":172,"given_name":2298,"surname":3754,"affiliation":63,"orcid":63},"Russell",{"paper_id":3735,"author_seq":155,"given_name":2298,"surname":3756,"affiliation":63,"orcid":63},"Wong","This paper deals with databases that combine different aspects: children's speech, emotional speech, human-robot communication, cross-linguistics, and read vs. spontaneous speech: in a Wizard-of-Oz scenario, German and English children had to instruct Sony's AIBO robot to fulfil specific tasks. In one experimental condition, strictly parallel for German and English, the AIBO behaved `disobedient' by following it's own script irrespective of the child's commands. By that, reactions of different children to the same sequence of AIBO's actions could be obtained. In addition, both the German and the English children were recorded reading texts. The data are transliterated orthographically; emotional user states and some other phenomena will be annotated. We report preliminary word recognition rates and classification results.",{"paper_id":3759,"title":3760,"year":197,"month":855,"day":63,"doi":3761,"resource_url":3762,"first_page":63,"last_page":63,"pdf_url":3763,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3764,"paper_type":860,"authors":3765,"abstract":3780},"lrec2004-main-173","The Role of MultiWord Terminology in Knowledge Management","10.63317\u002F2k9jxtskmwgh","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-173","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F319.pdf","dowdall-etal-2004-role",[3766,3769,3772,3775,3778],{"paper_id":3759,"author_seq":247,"given_name":3767,"surname":3768,"affiliation":63,"orcid":63},"James","Dowdall",{"paper_id":3759,"author_seq":232,"given_name":3770,"surname":3771,"affiliation":63,"orcid":63},"Will","Lowe",{"paper_id":3759,"author_seq":218,"given_name":3773,"surname":3774,"affiliation":63,"orcid":63},"Jeremy","Ellman",{"paper_id":3759,"author_seq":203,"given_name":3776,"surname":3777,"affiliation":63,"orcid":63},"Fabio","Rinaldi",{"paper_id":3759,"author_seq":188,"given_name":1357,"surname":3779,"affiliation":63,"orcid":63},"Hess","One of the major obstacles for knowledge management remains MultiWord Terminology (MWT). This paper explores the difficulties that arise and describes real world solutions implemented as part of the Parmenides project. Parmenides is being built as an integrated knowledge management package that combines information, MWT and ontology extraction methods in a semi-automated framework. The focus of this paper is on eliciting ontological fragments based on dedicated MWT processing.",{"paper_id":3782,"title":3783,"year":197,"month":855,"day":63,"doi":3784,"resource_url":3785,"first_page":63,"last_page":63,"pdf_url":3786,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3787,"paper_type":860,"authors":3788,"abstract":63},"lrec2004-main-174","The OPUS Corpus - Parallel and Free: http:\u002F\u002Flogos.uio.no\u002Fopus","10.63317\u002F2tqv48uoxykv","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-174","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F320.pdf","tiedemann-nygaard-2004-opus",[3789,3792],{"paper_id":3782,"author_seq":247,"given_name":3790,"surname":3791,"affiliation":63,"orcid":63},"Jörg","Tiedemann",{"paper_id":3782,"author_seq":232,"given_name":2654,"surname":3793,"affiliation":63,"orcid":63},"Nygaard",{"paper_id":3795,"title":3796,"year":197,"month":855,"day":63,"doi":3797,"resource_url":3798,"first_page":63,"last_page":63,"pdf_url":3799,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3800,"paper_type":860,"authors":3801,"abstract":3805},"lrec2004-main-175","Selecting the Correct English Synset for a Spanish Sense","10.63317\u002F3emv2j7npi22","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-175","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F324.pdf","farreres-rodriguez-2004-selecting",[3802,3804],{"paper_id":3795,"author_seq":247,"given_name":1779,"surname":3803,"affiliation":63,"orcid":63},"Farreres",{"paper_id":3795,"author_seq":232,"given_name":2910,"surname":2911,"affiliation":63,"orcid":63},"This work tries to enrich the Spanish Wordnet using a Spanish taxonomy as a knowledge source. The Spanish taxonomy is composed by Spanish senses, while Spanish Wordnet is composed by synsets, mostly linked to English WordNet. A set of weighted associations between Spanish words and Wordnet synsets is used for inferring associations between both taxonomies.",{"paper_id":3807,"title":3808,"year":197,"month":855,"day":63,"doi":3809,"resource_url":3810,"first_page":63,"last_page":63,"pdf_url":3811,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3812,"paper_type":860,"authors":3813,"abstract":3823},"lrec2004-main-176","Collection of SLR in the Asian-Pacific Area","10.63317\u002F57indjas4v8o","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-176","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F325.pdf","moreno-etal-2004-collection",[3814,3815,3816,3817,3818,3819,3820],{"paper_id":3807,"author_seq":247,"given_name":2707,"surname":3427,"affiliation":63,"orcid":63},{"paper_id":3807,"author_seq":232,"given_name":3403,"surname":3404,"affiliation":63,"orcid":63},{"paper_id":3807,"author_seq":218,"given_name":3423,"surname":3424,"affiliation":63,"orcid":63},{"paper_id":3807,"author_seq":203,"given_name":3420,"surname":3421,"affiliation":63,"orcid":63},{"paper_id":3807,"author_seq":188,"given_name":2721,"surname":1679,"affiliation":63,"orcid":63},{"paper_id":3807,"author_seq":172,"given_name":3432,"surname":3433,"affiliation":63,"orcid":63},{"paper_id":3807,"author_seq":155,"given_name":3821,"surname":3822,"affiliation":63,"orcid":63},"Herbert","Tropf","The goal of this project (LILA) is the collection of a large number of spoken databases for training Automatic Speech Recognition Systems for telephone applications in the Asian Pacific area. Specifications follow those of SpeechDat-like databases. Utterances will be recorded directly from calls made either from fixed or cellular telephones and are composed by read text and answers to specific questions. The project is driven by a consortium composed by a large number of industrial companies. Each company is in charge of the production of two databases. The consortium shares the databases produced in the project. The goal of the project should be reached within the year 2005.",{"paper_id":3825,"title":3826,"year":197,"month":855,"day":63,"doi":3827,"resource_url":3828,"first_page":63,"last_page":63,"pdf_url":3829,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3830,"paper_type":860,"authors":3831,"abstract":3838},"lrec2004-main-177","Derivational Relations in Flectional Languages - Czech Case","10.63317\u002F4vajg7mksvun","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-177","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F326.pdf","hlavacova-klimova-2004-derivational",[3832,3835],{"paper_id":3825,"author_seq":247,"given_name":3833,"surname":3834,"affiliation":63,"orcid":63},"Jaroslava","Hlaváčová",{"paper_id":3825,"author_seq":232,"given_name":3836,"surname":3837,"affiliation":63,"orcid":63},"Jana","Klímová","When a text in any language is submitted to a morphological analysis, there always rest some unrecognized words. We can lower their number by adding new words into the dictionary used by the morphological analyzer but we can never gather the whole of the language. The system described in this paper (we call it \"derivation module\") deals with the unknown derived words. It aims not only at analyzing but also at synthesizing Czech derived words. Such a system is of particular value for automatic processing of languages where derivational morphology plays an important role in regular word formation.",{"paper_id":3840,"title":3841,"year":197,"month":855,"day":63,"doi":3842,"resource_url":3843,"first_page":63,"last_page":63,"pdf_url":3844,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3845,"paper_type":860,"authors":3846,"abstract":3857},"lrec2004-main-178","Standards for Language Codes: developing ISO 639","10.63317\u002F2n5pa8xswowa","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-178","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F327.pdf","dalby-etal-2004-standards",[3847,3849,3851,3854],{"paper_id":3840,"author_seq":247,"given_name":1790,"surname":3848,"affiliation":63,"orcid":63},"Dalby",{"paper_id":3840,"author_seq":232,"given_name":1045,"surname":3850,"affiliation":63,"orcid":63},"Gillam",{"paper_id":3840,"author_seq":218,"given_name":3852,"surname":3853,"affiliation":63,"orcid":63},"Christopher","Cox",{"paper_id":3840,"author_seq":203,"given_name":3855,"surname":3856,"affiliation":63,"orcid":63},"Debbie","Garside","The international community, including the International Organization for Standardization (ISO), is currently seeking more granular   systems of language identifiers than the widely used tags of ISO 639 parts 1 and 2. There is growing need for the more precise   identification and annotation of language-based resources. This paper presents a key response to this need, in which the Linguasphere   Register of the World's Languages and Speech Communities would provide the referential framework for a future part 6 of ISO 639.   The paper discusses the proposed evolution of ISO 639-6 and its relationship to other parts of ISO 639, including its relevance to the   definition of meta-data categories in ISO 12620.",{"paper_id":3859,"title":3860,"year":197,"month":855,"day":63,"doi":3861,"resource_url":3862,"first_page":63,"last_page":63,"pdf_url":3863,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3864,"paper_type":860,"authors":3865,"abstract":3874},"lrec2004-main-179","SLR Validation: Current Trends and Developments","10.63317\u002F3dztox63a7o4","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-179","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F328.pdf","van-den-heuvel-etal-2004-slr",[3866,3867,3870,3871],{"paper_id":3859,"author_seq":247,"given_name":3420,"surname":3421,"affiliation":63,"orcid":63},{"paper_id":3859,"author_seq":232,"given_name":3868,"surname":3869,"affiliation":63,"orcid":63},"Dorota","Iskra",{"paper_id":3859,"author_seq":218,"given_name":2721,"surname":1679,"affiliation":63,"orcid":63},{"paper_id":3859,"author_seq":203,"given_name":3872,"surname":3873,"affiliation":63,"orcid":63},"Folkert","de Vriend","This paper deals with the quality evaluation (validation) of Spoken Language Resources (SLR). The current situation in terms of relevant validation criteria and procedures is briefly presented. Next, a number of validation issues related to new data formats (XML-based annotations, UTF-16 encoding) are discussed. Further, new validation cycles that were introduced in a series of new projects like SpeeCon and OrienTel are addressed: prompt sheet validation, lexicon validation and pre-release validation. Finally, SPEX's current and future",{"paper_id":3876,"title":3877,"year":197,"month":855,"day":63,"doi":3878,"resource_url":3879,"first_page":63,"last_page":63,"pdf_url":3880,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3881,"paper_type":860,"authors":3882,"abstract":3885},"lrec2004-main-180","Identifying Definitions in Text Collections for Question Answering","10.63317\u002F2rwddspogdia","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-180","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F331.pdf","saggion-2004-identifying",[3883],{"paper_id":3876,"author_seq":247,"given_name":2910,"surname":3884,"affiliation":63,"orcid":63},"Saggion","One particular type of question which was made the focus of its own subtask within the TREC2003 QA track was the definition question (``What is X?'' or ``Who is X?''). One of the main problems with this type of question is how to discriminate in vast text collections between definitional and non-definitional text passages about a particular definiendum (i.e., the term to be defined). A method will be presented that uses definition patterns and terms that co-occurr with the definiendum in on-line sources for both passage selection and definition extraction.",{"paper_id":3887,"title":3888,"year":197,"month":855,"day":63,"doi":3889,"resource_url":3890,"first_page":63,"last_page":63,"pdf_url":3891,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3892,"paper_type":860,"authors":3893,"abstract":63},"lrec2004-main-181","Multiple Sequence Alignment for Characterizing the Lineal Structure of Revision","10.63317\u002F59xzy9bwvvgf","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-181","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F332.pdf","alonso-etal-2004-multiple",[3894,3897,3900,3902,3904],{"paper_id":3887,"author_seq":247,"given_name":3895,"surname":3896,"affiliation":63,"orcid":63},"Laura","Alonso",{"paper_id":3887,"author_seq":232,"given_name":3898,"surname":3899,"affiliation":63,"orcid":63},"Irene","Castellón",{"paper_id":3887,"author_seq":218,"given_name":2105,"surname":3901,"affiliation":63,"orcid":63},"Escribano",{"paper_id":3887,"author_seq":203,"given_name":3212,"surname":3903,"affiliation":63,"orcid":63},"Messeguer",{"paper_id":3887,"author_seq":188,"given_name":3218,"surname":2908,"affiliation":63,"orcid":63},{"paper_id":3906,"title":3907,"year":197,"month":855,"day":63,"doi":3908,"resource_url":3909,"first_page":63,"last_page":63,"pdf_url":3910,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3911,"paper_type":860,"authors":3912,"abstract":3916},"lrec2004-main-182","Mining the Web for Discourse Markers","10.63317\u002F3b5c7yah6g4r","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-182","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F333.pdf","hutchinson-2004-mining",[3913],{"paper_id":3906,"author_seq":247,"given_name":3914,"surname":3915,"affiliation":63,"orcid":63},"Ben","Hutchinson","This paper proposes a methodology for obtaining sentences containing discourse markers from the World Wide Web. The proposed methodology is particularly suitable for collecting large numbers of discourse marker tokens. It relies on the automatic identification of discourse markers, and we show that this can be done with an accuracy within 9% of that of human performance. We also show that the distribution of discourse markers on the web correlates highly with those in a conventional balanced corpus.",{"paper_id":3918,"title":3919,"year":197,"month":855,"day":63,"doi":3920,"resource_url":3921,"first_page":63,"last_page":63,"pdf_url":3922,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3923,"paper_type":860,"authors":3924,"abstract":3929},"lrec2004-main-183","A Pattern Extraction Workbench Combining Multiple Linguistic Levels","10.63317\u002F5ovtjjn8vp6b","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-183","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F336.pdf","merkel-lange-2004-pattern",[3925,3927],{"paper_id":3918,"author_seq":247,"given_name":1596,"surname":3926,"affiliation":63,"orcid":63},"Merkel",{"paper_id":3918,"author_seq":232,"given_name":1647,"surname":3928,"affiliation":63,"orcid":63},"Lange","In this paper an interactive pattern extraction workbench, I*Pex, is presented. The workbench comes in a graphical environment and is designed to be used in an incremental and interactive fashion with the user. Patterns can be constructed to work in combination involving specifications on several linguistic levels simultaneously, from the character level using regular expressions, parts of speech and dependency relations to semantic roles. The input text format is based on XCES XML format.",{"paper_id":3931,"title":3932,"year":197,"month":855,"day":63,"doi":3933,"resource_url":3934,"first_page":63,"last_page":63,"pdf_url":3935,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3936,"paper_type":860,"authors":3937,"abstract":3947},"lrec2004-main-184","Exploiting Coreference Annotations for Text-to-Hypertext Conversion","10.63317\u002F3rfpizmpke3v","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-184","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F337.pdf","holler-etal-2004-exploiting",[3938,3941,3944],{"paper_id":3931,"author_seq":247,"given_name":3939,"surname":3940,"affiliation":63,"orcid":63},"Anke","Holler",{"paper_id":3931,"author_seq":232,"given_name":3942,"surname":3943,"affiliation":63,"orcid":63},"Jan Frederik","Maas",{"paper_id":3931,"author_seq":218,"given_name":3945,"surname":3946,"affiliation":63,"orcid":63},"Angelika","Storrer","The paper describes an annotation scheme for coreference developed within the application context of text-to-hypertext conversion. In this context coference is used (1) for generating document-internal and cross-document hyperlinks, and (2) for resolving anaphoric expressions in order to achieve cohesive closedness in hypertext nodes. We will argue that for the purpose of cross-document linking it is necessary to separate the annotation of coreference relations from the annotation of anaphoric relations. To account for this requirement, we developed a knowledge-based annotation scheme that relates referential expressions in the text to entities in a knowledge representation, which is modeled using XML Topic Maps.",{"paper_id":3949,"title":3950,"year":197,"month":855,"day":63,"doi":3951,"resource_url":3952,"first_page":63,"last_page":63,"pdf_url":3953,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3954,"paper_type":860,"authors":3955,"abstract":3958},"lrec2004-main-185","“Why do you Ignore me?” - Proof that not all Direct Speech is Bad","10.63317\u002F5hjwivybmh9e","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-185","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F338.pdf","hasler-2004-ignore",[3956],{"paper_id":3949,"author_seq":247,"given_name":3895,"surname":3957,"affiliation":63,"orcid":63},"Hasler","In the automatic summarisation of written texts, direct speech is usually deemed unsuitable for inclusion in important sentences. This is due to the fact that humans do not usually include such quotations when they create summaries. In this paper, we argue that despite generally negative attitudes, direct speech can be useful for summarisation and ignoring it can result in the omission of important and relevant information. We present an analysis of a corpus of annotated newswire texts in which a substantial amount of speech is marked by different annotators, and describe when and why direct speech can be included in summaries. In an attempt to make direct speech more appropriate for summaries, we also describe rules currently being developed to transform it into a more summary-acceptable format.",{"paper_id":3960,"title":3961,"year":197,"month":855,"day":63,"doi":3962,"resource_url":3963,"first_page":63,"last_page":63,"pdf_url":3964,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3965,"paper_type":860,"authors":3966,"abstract":3976},"lrec2004-main-186","“Human Language Technology Elements in a Knowledge Organisation System - The VID Project”","10.63317\u002F33dk9wxyipbi","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-186","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F339.pdf","navarretta-etal-2004-human",[3967,3970,3973],{"paper_id":3960,"author_seq":247,"given_name":3968,"surname":3969,"affiliation":63,"orcid":63},"Costanza","Navarretta",{"paper_id":3960,"author_seq":232,"given_name":3971,"surname":3972,"affiliation":63,"orcid":63},"Bolette Sandford","Pedersen",{"paper_id":3960,"author_seq":218,"given_name":3974,"surname":3975,"affiliation":63,"orcid":63},"Dorte Haltrup","Hansen","This paper describes how Human Language Technologies and linguistic resources are used to support the construction of components of a knowledge organisation system. In particular we focus on methodologies and resources for building a corpus-based domain ontology and extracting relevant metadata information for text chunks from domain-specific corpora.",{"paper_id":3978,"title":3979,"year":197,"month":855,"day":63,"doi":3980,"resource_url":3981,"first_page":63,"last_page":63,"pdf_url":3982,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3983,"paper_type":860,"authors":3984,"abstract":4005},"lrec2004-main-187","Generic Text Summarization Using WordNet","10.63317\u002F5dnot53ivcvt","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-187","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F342.pdf","bellare-etal-2004-generic",[3985,3988,3991,3993,3996,3999,4002],{"paper_id":3978,"author_seq":247,"given_name":3986,"surname":3987,"affiliation":63,"orcid":63},"Kedar","Bellare",{"paper_id":3978,"author_seq":232,"given_name":3989,"surname":3990,"affiliation":63,"orcid":63},"Anish Das","Sarma",{"paper_id":3978,"author_seq":218,"given_name":3992,"surname":3990,"affiliation":63,"orcid":63},"Atish Das",{"paper_id":3978,"author_seq":203,"given_name":3994,"surname":3995,"affiliation":63,"orcid":63},"Navneet","Loiwal",{"paper_id":3978,"author_seq":188,"given_name":3997,"surname":3998,"affiliation":63,"orcid":63},"Vaibhav","Mehta",{"paper_id":3978,"author_seq":172,"given_name":4000,"surname":4001,"affiliation":63,"orcid":63},"Ganesh","Ramakrishnan",{"paper_id":3978,"author_seq":155,"given_name":4003,"surname":4004,"affiliation":63,"orcid":63},"Pushpak","Bhattacharyya","This paper presents a WordNet based approach to text summarization. The document to be summarized is used to extract a “relevant”   sub-graph from the WordNet graph. Weights are assigned to each node of this sub-graph using a strategy similar to the Google Pageranking   algorithm. These weights capture the relevance of the respective synsets with respect to the whole document. A matrix in which   each row repesents a sentence and each column a node of the sub-graph (i.e., a synset) is created. Principal Component Analysis is   performed on this matrix to help extract the sentences for the summary. Our approach is generic unlike most previous approaches which   address specific genres of documents like news articles and biographies. Testing our system on the standard DUC2002 extracts shows   that our results are promising and comparable to existing summarizers.",{"paper_id":4007,"title":4008,"year":197,"month":855,"day":63,"doi":4009,"resource_url":4010,"first_page":63,"last_page":63,"pdf_url":4011,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4012,"paper_type":860,"authors":4013,"abstract":4020},"lrec2004-main-188","Development of Bilingual Domain-Specific Ontology for Automatic Conceptual Indexing","10.63317\u002F4ro3qoez2win","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-188","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F343.pdf","loukachevitch-dobrov-2004-development",[4014,4017],{"paper_id":4007,"author_seq":247,"given_name":4015,"surname":4016,"affiliation":63,"orcid":63},"Natalia V.","Loukachevitch",{"paper_id":4007,"author_seq":232,"given_name":4018,"surname":4019,"affiliation":63,"orcid":63},"Boris V.","Dobrov","In the paper we describe development, means of evaluation and applications of Russian-English Sociopolitical Thesaurus specially developed as a linguistic resource for automatic text processing applications. The Sociopolitical domain is not a domain of social research but a broad domain of social relations including economic, political, military, cultural, sports and other subdomains. The knowledge of this domain is necessary for automatic text processing of such important documents as official documents, legislative acts, newspaper articles.",{"paper_id":4022,"title":4023,"year":197,"month":855,"day":63,"doi":4024,"resource_url":4025,"first_page":63,"last_page":63,"pdf_url":4026,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4027,"paper_type":860,"authors":4028,"abstract":4031},"lrec2004-main-189","Development of Ontologies with Minimal Set of Conceptual Relations","10.63317\u002F3bw7xz7ihg35","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-189","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F345.pdf","loukachevitch-dobrov-2004-development-ontologies",[4029,4030],{"paper_id":4022,"author_seq":247,"given_name":4015,"surname":4016,"affiliation":63,"orcid":63},{"paper_id":4022,"author_seq":232,"given_name":4018,"surname":4019,"affiliation":63,"orcid":63},"In the paper we describe our approach to development of ontologies with small number of relation types. Non-taxonomic relations in our ontologies are based on ontological dependence conception described in the formal ontology. This minimal relations set does not depend on a domain or a task and makes possible to begin the ontology construction at once, as soon as a task is set and a domain is determined, to receive the first version of an ontology in short time. Such an initial ontology can be used for information-retrieval applications and can serve as a structural basis for further development of the ontology",{"paper_id":4033,"title":4034,"year":197,"month":855,"day":63,"doi":4035,"resource_url":4036,"first_page":63,"last_page":63,"pdf_url":4037,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4038,"paper_type":860,"authors":4039,"abstract":4049},"lrec2004-main-190","Providing On-line Access to Portuguese Language Resources: Corpora and Lexicons","10.63317\u002F3thkoq2egvjq","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-190","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F346.pdf","do-nascimento-etal-2004-providing",[4040,4043,4046],{"paper_id":4033,"author_seq":247,"given_name":4041,"surname":4042,"affiliation":63,"orcid":63},"Maria Fernanda Bacelar","do Nascimento",{"paper_id":4033,"author_seq":232,"given_name":4044,"surname":4045,"affiliation":63,"orcid":63},"Amália","Mendes",{"paper_id":4033,"author_seq":218,"given_name":4047,"surname":4048,"affiliation":63,"orcid":63},"Luísa","Pereira","Several Language Resources (LRs) for Portuguese, developed at the Center of Linguistics of the Lisbon University (CLUL), are available on-line at CLUL's webpage: www.clul.ul.pt\u002Fenglish\u002Fsectores\u002Fprojecto_rld.html. These LRs have been extracted from or developed based on the Reference Corpus of Contemporary Portuguese (CRPC), a monitor corpus containing, at the present, more than 300 million words, taken by sampling from several types of written text (literary, newspaper, technical, didactic, juridical, parlamentary, etc.) and spoken text (informal and formal), pertaining to national and regional varieties of Portuguese (including European, Brazilian, African and Asian Portuguese). The LRs available for on-line queries include: a) several subcorpora (written and spoken, tagged and untagged) compiled and extracted from CRPC for specific CLUL's projects and now available for on-line queries; b) a published sample of \"Português Fundamental\", a spoken CRPC subcorpus, available for texts download; c) a frequency lexicon extracted from a CRPC subcorpus available for both on-line queries and download. Other RLs available for Portuguese are also referred: C-ORAL-ROM - Integrated Reference Corpora for Spoken Romance Languages, a CD-ROM edition of a spoken corpus with text-to-sound alignment; the LE-PAROLE corpus; the LE-PAROLE Lexicon and the SIMPLE Lexicon.",{"paper_id":4051,"title":4052,"year":197,"month":855,"day":63,"doi":4053,"resource_url":4054,"first_page":63,"last_page":63,"pdf_url":4055,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4056,"paper_type":860,"authors":4057,"abstract":63},"lrec2004-main-191","Automatisation of the Activity of Term Collection in Different Languages","10.63317\u002F3jsziahjh8ff","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-191","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F294.pdf","cartoni-etal-2004-automatisation",[4058,4061,4064,4067],{"paper_id":4051,"author_seq":247,"given_name":4059,"surname":4060,"affiliation":63,"orcid":63},"Bruno","Cartoni",{"paper_id":4051,"author_seq":232,"given_name":4062,"surname":4063,"affiliation":63,"orcid":63},"Pierrette","Bouillon",{"paper_id":4051,"author_seq":218,"given_name":4065,"surname":4066,"affiliation":63,"orcid":63},"Yalina","Alphonse",{"paper_id":4051,"author_seq":203,"given_name":4068,"surname":4069,"affiliation":63,"orcid":63},"Sabine","Lehmann",{"paper_id":4071,"title":4072,"year":197,"month":855,"day":63,"doi":4073,"resource_url":4074,"first_page":63,"last_page":63,"pdf_url":4075,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4076,"paper_type":860,"authors":4077,"abstract":4080},"lrec2004-main-192","Automatically Selecting Domain Markers for Terminology Extraction","10.63317\u002F42tga72hqdz6","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-192","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F348.pdf","vivaldi-rodriguez-2004-automatically",[4078,4079],{"paper_id":4071,"author_seq":247,"given_name":1006,"surname":2058,"affiliation":63,"orcid":63},{"paper_id":4071,"author_seq":232,"given_name":2910,"surname":2911,"affiliation":63,"orcid":63},"Some approaches to automatic terminology extraction from corpora imply the use of existing semantic resources for guiding the detection of terms. Most of these systems exploit specialised resources, like UMLS in the medical domain, while a few try to take profit from general-purpose semantic resources, like EuroWordNet (EWN). As the term extraction task is clearly domain depending, in the case a general-purpose resource without specific domain information is used, we need a way of attaching domain information to the units of the resource. For big resources it is desirable that this semantic enrichment could be carried out automatically. Given a specific domain, our proposal aims to detect in EWN those units that can be considered as domain markers (DM). We can define a DM as an EWN entry whose attached strings belong to the domain, as well as the variants of all its descendents through the hyponymy relation. The procedure we propose in this paper is fully automatic and, a priori, domain-independent. The only external knowledge it uses is a set of terms, which is an external vocabulary, which is considered to have at least one sense belonging to the domain.",{"paper_id":4082,"title":4083,"year":197,"month":855,"day":63,"doi":4084,"resource_url":4085,"first_page":63,"last_page":63,"pdf_url":4086,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4087,"paper_type":860,"authors":4088,"abstract":4105},"lrec2004-main-193","The Ongoing Evaluation Campaign of Syntactic Parsing of French: EASY","10.63317\u002F2iavasnmxzej","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-193","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F349.pdf","vilnat-etal-2004-ongoing",[4089,4091,4092,4094,4097,4100,4103],{"paper_id":4082,"author_seq":247,"given_name":2977,"surname":4090,"affiliation":63,"orcid":63},"Vilnat",{"paper_id":4082,"author_seq":232,"given_name":1519,"surname":3665,"affiliation":63,"orcid":63},{"paper_id":4082,"author_seq":218,"given_name":3895,"surname":4093,"affiliation":63,"orcid":63},"Monceaux",{"paper_id":4082,"author_seq":203,"given_name":4095,"surname":4096,"affiliation":63,"orcid":63},"Isabelle","Robba",{"paper_id":4082,"author_seq":188,"given_name":4098,"surname":4099,"affiliation":63,"orcid":63},"Véronique","Gendner",{"paper_id":4082,"author_seq":172,"given_name":4101,"surname":4102,"affiliation":63,"orcid":63},"Gabriel","Illouz",{"paper_id":4082,"author_seq":155,"given_name":4104,"surname":2839,"affiliation":63,"orcid":63},"Michèle","This paper presents EASY (Evaluation of Analyzers of SYntax), an ongoing evaluation campaign of syntactic parsing of French, a subproject of EVALDA in the French TECHNOLANGUE program. After presenting the elaboration of the annotation formalism, we describe the corpus building steps, the annotation tools, the evaluation measures and finally, plans to produce a validated large linguistic resource, syntactically annotated",{"paper_id":4107,"title":4108,"year":197,"month":855,"day":63,"doi":4109,"resource_url":4110,"first_page":63,"last_page":63,"pdf_url":4111,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4112,"paper_type":860,"authors":4113,"abstract":4122},"lrec2004-main-194","Annotators’ Agreement: The Case of Topic-Focus Articulation","10.63317\u002F538wq3xwoapp","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-194","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F350.pdf","vesela-etal-2004-annotators",[4114,4117,4120],{"paper_id":4107,"author_seq":247,"given_name":4115,"surname":4116,"affiliation":63,"orcid":63},"Kateřina","Veselá",{"paper_id":4107,"author_seq":232,"given_name":4118,"surname":4119,"affiliation":63,"orcid":63},"Jiří","Havelka",{"paper_id":4107,"author_seq":218,"given_name":1289,"surname":4121,"affiliation":63,"orcid":63},"Hajičová","The annotation of the Prague Dependency Treebank (PDT) is conceived of as a multilayered scenario that comprises also dependency representations (tectogrammatical tree structures, TGTS's) of the underlying structure of the sentences. TGTS's capture three basic aspects of the underlying structure of sentences: (a) the dependency tree structure, (b) the kinds of dependency syntactic relations, and (c) the basic characteristics of the topic-focus articulation (TFA). Since the PDT is a large collection and the annotations on the deepest layer are to a large extent performed by several human annotators (based on an automatic preprocessing module), it is more than necessary to observe the consistence of annotators and the agreement among them. In the present paper, we summarize the results of the evaluation of parallel annotations of several samples taken from PDT and the measures accepted to improve the consistency of annotations.",{"paper_id":4124,"title":4125,"year":197,"month":855,"day":63,"doi":4126,"resource_url":4127,"first_page":63,"last_page":63,"pdf_url":4128,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4129,"paper_type":860,"authors":4130,"abstract":4140},"lrec2004-main-195","Evaluating Lexical Resources for a Semantic Tagger","10.63317\u002F44z7mxnuycxq","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-195","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F351.pdf","piao-etal-2004-evaluating",[4131,4134,4136,4139],{"paper_id":4124,"author_seq":247,"given_name":4132,"surname":4133,"affiliation":63,"orcid":63},"Scott S. L.","Piao",{"paper_id":4124,"author_seq":232,"given_name":973,"surname":4135,"affiliation":63,"orcid":63},"Rayson",{"paper_id":4124,"author_seq":218,"given_name":4137,"surname":4138,"affiliation":63,"orcid":63},"Dawn","Archer",{"paper_id":4124,"author_seq":203,"given_name":1147,"surname":2820,"affiliation":63,"orcid":63},"Semantic lexical resources play an important part in both linguistic study and natural language engineering. In Lancaster, a large semantic lexical resource has been built over the past 14 years, which provides a knowledge base for the USAS semantic tagger. Capturing semantic lexicological theory and empirical lexical usage information extracted from corpora, the Lancaster semantic lexicon provides a valuable resource for the corpus research and NLP community. In this paper, we evaluate the lexical coverage of the semantic lexicon both in terms of genres and time periods. We conducted the evaluation on test corpora including the BNC sampler, the METER Corpus of law\u002Fcourt journalism reports and some corpora of Newsbooks, prose and fictional works published between 17th and 19th centuries. In the evaluation, the semantic lexicon achieved a lexical coverage of 98.49% on the BNC sampler, 95.38% on the METER Corpus and 92.76% -- 97.29% on the historical data. Our evaluation reveals that the Lancaster semantic lexicon has a remarkably high lexical coverage on modern English lexicon, but needs expansion with domain-specific terms and historical words. Our evaluation also shows that, in order to make claims about the lexical coverage of annotation systems as well as to render them ‘future proof’, we need to evaluate their potential both synchronically and diachronically across genres.",{"paper_id":4142,"title":4143,"year":197,"month":855,"day":63,"doi":4144,"resource_url":4145,"first_page":63,"last_page":63,"pdf_url":4146,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4147,"paper_type":860,"authors":4148,"abstract":4157},"lrec2004-main-196","Multimodal Meaning Representation for Generic Dialogue Systems Architectures","10.63317\u002F2sbyh449tf3q","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-196","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F352.pdf","landragin-etal-2004-multimodal",[4149,4151,4153,4156],{"paper_id":4142,"author_seq":247,"given_name":3350,"surname":4150,"affiliation":63,"orcid":63},"Landragin",{"paper_id":4142,"author_seq":232,"given_name":4152,"surname":3353,"affiliation":63,"orcid":63},"Alexandre",{"paper_id":4142,"author_seq":218,"given_name":4154,"surname":4155,"affiliation":63,"orcid":63},"Annalisa","Ricci",{"paper_id":4142,"author_seq":203,"given_name":1175,"surname":1176,"affiliation":63,"orcid":63},"An unified language for the communicative acts between agents is essential for the design of multi-agents architectures. Whatever the type of interaction (linguistic, multimodal, including particular aspects such as force feedback), whatever the type of application (command dialogue, request dialogue, database querying), the concepts are common and we need a generic meta-model. In order to tend towards task-independent systems, we need to clarify the modules parameterization procedures. In this paper, we focus on the characteristics of a meta-model designed to represent meaning in linguistic and multimodal applications. This meta-model is called MMIL for MultiModal Interface Language, and has first been specified in the framework of the IST MIAMM European project. What we want to test here is how relevant is MMIL for a completely different context (a different task, a different interaction type, a different linguistic domain). We detail the exploitation of MMIL in the framework of the IST OZONE European project, and we draw the conclusions on the role of MMIL in the parameterization of task-independent dialogue managers.",{"paper_id":4159,"title":4160,"year":197,"month":855,"day":63,"doi":4161,"resource_url":4162,"first_page":63,"last_page":63,"pdf_url":4163,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4164,"paper_type":860,"authors":4165,"abstract":4172},"lrec2004-main-197","STO: A Danish Lexicon Resource - Ready for Applications","10.63317\u002F5eib8itovhxg","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-197","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F354.pdf","braasch-olsen-2004-sto",[4166,4169],{"paper_id":4159,"author_seq":247,"given_name":4167,"surname":4168,"affiliation":63,"orcid":63},"Anna","Braasch",{"paper_id":4159,"author_seq":232,"given_name":4170,"surname":4171,"affiliation":63,"orcid":63},"Sussi","Olsen","This paper deals with the STO lexicon, the most comprehensive computational lexicon of Danish developed for NLP\u002FHLT applications, which is now ready for use. Danish was one of the 12 EU-languages participating in the LE-PAROLE and SIMPLE projects; therefore it was obvious to continue this work building on our experience obtained from these projects. The material for Danish produced within these projects – further enriched with language-specific information - is incorporated into the STO lexicon. First, we describe the main characteristics of the lexical coverage and linguistic content of the STO lexicon; second, we present some recent uses and point to some prospective exploitations of the material. Finally, we outline an internet-based user interface, which allows for browsing through the complex information content of the STO lexical database and some other selected WRL’s for Danish.",{"paper_id":4174,"title":4175,"year":197,"month":855,"day":63,"doi":4176,"resource_url":4177,"first_page":63,"last_page":63,"pdf_url":4178,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4179,"paper_type":860,"authors":4180,"abstract":4186},"lrec2004-main-198","A Domain-Independent Approach to IE Rule Development","10.63317\u002F32buq2ek44dc","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-198","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F355.pdf","zervanou-mcnaught-2004-domain",[4181,4184],{"paper_id":4174,"author_seq":247,"given_name":4182,"surname":4183,"affiliation":63,"orcid":63},"Kalliopi","Zervanou",{"paper_id":4174,"author_seq":232,"given_name":3376,"surname":4185,"affiliation":63,"orcid":63},"McNaught","A key element for the extraction of information in a natural language document is a set of shallow text analysis rules, which are typically based on pre-defined linguistic patterns. Current Information Extraction research aims at the automatic or semi-automatic acquisition of these rules. Within this research framework, we consider in this paper the potential for acquiring generic extraction patterns. Our research is based on the hypothesis that, terms (the linguistic representation of concepts in a specialised domain) and Named Entities (the names of persons, organisations and dates of importance in the text) can together be considered as the basic semantic entities of textual information and can therefore be used as a basis for the conceptual representation of domain specific texts and the definition of what constitutes an information extraction template in linguistic terms. The extraction patterns discovered by this approach involve significant associations of these semantic entities with verbs and they can subsequently be translated into the grammar formalism of choice.",{"paper_id":4188,"title":4189,"year":197,"month":855,"day":63,"doi":4190,"resource_url":4191,"first_page":63,"last_page":63,"pdf_url":4192,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4193,"paper_type":860,"authors":4194,"abstract":4234},"lrec2004-main-199","The French MEDIA\u002FEVALDA Project: the Evaluation of the Understanding Capability of Spoken Language Dialogue Systems","10.63317\u002F34b9vqpiahws","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-199","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F356.pdf","devillers-etal-2004-french",[4195,4198,4200,4203,4204,4207,4210,4211,4213,4216,4219,4221,4222,4225,4228,4231],{"paper_id":4188,"author_seq":247,"given_name":4196,"surname":4197,"affiliation":63,"orcid":63},"Laurence","Devillers",{"paper_id":4188,"author_seq":232,"given_name":4199,"surname":1429,"affiliation":63,"orcid":63},"Hélène",{"paper_id":4188,"author_seq":218,"given_name":4201,"surname":4202,"affiliation":63,"orcid":63},"Sophie","Rosset",{"paper_id":4188,"author_seq":203,"given_name":1519,"surname":3665,"affiliation":63,"orcid":63},{"paper_id":4188,"author_seq":188,"given_name":4205,"surname":4206,"affiliation":63,"orcid":63},"Kevin","McTait",{"paper_id":4188,"author_seq":172,"given_name":4208,"surname":4209,"affiliation":63,"orcid":63},"D.","Mostefa",{"paper_id":4188,"author_seq":155,"given_name":3403,"surname":3404,"affiliation":63,"orcid":63},{"paper_id":4188,"author_seq":138,"given_name":1175,"surname":4212,"affiliation":63,"orcid":63},"Charnay",{"paper_id":4188,"author_seq":121,"given_name":4214,"surname":4215,"affiliation":63,"orcid":63},"Caroline","Bousquet",{"paper_id":4188,"author_seq":104,"given_name":4217,"surname":4218,"affiliation":63,"orcid":63},"Nadine","Vigouroux",{"paper_id":4188,"author_seq":87,"given_name":3350,"surname":4220,"affiliation":63,"orcid":63},"Béchet",{"paper_id":4188,"author_seq":73,"given_name":1175,"surname":1176,"affiliation":63,"orcid":63},{"paper_id":4188,"author_seq":55,"given_name":4223,"surname":4224,"affiliation":63,"orcid":63},"Jean-Yves","Antoine",{"paper_id":4188,"author_seq":38,"given_name":4226,"surname":4227,"affiliation":63,"orcid":63},"J.","Villaneau",{"paper_id":4188,"author_seq":17,"given_name":4229,"surname":4230,"affiliation":63,"orcid":63},"Myriam","Vergnes",{"paper_id":4188,"author_seq":4232,"given_name":4226,"surname":4233,"affiliation":63,"orcid":63},"16","Goulian","The aim of the MEDIA project is to design and test a methodology for the evaluat ion of context-dependent and independent spoken dialogue systems. We propose an evaluation paradigm based on the use of test suites from real-world corpora and a common semantic representation and common metrics. This paradigm should allow us to diagnose the context-sensitive understanding capability of dialogue system s. This paradigm will be used within an evaluation campaign involving several si tes all of which will carry out the task of querying information from a database .",{"paper_id":4236,"title":4237,"year":197,"month":855,"day":63,"doi":4238,"resource_url":4239,"first_page":63,"last_page":63,"pdf_url":4240,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4241,"paper_type":860,"authors":4242,"abstract":4256},"lrec2004-main-200","The C-ORAL-ROM CORPUS. A Multilingual Resource of Spontaneous Speech for Romance Languages","10.63317\u002F3dfsqpk834of","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-200","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F357.pdf","cresti-etal-2004-c",[4243,4246,4248,4251,4254,4255],{"paper_id":4236,"author_seq":247,"given_name":4244,"surname":4245,"affiliation":63,"orcid":63},"Emanuela","Cresti",{"paper_id":4236,"author_seq":232,"given_name":4247,"surname":4042,"affiliation":63,"orcid":63},"Fernanda Bacelar",{"paper_id":4236,"author_seq":218,"given_name":4249,"surname":4250,"affiliation":63,"orcid":63},"Antonio Moreno","Sandoval",{"paper_id":4236,"author_seq":203,"given_name":4252,"surname":4253,"affiliation":63,"orcid":63},"Jean","Veronis",{"paper_id":4236,"author_seq":188,"given_name":2736,"surname":1087,"affiliation":63,"orcid":63},{"paper_id":4236,"author_seq":172,"given_name":3403,"surname":3404,"affiliation":63,"orcid":63},"The C-ORAL-ROM project has delivered a multilingual corpus of spontaneous speech for the main romance languages (Italian, French, Portuguese and Spanish). The collection aims to represent the variety of speech acts performed in everyday language and to enable the description of prosodic and syntactic structures in the four romance languages. Sampling criteria are defined in a corpus design scheme. C-ORAL-ROM adopts two different sampling strategies, one for the formal and one for the informal part: While a set of typical domains of application is selected to document the formal use of language, the informal part documents speech variation using parameters referring to the event’s structure (dialogue vs. monologue) and the sociological domain of use (family-private vs public). The four romance corpora are tagged with respect to terminal and non terminal prosodic breaks. Terminal breaks are assumed to be the more relevant cues for the identification of relevant linguistic domains in spontaneous speech (utterances). Relations with other concurrent criteria are discussed. The multimedia storage of the C-ORAL-ROM corpus is based on this principle; each textual string ending with a terminal break is aligned, through the Win Pitch speech software, to its acoustic counterpart, generating the data base of all utterances.",{"paper_id":4258,"title":4259,"year":197,"month":855,"day":63,"doi":4260,"resource_url":4261,"first_page":63,"last_page":63,"pdf_url":4262,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4263,"paper_type":860,"authors":4264,"abstract":4273},"lrec2004-main-201","Principles of a System for Terminological Concept Modelling","10.63317\u002F394ue9rnysny","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-201","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F358.pdf","madsen-etal-2004-principles",[4265,4268,4271],{"paper_id":4258,"author_seq":247,"given_name":4266,"surname":4267,"affiliation":63,"orcid":63},"Bodil Nistrup","Madsen",{"paper_id":4258,"author_seq":232,"given_name":4269,"surname":4270,"affiliation":63,"orcid":63},"Hanne Erdman","Thomsen",{"paper_id":4258,"author_seq":218,"given_name":1358,"surname":4272,"affiliation":63,"orcid":63},"Vikner","We are working on a project called CAOS - Computer-Aided Ontology Structuring - whose aim is to develop a computer system designed to enable semi-automatic construction of concept systems, or ontologies. The system is intended to be interactive and presupposes an end-user with a terminological background (terminologist or professional translator). CAOS supports terminological concept modelling. The backbone of this concept modelling is constituted by characteristics modelled by formal feature specifications, i.e. attribute-value pairs. Our use of feature specifications is subject to a number of principles and constraints. In this paper we want to demonstrate some of these principles and to show why they are necessary in order to permit the construction of an interactive tool for building terminological ontologies. We will also show how they contribute to determine the structuring of the ontologies in CAOS and to facilitate the work of the terminologist user.",{"paper_id":4275,"title":4276,"year":197,"month":855,"day":63,"doi":4277,"resource_url":4278,"first_page":63,"last_page":63,"pdf_url":4279,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4280,"paper_type":860,"authors":4281,"abstract":4289},"lrec2004-main-202","On the Usefulness of Large Spoken Language Corpora for Linguistic Research","10.63317\u002F2c3xof6gvsm8","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-202","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F359.pdf","van-bael-etal-2004-usefulness",[4282,4285,4288],{"paper_id":4275,"author_seq":247,"given_name":4283,"surname":4284,"affiliation":63,"orcid":63},"Christophe","Van Bael",{"paper_id":4275,"author_seq":232,"given_name":4286,"surname":4287,"affiliation":63,"orcid":63},"Helmer","Strik",{"paper_id":4275,"author_seq":218,"given_name":3420,"surname":3421,"affiliation":63,"orcid":63},"In the past, fundamental linguistic research was typically conducted on small data sets that were handcrafted for the specific research at hand. However, from the eighties onwards, many large spoken language corpora have become available. This study investigates the usefulness of large multi-purpose spoken language corpora for fundamental linguistic research. A research task was designed in which we tried to capture the major pronunciation differences between three speech styles in context-sensitive re-write rules at the phone level. These re-write rules were extracted from the alignments of both a manual phonetic transcription and an automatic phonetic transcription with a canonical reference transcription of the same material.",{"paper_id":4291,"title":4292,"year":197,"month":855,"day":63,"doi":4293,"resource_url":4294,"first_page":63,"last_page":63,"pdf_url":4295,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4296,"paper_type":860,"authors":4297,"abstract":4311},"lrec2004-main-203","WALA: A Multilingual Resource Repository for West African Languages","10.63317\u002F3b8rr4iniau8","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-203","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F360.pdf","gibbon-etal-2004-wala",[4298,4299,4302,4305,4308],{"paper_id":4291,"author_seq":247,"given_name":2494,"surname":2495,"affiliation":63,"orcid":63},{"paper_id":4291,"author_seq":232,"given_name":4300,"surname":4301,"affiliation":63,"orcid":63},"Firmin","Ahoua",{"paper_id":4291,"author_seq":218,"given_name":4303,"surname":4304,"affiliation":63,"orcid":63},"Eddi","Gbéry",{"paper_id":4291,"author_seq":203,"given_name":4306,"surname":4307,"affiliation":63,"orcid":63},"Eno-Abasi","Urua",{"paper_id":4291,"author_seq":188,"given_name":4309,"surname":4310,"affiliation":63,"orcid":63},"Moses","Ekpenyong","The West African Language Archive (WALA) initiative has emerged from a number of concurrent projects, and aims to encourage local scholars to create high quality decentralised repositories documenting West African languages, and to make these repositories available to language communities, language planners, educationalists and scientists via an internet metadata portal such as OLAC (Open Language Archive Community). A wide range of criteria has to be met in designing and implementing this kind of archive. We discuss these criteria with reference to experiences in documentation work in three very different ongoing language documentation projects, on designing an encyclopaedia, on documenting an endangered language, and on creating a speech synthesiser. We pay special attention to the provision of metadata, a formal variety of catalogue or housekeeping information, without which resources are doomed to remain inaccessible.",{"paper_id":4313,"title":4314,"year":197,"month":855,"day":63,"doi":4315,"resource_url":4316,"first_page":63,"last_page":63,"pdf_url":4317,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4318,"paper_type":860,"authors":4319,"abstract":4322},"lrec2004-main-204","Annotating a Corpus for Building a Domain-specific Knowledge Base","10.63317\u002F2ycr5npkb854","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-204","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F361.pdf","bartsch-2004-annotating",[4320],{"paper_id":4313,"author_seq":247,"given_name":4068,"surname":4321,"affiliation":63,"orcid":63},"Bartsch","The project described in this paper seeks to develop a knowledge base for the domain of data processing in construction - a sub-domain of mechanical engineering - based on a corpus of authentic natural language text. Central in this undertaking is the annotation of the relevant linguistic and conceptual units and structures which are to form the basis of the knowledge base. This paper describes the levels of annotation and the ontology on which the knowledge base is going to be modelled and sketches some of the linguistic relations which are used in building the knowledge base.",{"paper_id":4324,"title":4325,"year":197,"month":855,"day":63,"doi":4326,"resource_url":4327,"first_page":63,"last_page":63,"pdf_url":4328,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4329,"paper_type":860,"authors":4330,"abstract":4338},"lrec2004-main-205","A Comparison of Summarisation Methods Based on Term Specificity Estimation","10.63317\u002F56zpcs5jgxp4","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-205","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F362.pdf","orasan-etal-2004-comparison",[4331,4334,4337],{"paper_id":4324,"author_seq":247,"given_name":4332,"surname":4333,"affiliation":63,"orcid":63},"Constantin","Orăsan",{"paper_id":4324,"author_seq":232,"given_name":4335,"surname":4336,"affiliation":63,"orcid":63},"Viktor","Pekar",{"paper_id":4324,"author_seq":218,"given_name":3895,"surname":3957,"affiliation":63,"orcid":63},"In automatic summarisation, knowledge poor methods do not necessarily perform worse than those which employ several knowledge sources to produce a summary. This paper presents a comprehensive comparison of several summarisation methods based on term specificity estimation in order to find out which one performs best. Parameters such as quality of the summary produced and the resources required to produce accurate results are considered in order to find out which of these methods is more appropriate for a real world application. Intrinsic and extrinsic evaluation indicates that TF*RIDF, a variant of the commonly used TF*IDF, is the best performing method.",{"paper_id":4340,"title":4341,"year":197,"month":855,"day":63,"doi":4342,"resource_url":4343,"first_page":63,"last_page":63,"pdf_url":4344,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4345,"paper_type":860,"authors":4346,"abstract":63},"lrec2004-main-206","Measurements of Spoken Language Variability in a Multilingual Corpus. Predictable Aspects","10.63317\u002F3j2g3f3r4sdg","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-206","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F363.pdf","moneglia-2004-measurements",[4347],{"paper_id":4340,"author_seq":247,"given_name":2926,"surname":2994,"affiliation":63,"orcid":63},{"paper_id":4349,"title":4350,"year":197,"month":855,"day":63,"doi":4351,"resource_url":4352,"first_page":63,"last_page":63,"pdf_url":4353,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4354,"paper_type":860,"authors":4355,"abstract":4358},"lrec2004-main-207","Reliability of Lexical and Prosodic Cues in Two Real-life Spoken Dialog Corpora","10.63317\u002F3cfc3utmj4u6","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-207","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F366.pdf","devillers-vasilescu-2004-reliability",[4356,4357],{"paper_id":4349,"author_seq":247,"given_name":2308,"surname":4197,"affiliation":63,"orcid":63},{"paper_id":4349,"author_seq":232,"given_name":3545,"surname":2734,"affiliation":63,"orcid":63},"The present research focuses on analyzing and detecting emotions in speech as re vealed by task-dependent spoken dialogs corpora. Previously, we have conducted s everal experiments on a real-life corpus in order to develop a reliable annotati on method and to detect lexical and prosodic cues correlated to the main emotion class. In this paper we evaluate both the robustness of the annotation scheme a nd of the lexical and prosodic cues by testing them on a new corpus. This work i s carried out in the context of the Amities project in which spoken dialog syste ms for call center services are being developed.",{"paper_id":4360,"title":4361,"year":197,"month":855,"day":63,"doi":4362,"resource_url":4363,"first_page":63,"last_page":63,"pdf_url":4364,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4365,"paper_type":860,"authors":4366,"abstract":63},"lrec2004-main-208","WordNet Affect: an Affective Extension of WordNet","10.63317\u002F2yctm9q26ar4","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-208","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F369.pdf","strapparava-valitutti-2004-wordnet",[4367,4370],{"paper_id":4360,"author_seq":247,"given_name":4368,"surname":4369,"affiliation":63,"orcid":63},"Carlo","Strapparava",{"paper_id":4360,"author_seq":232,"given_name":1743,"surname":4371,"affiliation":63,"orcid":63},"Valitutti",{"paper_id":4373,"title":4374,"year":197,"month":855,"day":63,"doi":4375,"resource_url":4376,"first_page":63,"last_page":63,"pdf_url":4377,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4378,"paper_type":860,"authors":4379,"abstract":4385},"lrec2004-main-209","The GENOMA-KB Platform: Queries over Integrated Linguistic Resources","10.63317\u002F2jycgyx64wgq","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-209","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F370.pdf","hospedales-rodriguez-2004-genoma",[4380,4383],{"paper_id":4373,"author_seq":247,"given_name":4381,"surname":4382,"affiliation":63,"orcid":63},"Margarita","Hospedales",{"paper_id":4373,"author_seq":232,"given_name":4384,"surname":2911,"affiliation":63,"orcid":63},"Manel","The human genome knowledge base platform integrates a variety of linguistic resources - Textual corpus, Terminological bank, Bibliographic bank and Factographic bank- in a unified environment independent of the origin and nature of the data it retrieves thereby offering the visitor one sole access point for different heterogeneous sources of information. GENOMA-KB platform has been developed by SPOC in order to make reality the initiative presented by the Institute for Applied Linguistics (IULA). One of the requirements of the solution has been to minimize its impact on the normal working procedures of the linguist team. For this reason the data input processes, that are carried out using a number of applications that act over different types of databases in various types of servers with heterogeneous operating systems, have been left intact. GENOMA-KB's user interface, developed under the premise of ease of navigation and aesthetic quality, offers the visitor a unified search interface and transparent navigation between different information sources, achieved in an agile and simple fashion.",{"paper_id":4387,"title":4388,"year":197,"month":855,"day":63,"doi":4389,"resource_url":4390,"first_page":63,"last_page":63,"pdf_url":4391,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4392,"paper_type":860,"authors":4393,"abstract":4407},"lrec2004-main-210","Evaluation of Consensus on the Annotation of Prosodic Breaks in the Romance Corpus of Spontaneous Speech “C-ORAL-ROM”","10.63317\u002F3gczboyr6q4x","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-210","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F371.pdf","danieli-etal-2004-evaluation",[4394,4397,4400,4401,4403,4405],{"paper_id":4387,"author_seq":247,"given_name":4395,"surname":4396,"affiliation":63,"orcid":63},"Morena","Danieli",{"paper_id":4387,"author_seq":232,"given_name":4398,"surname":4399,"affiliation":63,"orcid":63},"Juan María","Garrido",{"paper_id":4387,"author_seq":218,"given_name":2926,"surname":2994,"affiliation":63,"orcid":63},{"paper_id":4387,"author_seq":203,"given_name":1461,"surname":4402,"affiliation":63,"orcid":63},"Panizza",{"paper_id":4387,"author_seq":188,"given_name":2564,"surname":4404,"affiliation":63,"orcid":63},"Quazza",{"paper_id":4387,"author_seq":172,"given_name":2904,"surname":4406,"affiliation":63,"orcid":63},"Swerts","C-ORAL-ROM, Integrated Reference Corpora For Spoken Romance Languages, is a multilingual corpus of spontaneous speech delivered within the IST Program. Corpora are tagged with respect to terminal and non terminal prosodic breaks. Terminal breaks are considered the most perceptively relevant cues to determine the utterance boundaries in spontaneous speech resources. The paper presents the evaluation of the inter-annotator agreement accomplished by an institution external to the consortium and shows the level of reliability of the tagging delivered and the annotation scheme adopted. The data show, at cross-linguistic level, a very high K coefficient (between 7.7 and 9.2, according to the language resource). A strong level of agreement specifically for terminal breaks has also been recorded. The data thus show that the annotation of the utterances identified in terms of their prosodic breaks is able to capture relevant perceptual facts, and it appears that the proposed coding scheme can be applied in a highly replicable way.",{"paper_id":4409,"title":4410,"year":197,"month":855,"day":63,"doi":4411,"resource_url":4412,"first_page":63,"last_page":63,"pdf_url":4413,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4414,"paper_type":860,"authors":4415,"abstract":4422},"lrec2004-main-211","Towards the Use of Word Stems and Suffixes for Statistical Machine Translation","10.63317\u002F3iite3qjxdu3","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-211","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F372.pdf","popovic-ney-2004-towards",[4416,4419],{"paper_id":4409,"author_seq":247,"given_name":4417,"surname":4418,"affiliation":63,"orcid":63},"Maja","Popović",{"paper_id":4409,"author_seq":232,"given_name":4420,"surname":4421,"affiliation":63,"orcid":63},"Hermann","Ney","In this paper we present methods for improving the quality of translation from an inflected language into English by making use of part-of-speech tags and word stems and suffixes in the source language. Results for translations from Spanish and Catalan into English are presented on the LC-STAR trilingual corpus which consists of spontaneously spoken dialogues in the domain of travelling and appointment scheduling. Results for translation from Serbian into English are presented on the Assimil language course, the bilingual corpus from unrestricted domain. We achieve up to 5% relative reduction of error rates for Spanish and Catalan and about 8% for Serbian",{"paper_id":4424,"title":4425,"year":197,"month":855,"day":63,"doi":4426,"resource_url":4427,"first_page":63,"last_page":63,"pdf_url":4428,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4429,"paper_type":860,"authors":4430,"abstract":4439},"lrec2004-main-212","Language Model Adaptation for Statistical Machine Translation Based on Information Retrieval","10.63317\u002F3f86yi2tp94j","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-212","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F374.pdf","eck-etal-2004-language",[4431,4434,4436],{"paper_id":4424,"author_seq":247,"given_name":4432,"surname":4433,"affiliation":63,"orcid":63},"Matthias","Eck",{"paper_id":4424,"author_seq":232,"given_name":1560,"surname":4435,"affiliation":63,"orcid":63},"Vogel",{"paper_id":4424,"author_seq":218,"given_name":4437,"surname":4438,"affiliation":63,"orcid":63},"Alex","Waibel","Language modeling is an important part for both speech recognition and machine translation systems. Adaptation has been successfully applied to language models for speech recognition. In this paper we present experiments concerning language model adaptation for statistical machine translation. We develop a method to adapt language models using information retrieval methods. The adapted language models drastically reduce perplexity over a general language model and we can show that it is possible to improve the translation quality of a statistical machine translation using those adapted language models instead of a general language model.",{"paper_id":4441,"title":4442,"year":197,"month":855,"day":63,"doi":4443,"resource_url":4444,"first_page":63,"last_page":63,"pdf_url":4445,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4446,"paper_type":860,"authors":4447,"abstract":4451},"lrec2004-main-213","Evaluating Name-Matching for Coreference Resolution","10.63317\u002F4ewz8ao8chxi","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-213","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F376.pdf","uryupina-2004-evaluating",[4448],{"paper_id":4441,"author_seq":247,"given_name":4449,"surname":4450,"affiliation":63,"orcid":63},"Olga","Uryupina","In this paper we describe experiments aimed at improving matching techniques for the Coreference Resolution task. Combining algorithms proposed in the literature with our own solutions, we run machine learning experiments and evaluate extensively different features and feature combinations to find the best settings. These settings do not only show good performance for English data, but should also be easily adjustable to cover other languages.",{"paper_id":4453,"title":4454,"year":197,"month":855,"day":63,"doi":4455,"resource_url":4456,"first_page":63,"last_page":63,"pdf_url":4457,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4458,"paper_type":860,"authors":4459,"abstract":4472},"lrec2004-main-214","Design and Implementation of a Semantic Search Engine for Portuguese","10.63317\u002F2p5p85q3tdf2","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-214","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F378.pdf","amaral-etal-2004-design",[4460,4463,4464,4467,4469],{"paper_id":4453,"author_seq":247,"given_name":4461,"surname":4462,"affiliation":63,"orcid":63},"Carlos","Amaral",{"paper_id":4453,"author_seq":232,"given_name":3087,"surname":1175,"affiliation":63,"orcid":63},{"paper_id":4453,"author_seq":218,"given_name":4465,"surname":4466,"affiliation":63,"orcid":63},"André","Martins",{"paper_id":4453,"author_seq":203,"given_name":4468,"surname":4045,"affiliation":63,"orcid":63},"Afonso",{"paper_id":4453,"author_seq":188,"given_name":4470,"surname":4471,"affiliation":63,"orcid":63},"Cláudia","Pinto","We present the semantic multilingual question answering engine of the TRUST project, describing its overall architecture, its common multilingual resources, as well as the specific resources, tools and processing mechanisms implemented for the development of the Portuguese language module.",{"paper_id":4474,"title":4475,"year":197,"month":855,"day":63,"doi":4476,"resource_url":4477,"first_page":63,"last_page":63,"pdf_url":4478,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4479,"paper_type":860,"authors":4480,"abstract":4484},"lrec2004-main-215","Converting Treebank Annotations to Language Neutral Syntax","10.63317\u002F26xn66mybtwc","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-215","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F380.pdf","campbell-ringger-2004-converting",[4481,4483],{"paper_id":4474,"author_seq":247,"given_name":4482,"surname":908,"affiliation":63,"orcid":63},"Richard",{"paper_id":4474,"author_seq":232,"given_name":2721,"surname":2781,"affiliation":63,"orcid":63},"We describe the automatic conversion of English Penn Treebank (PTB) annotations into Language Neutral Syntax (LNS) (Campbell and Suzuki, 2002a,b). In this paper, we describe LNS and why it is useful, describe the conversion algorithm, present an evaluation of the conversion, and discuss some uses of the converted annotations and the potential for extending the coverage to other languages. The work described here is in the spirit of other automatic re-annotations of PTB trees (e.g. Frank, 2000 and Meyers, 2001), but differs in the nature of the output.",{"paper_id":4486,"title":4487,"year":197,"month":855,"day":63,"doi":4488,"resource_url":4489,"first_page":63,"last_page":63,"pdf_url":4490,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4491,"paper_type":860,"authors":4492,"abstract":63},"lrec2004-main-216","Methodology For Building Thematic Indexes In Medicine For French","10.63317\u002F2pmba9thkzeh","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-216","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F381.pdf","alphonse-bouillon-2004-methodology",[4493,4494],{"paper_id":4486,"author_seq":247,"given_name":4065,"surname":4066,"affiliation":63,"orcid":63},{"paper_id":4486,"author_seq":232,"given_name":4062,"surname":4063,"affiliation":63,"orcid":63},{"paper_id":4496,"title":4497,"year":197,"month":855,"day":63,"doi":4498,"resource_url":4499,"first_page":63,"last_page":63,"pdf_url":4500,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4501,"paper_type":860,"authors":4502,"abstract":4510},"lrec2004-main-217","Transcrigal: A Bilingual System for Automatic Indexing of Broadcast News","10.63317\u002F2juati3m66ou","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-217","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F382.pdf","garcia-mateo-etal-2004-transcrigal",[4503,4504,4506,4508],{"paper_id":4496,"author_seq":247,"given_name":1776,"surname":1777,"affiliation":63,"orcid":63},{"paper_id":4496,"author_seq":232,"given_name":1779,"surname":4505,"affiliation":63,"orcid":63},"Dieguez-Tirado",{"paper_id":4496,"author_seq":218,"given_name":3895,"surname":4507,"affiliation":63,"orcid":63},"Docio-Fernandez",{"paper_id":4496,"author_seq":203,"given_name":3429,"surname":4509,"affiliation":63,"orcid":63},"Cardenal-Lopez","This paper describes a Broadcast News (BN) database called Transcrigal-DB. The news shows are mainly in Galician language, although around 11% of data is in Spanish. This database has been constructed for automatic speech recognition (ASR) purposes. A BN-ASR reference system is also described and evaluated on the test partition of Transcrigal-DB. The reference system has been designed having in mind that both languages, Spanish and Galician, may be used. Performance of the reference is improved when language adaptation techniques are taken into consideration",{"paper_id":4512,"title":4513,"year":197,"month":855,"day":63,"doi":4514,"resource_url":4515,"first_page":63,"last_page":63,"pdf_url":4516,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4517,"paper_type":860,"authors":4518,"abstract":4528},"lrec2004-main-218","Abar-Hitz: An Annotation Tool for the Basque Dependency Treebank","10.63317\u002F49biyine3p9j","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-218","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F383.pdf","diaz-de-ilarraza-etal-2004-abar",[4519,4522,4525],{"paper_id":4512,"author_seq":247,"given_name":4520,"surname":4521,"affiliation":63,"orcid":63},"Arantza","Díaz de Ilarraza",{"paper_id":4512,"author_seq":232,"given_name":4523,"surname":4524,"affiliation":63,"orcid":63},"Aitzpea","Garmendia",{"paper_id":4512,"author_seq":218,"given_name":4526,"surname":4527,"affiliation":63,"orcid":63},"Maite","Oronoz","This paper presents the process followed to design and build a graphical and language independent tool, Abar-Hitz, for the creation and management of the Basque Dependency Treebank. Abar-Hitz makes the annotation process faster and avoids possible mistakes linguists can make. It is composed of three areas: the corpus area, the tagging area and the tree visualizer area. Three linguists used Abar-Hitz to tag 25.000 word-forms from the Eus3LB corpus, making clear, as the evaluation results show, its utility.",{"paper_id":4530,"title":4531,"year":197,"month":855,"day":63,"doi":4532,"resource_url":4533,"first_page":63,"last_page":63,"pdf_url":4534,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4535,"paper_type":860,"authors":4536,"abstract":4543},"lrec2004-main-219","Creating Multi-purpose Linguistic Resources for Modern Greek: a Deep Modern Greek Grammar","10.63317\u002F46g57vexrczh","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-219","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F384.pdf","kordoni-neu-2004-creating",[4537,4540],{"paper_id":4530,"author_seq":247,"given_name":4538,"surname":4539,"affiliation":63,"orcid":63},"Valia","Kordoni",{"paper_id":4530,"author_seq":232,"given_name":4541,"surname":4542,"affiliation":63,"orcid":63},"Julia","Neu","Here we describe the development of a re-usable, multi-purpose linguistic resource for Modern Greek: a deep computational Modern Greek Grammar. The grammar is written in the HPSG formalism and is being developed in a multilingual context with MRS semantics, contributing to an open-source repository of software and linguistic resources with wide usage in education, research, and application building.",{"paper_id":4545,"title":4546,"year":197,"month":855,"day":63,"doi":4547,"resource_url":4548,"first_page":63,"last_page":63,"pdf_url":4549,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4550,"paper_type":860,"authors":4551,"abstract":4560},"lrec2004-main-220","Enriching the Spanish EuroWordNet by Collocations","10.63317\u002F4oxsaw4qimwh","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-220","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F386.pdf","wanner-etal-2004-enriching",[4552,4555,4558],{"paper_id":4545,"author_seq":247,"given_name":4553,"surname":4554,"affiliation":63,"orcid":63},"Leo","Wanner",{"paper_id":4545,"author_seq":232,"given_name":4556,"surname":4557,"affiliation":63,"orcid":63},"Margarita Alonso","Ramos",{"paper_id":4545,"author_seq":218,"given_name":4559,"surname":988,"affiliation":63,"orcid":63},"Antonia","Collocations constitute an important type of syntagmatic information whose introduction into WordNets has not yet been addressed. The goal of our work is the integration of the collocational material for the field of emotion nouns encoded in the DIccionario de colocaciones del español (DICE) in terms of Lexical Functions into the Spanish part of the EuroWordNet (SpEWN). Two features of collocations are decisive in connection with their representation in SpEWN: (i) they are variant-specific rather than synset-specific and (ii) depending on the degree of their idiosyncrasy, they may be generalized to a certain degree. These features are accounted for by introducing new structures into the SpEWN. These new structures are compatible with the general design principles of the EWN. Given that SpEWN and DICE reveal a diverging distinction of the senses of lexical items, prior to the introduction of collocations from DICE into SpEWN, the senses of the elements of collocations in SpEWN and DICE are aligned.",{"paper_id":4562,"title":4563,"year":197,"month":855,"day":63,"doi":4564,"resource_url":4565,"first_page":63,"last_page":63,"pdf_url":4566,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4567,"paper_type":860,"authors":4568,"abstract":63},"lrec2004-main-221","FrameNet as a “Net”","10.63317\u002F38rny9bi5q27","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-221","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F388.pdf","fillmore-etal-2004-framenet",[4569,4572,4575],{"paper_id":4562,"author_seq":247,"given_name":4570,"surname":4571,"affiliation":63,"orcid":63},"Charles J.","Fillmore",{"paper_id":4562,"author_seq":232,"given_name":4573,"surname":4574,"affiliation":63,"orcid":63},"Collin F.","Baker",{"paper_id":4562,"author_seq":218,"given_name":4576,"surname":1335,"affiliation":63,"orcid":63},"Hiroaki",{"paper_id":4578,"title":4579,"year":197,"month":855,"day":63,"doi":4580,"resource_url":4581,"first_page":63,"last_page":63,"pdf_url":4582,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4583,"paper_type":860,"authors":4584,"abstract":4604},"lrec2004-main-222","AV@CAR: A Spanish Multichannel Multimodal Corpus for In-Vehicle Automatic Audio-Visual Speech Recognition","10.63317\u002F26hq9jzng47p","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-222","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F389.pdf","ortega-etal-2004-av",[4585,4588,4591,4593,4596,4598,4601],{"paper_id":4578,"author_seq":247,"given_name":4586,"surname":4587,"affiliation":63,"orcid":63},"Alfonso","Ortega",{"paper_id":4578,"author_seq":232,"given_name":4589,"surname":4590,"affiliation":63,"orcid":63},"Federico","Sukno",{"paper_id":4578,"author_seq":218,"given_name":3640,"surname":4592,"affiliation":63,"orcid":63},"LLeida",{"paper_id":4578,"author_seq":203,"given_name":4594,"surname":4595,"affiliation":63,"orcid":63},"Alejandro","Frangi",{"paper_id":4578,"author_seq":188,"given_name":3429,"surname":4597,"affiliation":63,"orcid":63},"Miguel",{"paper_id":4578,"author_seq":172,"given_name":4599,"surname":4600,"affiliation":63,"orcid":63},"Luis","Buera",{"paper_id":4578,"author_seq":155,"given_name":4602,"surname":4603,"affiliation":63,"orcid":63},"Ernesto","Zacur","This paper describes the acquisition of the multichannel multimodal database AV@CAR for automatic audio-visual speech recognition in cars. Automatic speech recognition (ASR) plays an important role inside vehicles to keep the driver away from distraction. It is also known that visual information (lip-reading) can improve accuracy in ASR under adverse conditions as those within a car. The corpus described here is intended to provide training and testing material for several classes of audiovisual speech recognizers including isolated word system, word-spotting systems, vocabulary independent systems, and speaker dependent or speaker independent systems for a wide range of applications. The audio database is composed of seven audio channels including, clean speech (captured using a close talk microphone), noisy speech from several microphones placed on the overhead of the cabin, noise only signal coming from the engine compartment and information about the speed of the car. For the video database, a small video camera sensible to the visible and the near infrared bands is placed on the windscreen and used to capture the face of the driver. This is done under different light conditions both during the day and at night. Additionally, the same individuals are recorded in laboratory, under controlled environment conditions to obtain noise free speech signals, 2D images and 3D + texture face models.",{"paper_id":4606,"title":4607,"year":197,"month":855,"day":63,"doi":4608,"resource_url":4609,"first_page":63,"last_page":63,"pdf_url":4610,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4611,"paper_type":860,"authors":4612,"abstract":4628},"lrec2004-main-223","Creation of a Doctor-Patient Dialogue Corpus Using Standardized Patients","10.63317\u002F3vxfai9xnfu7","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-223","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F391.pdf","melvin-etal-2004-creation",[4613,4616,4619,4622,4625],{"paper_id":4606,"author_seq":247,"given_name":4614,"surname":4615,"affiliation":63,"orcid":63},"Robert S.","Melvin",{"paper_id":4606,"author_seq":232,"given_name":4617,"surname":4618,"affiliation":63,"orcid":63},"Win","May",{"paper_id":4606,"author_seq":218,"given_name":4620,"surname":4621,"affiliation":63,"orcid":63},"Shrikanth","Narayanan",{"paper_id":4606,"author_seq":203,"given_name":4623,"surname":4624,"affiliation":63,"orcid":63},"Panayiotis","Georgiou",{"paper_id":4606,"author_seq":188,"given_name":4626,"surname":4627,"affiliation":63,"orcid":63},"Shadi","Ganjavi","In this paper we describe the development of a doctor-patient dialogue corpus to support a speech-to-speech machine translation effort for English-Persian medical dialogues. The corpus was developed by recording and transcribing English-to-English dialogues between medical students and standardized patients (actors who have been trained to portray illness or injury victims), and then translated into Persian. We discuss some of the benefits and drawbacks to creating a corpus in this way. Benefits include the ability to customize the corpus in a way that would be infeasible for actual doctor-patient data and avoidance of privacy and legal issues, while drawbacks include the fact that the Persian does not originate as speech, but as text translation of English speech. We address concerns such as the authenticity of the dialogues and the value of such data for system development.",{"paper_id":4630,"title":4631,"year":197,"month":855,"day":63,"doi":4632,"resource_url":4633,"first_page":63,"last_page":63,"pdf_url":4634,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4635,"paper_type":860,"authors":4636,"abstract":4644},"lrec2004-main-224","Talkbank: Building an Open Unified Multimodal Database of Communicative Interaction","10.63317\u002F3i3yzxu79o23","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-224","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F392.pdf","macwhinney-etal-2004-talkbank",[4637,4638,4639,4641],{"paper_id":4630,"author_seq":247,"given_name":896,"surname":897,"affiliation":63,"orcid":63},{"paper_id":4630,"author_seq":232,"given_name":2500,"surname":2501,"affiliation":63,"orcid":63},{"paper_id":4630,"author_seq":218,"given_name":3852,"surname":4640,"affiliation":63,"orcid":63},"Cieri",{"paper_id":4630,"author_seq":203,"given_name":4642,"surname":4643,"affiliation":63,"orcid":63},"Craig","Martell","The goal of the TalkBank project (http:\u002F\u002Ftalkbank.org) is to support data-sharing and direct, community-wide access to naturalistic recordings and transcripts of human and animal communication. Toward this end, we have constructed a web accessible database of transcripts linked to audio and video media within fields such as conversation analysis, classroom discourse, animal communication, gesture, meetings, second language acquisition, first language acquisition, bilingualism, tutoring, and legal oral argumentation. We discuss how we have taken discrepant databases from dozens of individual projects and merged them together into a well-structured uniform database in which transcripts can be opened online through browsers, allowing direct multimedia playback. To achieve translation across corpora, we have defined a general XML schema. The validity of this schema is checked by bidirectional conversion from alternative input formats to XML and back. The resultant transcripts are then linked to hinted media and XSLT is used to format web readable browsable multimedia transcripts playable through SMIL. A parallel pathway is used to support collaborative commentary and publication of PDF linked to media through special issues of journals in the relevant fields.",{"paper_id":4646,"title":4647,"year":197,"month":855,"day":63,"doi":4648,"resource_url":4649,"first_page":63,"last_page":63,"pdf_url":4650,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4651,"paper_type":860,"authors":4652,"abstract":4660},"lrec2004-main-225","A Fine-Grained Evaluation Method for Speech-to-Speech Machine Translation Using Concept Annotations","10.63317\u002F5jw8mej9en32","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-225","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F394.pdf","belvin-etal-2004-fine",[4653,4655,4657],{"paper_id":4646,"author_seq":247,"given_name":4614,"surname":4654,"affiliation":63,"orcid":63},"Belvin",{"paper_id":4646,"author_seq":232,"given_name":1172,"surname":4656,"affiliation":63,"orcid":63},"Riehemann",{"paper_id":4646,"author_seq":218,"given_name":4658,"surname":4659,"affiliation":63,"orcid":63},"Kristin","Precoda","In this paper we report on a method of evaluating spoken language translation systems that builds upon a task-based evaluation method developed by CMU, but rather than relying on a predefined database of Interchange Format representations of spoken utterances, instead relies on a set of explicitly defined conventions for creating these interlingual representations. Our method also departs from CMU's in its scoring conventions in using a finer-grained approach to scoring (especially scoring of predicates). We have attempted to validate the legitimacy of this approach to speech-to-speech MT evaluation by looking for a relationship between the scores generated by this method, and the scores generated by a series of experiments using na•ve human judgements of the meaning and quality of MT systems' output.",{"paper_id":4662,"title":4663,"year":197,"month":855,"day":63,"doi":4664,"resource_url":4665,"first_page":63,"last_page":63,"pdf_url":4666,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4667,"paper_type":860,"authors":4668,"abstract":63},"lrec2004-main-226","Rethinking Reusable Resources","10.63317\u002F53f3rdjcv8kt","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-226","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F395.pdf","de-matos-etal-2004-rethinking",[4669,4672,4675],{"paper_id":4662,"author_seq":247,"given_name":4670,"surname":4671,"affiliation":63,"orcid":63},"David M.","de Matos",{"paper_id":4662,"author_seq":232,"given_name":4673,"surname":4674,"affiliation":63,"orcid":63},"Ricardo","Ribeiro",{"paper_id":4662,"author_seq":218,"given_name":4676,"surname":4677,"affiliation":63,"orcid":63},"Nuno J.","Mamede",{"paper_id":4679,"title":4680,"year":197,"month":855,"day":63,"doi":4681,"resource_url":4682,"first_page":63,"last_page":63,"pdf_url":4683,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4684,"paper_type":860,"authors":4685,"abstract":4701},"lrec2004-main-227","The Cross-Breeding of Dictionaries","10.63317\u002F5fg5ct8436yp","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-227","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F397.pdf","meyers-etal-2004-cross",[4686,4688,4691,4693,4696,4699],{"paper_id":4679,"author_seq":247,"given_name":3274,"surname":4687,"affiliation":63,"orcid":63},"Meyers",{"paper_id":4679,"author_seq":232,"given_name":4689,"surname":4690,"affiliation":63,"orcid":63},"Ruth","Reeves",{"paper_id":4679,"author_seq":218,"given_name":2497,"surname":4692,"affiliation":63,"orcid":63},"Macleod",{"paper_id":4679,"author_seq":203,"given_name":4694,"surname":4695,"affiliation":63,"orcid":63},"Rachel","Szekely",{"paper_id":4679,"author_seq":188,"given_name":4697,"surname":4698,"affiliation":63,"orcid":63},"Veronika","Zielinska",{"paper_id":4679,"author_seq":172,"given_name":896,"surname":4700,"affiliation":63,"orcid":63},"Young","Especially for English, the number of hand-coded electronic resources available to the Natural Language Processing Community keeps growing: annotated corpora, treebanks, lexicons, wordnets, etc. Unfortunately, initial funding for such projects is much easier to obtain than the additional funding needed to enlarge or improve upon such resources. Thus once one proves the usefulness of a resource, it is difficult to make that resource reach its full potential. We discuss techniques for combining dictionary resources and producing others by semi-automatic means. The resources we created using these techniques have become an integral part of our work on NomBank, a project with the goal of annotating noun arguments in the Penn Treebank II corpus (PTB).",{"paper_id":4703,"title":4704,"year":197,"month":855,"day":63,"doi":4705,"resource_url":4706,"first_page":63,"last_page":63,"pdf_url":4707,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4708,"paper_type":860,"authors":4709,"abstract":4718},"lrec2004-main-228","Annotating Noun Argument Structure for NomBank","10.63317\u002F4icwnfm7fhoy","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-228","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F398.pdf","meyers-etal-2004-annotating",[4710,4711,4712,4713,4714,4715,4716],{"paper_id":4703,"author_seq":247,"given_name":3274,"surname":4687,"affiliation":63,"orcid":63},{"paper_id":4703,"author_seq":232,"given_name":4689,"surname":4690,"affiliation":63,"orcid":63},{"paper_id":4703,"author_seq":218,"given_name":2497,"surname":4692,"affiliation":63,"orcid":63},{"paper_id":4703,"author_seq":203,"given_name":4694,"surname":4695,"affiliation":63,"orcid":63},{"paper_id":4703,"author_seq":188,"given_name":4697,"surname":4698,"affiliation":63,"orcid":63},{"paper_id":4703,"author_seq":172,"given_name":896,"surname":4700,"affiliation":63,"orcid":63},{"paper_id":4703,"author_seq":155,"given_name":1033,"surname":4717,"affiliation":63,"orcid":63},"Grishman","When complete, NomBank will provide annotation of noun arguments in Penn Treebank II (PTB). In PropBank, University of Pennsylvania annotators provide similar information for verbs. Given nominalization\u002Fverb mappings, the combination of NomBank and PropBank allows for generalization of arguments across parts of speech. This paper describes our annotation task including factors which make assigning role labels to noun arguments a challenging task.",{"paper_id":4720,"title":4721,"year":197,"month":855,"day":63,"doi":4722,"resource_url":4723,"first_page":63,"last_page":63,"pdf_url":4724,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4725,"paper_type":860,"authors":4726,"abstract":4735},"lrec2004-main-229","Concept-based Queries: Combining and Reusing Linguistic Corpus Formats and Query Languages","10.63317\u002F2apjgtz9d276","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-229","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F399.pdf","sasaki-etal-2004-concept",[4727,4729,4731,4732],{"paper_id":4720,"author_seq":247,"given_name":4728,"surname":3061,"affiliation":63,"orcid":63},"Felix",{"paper_id":4720,"author_seq":232,"given_name":1647,"surname":4730,"affiliation":63,"orcid":63},"Witt",{"paper_id":4720,"author_seq":218,"given_name":2494,"surname":2495,"affiliation":63,"orcid":63},{"paper_id":4720,"author_seq":203,"given_name":4733,"surname":4734,"affiliation":63,"orcid":63},"Thorsten","Trippel","This paper proposes a methodology for querying linguistic data represented in different corpus formats. Examples of the need for queries over such heterogeneous resources are the corpus-based analysis of multimodal phenomena like the interaction of gestures and prosodic features, or syntax-related phenomena like information structure which exceed the expressive power of a tree-centered corpus format. Query languages (QLs) currently under development are strongly connected to corpus formats, like the NITE Object Model (NOM) or the Meta-Annotation Infrastructure for ATLAS (MAIA). The parallel development of linguistic query languages and corpus formats is due to the fact that general purpose query languages like XQuery do not fulfill the changing needs of linguistically motivated queries, e.g. to give access to (non-)hierarchically organized, theory- and language-dependent annotations of multi modal signals and\u002For text. This leads to the problem that existing corpus formats and query languages are hard to reuse. They have to be re-developed and re-implemented time-consumingly and expensively for unforeseen tasks. This paper describes an approach overcoming these problems and a sample application.",{"paper_id":4737,"title":4738,"year":197,"month":855,"day":63,"doi":4739,"resource_url":4740,"first_page":63,"last_page":63,"pdf_url":4741,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4742,"paper_type":860,"authors":4743,"abstract":4746},"lrec2004-main-230","Co-reference in Japanese Task-oriented Dialogues: A Contribution to the Development of Language-specific and Language-general Annotation Schemes and Resources","10.63317\u002F55c6dja7cgn2","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-230","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F400.pdf","sasaki-witt-2004-co",[4744,4745],{"paper_id":4737,"author_seq":247,"given_name":4728,"surname":3061,"affiliation":63,"orcid":63},{"paper_id":4737,"author_seq":232,"given_name":1647,"surname":4730,"affiliation":63,"orcid":63},"This paper describes a corpus of Japanese task-oriented dialogues, i.e. its data, annotations, analysis methodology and preliminary results for the modeling of co-referential phenomena. Current corpus-based approaches to co-reference concentrate on textual data from English or other European languages. Hence, the emerging language-general models of co-reference miss input from dialogue data of non-European languages. We aim to fill this gap and contribute to a model of co-reference on various language-specific and language-general levels",{"paper_id":4748,"title":4749,"year":197,"month":855,"day":63,"doi":4750,"resource_url":4751,"first_page":63,"last_page":63,"pdf_url":4752,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4753,"paper_type":860,"authors":4754,"abstract":4760},"lrec2004-main-231","Constructing Word-Sense Association Networks from Bilingual Dictionary and Comparable Corpora","10.63317\u002F22rhxmmy8i5m","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-231","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F401.pdf","kaji-imaichi-2004-constructing",[4755,4757],{"paper_id":4748,"author_seq":247,"given_name":3057,"surname":4756,"affiliation":63,"orcid":63},"Kaji",{"paper_id":4748,"author_seq":232,"given_name":4758,"surname":4759,"affiliation":63,"orcid":63},"Osamu","Imaichi","A novel thesaurus named a �gword-sense association network�h is proposed for the first time. It consists of nodes representing word senses, each of which is defined as a set consisting of a word and its translation equivalents, and edges connecting topically associated word senses. This word-sense association network is produced from a bilingual dictionary and comparable corpora by means of a newly developed fully automatic method. The feasibility and effectiveness of the method were demonstrated experimentally by using the EDR English-Japanese dictionary together with Wall Street Journal and Nihon Keizai Shimbun corpora. The word-sense association networks were applied to word-sense disambiguation as well as to a query interface for information retrieval.",{"paper_id":4762,"title":4763,"year":197,"month":855,"day":63,"doi":4764,"resource_url":4765,"first_page":63,"last_page":63,"pdf_url":4766,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4767,"paper_type":860,"authors":4768,"abstract":4775},"lrec2004-main-232","Utilization of Multiple Language Resources for Robust Grammar-Based Tense and Aspect Classification","10.63317\u002F2mr9byhgyv2q","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-232","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F402.pdf","palmer-etal-2004-utilization",[4769,4770,4772],{"paper_id":4762,"author_seq":247,"given_name":1021,"surname":1904,"affiliation":63,"orcid":63},{"paper_id":4762,"author_seq":232,"given_name":1867,"surname":4771,"affiliation":63,"orcid":63},"Kuhn",{"paper_id":4762,"author_seq":218,"given_name":4773,"surname":4774,"affiliation":63,"orcid":63},"Carlota","Smith","This paper reports on an ongoing project that uses varied language resources and advanced NLP tools for a linguistic classification task in discourse semantics. The system we present is designed to assign a \"situation entity\" class label to each predicator in English text. The project goal is to achieve the best-possible identification of situation entities in naturally-occurring written texts by implementing a robust system that will deal with real corpus material, rather than just with constructed textbook examples of discourse. In this paper we focus on the combination of multiple information sources, which we see as being vital for a robust classification system. We use a deep syntactic grammar of English to identify morphological, syntactic, and discourse clues, and we use various lexical databases for fine-grained semantic properties of the predicators. Experiments performed to date show that enhancing the output of the grammar with information from lexical resources improves recall but lowers precision in the situation entity classification task.",{"paper_id":4777,"title":4778,"year":197,"month":855,"day":63,"doi":4779,"resource_url":4780,"first_page":63,"last_page":63,"pdf_url":4781,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4782,"paper_type":860,"authors":4783,"abstract":4794},"lrec2004-main-233","Retrieving Annotated Corpora for Corpus Annotation","10.63317\u002F3uu2ehwnxfkj","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-233","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F403.pdf","yoshida-etal-2004-retrieving",[4784,4787,4790,4793],{"paper_id":4777,"author_seq":247,"given_name":4785,"surname":4786,"affiliation":63,"orcid":63},"Kyôsuke","Yoshida",{"paper_id":4777,"author_seq":232,"given_name":4788,"surname":4789,"affiliation":63,"orcid":63},"Taiichi","Hashimoto",{"paper_id":4777,"author_seq":218,"given_name":4791,"surname":4792,"affiliation":63,"orcid":63},"Takenobu","Tokunaga",{"paper_id":4777,"author_seq":203,"given_name":3332,"surname":3333,"affiliation":63,"orcid":63},"This paper introduces a tool \\Bonsai which supports human in annotating corpora with morphosyntactic information, and in retrieving syntactic structures stored in the database. Integrating annotation and retrieval enables users to annotate a new instance while looking back at the already annotated sentences which share the similar morphosyntactic structure. We focus on the retrieval part of the system, and describe a method to decompose a large input query into smaller ones in order to gain retrieval efficiency. The proposed method is evaluated with the Penn Treebank corpus, showing significant improvements.",{"paper_id":4796,"title":4797,"year":197,"month":855,"day":63,"doi":4798,"resource_url":4799,"first_page":63,"last_page":63,"pdf_url":4800,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4801,"paper_type":860,"authors":4802,"abstract":4813},"lrec2004-main-234","Classification of Japanese Spatial Nouns","10.63317\u002F472cuv8ic3mn","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-234","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F405.pdf","tokunaga-etal-2004-classification",[4803,4804,4807,4810],{"paper_id":4796,"author_seq":247,"given_name":4791,"surname":4792,"affiliation":63,"orcid":63},{"paper_id":4796,"author_seq":232,"given_name":4805,"surname":4806,"affiliation":63,"orcid":63},"Tomofumi","Koyama",{"paper_id":4796,"author_seq":218,"given_name":4808,"surname":4809,"affiliation":63,"orcid":63},"Suguru","Saito",{"paper_id":4796,"author_seq":203,"given_name":4811,"surname":4812,"affiliation":63,"orcid":63},"Masayuki","Nakajima","We have already proposed a framework to represent a location in terms of both symbolic and numeric aspects. In order to deal with vague linguistic expressions of a location, the representation adopts a potential function mapping a location to its plausibility. This paper proposes classification of Japanese spatial nouns and potential functions corresponding to each class. We focused on a common Japanese spatial expression ``X no Y (Y of X)'' where X is a reference object and Y is a spatial noun. For example, ``tukue no migi (the right of the desk)'' denotes a location with reference to the desk. This expression were collected from corpora, and spatial nouns appearing in the Y position were classified into two major classes; designating a part of the reference object and designating a location apart from the reference object . And the latter class were further classified into two subclasses; direction-oriented and distance-oriented. For each class, a potential function were designed for providing meaning of spatial nouns.",{"paper_id":4815,"title":4816,"year":197,"month":855,"day":63,"doi":4817,"resource_url":4818,"first_page":63,"last_page":63,"pdf_url":4819,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4820,"paper_type":860,"authors":4821,"abstract":63},"lrec2004-main-235","Meaningful Clusters","10.63317\u002F3d9ovpzmc7py","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-235","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F406.pdf","sanfilippo-etal-2004-meaningful",[4822,4824,4827,4830,4833],{"paper_id":4815,"author_seq":247,"given_name":3429,"surname":4823,"affiliation":63,"orcid":63},"Sanfilippo",{"paper_id":4815,"author_seq":232,"given_name":4825,"surname":4826,"affiliation":63,"orcid":63},"Gus","Calapristi",{"paper_id":4815,"author_seq":218,"given_name":4828,"surname":4829,"affiliation":63,"orcid":63},"Vernon","Crow",{"paper_id":4815,"author_seq":203,"given_name":4831,"surname":4832,"affiliation":63,"orcid":63},"Beth","Hetzler",{"paper_id":4815,"author_seq":188,"given_name":4834,"surname":4835,"affiliation":63,"orcid":63},"Alan","Turner",{"paper_id":4837,"title":4838,"year":197,"month":855,"day":63,"doi":4839,"resource_url":4840,"first_page":63,"last_page":63,"pdf_url":4841,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4842,"paper_type":860,"authors":4843,"abstract":4853},"lrec2004-main-236","Multi-Document Summarization Using Multiple-Sequence Alignment","10.63317\u002F42wuasiag998","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-236","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F408.pdf","lacatusu-etal-2004-multi",[4844,4847,4850],{"paper_id":4837,"author_seq":247,"given_name":4845,"surname":4846,"affiliation":63,"orcid":63},"V. Finley","Lacatusu",{"paper_id":4837,"author_seq":232,"given_name":4848,"surname":4849,"affiliation":63,"orcid":63},"Steven J.","Maiorano",{"paper_id":4837,"author_seq":218,"given_name":4851,"surname":4852,"affiliation":63,"orcid":63},"Sanda M.","Harabagiu","This paper describes a novel clustering-based text summarization system that uses Multiple Sequence Alignment to improve the alignment of sentences within topic clusters. While most current clustering-based summarization systems base their summaries only on the common information contained in a collection of highly-related sentences, our system constructs more informative summaries that incorporate both the redundant and unique contributions of the sentences in the cluster. When evaluated using ROUGE, the summaries produced by our system represent a substantial improvement over the baseline, which is at 63% of the human performance.",{"paper_id":4855,"title":4856,"year":197,"month":855,"day":63,"doi":4857,"resource_url":4858,"first_page":63,"last_page":63,"pdf_url":4859,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4860,"paper_type":860,"authors":4861,"abstract":4868},"lrec2004-main-237","RevisionBank: A Resource for Revision-based Multi-document Summarization and Evaluation","10.63317\u002F5fwwhvjxekqp","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-237","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F409.pdf","otterbacher-radev-2004-revisionbank",[4862,4865],{"paper_id":4855,"author_seq":247,"given_name":4863,"surname":4864,"affiliation":63,"orcid":63},"Jahna","Otterbacher",{"paper_id":4855,"author_seq":232,"given_name":4866,"surname":4867,"affiliation":63,"orcid":63},"Dragomir","Radev","Multi-document summaries produced via sentence extraction often suffer from a number of cohesion problems, including dangling anaphora, sudden shifts in topic and incorrect or awkward chronological ordering. Therefore, the development of an automated revision process to correct such problems is a research area of current interest. We present the RevisionBank, a corpus of 240 extractive, multi-document summaries that have been manually revised to promote cohesion. The summaries were revised by six linguistic students using a constrained set of revision operations that we previously developed. In the current paper, we describe the process of developing a taxonomy of cohesion problems and corrective revision operators that address such problems, as well as an annotation schema for our corpus. Finally, we discuss how our taxonomy and corpus can be used for the study of revision-based multi-document summarization as well as for summary evaluation.",{"paper_id":4870,"title":4871,"year":197,"month":855,"day":63,"doi":4872,"resource_url":4873,"first_page":63,"last_page":63,"pdf_url":4874,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4875,"paper_type":860,"authors":4876,"abstract":4894},"lrec2004-main-238","The Lácio-Web: Corpora and Tools to Advance Brazilian Portuguese Language Investigations and Computational Linguistic Tools","10.63317\u002F2xnvjy7pqcww","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-238","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F410.pdf","aluisio-etal-2004-lacio",[4877,4879,4882,4885,4888,4891],{"paper_id":4870,"author_seq":247,"given_name":2460,"surname":4878,"affiliation":63,"orcid":63},"Aluisio",{"paper_id":4870,"author_seq":232,"given_name":4880,"surname":4881,"affiliation":63,"orcid":63},"Gisele Montilha","Pinheiro",{"paper_id":4870,"author_seq":218,"given_name":4883,"surname":4884,"affiliation":63,"orcid":63},"Aline M. P.","Manfrin",{"paper_id":4870,"author_seq":203,"given_name":4886,"surname":4887,"affiliation":63,"orcid":63},"Leandro H. M.","de Oliveira",{"paper_id":4870,"author_seq":188,"given_name":4889,"surname":4890,"affiliation":63,"orcid":63},"Luiz C.","Genoves, Jr.",{"paper_id":4870,"author_seq":172,"given_name":4892,"surname":4893,"affiliation":63,"orcid":63},"Stella E. O.","Tagnin","In this paper we discuss the five requirements for building large publicly available corpora which geared the construction of the Lácio-Web corpora and their environments: 1) a comprehensive text typology; 2) text copyright clearance, compilation and annotation scheme; 3) a friendly and didactic interface; 4) the need to serve as support for several types of research; 5) the need to offer an array of associated tools. Also, we present the features that make Lácio-Web corpora interesting and novel as well as the limitations of this project, such as corpora size and balance, and the non-inclusion of spoken texts in the project’s reference corpus.",{"paper_id":4896,"title":4897,"year":197,"month":855,"day":63,"doi":4898,"resource_url":4899,"first_page":63,"last_page":63,"pdf_url":4900,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4901,"paper_type":860,"authors":4902,"abstract":4908},"lrec2004-main-239","CST Bank: A Corpus for the Study of Cross-document Structural Relationships","10.63317\u002F2f5uxzz8gxrk","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-239","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F411.pdf","radev-etal-2004-cst",[4903,4904,4905],{"paper_id":4896,"author_seq":247,"given_name":4866,"surname":4867,"affiliation":63,"orcid":63},{"paper_id":4896,"author_seq":232,"given_name":4863,"surname":4864,"affiliation":63,"orcid":63},{"paper_id":4896,"author_seq":218,"given_name":4906,"surname":4907,"affiliation":63,"orcid":63},"Zhu","Zhang","Clusters of multiple news stories related to the same topic exhibit a number of interesting properties. For example, when documents have been published at various points in time or by different authors or news agencies, one finds many instances of paraphrasing, information overlap and even contradiction. The current paper presents the Cross-document Structure Theory (CST) Bank, a collection of multi-document clusters in which pairs of sentences from different documents have been annotated for cross-document structure theory relationships. We will describe how we built the corpus, including our method for reducing the number of sentence pairs to be annotated by our hired judges, using lexical similarity measures. Finally, we will describe how CST and the CST Bank can be applied to different research areas such as multi-document summarization.",{"paper_id":4910,"title":4911,"year":197,"month":855,"day":63,"doi":4912,"resource_url":4913,"first_page":63,"last_page":63,"pdf_url":4914,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4915,"paper_type":860,"authors":4916,"abstract":4921},"lrec2004-main-240","Applying Computational Linguistic Techniques in a Documentary Project for Q’anjob’al (Mayan, Guatemala)","10.63317\u002F3muk9a97x678","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-240","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F412.pdf","kuhn-mateo-toledo-2004-applying",[4917,4918],{"paper_id":4910,"author_seq":247,"given_name":1867,"surname":4771,"affiliation":63,"orcid":63},{"paper_id":4910,"author_seq":232,"given_name":4919,"surname":4920,"affiliation":63,"orcid":63},"B’alam","Mateo-Toledo","This paper reports on a number of experiments in which we applied standard techniques from NLP in the context of documentation of endangered languages. We concentrated on the use of existing, freely available toolkits. Specifically, we explore the use of Finite-State Morphological Analysis, Maximum Entropy Part-of-Speech Tagging, and N-Gram Language Modeling.",{"paper_id":4923,"title":4924,"year":197,"month":855,"day":63,"doi":4925,"resource_url":4926,"first_page":63,"last_page":63,"pdf_url":4927,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4928,"paper_type":860,"authors":4929,"abstract":4932},"lrec2004-main-241","Information Retrieval System Using Latent Contextual Relevance","10.63317\u002F2dopwvdhjhpg","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-241","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F413.pdf","sasaki-shinnou-2004-information",[4930,4931],{"paper_id":4923,"author_seq":247,"given_name":3060,"surname":3061,"affiliation":63,"orcid":63},{"paper_id":4923,"author_seq":232,"given_name":3057,"surname":3058,"affiliation":63,"orcid":63},"When the relevance feedback, which is one of the most popular information retrieval model, is used in an information retrieval system, a related word is extracted based on the first retrival result. Then these words are added into the original query, and retrieval is performed again using updated query. Generally, Using such query expansion technique, retrieval performance using the query expansion falls in comparison with the performance using the original query. As the cause, there is a few synonyms in the thesaurus and although some synonyms are added to the query, the same documents are retireved as a result. In this paper, to solve the problem over such related words, we propose latent context relevance in consideration of the relevance between query and each index words in the document set.",{"paper_id":4934,"title":4935,"year":197,"month":855,"day":63,"doi":4936,"resource_url":4937,"first_page":63,"last_page":63,"pdf_url":4938,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4939,"paper_type":860,"authors":4940,"abstract":4950},"lrec2004-main-242","Toward Text Understanding: Integrating Relevance-tagged Corpus and Automatically Constructed Case Frames","10.63317\u002F36ppt9fmk2sg","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-242","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F414.pdf","kawahara-etal-2004-toward",[4941,4944,4947],{"paper_id":4934,"author_seq":247,"given_name":4942,"surname":4943,"affiliation":63,"orcid":63},"Daisuke","Kawahara",{"paper_id":4934,"author_seq":232,"given_name":4945,"surname":4946,"affiliation":63,"orcid":63},"Ryohei","Sasano",{"paper_id":4934,"author_seq":218,"given_name":4948,"surname":4949,"affiliation":63,"orcid":63},"Sadao","Kurohashi","This paper proposes a wide-range anaphora resolution system toward text understanding. This system resolves zero, direct and indirect anaphors in Japanese texts by integrating two sorts of linguistic resources: a hand-annotated corpus with various relations and automatically constructed case frames. The corpus has relevance tags which consist of predicate-argument relations, relations between nouns and coreferences, and is utilized for learning parameters of the system and testing it. The case frames are indispensable knowledge both for detecting zero\u002Findirect anaphors and estimating appropriate antecedents. Our preliminary experiments showed promising results.",{"paper_id":4952,"title":4953,"year":197,"month":855,"day":63,"doi":4954,"resource_url":4955,"first_page":63,"last_page":63,"pdf_url":4956,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4957,"paper_type":860,"authors":4958,"abstract":4964},"lrec2004-main-243","Lexical Analysis of Agglutinative Languages Using a Dictionary of Lemmas and Lexical Transducers","10.63317\u002F37x2rajks23j","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-243","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F417.pdf","bae-choi-2004-lexical",[4959,4962],{"paper_id":4952,"author_seq":247,"given_name":4960,"surname":4961,"affiliation":63,"orcid":63},"Sun-Mee","Bae",{"paper_id":4952,"author_seq":232,"given_name":4963,"surname":1727,"affiliation":63,"orcid":63},"Key-Sun","This paper presents a simple method for performing a lexical analysis of agglutinative languages like Korean, which have a heavy morphology. Especially, for nouns and adverbs with regular morphological modifications and\u002For high productivity, we do not need to artificially construct huge dictionaries of all inflected forms of lemmas. To construct a dictionary of lemmas and lexical transducers, first, we construct automatically a dictionary of all inflected forms from KAIST POS-Tagged Corpus. Secondly, we separate the party of lemmas and one of sequences of inflectional suffixes. Thirdly, we describe their lexical transducers (i.e., morphological rules) to recognize all inflected forms of lemmas for nouns and adverbs according to the combinatorial restrictions between lemmas and their inflectional suffixes. Finally, we evaluate the advantages of this method.",{"paper_id":4966,"title":4967,"year":197,"month":855,"day":63,"doi":4968,"resource_url":4969,"first_page":63,"last_page":63,"pdf_url":4970,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4971,"paper_type":860,"authors":4972,"abstract":4976},"lrec2004-main-244","Evaluation and Adaptation of a Specialised Language Checking Tool for Non-specialised Machine Translation and Non-expert MT Users for Multi-lingual Telecooperation","10.63317\u002F5dysu3k3spqo","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-244","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F418.pdf","nuebel-2004-evaluation",[4973],{"paper_id":4966,"author_seq":247,"given_name":4974,"surname":4975,"affiliation":63,"orcid":63},"Rita","Nüebel","Style guides or writing recommendations play an important role in the field of technical documentation production, e.g. in industrial contexts. Also, writing recommendations are used in technical contexts together with machine translation (MT) in order to circumvent the MT system's weaknesses. This paper describes the evaluation and adaptation of a language checker deployed in the project int.unity In this project, both MT and a specialised language checker were adapted to the requirements of non-expert users and a non-technical domain. The language technology was integrated with the groupware platform BSCW to support the multi-lingual communication of geographically distributed teams concerned with trade union work. The users' languages were either German or English, i.e. the users were monolingual. We chose linguatec's server version of Personal Translator 2004 MT system for the German&lt;-&gt;English translations. The language checker CLAT for German and English has been developed at IAI. It is used by technical authors to support the production of high-quality technical documentation. The CLAT core system was adapted and extended in order to match the new requirements imposed by both the user profile and the subsequent MT application. In this paper, the focus will be on the assessment and adaptation of style rules for German.",{"paper_id":4978,"title":4979,"year":197,"month":855,"day":63,"doi":4980,"resource_url":4981,"first_page":63,"last_page":63,"pdf_url":4982,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4983,"paper_type":860,"authors":4984,"abstract":63},"lrec2004-main-245","A Critical Survey of the Methodology for IE Evaluation","10.63317\u002F3vn6stkyykfd","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-245","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F416.pdf","lavelli-etal-2004-critical",[4985,4987,4990,4993,4995,4997,4999],{"paper_id":4978,"author_seq":247,"given_name":2846,"surname":4986,"affiliation":63,"orcid":63},"Lavelli",{"paper_id":4978,"author_seq":232,"given_name":4988,"surname":4989,"affiliation":63,"orcid":63},"M. E.","Califf",{"paper_id":4978,"author_seq":218,"given_name":4991,"surname":4992,"affiliation":63,"orcid":63},"F.","Ciravegna",{"paper_id":4978,"author_seq":203,"given_name":4208,"surname":4994,"affiliation":63,"orcid":63},"Freitag",{"paper_id":4978,"author_seq":188,"given_name":3653,"surname":4996,"affiliation":63,"orcid":63},"Giuliano",{"paper_id":4978,"author_seq":172,"given_name":2303,"surname":4998,"affiliation":63,"orcid":63},"Kushmerick",{"paper_id":4978,"author_seq":155,"given_name":2308,"surname":5000,"affiliation":63,"orcid":63},"Romano",{"paper_id":5002,"title":5003,"year":197,"month":855,"day":63,"doi":5004,"resource_url":5005,"first_page":63,"last_page":63,"pdf_url":5006,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5007,"paper_type":860,"authors":5008,"abstract":5012},"lrec2004-main-246","Enriching WordNet Via Generative Metonymy and Creative Polysemy","10.63317\u002F4zw9gtmpdyyi","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-246","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F419.pdf","hayes-etal-2004-enriching",[5009,5010,5011],{"paper_id":5002,"author_seq":247,"given_name":1150,"surname":1151,"affiliation":63,"orcid":63},{"paper_id":5002,"author_seq":232,"given_name":1147,"surname":1148,"affiliation":63,"orcid":63},{"paper_id":5002,"author_seq":218,"given_name":1144,"surname":1145,"affiliation":63,"orcid":63},"Metonymy is a creative process that establishes relationships based on contiguity or semantic relatedness between concepts. We outline a mechanism for deriving new concepts from WordNet using metonymy. We argue that by exploiting polysemy in WordNet we can take advantage of the metonymic relations between concepts. The focus of our metonymy generation work has been the creation of noun­ noun compounds that do not already exist in WordNet and which can be profitably added to WordNet. The mechanism of metonymy generation we outline takes a source compound and creates new compounds by exploiting the polysemy associated with hyponyms of the head of the source compound. We argue that metonymy generation is a sound basis for concept creation as the newly created compounds are semantically related to the source concept. We demonstrate that metonymy generation based on polysemy is superior to a method of metonymy generation that ignores polysemy. These new concepts can be used to augment WordNet.",{"paper_id":5014,"title":5015,"year":197,"month":855,"day":63,"doi":5016,"resource_url":5017,"first_page":63,"last_page":63,"pdf_url":5018,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5019,"paper_type":860,"authors":5020,"abstract":5031},"lrec2004-main-247","Evaluation and Adaptation of the Celex Dutch Morphological Database","10.63317\u002F2i3jev5mzmkf","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-247","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F421.pdf","laureys-etal-2004-evaluation",[5021,5024,5026,5027,5028],{"paper_id":5014,"author_seq":247,"given_name":5022,"surname":5023,"affiliation":63,"orcid":63},"Tom","Laureys",{"paper_id":5014,"author_seq":232,"given_name":2739,"surname":5025,"affiliation":63,"orcid":63},"De Pauw",{"paper_id":5014,"author_seq":218,"given_name":1773,"surname":3607,"affiliation":63,"orcid":63},{"paper_id":5014,"author_seq":203,"given_name":2672,"surname":2673,"affiliation":63,"orcid":63},{"paper_id":5014,"author_seq":188,"given_name":5029,"surname":5030,"affiliation":63,"orcid":63},"Dirk","Van Compernolle","This paper describes some important modifications to the Celex morphological database in the context of the FLaVoR project. FLaVoR aims to develop a novel modular framework for speech recognition, enabling the integration of complex linguistic knowledge sources, such as a morphological model. Morphology is a fairly unexploited linguistic information source speech recognizers could benefit from. This is especially true for languages which allow for a rich set of morphological operations, such as our target language Dutch. In this paper we focus on the exploitation of the Celex Dutch morphological database as the information source underlying two different morphological analyzers being developed within the project. Although the Celex database provides a valuable source of morphological information for Dutch, many modifications were necessary before it could be practically applied. We identify major problems, discuss the implemented solutions and finally experimentally evaluate the effect of our modifications to the database.",{"paper_id":5033,"title":5034,"year":197,"month":855,"day":63,"doi":5035,"resource_url":5036,"first_page":63,"last_page":63,"pdf_url":5037,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5038,"paper_type":860,"authors":5039,"abstract":5050},"lrec2004-main-248","A Model of Semantic Representations Analysis for Chinese Sentences","10.63317\u002F5oqb8yzphzst","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-248","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F422.pdf","tang-etal-2004-model",[5040,5043,5046,5048],{"paper_id":5033,"author_seq":247,"given_name":5041,"surname":5042,"affiliation":63,"orcid":63},"Li","Tang",{"paper_id":5033,"author_seq":232,"given_name":5044,"surname":5045,"affiliation":63,"orcid":63},"Donghong","Ji",{"paper_id":5033,"author_seq":218,"given_name":5047,"surname":3634,"affiliation":63,"orcid":63},"Lingpeng",{"paper_id":5033,"author_seq":203,"given_name":1249,"surname":5049,"affiliation":63,"orcid":63},"Nie","Analyzing the semantic Representations of 5000 Chinese sentences and describing a new sentence analysis method that evaluates semantic preference knowledge, we create a model of semantic representation analysis based on the correspondence between lexical meanings and conceptual structures, and relations that underlie those lexical meanings. We also propose a semantical argument-head relation that combines ¡®basic conceptual structure¡¯ and ¡®Head-Driven Principle¡¯",{"paper_id":5052,"title":5053,"year":197,"month":855,"day":63,"doi":5054,"resource_url":5055,"first_page":63,"last_page":63,"pdf_url":5056,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5057,"paper_type":860,"authors":5058,"abstract":63},"lrec2004-main-249","A Comparison of Two Variant Corpora: The Same Content with Different Source","10.63317\u002F59oijyh3pt68","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-249","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F424.pdf","paik-etal-2004-comparison",[5059,5062,5065],{"paper_id":5052,"author_seq":247,"given_name":5060,"surname":5061,"affiliation":63,"orcid":63},"Kyonghee","Paik",{"paper_id":5052,"author_seq":232,"given_name":5063,"surname":5064,"affiliation":63,"orcid":63},"Kiyonori","Ohtake",{"paper_id":5052,"author_seq":218,"given_name":5066,"surname":2243,"affiliation":63,"orcid":63},"Kazuhide",{"paper_id":5068,"title":5069,"year":197,"month":855,"day":63,"doi":5070,"resource_url":5071,"first_page":63,"last_page":63,"pdf_url":5072,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5073,"paper_type":860,"authors":5074,"abstract":5078},"lrec2004-main-250","Training a Sentence-Level Machine Translation Confidence Measure","10.63317\u002F3dvfiipkcias","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-250","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F426.pdf","quirk-2004-training",[5075],{"paper_id":5068,"author_seq":247,"given_name":5076,"surname":5077,"affiliation":63,"orcid":63},"Christopher B.","Quirk","We present a supervised method for training a sentence level confidence measure on translation output using a human-annotated corpus. We evaluate a variety of machine learning methods. The resultant measure, while trained on a very small dataset, correlates well with human judgments, and proves to be effective on one task based evaluation. Although the experiments have only been run on one MT system, we believe the nature of the features gathered are general enough that the approach will also work well on other systems.",{"paper_id":5080,"title":5081,"year":197,"month":855,"day":63,"doi":5082,"resource_url":5083,"first_page":63,"last_page":63,"pdf_url":5084,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5085,"paper_type":860,"authors":5086,"abstract":5093},"lrec2004-main-251","Software Tools for Morphological Tagging of Zulu Corpora and Lexicon Development","10.63317\u002F2sxej9uov327","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-251","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F427.pdf","bosch-pretorius-2004-software",[5087,5090],{"paper_id":5080,"author_seq":247,"given_name":5088,"surname":5089,"affiliation":63,"orcid":63},"Sonja E.","Bosch",{"paper_id":5080,"author_seq":232,"given_name":5091,"surname":5092,"affiliation":63,"orcid":63},"Laurette","Pretorius","The aim of this paper is to discuss aspects of an on-going project on the development of grammatical and lexical resources for Zulu with sufficient coverage for unrestricted text. We explain how the basic software tools of computational morphology are used in linguistic processing, more specifically for automatic word form recognition and morphological tagging of the growing stock of electronic text corpora of a Bantu language such as Zulu. It is also shown how a machine-readable lexicon is in turn enhanced with the information acquired and extracted by means of such corpus analysis.",{"paper_id":5095,"title":5096,"year":197,"month":855,"day":63,"doi":5097,"resource_url":5098,"first_page":63,"last_page":63,"pdf_url":5099,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5100,"paper_type":860,"authors":5101,"abstract":5110},"lrec2004-main-252","Improving Collocation Extraction for High Frequency Words","10.63317\u002F4hqnby5tbsrw","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-252","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F429.pdf","wible-etal-2004-improving",[5102,5104,5107],{"paper_id":5095,"author_seq":247,"given_name":1790,"surname":5103,"affiliation":63,"orcid":63},"Wible",{"paper_id":5095,"author_seq":232,"given_name":5105,"surname":5106,"affiliation":63,"orcid":63},"Chin-Hwa","Kuo",{"paper_id":5095,"author_seq":218,"given_name":5108,"surname":5109,"affiliation":63,"orcid":63},"Nai-Lung","Tsao","The purpose of this paper is to introduce an alternative word association measure aimed at addressing the under-extraction collocations that contain high frequency words. While measures such as MI provide the important contribution of filtering out sheer high frequency of words in the detection of collocations in large corpora, one side effect of this filtering is that it becomes correspondingly difficult for such measures to detect true collocations involving high frequency words. As an alternative, we propose normalizing the MI measure by dividing the frequency of a candidate lexeme by the number of senses of that lexeme. We premise this alternative approach on the one sense per collocation assumption of Yarowsky (1992; 1995). Ten verb-noun collocations involving three high frequency verbs (make, take, run) are used to compare the extraction results of traditional MI and the proposed normalized MI. Results show the ranking of these high-frequency verbs as candidate collocates with the target focal nouns is raised by normalizing MI as proposed. Side effects of these improved rankings are discussed, such as increase in false positives resulting from higher recall. It is found that overall rank precision remains quite stable even with the increased recall of normalized MI.",{"paper_id":5112,"title":5113,"year":197,"month":855,"day":63,"doi":5114,"resource_url":5115,"first_page":63,"last_page":63,"pdf_url":5116,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5117,"paper_type":860,"authors":5118,"abstract":5128},"lrec2004-main-253","Annotation of Coreference Relations Among Linguistic Expressions and Images in Biological Articles","10.63317\u002F4jehsz9schbp","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-253","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F431.pdf","kawazoe-etal-2004-annotation",[5119,5122,5125],{"paper_id":5112,"author_seq":247,"given_name":5120,"surname":5121,"affiliation":63,"orcid":63},"Ai","Kawazoe",{"paper_id":5112,"author_seq":232,"given_name":5123,"surname":5124,"affiliation":63,"orcid":63},"Asanobu","Kitamoto",{"paper_id":5112,"author_seq":218,"given_name":5126,"surname":5127,"affiliation":63,"orcid":63},"Nigel","Collier","In this paper, we propose an annotation scheme which can be used not only for annotating coreference relations between linguistic expressions, but also those among linguistic expressions and images, in scientific texts such as biomedical articles. Images in biomedical domain often contain important information for analyses and diagnoses, and we consider that linking images to textual descriptions of their semantic contents in terms of coreference relations is useful for multimodal access to the information. We present our annotation scheme and the concept of a \"coreference pool,\" which plays a central role in the scheme. We also introduce a support tool for text annotation named Open Ontology Forge which we have already developed, and additional functions for the software to cover image annotations (ImageOF) which is now being developed.",{"paper_id":5130,"title":5131,"year":197,"month":855,"day":63,"doi":5132,"resource_url":5133,"first_page":63,"last_page":63,"pdf_url":5134,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5135,"paper_type":860,"authors":5136,"abstract":5138},"lrec2004-main-254","Evaluation of Cross-Language Information Retrieval Using the Domain-Specific GIRT Data as Parallel German-English Corpus","10.63317\u002F47jrsxp6mc8q","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-254","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F432.pdf","kluck-2004-evaluation",[5137],{"paper_id":5130,"author_seq":247,"given_name":1357,"surname":3409,"affiliation":63,"orcid":63},"The development of the evaluation of domain-specific cross-language information retrieval (CLIR) is shown in the context of the Cross-Language Evaluation Forum (CLEF) campaigns from 2000 to 2003. The pre-conditions and the usable data and additionally available instruments are described. The main goals of this task of CLEF are to allow the evaluation of Cross-Language Information Retrieval (CLIR) systems in the context of structured data and in a domain-specific area (not in the more general context of floating, journalistic texts), and with the additional possibility to make use of thesauri which had been used for intellectual indexing of the documents and are provided with the data. The parallel German-English GIRT4 corpus is described and some of the results of the CLEF 2004 campaign are discussed.",{"paper_id":5140,"title":5141,"year":197,"month":855,"day":63,"doi":5142,"resource_url":5143,"first_page":63,"last_page":63,"pdf_url":5144,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5145,"paper_type":860,"authors":5146,"abstract":5149},"lrec2004-main-255","Generating Coreferential Descriptions from a Structured Model of the Context","10.63317\u002F32vfm3tyrvdg","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-255","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F433.pdf","manuelian-2004-generating",[5147],{"paper_id":5140,"author_seq":247,"given_name":4199,"surname":5148,"affiliation":63,"orcid":63},"Manuélian","This paper shows on the basis of a corpus study how a model of the context should be structured for the generation of coreferring descriptions in French. We show that this way of structuring the context can help to generate more paraphrases and a particular kind of referring expressions used to add information about the referent.",{"paper_id":5151,"title":5152,"year":197,"month":855,"day":63,"doi":5153,"resource_url":5154,"first_page":63,"last_page":63,"pdf_url":5155,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5156,"paper_type":860,"authors":5157,"abstract":5173},"lrec2004-main-256","Open Collaborative Development of the Thai Language Resources for Natural Language Processing","10.63317\u002F2m3pxncrvkea","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-256","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F434.pdf","charoenporn-etal-2004-open",[5158,5161,5164,5167,5170],{"paper_id":5151,"author_seq":247,"given_name":5159,"surname":5160,"affiliation":63,"orcid":63},"Thatsanee","Charoenporn",{"paper_id":5151,"author_seq":232,"given_name":5162,"surname":5163,"affiliation":63,"orcid":63},"Virach","Sornlertlamvanich",{"paper_id":5151,"author_seq":218,"given_name":5165,"surname":5166,"affiliation":63,"orcid":63},"Sawit","Kasuriya",{"paper_id":5151,"author_seq":203,"given_name":5168,"surname":5169,"affiliation":63,"orcid":63},"Chatchawarn","Hansakunbuntheung",{"paper_id":5151,"author_seq":188,"given_name":5171,"surname":5172,"affiliation":63,"orcid":63},"Hitoshi","Isahara","Language Resources are recognized as an essential component in linguistic infrastructure and a starting point of Natural Language Processing systems and applications. In this paper, we describe the achievement of the development and the use of Thai Language Resources germinated with an open collaboration platform, under the collaboration between research institutes. The resources include either text or speech. Text resources are divided into lexicon database and annotated corpus. We started developing a corpus-based Thai-English lexicon database (LEXiTRON) since 1994. It was originated from a dictionary designed for using in developing a machine translation system. Since then the Thai POS was designed and evaluated in several applications (word segmentation, machine translation, grapheme-to-phoneme, etc.) Extending the lexicon database, POS tagged corpus (ORCHID), and speech corpora for both synthesis and recognition are developed and functioned as an important part of research and development on NLP or HLT. These language resources are available for academic experiment.",{"paper_id":5175,"title":5176,"year":197,"month":855,"day":63,"doi":5177,"resource_url":5178,"first_page":63,"last_page":63,"pdf_url":5179,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5180,"paper_type":860,"authors":5181,"abstract":5187},"lrec2004-main-257","Automatic Translation Memory Fuzzy Match Post-Editing: A Step Beyond Traditional TM\u002FMT Integration","10.63317\u002F3ka6b64jyd9b","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-257","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F435.pdf","kranias-samiotou-2004-automatic",[5182,5185],{"paper_id":5175,"author_seq":247,"given_name":5183,"surname":5184,"affiliation":63,"orcid":63},"Lambros","Kranias",{"paper_id":5175,"author_seq":232,"given_name":4167,"surname":5186,"affiliation":63,"orcid":63},"Samiotou","An innovative way of integrating Translation Memory (TM) and Machine Translation (MT) processing is presented which goes beyond the traditional \"cascade\" integration of Translation Memory and Machine Translation. The new method aims to automatically post-edit TM similar matches by the use of an MT module thus enhancing the TM fuzzy (similar) scores as well as enabling the utilisation of low-score TM fuzzy matches. This leads to substantial translation cost reduction. The suggested method, which can be classified as an Example-Based Machine Translation application, is analysed and examples are provided for clarification. It is evaluated through test results that involve human interaction. The method has been implemented within the ESTeam Translator (ET) Language Toolbox and is already in use in the various commercial installations of ET.",{"paper_id":5189,"title":5190,"year":197,"month":855,"day":63,"doi":5191,"resource_url":5192,"first_page":63,"last_page":63,"pdf_url":5193,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5194,"paper_type":860,"authors":5195,"abstract":63},"lrec2004-main-258","Linguistic Annotation of the Spoken Dutch Corpus: If We Had To Do It All Over Again","10.63317\u002F2bqmeiuwsgkq","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-258","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F437.pdf","schuurman-etal-2004-linguistic",[5196,5199,5200,5203,5206,5208],{"paper_id":5189,"author_seq":247,"given_name":5197,"surname":5198,"affiliation":63,"orcid":63},"Ineke","Schuurman",{"paper_id":5189,"author_seq":232,"given_name":1387,"surname":1818,"affiliation":63,"orcid":63},{"paper_id":5189,"author_seq":218,"given_name":5201,"surname":5202,"affiliation":63,"orcid":63},"Heleen","Hoekstra",{"paper_id":5189,"author_seq":203,"given_name":5204,"surname":5205,"affiliation":63,"orcid":63},"Nelleke","Oostdijk",{"paper_id":5189,"author_seq":188,"given_name":4482,"surname":5207,"affiliation":63,"orcid":63},"Piepenbrock",{"paper_id":5189,"author_seq":172,"given_name":5209,"surname":5210,"affiliation":63,"orcid":63},"Machteld","Schouppe",{"paper_id":5212,"title":5213,"year":197,"month":855,"day":63,"doi":5214,"resource_url":5215,"first_page":63,"last_page":63,"pdf_url":5216,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5217,"paper_type":860,"authors":5218,"abstract":5227},"lrec2004-main-259","Combining Symbolic and Statistical Methods in Morphological Analysis and Unknown Word Guessing","10.63317\u002F3yzudscu9f3r","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-259","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F438.pdf","novak-etal-2004-combining",[5219,5222,5224],{"paper_id":5212,"author_seq":247,"given_name":5220,"surname":5221,"affiliation":63,"orcid":63},"Attila","Novák",{"paper_id":5212,"author_seq":232,"given_name":4335,"surname":5223,"affiliation":63,"orcid":63},"Nagy",{"paper_id":5212,"author_seq":218,"given_name":5225,"surname":5226,"affiliation":63,"orcid":63},"Csaba","Oravecz","Highly inflectional\u002Fagglutinative languages like Hungarian typically feature possible word forms in such a magnitude that automatic methods that provide morphosyntactic annotation on the basis of some training corpus often face the problem of data sparseness. A possible solution to this problem is to apply a comprehensive morphological analyser, which is able to analyse almost all wordforms alleviating the problem of unseen tokens. However, although in a smaller number, there will still remain forms which are unknown even to the morphological analyzer and should be handled by some guesser mechanism. The paper will describe a hybrid method which combines symbolic and statistical information to provide lemmatization and suffix analyses for unknown word forms. Evaluation is carried out with respect to the induction of possible analyses and their respective lexical probabilities for unknown word forms in a part-of-speech tagging system.",{"paper_id":5229,"title":5230,"year":197,"month":855,"day":63,"doi":5231,"resource_url":5232,"first_page":63,"last_page":63,"pdf_url":5233,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5234,"paper_type":860,"authors":5235,"abstract":5255},"lrec2004-main-260","A New Approach to the Corpus-based Statistical Investigation of Hungarian Multi-word Lexemes","10.63317\u002F26idhknrkpp2","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-260","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F441.pdf","kis-etal-2004-new",[5236,5239,5242,5245,5248,5251,5253],{"paper_id":5229,"author_seq":247,"given_name":5237,"surname":5238,"affiliation":63,"orcid":63},"Balázs","Kis",{"paper_id":5229,"author_seq":232,"given_name":5240,"surname":5241,"affiliation":63,"orcid":63},"Begoña","Villada",{"paper_id":5229,"author_seq":218,"given_name":5243,"surname":5244,"affiliation":63,"orcid":63},"Gosse","Bouma",{"paper_id":5229,"author_seq":203,"given_name":5246,"surname":5247,"affiliation":63,"orcid":63},"Gábor","Ugray",{"paper_id":5229,"author_seq":188,"given_name":5249,"surname":5250,"affiliation":63,"orcid":63},"Tamás","Bíró",{"paper_id":5229,"author_seq":172,"given_name":5246,"surname":5252,"affiliation":63,"orcid":63},"Pohl",{"paper_id":5229,"author_seq":155,"given_name":3376,"surname":5254,"affiliation":63,"orcid":63},"Nerbonne","We apply statistical methods to perform automatic extraction of Hungarian collocations from corpora. Due to the complexity of Hungarian   morphology, a complex resource preparation tool chain has been developed. This tool chain implements a reusable and, in principle,   language independent framework. In the first part, the paper describes the tool chain itself, then, in the second part, an experiment   using this framework. The experiment deals with the extraction of &lt;verb+noun+casemark&gt; patterns from the corpus as collocation   candidates, in order to compare results to an experiment on Dutch V + PP patterns (Villada, 2004). Statistical processing on this   dataset provided interesting observations, briefly explained in the evaluation section.   We conclude by providing a summary of further steps required to improve the extraction process. This is not restricted to improvements   in the resource preparation for statistical processing, but a proposal to use nonstatistical means as well, thus acquiring an efficient   blend of different methods.",{"paper_id":5257,"title":5258,"year":197,"month":855,"day":63,"doi":5259,"resource_url":5260,"first_page":63,"last_page":63,"pdf_url":5261,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5262,"paper_type":860,"authors":5263,"abstract":5267},"lrec2004-main-261","Discarding Noise in an Automatically Acquired Lexicon of Support verb Constructions","10.63317\u002F4z68jm7szb9p","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-261","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F442.pdf","moiron-2004-discarding",[5264],{"paper_id":5257,"author_seq":247,"given_name":5265,"surname":5266,"affiliation":63,"orcid":63},"M. Begoña Villada","Moirón","We applied data-driven methods to carry out automatic acquisition of Dutch prepositional support verb constructions (SVCs) in corpora (e.g., iets in de gaten houden (``keep an eye on something'')). This paper addresses the question whether linguistic diagnostics help to discard noise from the nbest lists and how to (semi-)automatically apply such linguistic diagnostics to parsed corpora. We show that some of the linguistic diagnostics proposed in Hollebrandse (1993) effectively identify SVCs and contribute a modest error rate decrease.",{"paper_id":5269,"title":5270,"year":197,"month":855,"day":63,"doi":5271,"resource_url":5272,"first_page":63,"last_page":63,"pdf_url":5273,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5274,"paper_type":860,"authors":5275,"abstract":5284},"lrec2004-main-262","Translation Memories Enrichment by Statistical Bilingual Segmentation","10.63317\u002F2djjrhajoax3","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-262","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F443.pdf","nevado-etal-2004-translation",[5276,5279,5281],{"paper_id":5269,"author_seq":247,"given_name":5277,"surname":5278,"affiliation":63,"orcid":63},"Francisco","Nevado",{"paper_id":5269,"author_seq":232,"given_name":5277,"surname":5280,"affiliation":63,"orcid":63},"Casacuberta",{"paper_id":5269,"author_seq":218,"given_name":5282,"surname":5283,"affiliation":63,"orcid":63},"Josu","Landa","A majority of Machine Aided Translation systems are based on comparisons between a source sentence and reference sentences stored in Translation Memories (TMs). The translation search is done by looking for sentences in a database which are similar to the source sentence. TMs have two basic limitations: the dependency on the repetition of complete sentences and the high cost of building a TM. As human translators do not only remember sentences from their preceding translations, but they also decompose the sentence to be translated and work with smaller units, it would be desirable to enrich the TM database with smaller translation units. This enrichment should also be automatic in order not to increase the cost of building a TM. We propose the application of two automatic bilingual segmentation techniques based on statistical translation methods in order to create new, shorter bilingual segments to be included in a TM database. An evaluation of the two techniques is carried out for a bilingual Basque-Spanish task.",{"paper_id":5286,"title":5287,"year":197,"month":855,"day":63,"doi":5288,"resource_url":5289,"first_page":63,"last_page":63,"pdf_url":5290,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5291,"paper_type":860,"authors":5292,"abstract":5302},"lrec2004-main-263","The African Speech Technology Project: An Assessment","10.63317\u002F3iwkovnz3vkg","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-263","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F445.pdf","roux-etal-2004-african",[5293,5296,5299],{"paper_id":5286,"author_seq":247,"given_name":5294,"surname":5295,"affiliation":63,"orcid":63},"J. C.","Roux",{"paper_id":5286,"author_seq":232,"given_name":5297,"surname":5298,"affiliation":63,"orcid":63},"P. H.","Louw",{"paper_id":5286,"author_seq":218,"given_name":5300,"surname":5301,"affiliation":63,"orcid":63},"T. R.","Niesler","This paper reflects on the recently completed African Speech Technology (AST) Project. The AST Project successfully developed eleven annotated telephone speech databases for five languages spoken in South Africa i.e. Xhosa, Southern Sotho, Zulu, English and Afrikaans. These databases were used to train and test speech recognition systems applied in a multilingual telephone-based prototype hotel booking system. An overview is given of the database design and contents. The acquisition of the data is discussed with regards to the telephony interface, as well as speaker recruitment and briefing. Particular reference is given to some of the practical implications of acquiring appropriate data in under-developed communities. Database management processes such as transcription, quality control and validation are explained. This is followed by information on the development of the prototype. Results of usability tests are discussed followed by an assessment of the Project as a whole.",{"paper_id":5304,"title":5305,"year":197,"month":855,"day":63,"doi":5306,"resource_url":5307,"first_page":63,"last_page":63,"pdf_url":5308,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5309,"paper_type":860,"authors":5310,"abstract":5318},"lrec2004-main-264","Automatic Phonemic Labeling and Segmentation of Spoken Dutch","10.63317\u002F3fuqz2pn3ofr","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-264","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F447.pdf","demuynck-etal-2004-automatic",[5311,5314,5315,5317],{"paper_id":5304,"author_seq":247,"given_name":5312,"surname":5313,"affiliation":63,"orcid":63},"Kris","Demuynck",{"paper_id":5304,"author_seq":232,"given_name":5022,"surname":5023,"affiliation":63,"orcid":63},{"paper_id":5304,"author_seq":218,"given_name":1519,"surname":5316,"affiliation":63,"orcid":63},"Wambacq",{"paper_id":5304,"author_seq":203,"given_name":5029,"surname":5030,"affiliation":63,"orcid":63},"The CGN corpus (Corpus Gesproken Nederlands\u002FCorpus Spoken Dutch) is a large speech corpus of contemporary Dutch as spoken in Belgium (3.3 million words) and in the Netherlands (5.6 million words). Due to its size, manual phonemic annotation was limited to 10% of the data and automatic systems were used to complement this data. This paper describes the automatic generation of the phonemic annotations and the corresponding segmentations. First, we detail the processes used to generate possible pronunciations for each sentence and to select to most likely one. Next, we identify the remaining difficulties when handling the CGN data and explain how we solved them. We conclude with an evaluation of the quality of the resulting transcriptions and segmentations.",{"paper_id":5320,"title":5321,"year":197,"month":855,"day":63,"doi":5322,"resource_url":5323,"first_page":63,"last_page":63,"pdf_url":5324,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5325,"paper_type":860,"authors":5326,"abstract":5331},"lrec2004-main-265","Using Large Multi-purpose Corpora for Specific Research Questions: Discourse Phenomena Related to Wh-questions in the Spoken Dutch Corpus","10.63317\u002F2zxnch9wkbfp","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-265","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F449.pdf","oostdijk-boves-2004-using",[5327,5328],{"paper_id":5320,"author_seq":247,"given_name":5204,"surname":5205,"affiliation":63,"orcid":63},{"paper_id":5320,"author_seq":232,"given_name":5329,"surname":5330,"affiliation":63,"orcid":63},"Lou","Boves","In this paper, we investigate whether a dataset derived from a multi-purpose corpus such as the Spoken Dutch Corpus may be considered appropriate for developing a taxonomy of wh-questions, and a model of the way in which these questions are integrated in spoken discourse. We compare the results obtained from the Spoken Dutch Corpus with a similar analysis of a large random collection of FAQs from the internet. We find substantial differences between the questions in spoken discourse and FAQs. Therefore, it may not be trivial to use a general purpose corpus as a starting point for developing models for human-computer interaction.",{"paper_id":5333,"title":5334,"year":197,"month":855,"day":63,"doi":5335,"resource_url":5336,"first_page":63,"last_page":63,"pdf_url":5337,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5338,"paper_type":860,"authors":5339,"abstract":5344},"lrec2004-main-266","Methods of Digital Access for Legal Language Documentation","10.63317\u002F2twtmfury5ae","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-266","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F450.pdf","mariani-badii-2004-methods",[5340,5342],{"paper_id":5333,"author_seq":247,"given_name":1752,"surname":5341,"affiliation":63,"orcid":63},"Mariani",{"paper_id":5333,"author_seq":232,"given_name":3968,"surname":5343,"affiliation":63,"orcid":63},"Badii","For many years the Istituto di Teoria e Tecniche dell'Informazione Giuridica (ITTIG) of the Consiglio Nazionale delle Ricerche has studied the evolution of legal language, creating databases for documentation and digital retrieval of law texts. The ITTIG is attending to document legal language through information technology in order to provide as wide an access as possible to its findings. The Institute has recently created an on-line digital database that includes the full text of the most important Italian laws (Codes and Constitutions) from the 16th to the 20th century. The ITTIG is also in the process of preparing another database made up of contexts from the original 10th to the 20th century legal sources.",{"paper_id":5346,"title":5347,"year":197,"month":855,"day":63,"doi":5348,"resource_url":5349,"first_page":63,"last_page":63,"pdf_url":5350,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5351,"paper_type":860,"authors":5352,"abstract":5367},"lrec2004-main-267","Architecture for Distributed Language Resource Management and Archiving","10.63317\u002F2ves6qza26ri","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-267","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F451.pdf","wittenburg-etal-2004-architecture",[5353,5356,5359,5362,5365],{"paper_id":5346,"author_seq":247,"given_name":5354,"surname":5355,"affiliation":63,"orcid":63},"Peter","Wittenburg",{"paper_id":5346,"author_seq":232,"given_name":5357,"surname":5358,"affiliation":63,"orcid":63},"Heidi","Johnson",{"paper_id":5346,"author_seq":218,"given_name":5360,"surname":5361,"affiliation":63,"orcid":63},"Markus","Buchhorn",{"paper_id":5346,"author_seq":203,"given_name":5363,"surname":5364,"affiliation":63,"orcid":63},"Hennie","Brugman",{"paper_id":5346,"author_seq":188,"given_name":1811,"surname":5366,"affiliation":63,"orcid":63},"Broeder","An architecture is presented that provides an integrated framework for managing, archiving and accessing language resources. This architecture was discussed in the DELAMAN network – a world-wide network of archives holding material about endangered languages. Such a framework will be built upon a metadata infrastructure, a mechanism to resolve unique resource identifiers, user and access rights management components. These components are closely related and have to be based on redundant and distributed services. For all these components existing middleware seems to be available, however, it has to be checked how they can interact with each other.",{"paper_id":5369,"title":5370,"year":197,"month":855,"day":63,"doi":5371,"resource_url":5372,"first_page":63,"last_page":63,"pdf_url":5373,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5374,"paper_type":860,"authors":5375,"abstract":5394},"lrec2004-main-268","Creation and Validation of Large Lexica for Speech-to-Speech Translation Purposes","10.63317\u002F5kg45a89794r","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-268","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F452.pdf","fersoe-etal-2004-creation",[5376,5379,5382,5383,5386,5388,5391],{"paper_id":5369,"author_seq":247,"given_name":5377,"surname":5378,"affiliation":63,"orcid":63},"Hanne","Fersøe",{"paper_id":5369,"author_seq":232,"given_name":5380,"surname":5381,"affiliation":63,"orcid":63},"Elviira","Hartikainen",{"paper_id":5369,"author_seq":218,"given_name":3420,"surname":3421,"affiliation":63,"orcid":63},{"paper_id":5369,"author_seq":203,"given_name":5384,"surname":5385,"affiliation":63,"orcid":63},"Giulio","Maltese",{"paper_id":5369,"author_seq":188,"given_name":5387,"surname":3427,"affiliation":63,"orcid":63},"Asuncíon",{"paper_id":5369,"author_seq":172,"given_name":5389,"surname":5390,"affiliation":63,"orcid":63},"Shaunie","Shammass",{"paper_id":5369,"author_seq":155,"given_name":5392,"surname":5393,"affiliation":63,"orcid":63},"Ute","Ziegenhain","This paper presents specifications and requirements for creation and validation of large lexica that are needed in automatic Speech Recognition (ASR), Text-to-Speech (TTS) and statistical Speech-to-Speech Translation (SST) systems. The prepared language resources are created and validated within the scope of the EU-project LC-STAR (Lexica and Corpora for Speech-to-Speech Translation Components) during years 2002-2005. Large lexica consisting of phonetic, suprasegmental and morpho-syntactic content will be provided with well-documented specifications for 13 languages. A short summary of the LC-STAR project itself is presented. Overview about the specification for the corpora collection and word extraction as well as the specification and format of the lexica are presented. Particular attention is paid to the validation of the produced lexica and the lessons learnt during pre-validation. The created and validated language resources will be available via ELRA\u002FELDA.",{"paper_id":5396,"title":5397,"year":197,"month":855,"day":63,"doi":5398,"resource_url":5399,"first_page":63,"last_page":63,"pdf_url":5400,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5401,"paper_type":860,"authors":5402,"abstract":5409},"lrec2004-main-269","Enlarging the Croatian Morphological Lexicon by Automatic Lexical Acquisition from Raw Corpora","10.63317\u002F4t2nbcypfnnm","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-269","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F457.pdf","oliver-tadic-2004-enlarging",[5403,5406],{"paper_id":5396,"author_seq":247,"given_name":5404,"surname":5405,"affiliation":63,"orcid":63},"Antoni","Oliver",{"paper_id":5396,"author_seq":232,"given_name":5407,"surname":5408,"affiliation":63,"orcid":63},"Marko","Tadić","This paper presents experiments for enlarging the Croatian Morphological Lexicon by applying an automatic acquisition methodology. The basic sources of information for the system are a set of morphological rules and a raw corpus. The morphological rules have been automatically derived from the existing Croatian Morphological Lexicon and we have used in our experiments a subset of the Croatian National Corpus. The methodology has proved to be efficient for those languages that, like Croatian, present a rich and mainly concatenative morphology. This method can be applied for the creation of new resources, as well as in the enrichment of existing ones. We also present an extension of the system that uses automatic querying to Internet to acquire those entries for which we have not enough information in our corpus.",{"paper_id":5411,"title":5412,"year":197,"month":855,"day":63,"doi":5413,"resource_url":5414,"first_page":63,"last_page":63,"pdf_url":5415,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5416,"paper_type":860,"authors":5417,"abstract":5424},"lrec2004-main-270","Learning to Predict Pitch Accents Using Bayesian Belief Networks for Greek Language","10.63317\u002F3hw8mswo8smm","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-270","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F458.pdf","zervas-etal-2004-learning",[5418,5421,5422,5423],{"paper_id":5411,"author_seq":247,"given_name":5419,"surname":5420,"affiliation":63,"orcid":63},"Panagiotis","Zervas",{"paper_id":5411,"author_seq":232,"given_name":3571,"surname":3572,"affiliation":63,"orcid":63},{"paper_id":5411,"author_seq":218,"given_name":1111,"surname":1112,"affiliation":63,"orcid":63},{"paper_id":5411,"author_seq":203,"given_name":1018,"surname":3575,"affiliation":63,"orcid":63},"Any text-to-speech (TTS) system that aims at producing understandable and natural-sounding output needs to have a module for predicting prosody. In natural speech, some words are said to be stressed, or to bear Pitch Accents (PA). Errors at this level may impede the listener in the correct understanding of the spoken utterance. Regarding the performance of data driven methods, the scale and quality of the corpus are important. Since there is no suitable corpus available for modeling Modern Greek (MG) prosody, we created a corpus consisted of 5.500 words, distributed in 500 paragraphs. In describing pitch accent in particular and intonation features in general, we use Pierrehumbert's theory adopted for MG. In the present study, we try to predict four categories of PA H*, L*, L+H* and unaccented.",{"paper_id":5426,"title":5427,"year":197,"month":855,"day":63,"doi":5428,"resource_url":5429,"first_page":63,"last_page":63,"pdf_url":5430,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5431,"paper_type":860,"authors":5432,"abstract":5439},"lrec2004-main-271","A Grammar and Style Checker Based on Internet Searches","10.63317\u002F28qtzsns45us","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-271","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F461.pdf","more-etal-2004-grammar",[5433,5436,5438],{"paper_id":5426,"author_seq":247,"given_name":5434,"surname":5435,"affiliation":63,"orcid":63},"Joaquim","Moré",{"paper_id":5426,"author_seq":232,"given_name":2181,"surname":5437,"affiliation":63,"orcid":63},"Climent",{"paper_id":5426,"author_seq":218,"given_name":5404,"surname":5405,"affiliation":63,"orcid":63},"In this paper we present an English grammar and style checker for non-native English speakers. The main characteristic of this checker is the use of an Internet search engine. As the number of web pages written in English is immense, the system hypothesizes that a piece of text not found on the Web is probably badly written. The system also hypothesizes that the Web will provide examples of how the content of the text segment can be expressed in a gramatical and idiomatic way. So, after the checker warns the user about the odd character of a text segment, the Internet engine searches for contexts that will be helpful for the user to decide whether he\u002Fshe corrects the segment or not. By means of a search engine, the checker also suggests the writer to use expressions which are more frequent on theWeb other than the expression he\u002Fshe actually wrote. Although the system is currently being developed for teachers of the Open University of Catalonia, the checker can also be useful for second-language learners, translators, and post-editors.",{"paper_id":5441,"title":5442,"year":197,"month":855,"day":63,"doi":5443,"resource_url":5444,"first_page":63,"last_page":63,"pdf_url":5445,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5446,"paper_type":860,"authors":5447,"abstract":5456},"lrec2004-main-272","Cross-Disciplinary Integration of Metadata Descriptions","10.63317\u002F3m9qjecpdwz9","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-272","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F462.pdf","wittenburg-etal-2004-cross",[5448,5449,5452,5453],{"paper_id":5441,"author_seq":247,"given_name":5354,"surname":5355,"affiliation":63,"orcid":63},{"paper_id":5441,"author_seq":232,"given_name":5450,"surname":5451,"affiliation":63,"orcid":63},"Greg","Gulrajani",{"paper_id":5441,"author_seq":218,"given_name":1811,"surname":5366,"affiliation":63,"orcid":63},{"paper_id":5441,"author_seq":203,"given_name":5454,"surname":5455,"affiliation":63,"orcid":63},"Marcus","Uneson","Within the ECHO (European Cultural Heritage Online) project an integrated domain of metadata repositories was created covering data from 5 different humanities disciplines. The integration required intensive work on encoding, syntactical and especially the semantic level, since interoperability is still difficult to be achieved. An ontology was created and a search engine that makes use of the knowledge components. This work within ECHO is seen as one of the practical contributions on the way towards the Semantic Web.",{"paper_id":5458,"title":5459,"year":197,"month":855,"day":63,"doi":5460,"resource_url":5461,"first_page":63,"last_page":63,"pdf_url":5462,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5463,"paper_type":860,"authors":5464,"abstract":5468},"lrec2004-main-273","Representing Italian Complex Nominals: A Pilot Study","10.63317\u002F3j52iha6kzif","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-273","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F463.pdf","quochi-2004-representing",[5465],{"paper_id":5458,"author_seq":247,"given_name":5466,"surname":5467,"affiliation":63,"orcid":63},"Valeria","Quochi","A corpus-based investigation of Italian Complex Nominals (CNs), of the form N+PP, which aims at clarifying their syntactic and semantic constitution, is presented. The main goal is to find out useful parameters for their representation in a computational lexicon. As a reference model we have taken an implementation of Pustejovsky's Generative Lexicon Theory (1995), the SIMPLE Italian Lexicon, and in particular the Extended Qualia Structure. Italian CN formation mainly exploits post-modification; of particular interest here are CNs of the kind N+PP since this syntactic pattern is highly productive in Italian and such CNs very often translate compound nouns of other languages. One of the major problems posed by CNs for interpretation is the retrieval or identification of the semantic relation linking their components, which is (at least partially) implicit on the surface. Studying a small sample, we observed some interesting facts that could be useful when setting up a larger experiment to identify semantic relations and\u002For automatically learn the syntactic peculiarities of given semantic paradigms. Finally, a set of representational features exploiting the results from our corpus is proposed.",{"paper_id":5470,"title":5471,"year":197,"month":855,"day":63,"doi":5472,"resource_url":5473,"first_page":63,"last_page":63,"pdf_url":5474,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5475,"paper_type":860,"authors":5476,"abstract":5483},"lrec2004-main-274","Text Corpora, Local Grammars and Prediction","10.63317\u002F48oxcatxdgts","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-274","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F464.pdf","traboulsi-etal-2004-text",[5477,5480,5482],{"paper_id":5470,"author_seq":247,"given_name":5478,"surname":5479,"affiliation":63,"orcid":63},"Hayssam","Traboulsi",{"paper_id":5470,"author_seq":232,"given_name":1790,"surname":5481,"affiliation":63,"orcid":63},"Cheng",{"paper_id":5470,"author_seq":218,"given_name":1979,"surname":1980,"affiliation":63,"orcid":63},"A corpus-based method for identifying and learning patterns describing events in a specific domain by examining the manner in which: (a) a small number of keywords in the domain are distributed throughout the corpus; and, (b) a local grammar that is idiosyncratic of a class of events in the domain, governs the usage of the keywords. We used a 3.63 million words corpus, and the results are encouraging. More importantly, the method can be applied to any arbitrary domains.",{"paper_id":5485,"title":5486,"year":197,"month":855,"day":63,"doi":5487,"resource_url":5488,"first_page":63,"last_page":63,"pdf_url":5489,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5490,"paper_type":860,"authors":5491,"abstract":5500},"lrec2004-main-275","SMOR: A German Computational Morphology Covering Derivation, Composition and Inflection","10.63317\u002F5psp25oubt32","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-275","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F468.pdf","schmid-etal-2004-smor",[5492,5495,5497],{"paper_id":5485,"author_seq":247,"given_name":5493,"surname":5494,"affiliation":63,"orcid":63},"Helmut","Schmid",{"paper_id":5485,"author_seq":232,"given_name":2657,"surname":5496,"affiliation":63,"orcid":63},"Fitschen",{"paper_id":5485,"author_seq":218,"given_name":5498,"surname":5499,"affiliation":63,"orcid":63},"Ulrich","Heid","We present a morphological analyser for German inflection and word formation implemented in finite state technology. Unlike purely lexicon-based approaches, it can account for productive word formation like derivation and composition. The implementation is based on the Stuttgart Finite State Transducer Tools (SFST-Tools), a non-commercial FST platform. It is fast and achieves a high coverage.",{"paper_id":5502,"title":5503,"year":197,"month":855,"day":63,"doi":5504,"resource_url":5505,"first_page":63,"last_page":63,"pdf_url":5506,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5507,"paper_type":860,"authors":5508,"abstract":5516},"lrec2004-main-276","The Overview of the SST Speech Corpus of Japanese Learner English and Evaluation Through the Experiment on Automatic Detection of Learners’ Errors","10.63317\u002F4hqt3wpzq3hn","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-276","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F470.pdf","izumi-etal-2004-overview",[5509,5512,5515],{"paper_id":5502,"author_seq":247,"given_name":5510,"surname":5511,"affiliation":63,"orcid":63},"Emi","Izumi",{"paper_id":5502,"author_seq":232,"given_name":5513,"surname":5514,"affiliation":63,"orcid":63},"Kiyotaka","Uchimoto",{"paper_id":5502,"author_seq":218,"given_name":5171,"surname":5172,"affiliation":63,"orcid":63},"This paper introduces an overview of the speech corpus of Japanese learner English compiled by National Institute of Information and Communications Technology by showing its data collection procedure and annotation schemes including error tagging. We have collected 1,200 interviews for three years. One of the most unique features of this corpus is that it contains rich information on learners' errors. We have performed error tagging for learners' grammatical and lexical errors with originally-designed error tagset. We also evaluated the corpus through the experiment on automatic detection of learners' errors by using error tag information in the corpus. We did this by using a machine learning model, Maximum Entropy (ME) model. Since we had obtained the limited amount of error-tagged data, we needed to make some efforts to enlarge training data. We added the correct sentences and artificially-made errors to the training data, and found that it improved accuracy. We are planning to make this corpus publicly available in the spring of 2004, so that teachers and researchers in many fields can use the data for their own interests, such as second language acquisition research, syllabus and material design, or the development of computerized pedagogical tools, by combining it with NLP technology.",{"paper_id":5518,"title":5519,"year":197,"month":855,"day":63,"doi":5520,"resource_url":5521,"first_page":63,"last_page":63,"pdf_url":5522,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5523,"paper_type":860,"authors":5524,"abstract":5529},"lrec2004-main-277","Dynamic Lexicographic Data Modelling. A Diachronic Dictionary Development Report","10.63317\u002F3jqus4g6q3qt","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-277","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F471.pdf","gevaudan-wiebel-2004-dynamic",[5525,5527],{"paper_id":5518,"author_seq":247,"given_name":973,"surname":5526,"affiliation":63,"orcid":63},"Gévaudan",{"paper_id":5518,"author_seq":232,"given_name":5029,"surname":5528,"affiliation":63,"orcid":63},"Wiebel","Lexical units of several different language areas show noticeable similarities in their semantic structure, corresponding to their etymological development. This phenomenon of polygenetical evolution leads to the assumption that lexical innovations are strongly influenced by cognitive constants. These cognitive constants can be seen as a result of anthropological predispositions. In order to examine the relevant constants and the resulting polygenesis, a model of diachronic filiation has been developed. This model has the capacity to analyse highly complex cases of lexical evolution. An easily readable access and representation system is given by a linear notation method. This method is able to annotate all lexical innovations; however, it is not suitable for computer based analyses. We have therefore subsequently introduced an entity-relationship model representing the linguistic data model. Integrated into a powerful DBMS, this allows a dynamic representation of the underlying diachronic lexicon which has been compiled in our projects.",{"paper_id":5531,"title":5532,"year":197,"month":855,"day":63,"doi":5533,"resource_url":5534,"first_page":63,"last_page":63,"pdf_url":5535,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5536,"paper_type":860,"authors":5537,"abstract":5544},"lrec2004-main-278","Re-using High-quality Resources for Continued Evaluation of Automated Summarization Systems","10.63317\u002F4yz5d9naynh3","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-278","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F472.pdf","alonso-etal-2004-using",[5538,5539,5542,5543],{"paper_id":5531,"author_seq":247,"given_name":3895,"surname":3896,"affiliation":63,"orcid":63},{"paper_id":5531,"author_seq":232,"given_name":5540,"surname":5541,"affiliation":63,"orcid":63},"Maria","Fuentes",{"paper_id":5531,"author_seq":218,"given_name":2904,"surname":2905,"affiliation":63,"orcid":63},{"paper_id":5531,"author_seq":203,"given_name":2910,"surname":2911,"affiliation":63,"orcid":63},"In this paper we present a method for re-using the human judgements on summary quality provided by the DUC contest. The score to be awarded to automatic summaries is calculated as a function of the scores assigned manually to the most similar summaries for the same document. This approach enhances the standard n-gram based evaluation of automatic summarization systems by establishing similarities between {\\it extractive} (vs. {\\it abstractive}) summaries and by taking advantage of the big quantity of evaluated summaries available from the DUC contest. The utility of this method is exemplified by the improvements achieved on a headline production system.",{"paper_id":5546,"title":5547,"year":197,"month":855,"day":63,"doi":5548,"resource_url":5549,"first_page":63,"last_page":63,"pdf_url":5550,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5551,"paper_type":860,"authors":5552,"abstract":63},"lrec2004-main-279","Corpus-based Learning of Lexical Resources for German Named Entity Recognition","10.63317\u002F37mmo6q9qvz4","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-279","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F373.pdf","rossler-2004-corpus",[5553],{"paper_id":5546,"author_seq":247,"given_name":2904,"surname":5554,"affiliation":63,"orcid":63},"Rössler",{"paper_id":5556,"title":5557,"year":197,"month":855,"day":63,"doi":5558,"resource_url":5559,"first_page":63,"last_page":63,"pdf_url":5560,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5561,"paper_type":860,"authors":5562,"abstract":5570},"lrec2004-main-280","Collaborative Annotation of Sign Language Data with Peer-to-Peer Technology","10.63317\u002F3m89b6txpbi8","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-280","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F473.pdf","brugman-etal-2004-collaborative",[5563,5564,5567],{"paper_id":5556,"author_seq":247,"given_name":5363,"surname":5364,"affiliation":63,"orcid":63},{"paper_id":5556,"author_seq":232,"given_name":5565,"surname":5566,"affiliation":63,"orcid":63},"Onno","Crasborn",{"paper_id":5556,"author_seq":218,"given_name":5568,"surname":5569,"affiliation":63,"orcid":63},"Albert","Russel","Collaboration on annotation projects is in practice mostly done by people sharing the same room. However, several models for online cooperative annotation over the internet are possible. This paper explores and evaluates these, and reports on the use of peer-to-peer technology to extend a multimedia annotation tool (ELAN) with functions that support collaborative annotation.",{"paper_id":5572,"title":5573,"year":197,"month":855,"day":63,"doi":5574,"resource_url":5575,"first_page":63,"last_page":63,"pdf_url":5576,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5577,"paper_type":860,"authors":5578,"abstract":5587},"lrec2004-main-281","Semantic Categorization of Spanish Se-constructions","10.63317\u002F4rivtrmbsqnb","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-281","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F474.pdf","vazquez-etal-2004-semantic",[5579,5582,5585,5586],{"paper_id":5572,"author_seq":247,"given_name":5580,"surname":5581,"affiliation":63,"orcid":63},"Glòria","Vázquez",{"paper_id":5572,"author_seq":232,"given_name":5583,"surname":5584,"affiliation":63,"orcid":63},"Ana Fernández","Montraveta",{"paper_id":5572,"author_seq":218,"given_name":3898,"surname":3899,"affiliation":63,"orcid":63},{"paper_id":5572,"author_seq":203,"given_name":3895,"surname":3896,"affiliation":63,"orcid":63},"In this paper we present a tool to automatically determine the semantic interpretation of some ambiguous sentences in Spanish, the so-called ``\\seconsp''. These sentences are syntactically similar, but their argument structure is different. The performance of the disambiguation procedure has been evaluated against a manually annotated corpus, achieving 95\\% precision.",{"paper_id":5589,"title":5590,"year":197,"month":855,"day":63,"doi":5591,"resource_url":5592,"first_page":63,"last_page":63,"pdf_url":5593,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5594,"paper_type":860,"authors":5595,"abstract":5607},"lrec2004-main-282","Web Services Architecture for Language Resources","10.63317\u002F4u8zjpgspy2p","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-282","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F475.pdf","dalli-etal-2004-web",[5596,5599,5602,5603,5604,5605,5606],{"paper_id":5589,"author_seq":247,"given_name":5597,"surname":5598,"affiliation":63,"orcid":63},"Angelo","Dalli",{"paper_id":5589,"author_seq":232,"given_name":5600,"surname":5601,"affiliation":63,"orcid":63},"Valentin","Tablan",{"paper_id":5589,"author_seq":218,"given_name":1431,"surname":1432,"affiliation":63,"orcid":63},{"paper_id":5589,"author_seq":203,"given_name":1456,"surname":1457,"affiliation":63,"orcid":63},{"paper_id":5589,"author_seq":188,"given_name":1811,"surname":5366,"affiliation":63,"orcid":63},{"paper_id":5589,"author_seq":172,"given_name":5363,"surname":5364,"affiliation":63,"orcid":63},{"paper_id":5589,"author_seq":155,"given_name":5354,"surname":5355,"affiliation":63,"orcid":63},"A web services based architecture for Language Resources utilizing existing technology such as XML, SOAP, WSDL and UDDI is presented. The web services architecture creates a pervasive information infrastructure that enables straightforward access to two kinds of Language Resources: traditional information sources and language processing resources. Details about two practical implementations of this web services architecture are given.",{"paper_id":5609,"title":5610,"year":197,"month":855,"day":63,"doi":5611,"resource_url":5612,"first_page":63,"last_page":63,"pdf_url":5613,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5614,"paper_type":860,"authors":5615,"abstract":5626},"lrec2004-main-283","A Large Metadata Domain of Language Resources","10.63317\u002F3eekwezh76zd","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-283","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F478.pdf","broeder-etal-2004-large",[5616,5617,5620,5621,5622,5625],{"paper_id":5609,"author_seq":247,"given_name":1811,"surname":5366,"affiliation":63,"orcid":63},{"paper_id":5609,"author_seq":232,"given_name":5618,"surname":5619,"affiliation":63,"orcid":63},"Thierry","Declerck",{"paper_id":5609,"author_seq":218,"given_name":1175,"surname":1176,"affiliation":63,"orcid":63},{"paper_id":5609,"author_seq":203,"given_name":5360,"surname":5455,"affiliation":63,"orcid":63},{"paper_id":5609,"author_seq":188,"given_name":5623,"surname":5624,"affiliation":63,"orcid":63},"Sven","Strömqvist",{"paper_id":5609,"author_seq":172,"given_name":5354,"surname":5355,"affiliation":63,"orcid":63},"The INTERA and ECHO projects were partly intended to create a critical mass of open and linked metadata descriptions of language resources, helping researchers to understand the benefits of an increased visibility of language resources in the Internet and motivating them to participate. The work was based on the new IMDI version 3.0.3 which is a result of experiences with the earlier versions and new requirements coming from the involved partners. While in INTERA major data centers in Europe are participating, the ECHO project focuses on resources that can be seen as part of cultural heritage. Currently, 27 institutions and projects are active with the goal of having a large browsable and searchable domain by the summer of 2004. Experience shows that the creation of high quality metadata is not trivial and asks for a considerable amount of effort and skills, since manual work alone is too time consuming.",{"paper_id":5628,"title":5629,"year":197,"month":855,"day":63,"doi":5630,"resource_url":5631,"first_page":63,"last_page":63,"pdf_url":5632,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5633,"paper_type":860,"authors":5634,"abstract":5640},"lrec2004-main-284","MetaMorpho TM: A Rule-Based Translation Corpus","10.63317\u002F2prwfzx9zh7z","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-284","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F479.pdf","grobler-etal-2004-metamorpho",[5635,5637,5639],{"paper_id":5628,"author_seq":247,"given_name":5249,"surname":5636,"affiliation":63,"orcid":63},"Gröbler",{"paper_id":5628,"author_seq":232,"given_name":5246,"surname":5638,"affiliation":63,"orcid":63},"Hodász",{"paper_id":5628,"author_seq":218,"given_name":5237,"surname":5238,"affiliation":63,"orcid":63},"This paper discusses the aspects of bi-lingual resource processing within a rule-based translation memory (TM) system currently being   developed. Translation memories can be viewed as translation tools incorporating parallel corpora, mainly aligned at the sentence   level. Usually, these corpora have no linguistic annotation, as commercial TM systems perform queries at the character level, using   fuzzy matches.   The proposed translation memory system uses linguistic analysis (morphology and parsing) to determine similarity between two   source-language segments, and attempts to assemble a sensible translation using translations of source-language chunks if the entire   source segment was not found. This is achieved by integrating a rule-based machine translation (RBMT) engine. The drawback of this   approach is language-dependence; however, proper grammar acquisition methods are being developed to speed up grammar preparation   for further language pairs.   This paper addresses the problem of adding sufficient linguistic annotation to segment pairs – translation units (TU) – for new segment   pairs to integrate with the RBMT scheme. This should be fully automatic because adding a new translation unit to a translation memory   must be transparent, without requiring user reaction. The paper discusses a robust enough method to obtain as much linguistic annotation   as possible, while keeping the error rate low.",{"paper_id":5642,"title":5643,"year":197,"month":855,"day":63,"doi":5644,"resource_url":5645,"first_page":63,"last_page":63,"pdf_url":5646,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5647,"paper_type":860,"authors":5648,"abstract":63},"lrec2004-main-285","Annotating Multi-media\u002FMulti-modal Resources with ELAN","10.63317\u002F4orb5nuwpvji","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-285","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F480.pdf","brugman-russel-2004-annotating",[5649,5650],{"paper_id":5642,"author_seq":247,"given_name":5363,"surname":5364,"affiliation":63,"orcid":63},{"paper_id":5642,"author_seq":232,"given_name":5568,"surname":5569,"affiliation":63,"orcid":63},{"paper_id":5652,"title":5653,"year":197,"month":855,"day":63,"doi":5654,"resource_url":5655,"first_page":63,"last_page":63,"pdf_url":5656,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5657,"paper_type":860,"authors":5658,"abstract":5670},"lrec2004-main-286","Annotation of Anaphoric Expressions in an Aligned Bilingual Corpus","10.63317\u002F2oo5gcrpke82","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-286","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F482.pdf","tutin-etal-2004-annotation",[5659,5662,5665,5668],{"paper_id":5652,"author_seq":247,"given_name":5660,"surname":5661,"affiliation":63,"orcid":63},"Agnès","Tutin",{"paper_id":5652,"author_seq":232,"given_name":5663,"surname":5664,"affiliation":63,"orcid":63},"Meriam","Haddara",{"paper_id":5652,"author_seq":218,"given_name":5666,"surname":5667,"affiliation":63,"orcid":63},"Ruslan","Mitkov",{"paper_id":5652,"author_seq":203,"given_name":4332,"surname":5669,"affiliation":63,"orcid":63},"Orasan","This paper discusses a French-English corpus annotated and aligned at anaphoric level. It also presents an annotation scheme based on the study of a detailed corpus featuring different types of correspondences and mismatches. The scheme which is adapted from EAGLES recommendations, supports the alignment at anaphoric level and caters for the different kinds of mismatches.",{"paper_id":5672,"title":5673,"year":197,"month":855,"day":63,"doi":5674,"resource_url":5675,"first_page":63,"last_page":63,"pdf_url":5676,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5677,"paper_type":860,"authors":5678,"abstract":5683},"lrec2004-main-287","Unexpected Productions May Well be Errors","10.63317\u002F4jdvmp39s7oy","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-287","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F483.pdf","ule-simov-2004-unexpected",[5679,5682],{"paper_id":5672,"author_seq":247,"given_name":5680,"surname":5681,"affiliation":63,"orcid":63},"Tylman","Ule",{"paper_id":5672,"author_seq":232,"given_name":3102,"surname":3103,"affiliation":63,"orcid":63},"We present a method for detecting annotation errors in treebanks. It assumes that errors are unexpected small tree fragments. We generate statistics over configurations of these fragments using a standard statistical test. We use the test result and the characteristics of their distributions as features to classify unseen configurations as likely errors via machine learning. Evaluation shows that the resulting list of error candidates is reliable, independent of corpus size, annotation quality, and target language.",{"paper_id":5685,"title":5686,"year":197,"month":855,"day":63,"doi":5687,"resource_url":5688,"first_page":63,"last_page":63,"pdf_url":5689,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5690,"paper_type":860,"authors":5691,"abstract":5697},"lrec2004-main-288","A Framework for Evaluating the Suitability of Non-English Corpora for Language Engineering","10.63317\u002F47vg5kbx8dcn","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-288","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F485.pdf","sarkar-de-roeck-2004-framework",[5692,5695],{"paper_id":5685,"author_seq":247,"given_name":5693,"surname":5694,"affiliation":63,"orcid":63},"Avik","Sarkar",{"paper_id":5685,"author_seq":232,"given_name":2977,"surname":5696,"affiliation":63,"orcid":63},"De Roeck","In this paper we develop a framework for fast profiling and quality verification of datasets for language engineering and information retrieval research. The profiling steps consist of an initial tokenization of the corpus to produce a frequency list from which some basic statistics are derived. Manual sampling is carried out to detect obvious discrepancies. Two diagnostic tests are performed to check for sparseness related measures. The behaviour of the function words is traced to gauge homogeneity of their distribution in documents.",{"paper_id":5699,"title":5700,"year":197,"month":855,"day":63,"doi":5701,"resource_url":5702,"first_page":63,"last_page":63,"pdf_url":5703,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5704,"paper_type":860,"authors":5705,"abstract":5710},"lrec2004-main-289","Intelligent Building of Language Resources for HLT Applications","10.63317\u002F4ozxjkzbv2ay","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-289","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F486.pdf","samiotou-etal-2004-intelligent",[5706,5707,5708],{"paper_id":5699,"author_seq":247,"given_name":4167,"surname":5186,"affiliation":63,"orcid":63},{"paper_id":5699,"author_seq":232,"given_name":5183,"surname":5184,"affiliation":63,"orcid":63},{"paper_id":5699,"author_seq":218,"given_name":5709,"surname":3575,"affiliation":63,"orcid":63},"Dimitrios","It is generally agreed by human language technology (HLT) practitioners (information scientists, computational linguists etc.) that the collection and processing of legacy data as well as the definition and development of the knowledge structure for the purpose of building language resources (LRs) for HLT applications, such as machine translation and knowledge management systems, is a crucial issue. This paper deals with a semi-automatic, data-driven scenario for building language resources for multilingual HLT applications. The scenario is language and domain independent to a great extend and conforms to international standards.",{"paper_id":5712,"title":5713,"year":197,"month":855,"day":63,"doi":5714,"resource_url":5715,"first_page":63,"last_page":63,"pdf_url":5716,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5717,"paper_type":860,"authors":5718,"abstract":5727},"lrec2004-main-290","Collecting Spontaneously Spoken Queries for Information Retrieval","10.63317\u002F29i6nsjrjbia","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-290","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F487.pdf","akiba-etal-2004-collecting",[5719,5721,5724],{"paper_id":5712,"author_seq":247,"given_name":5720,"surname":2236,"affiliation":63,"orcid":63},"Tomoyosi",{"paper_id":5712,"author_seq":232,"given_name":5722,"surname":5723,"affiliation":63,"orcid":63},"Atsushi","Fujii",{"paper_id":5712,"author_seq":218,"given_name":5725,"surname":5726,"affiliation":63,"orcid":63},"Katunobu","Itou","Motivated to realize the speech-driven information retrieval systems that accept spontaneously spoken queries, we developed a method to collect such speech data derived from the pre-defined search topics that had been systematically constructed for IR research. In order to evaluate both our method and the performance of the document retrieval by using the spontaneously spoken queries, we took place two experiments of collecting the speech data by our method using publicly available test collections of evaluating document retrieval. The first preliminary experiment took place with relatively small number of search topics selected from the NTCIR-3 Web retrieval collection, which had been constructed for the TREC-style evaluation workshop, in order to test our method. The second experiment took place with all of the search topics released from the NTCIR-4 Web task to participate the formal run of the evaluation. The information about the collected data and the result of the evaluation with respect to both the speech recognition accuracy and the precision of document retrieval by using the collected data are presented in this paper.",{"paper_id":5729,"title":5730,"year":197,"month":855,"day":63,"doi":5731,"resource_url":5732,"first_page":63,"last_page":63,"pdf_url":5733,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5734,"paper_type":860,"authors":5735,"abstract":5750},"lrec2004-main-291","Multilingual Pattern Libraries for Question Answering: a Case Study for Definition Questions","10.63317\u002F2dy7iigonksf","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-291","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F488.pdf","tanev-etal-2004-multilingual",[5736,5738,5741,5744,5747],{"paper_id":5729,"author_seq":247,"given_name":3106,"surname":5737,"affiliation":63,"orcid":63},"Tanev",{"paper_id":5729,"author_seq":232,"given_name":5739,"surname":5740,"affiliation":63,"orcid":63},"Milen","Kouylekov",{"paper_id":5729,"author_seq":218,"given_name":5742,"surname":5743,"affiliation":63,"orcid":63},"Matteo","Negri",{"paper_id":5729,"author_seq":203,"given_name":5745,"surname":5746,"affiliation":63,"orcid":63},"Bonaventura","Coppola",{"paper_id":5729,"author_seq":188,"given_name":5748,"surname":5749,"affiliation":63,"orcid":63},"Bernardo","Magnini","In this paper we investigate the effectiveness of a novel resource for Multilingual Question Answering (QA). Such a resource consists of a set of multilingual pattern libraries for answer extraction and validation. In the spirit of the ongoing attempts to develop freely available resources for QA, we argue that the distribution and use of pattern libraries will contribute to make Multilingual QA a more feasible task.",{"paper_id":5752,"title":5753,"year":197,"month":855,"day":63,"doi":5754,"resource_url":5755,"first_page":63,"last_page":63,"pdf_url":5756,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5757,"paper_type":860,"authors":5758,"abstract":5766},"lrec2004-main-292","Automatic Transformation of Phrase Treebanks to Dependency Trees","10.63317\u002F584y3q4ooq42","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-292","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F489.pdf","daum-etal-2004-automatic",[5759,5761,5764],{"paper_id":5752,"author_seq":247,"given_name":1357,"surname":5760,"affiliation":63,"orcid":63},"Daum",{"paper_id":5752,"author_seq":232,"given_name":5762,"surname":5763,"affiliation":63,"orcid":63},"Kilian A.","Foth",{"paper_id":5752,"author_seq":218,"given_name":1531,"surname":5765,"affiliation":63,"orcid":63},"Menzel","Word-to-word dependency structures are useful for consistent representation and comparable evaluation of parsing results. However, most large-scale treebanks contain various variants of phrase structure trees, since automatic parsers usually produce constituent structures. We present a freely available extensible tool for converting phrase structure to dependencies automatically, and discuss its application to the NEGRA treebank of German.",{"paper_id":5768,"title":5769,"year":197,"month":855,"day":63,"doi":5770,"resource_url":5771,"first_page":63,"last_page":63,"pdf_url":5772,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5773,"paper_type":860,"authors":5774,"abstract":63},"lrec2004-main-293","Computational Lexicography and Carlo Emilio Gadda, Principe dell’Analisi e Duca della Buona Cognizione","10.63317\u002F3pxne6khoayn","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-293","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F490.pdf","ceccotti-sassi-2004-computational",[5775,5778],{"paper_id":5768,"author_seq":247,"given_name":5776,"surname":5777,"affiliation":63,"orcid":63},"Maria Luigia","Ceccotti",{"paper_id":5768,"author_seq":232,"given_name":5779,"surname":5780,"affiliation":63,"orcid":63},"Manuela","Sassi",{"paper_id":5782,"title":5783,"year":197,"month":855,"day":63,"doi":5784,"resource_url":5785,"first_page":63,"last_page":63,"pdf_url":5786,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5787,"paper_type":860,"authors":5788,"abstract":5793},"lrec2004-main-294","An Annotation Scheme for a Rhetorical Analysis of Biology Articles","10.63317\u002F3622e2xb8ams","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-294","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F494.pdf","mizuta-collier-2004-annotation",[5789,5792],{"paper_id":5782,"author_seq":247,"given_name":5790,"surname":5791,"affiliation":63,"orcid":63},"Yoko","Mizuta",{"paper_id":5782,"author_seq":232,"given_name":5126,"surname":5127,"affiliation":63,"orcid":63},"In information extraction from scientific texts, it is crucially important to identify the unique contribution of the research. The task is complicated by the large number of statements made in each article that pertain to results, including reference to previous work and technical details. Simple keyword searches are helpful for a content-based analysis but fail to tell new results from other ones. We aim to approach the problem from a rhetorical perspective and give a 'zone analysis' (ZA) of texts in light of Teufel, Carletta &amp; Moens (1999). We analyze a text into 'zones' with a shallow nesting based on the rhetorical status which each sequence of statements fit into and annotate the text correspondingly. Our current focus is on the molecular biology domain. In this paper, we propose an annotation scheme for ZA based on an empirical analysis of major online journals (EMBO, NAR, PNAS, and JCB), and illustrate how it works. Our scheme provides a way to differentiate the text in terms of the aspects of the author's own work (e.g. experimental procedure, findings, implications) and to identify a set of statements relating data and findings and therefore helps identify the author's new results and findings.",{"paper_id":5795,"title":5796,"year":197,"month":855,"day":63,"doi":5797,"resource_url":5798,"first_page":63,"last_page":63,"pdf_url":5799,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5800,"paper_type":860,"authors":5801,"abstract":5807},"lrec2004-main-295","Textual Distraction as a Basis for Evaluating Automatic Summarisers","10.63317\u002F2xso2r7nazfs","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-295","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F495.pdf","renouf-kehoe-2004-textual",[5802,5805],{"paper_id":5795,"author_seq":247,"given_name":5803,"surname":5804,"affiliation":63,"orcid":63},"Antoinette","Renouf",{"paper_id":5795,"author_seq":232,"given_name":3312,"surname":5806,"affiliation":63,"orcid":63},"Kehoe","Our summarisation tool, SEAGULL (Summary Extraction Algorithm Generated Using Lexical Links), is a sentence extractor which exploits the patterns of lexical repetition across a text and creates abridgements which express non-trivially the conceptual content and development of topic. In this paper, we report on a test devised to assess its performance against other summarisers. This involves the introduction of progressive batches of unrelated sentences into a source text. Targeted distraction reveals the relative degrees of robustness in summariser performance. The tests show that our system functions best.",{"paper_id":5809,"title":5810,"year":197,"month":855,"day":63,"doi":5811,"resource_url":5812,"first_page":63,"last_page":63,"pdf_url":5813,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5814,"paper_type":860,"authors":5815,"abstract":5819},"lrec2004-main-296","Verb Valency Descriptors for a Syntactic Treebank","10.63317\u002F2i4qwiypht6r","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-296","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F496.pdf","slavcheva-2004-verb",[5816],{"paper_id":5809,"author_seq":247,"given_name":5817,"surname":5818,"affiliation":63,"orcid":63},"Milena","Slavcheva","An essential component of Language Engineering (LE)tools are verb class descriptors that provide information about the relations of the predicates to their arguments. The production of computationally tractable language resources necessitates the assignment of types of predicate-argument relations to a great variety of verb-centered structures: it is necessary to define not only the initial, canonical valency frame of a great number of verb lexemes, but also the diathesis alternations, which reflect the real-life usage of verbs. This paper describes the implementation of descriptors of the valency properties of Bulgarian verbs used in the production of a syntactic treebank of Bulgarian. The descriptors are based on available LE resources for Bulgarian: a verb subcategorization model implemented in the lexical data base that is used; a chunk grammar that recognizes verb form patterns. Predictive models are built and applied in a grammar that annotates grammatical relations inferred from the combination of morphosyntactic and shallow syntactic processing cues. The real significance of this particular processing is the resolution, in relation to the valency properties of many verbs, of the discrepancy or the contradiction between the verb lexicon specifications and the verb syntagmatic realization.",{"paper_id":5821,"title":5822,"year":197,"month":855,"day":63,"doi":5823,"resource_url":5824,"first_page":63,"last_page":63,"pdf_url":5825,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5826,"paper_type":860,"authors":5827,"abstract":5836},"lrec2004-main-297","Integrated Language Technologies for Multilingual Information Services in the MEMPHIS Project","10.63317\u002F4b993x57yvjg","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-297","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F498.pdf","kasper-etal-2004-integrated",[5828,5830,5831,5834],{"paper_id":5821,"author_seq":247,"given_name":2672,"surname":5829,"affiliation":63,"orcid":63},"Kasper",{"paper_id":5821,"author_seq":232,"given_name":3790,"surname":1650,"affiliation":63,"orcid":63},{"paper_id":5821,"author_seq":218,"given_name":5832,"surname":5833,"affiliation":63,"orcid":63},"Jakub","Piskorski",{"paper_id":5821,"author_seq":203,"given_name":973,"surname":5835,"affiliation":63,"orcid":63},"Buitelaar","The MEMPHIS project integrates a large set of NLP technologies. An overview of components, their underlying technologies and resources will be presented: language identification, document classification, linguistic analysis, summarization, information extraction, machine translation, knowledge management and crosslingual retrieval.",{"paper_id":5838,"title":5839,"year":197,"month":855,"day":63,"doi":5840,"resource_url":5841,"first_page":63,"last_page":63,"pdf_url":5842,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5843,"paper_type":860,"authors":5844,"abstract":5856},"lrec2004-main-298","Automatic Generation of Compound Word Lexicon for Hindi Speech Synthesis","10.63317\u002F4bj95jkoywmc","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-298","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F501.pdf","deepa-etal-2004-automatic",[5845,5848,5851,5853],{"paper_id":5838,"author_seq":247,"given_name":5846,"surname":5847,"affiliation":63,"orcid":63},"S.R.","Deepa",{"paper_id":5838,"author_seq":232,"given_name":5849,"surname":5850,"affiliation":63,"orcid":63},"Kalika","Bali",{"paper_id":5838,"author_seq":218,"given_name":5852,"surname":4001,"affiliation":63,"orcid":63},"A.G.",{"paper_id":5838,"author_seq":203,"given_name":5854,"surname":5855,"affiliation":63,"orcid":63},"Partha Pratim","Talukdar","This paper addresses the problem of Hindi compound word splitting and its relevance to developing a good quality phonetizer for Hindi Speech Synthesis. The constituents of a Hindi compound word are not separated by space or hyphen. Hence, most of the existing compound splitting algorithms can not be applied to Hindi. We propose a new technique for automatic extraction of compound words from Hindi corpus. Preliminary tests conducted on the algorithm have shown a split rate of 92 to 96% of the input compound words. Of these splits, around 83 to 87% are correct splits. A few modifications have been suggested, which will improve the accuracy of the splits. Finally, we observe an improvement of 1.6% in Hindi Grapheme-to-Phoneme (G2P) conversion as a result of using a phonetized compound word lexicon, created by the above technique.",{"paper_id":5858,"title":5859,"year":197,"month":855,"day":63,"doi":5860,"resource_url":5861,"first_page":63,"last_page":63,"pdf_url":5862,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5863,"paper_type":860,"authors":5864,"abstract":63},"lrec2004-main-299","Summarization of Multimodal Information","10.63317\u002F5bfg43i5sq8h","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-299","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F502.pdf","ahmad-etal-2004-summarization",[5865,5867,5869],{"paper_id":5858,"author_seq":247,"given_name":5866,"surname":1980,"affiliation":63,"orcid":63},"Saif",{"paper_id":5858,"author_seq":232,"given_name":5868,"surname":4887,"affiliation":63,"orcid":63},"Paulo C. F.",{"paper_id":5858,"author_seq":218,"given_name":1979,"surname":1980,"affiliation":63,"orcid":63},{"paper_id":5871,"title":5872,"year":197,"month":855,"day":63,"doi":5873,"resource_url":5874,"first_page":63,"last_page":63,"pdf_url":5875,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5876,"paper_type":860,"authors":5877,"abstract":5884},"lrec2004-main-300","Design of an Interactive Web-based User Interface for Speech Database Query Formation","10.63317\u002F3twd724jsbdy","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-300","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F503.pdf","altosaar-karjalainen-2004-design",[5878,5881],{"paper_id":5871,"author_seq":247,"given_name":5879,"surname":5880,"affiliation":63,"orcid":63},"Toomas","Altosaar",{"paper_id":5871,"author_seq":232,"given_name":5882,"surname":5883,"affiliation":63,"orcid":63},"Matti","Karjalainen","This paper presents the design of an advanced experimental system currently under development that allows speech database queries to be defined in a high-level manner both intuitively and interactively. Queries are graphically specified by creating matching template units that are instances of classes, e.g., phones, phonemes, syllables, and words. Also, the features and properties of these instances are defined, e.g., voicing, part-of-speech tags, etc. Types, features, and properties of each unit can be made as specific or general as required, enabling different degrees of query selectivity. Relations between template units are used to specify query contexts and are indicated through links. Template units can be named symbolically, enabling the formation of reusable libraries of queries. Interaction is promoted since formed queries can be applied to object-oriented speech databases with immediate feedback. Databases can be located on a central server or on a local machine. Being html-compliant, the system is accessible from different web-browsers and platforms making it a portable tool for speech processing.",{"paper_id":5886,"title":5887,"year":197,"month":855,"day":63,"doi":5888,"resource_url":5889,"first_page":63,"last_page":63,"pdf_url":5890,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5891,"paper_type":860,"authors":5892,"abstract":5906},"lrec2004-main-301","Migrating Language Resources from SGML to XML: The Text Encoding Initiative Recommendations","10.63317\u002F5g9zvejwf2oq","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-301","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F504.pdf","bauman-etal-2004-migrating",[5893,5896,5898,5900,5901,5904],{"paper_id":5886,"author_seq":247,"given_name":5894,"surname":5895,"affiliation":63,"orcid":63},"Syd","Bauman",{"paper_id":5886,"author_seq":232,"given_name":4594,"surname":5897,"affiliation":63,"orcid":63},"Bia",{"paper_id":5886,"author_seq":218,"given_name":5329,"surname":5899,"affiliation":63,"orcid":63},"Burnard",{"paper_id":5886,"author_seq":203,"given_name":2128,"surname":2129,"affiliation":63,"orcid":63},{"paper_id":5886,"author_seq":188,"given_name":5902,"surname":5903,"affiliation":63,"orcid":63},"Christine","Ruotolo",{"paper_id":5886,"author_seq":172,"given_name":3157,"surname":5905,"affiliation":63,"orcid":63},"Schreibman","The Text Encoding Initiative (TEI), established in 1987, has been the largest effort in the area of standardisation of computer encoding of language resources. TEI chose SGML (Standard Generalized Markup Language) as its underlying standard, and in the years before the inception of XML, a number of projects encoded their data according to some SGML DTD, TEI compliant, or otherwise. These projects could now benefit from migrating their data to XML. Apart from validation, the most compelling reason for migration is the scarcity of SGML-aware software and the abundance of XML-based tools and related recommendations. However, despite the fact that XML is a subset of SGML, migration is not a trivial process, especially in the case of large holdings of legacy language resources. This is why in 2002 the TEI Consortium established a Task Force on SGML to XML migration. The TF has now produced a number of reports that simplify and make explicit the conversion of SGML TEI (version P3) to XML TEI (version P4) documents. The reports are also relevant for a general audience of SGML users that are considering migrating their language resources to XML. This paper presents the recommendations made by the TF, concentrating on strategic considerations, the practical guide, and one case study, the conversion of the British National Corpus.",{"paper_id":5908,"title":5909,"year":197,"month":855,"day":63,"doi":5910,"resource_url":5911,"first_page":63,"last_page":63,"pdf_url":5912,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5913,"paper_type":860,"authors":5914,"abstract":5922},"lrec2004-main-302","Evaluating Conversation with Hans Christian Andersen","10.63317\u002F4yj3io554f6y","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-302","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F505.pdf","bernsen-etal-2004-evaluating",[5915,5918,5919],{"paper_id":5908,"author_seq":247,"given_name":5916,"surname":5917,"affiliation":63,"orcid":63},"Niels Ole","Bernsen",{"paper_id":5908,"author_seq":232,"given_name":1956,"surname":1954,"affiliation":63,"orcid":63},{"paper_id":5908,"author_seq":218,"given_name":5920,"surname":5921,"affiliation":63,"orcid":63},"Svend","Kiilerich","This paper presents an analysis of data from a large-scale in-field Wizard of Oz simulation of a conversational domain-oriented edutainment system. The basic turn-level data are described and the 10 longest conversations are analysed in order to evaluate the theory of domain-oriented conversation underlying the design specification of the system.",{"paper_id":5924,"title":5925,"year":197,"month":855,"day":63,"doi":5926,"resource_url":5927,"first_page":63,"last_page":63,"pdf_url":5928,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5929,"paper_type":860,"authors":5930,"abstract":5936},"lrec2004-main-303","The New Dutch-Flemish HLT Programme: a Concerted Effort to Stimulate the HLT Sector","10.63317\u002F434y9ovs2i9z","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-303","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F506.pdf","cucchiarini-dhalleweyn-2004-new",[5931,5934],{"paper_id":5924,"author_seq":247,"given_name":5932,"surname":5933,"affiliation":63,"orcid":63},"Catia","Cucchiarini",{"paper_id":5924,"author_seq":232,"given_name":1566,"surname":5935,"affiliation":63,"orcid":63},"D’Halleweyn","In 2004 a new, comprehensive Dutch-Flemish HLT programme will be launched by a number of ministries and organizations in the Netherlands and Flanders. To guarantee its Dutch-Flemish character, this large-scale programme will be carried out under the auspices of the Dutch Language Union (NTU). The aim of this new initiative, which is a continuation of the previous HLT Platform project, is to contribute to the further progress of HLT for the Dutch language. In trying to achieve this goal the project partners will make a concerted effort aimed at raising awareness of HLT results, , promoting innovation oriented strategic research in HLT, stimulating the demand of HLT products, developing HLT resources that are essential and are known to be missing, organising the management, maintenance and distribution of HLT resources.",{"paper_id":5938,"title":5939,"year":197,"month":855,"day":63,"doi":5940,"resource_url":5941,"first_page":63,"last_page":63,"pdf_url":5942,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5943,"paper_type":860,"authors":5944,"abstract":5950},"lrec2004-main-304","Related Word-pairs Extraction Without Dictionaries","10.63317\u002F56xfmjapgr7s","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-304","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F507.pdf","yamamoto-umemura-2004-related",[5945,5947],{"paper_id":5938,"author_seq":247,"given_name":5946,"surname":2243,"affiliation":63,"orcid":63},"Eiko",{"paper_id":5938,"author_seq":232,"given_name":5948,"surname":5949,"affiliation":63,"orcid":63},"Kyoji","Umemura","Although related pairs of words are useful lexical semantic resources, it is sometimes expensive to create and maintain the pairs. We propose a method that extracts pairs of related Japanese words from a text corpus, without the use of language knowledge, such as a dictionary, in any of the steps. This is difficult with a Japanese text because there are no spaces between words. The pairs are related words with similar usages and can be useful for understanding texts including unknown words. These related word pairs are extracted based on judgments of whether two words are used in a similar way. We report the precisions of pair lists extracted from various kinds of corpora and analyze the tendencies of each list.",{"paper_id":5952,"title":5953,"year":197,"month":855,"day":63,"doi":5954,"resource_url":5955,"first_page":63,"last_page":63,"pdf_url":5956,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5957,"paper_type":860,"authors":5958,"abstract":5966},"lrec2004-main-305","What is my Style? Using Stylistic Features of Portuguese Web Texts to Classify Web Pages According to Users’ Needs","10.63317\u002F5fwy6axsx45i","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-305","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F508.pdf","aires-etal-2004-style",[5959,5961,5963,5965],{"paper_id":5952,"author_seq":247,"given_name":4694,"surname":5960,"affiliation":63,"orcid":63},"Aires",{"paper_id":5952,"author_seq":232,"given_name":5962,"surname":4884,"affiliation":63,"orcid":63},"Aline",{"paper_id":5952,"author_seq":218,"given_name":2460,"surname":5964,"affiliation":63,"orcid":63},"Aluísio",{"paper_id":5952,"author_seq":203,"given_name":1060,"surname":1061,"affiliation":63,"orcid":63},"In this paper we investigate the use of stylistic features of Web texts in Portuguese to classify web pages according to users’ needs, in order to improve Web Information Retrieval. We first describe a seven categories classification of users´ needs, which was the outcome of a qualitative analysis of two TodoBr logs (a major Brazilian search engine). We describe 46 shallow linguistic features, inspired by the works of Biber and Karlgren, and proceed describing the compilation of the corpus employed on the classifier training. Our aim is to obtain rules that can be applied on the classification of Web texts according to those seven users´ needs. Some experiments are reported, showing that it is possible, at least for some of the categories, to identify them reliably.",{"paper_id":5968,"title":5969,"year":197,"month":855,"day":63,"doi":5970,"resource_url":5971,"first_page":63,"last_page":63,"pdf_url":5972,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5973,"paper_type":860,"authors":5974,"abstract":5977},"lrec2004-main-306","BootCaT: Bootstrapping Corpora and Terms from the Web","10.63317\u002F2wo3g74qniup","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-306","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F509.pdf","baroni-bernardini-2004-bootcat",[5975,5976],{"paper_id":5968,"author_seq":247,"given_name":3005,"surname":3006,"affiliation":63,"orcid":63},{"paper_id":5968,"author_seq":232,"given_name":2564,"surname":3008,"affiliation":63,"orcid":63},"This paper introduces the BootCaT toolkit, a suite of perl programs implementing an iterative procedure to bootstrap specialized corpora and terms from the web. The procedure requires only a small set of seed terms as input. The seeds are used to build a corpus via automated Google queries, and more terms are extracted from this corpus. In turn, these new terms are used as seeds to build a larger corpus via automated queries, and so forth. The corpus and the unigram terms are then used to extract multi-word terms. We conducted an evaluation of the tools by applying them to the construction of English and Italian corpora and term lists from the domain of psychiatry. The results illustrate the potential usefulness of the tools.",{"paper_id":5979,"title":5980,"year":197,"month":855,"day":63,"doi":5981,"resource_url":5982,"first_page":63,"last_page":63,"pdf_url":5983,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5984,"paper_type":860,"authors":5985,"abstract":63},"lrec2004-main-307","N-Gram Language Modeling for Robust Multi-Lingual Document Classification","10.63317\u002F2rds2pr82kit","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-307","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F510.pdf","steffen-2004-n",[5986],{"paper_id":5979,"author_seq":247,"given_name":3790,"surname":1650,"affiliation":63,"orcid":63},{"paper_id":5988,"title":5989,"year":197,"month":855,"day":63,"doi":5990,"resource_url":5991,"first_page":63,"last_page":63,"pdf_url":5992,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5993,"paper_type":860,"authors":5994,"abstract":5997},"lrec2004-main-308","A Word Alignment System Based on a Translation Equivalence Extractor","10.63317\u002F22armnxyq664","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-308","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F512.pdf","barbu-2004-word",[5995],{"paper_id":5988,"author_seq":247,"given_name":5996,"surname":3517,"affiliation":63,"orcid":63},"Ana-Maria","The paper describes a word alignment system (TREQ-AL), which uses the lexicon extracted with a translation-equivalents extractor (TREQ) from a training corpus, including the text to be aligned. The improvement methods applied since the previous version of TREQ-AL are one of the paper's focus, as well as the recalled types of the system.",{"paper_id":5999,"title":6000,"year":197,"month":855,"day":63,"doi":6001,"resource_url":6002,"first_page":63,"last_page":63,"pdf_url":6003,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6004,"paper_type":860,"authors":6005,"abstract":63},"lrec2004-main-309","Using Profiles for IMDI Metadata Creation","10.63317\u002F2r6r87y2euyw","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-309","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F513.pdf","broeder-etal-2004-using",[6006,6007,6008],{"paper_id":5999,"author_seq":247,"given_name":1811,"surname":5366,"affiliation":63,"orcid":63},{"paper_id":5999,"author_seq":232,"given_name":5354,"surname":5355,"affiliation":63,"orcid":63},{"paper_id":5999,"author_seq":218,"given_name":5565,"surname":5566,"affiliation":63,"orcid":63},{"paper_id":6010,"title":6011,"year":197,"month":855,"day":63,"doi":6012,"resource_url":6013,"first_page":63,"last_page":63,"pdf_url":6014,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6015,"paper_type":860,"authors":6016,"abstract":6020},"lrec2004-main-310","Rethinking Readability of Digital Editions — The Case of the AAC’s “Digital Brenner”","10.63317\u002F3no8quqp4s37","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-310","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F515.pdf","morth-2004-rethinking",[6017],{"paper_id":6010,"author_seq":247,"given_name":6018,"surname":6019,"affiliation":63,"orcid":63},"Karlheinz","Mörth","Retrodigitization projects have become very common as demand for digital versions has increased considerably over the past few years. Many of these projects have been limited to the production of facsimiles, to a much lesser degree text has been integrated which is due to the costs that are involved in such undertakings. The AAC’s \"Brenner\" project was started as an experimental endeavour in humanities computing designed to fathom out the possibilities involved in the particular case of historical journals. The focus in this paper is on issues of readability of digital texts. The issue at hand is the exploration of features of digitized editions that may make up for the printed original and even go beyond what is offered by the printed original. The identification of useful display objects that can be easily implemented digitally was the main concern of the study. In describing the constituents of a planned web interface, we will try to figure out ways of improving the acceptability and quality of electronic texts.",{"paper_id":6022,"title":6023,"year":197,"month":855,"day":63,"doi":6024,"resource_url":6025,"first_page":63,"last_page":63,"pdf_url":6026,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6027,"paper_type":860,"authors":6028,"abstract":6034},"lrec2004-main-311","Automatic Building Gazetteers of Co-referring Named Entities","10.63317\u002F2ho6jmox3oxf","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-311","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F516.pdf","ferres-etal-2004-automatic-building",[6029,6030,6031,6032,6033],{"paper_id":6022,"author_seq":247,"given_name":2268,"surname":2902,"affiliation":63,"orcid":63},{"paper_id":6022,"author_seq":232,"given_name":2904,"surname":2905,"affiliation":63,"orcid":63},{"paper_id":6022,"author_seq":218,"given_name":2907,"surname":2908,"affiliation":63,"orcid":63},{"paper_id":6022,"author_seq":203,"given_name":2910,"surname":2911,"affiliation":63,"orcid":63},{"paper_id":6022,"author_seq":188,"given_name":2105,"surname":2913,"affiliation":63,"orcid":63},"Noun phrase (NP) co-reference resolution is a problem involved in many Natural Language areas, such as Dialog, Information Extraction, Summarization and Question Answering, among others. Especially important issues regarding this problem are the detection of aliases and the detection and expansion of acronyms. In this sense, terminological and general gazetteers of Named Entities (NEs) being aliases and of pairs acronym-expansion can be helpful. This paper describes a methodology to acquire semi-automatically these gazetteers.",{"paper_id":6036,"title":6037,"year":197,"month":855,"day":63,"doi":6038,"resource_url":6039,"first_page":63,"last_page":63,"pdf_url":6040,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6041,"paper_type":860,"authors":6042,"abstract":6048},"lrec2004-main-312","Semi-Automatic Derivation of a French Lexicon from CLIPS","10.63317\u002F22vidbzkhek6","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-312","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F517.pdf","ruimy-etal-2004-semi",[6043,6046,6047],{"paper_id":6036,"author_seq":247,"given_name":6044,"surname":6045,"affiliation":63,"orcid":63},"Nilda","Ruimy",{"paper_id":6036,"author_seq":232,"given_name":4062,"surname":4063,"affiliation":63,"orcid":63},{"paper_id":6036,"author_seq":218,"given_name":4059,"surname":4060,"affiliation":63,"orcid":63},"In this paper we describe the methodology developed in the framework of a feasibility study for the derivation of a semantically annotated French lexicon from a monolingual Italian lexical resource. Firstly, an outline of the source lexicon is provided. Then, the two different and complementary strategies that have been experimented for pairing off the relevant monolingual Italian entries and their translational equivalents are described. Finally, the results achieved through each of the illustrated methodologies are presented, their viability is evaluated and a general assessment of the experiment performed is provided.",{"paper_id":6050,"title":6051,"year":197,"month":855,"day":63,"doi":6052,"resource_url":6053,"first_page":63,"last_page":63,"pdf_url":6054,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6055,"paper_type":860,"authors":6056,"abstract":6061},"lrec2004-main-313","The American National Corpus First Release","10.63317\u002F3vavyxd22b5i","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-313","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F518.pdf","ide-suderman-2004-american",[6057,6058],{"paper_id":6050,"author_seq":247,"given_name":2864,"surname":2865,"affiliation":63,"orcid":63},{"paper_id":6050,"author_seq":232,"given_name":6059,"surname":6060,"affiliation":63,"orcid":63},"Keith","Suderman","The First Release of the American National Corpus (ANC) was made available in mid-fall, 2003. The data includes approximately 11 million words of American English, including written and spoken data and a variety of text types annotated for part of speech and lemma. The corpus is provided in XML format conformant to the XML Corpus Encoding Standard (XCES) (http:\u002F\u002Fwww.xml-ces.org), and is distributed in both a stand-off version (where annotation is in an XML document separate from the primary texts) and a merged version (where annotation is included in-line in the texts). The merged version includes annotation for part of speech and lemma produced by the Biber tagger; in stand-off annotation, in addition to the Biber tagging, morpho-syntactic annotations of the data are provided using the CLAWS 5 and 7 tagsets as well as several other tagsets.",{"paper_id":6063,"title":6064,"year":197,"month":855,"day":63,"doi":6065,"resource_url":6066,"first_page":63,"last_page":63,"pdf_url":6067,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6068,"paper_type":860,"authors":6069,"abstract":6076},"lrec2004-main-314","Identifying Morphosyntactic Preferences in Collocations","10.63317\u002F2xkmh97rqnms","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-314","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F519.pdf","evert-etal-2004-identifying",[6070,6072,6073],{"paper_id":6063,"author_seq":247,"given_name":961,"surname":6071,"affiliation":63,"orcid":63},"Evert",{"paper_id":6063,"author_seq":232,"given_name":5498,"surname":5499,"affiliation":63,"orcid":63},{"paper_id":6063,"author_seq":218,"given_name":6074,"surname":6075,"affiliation":63,"orcid":63},"Kristina","Spranger","In this paper, we describe research that aims to make evidence on the morphosyntactic preferences of collocations available to lexicographers. Our methods for the extraction of appropriate frequency data and its statistical analysis are applied to the number and case preferences of German adjective+noun combinations in a small case study.",{"paper_id":6078,"title":6079,"year":197,"month":855,"day":63,"doi":6080,"resource_url":6081,"first_page":63,"last_page":63,"pdf_url":6082,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6083,"paper_type":860,"authors":6084,"abstract":6088},"lrec2004-main-315","Towards General-Purpose Annotation Tools – How Far Are We Today?","10.63317\u002F2qww57dpvcmd","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-315","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F520.pdf","dybkjaer-bernse-2004-towards",[6085,6086],{"paper_id":6078,"author_seq":247,"given_name":1956,"surname":1954,"affiliation":63,"orcid":63},{"paper_id":6078,"author_seq":232,"given_name":5916,"surname":6087,"affiliation":63,"orcid":63},"Bernse","This paper discusses the notions of special-purpose natural interactivity and multimodality (NIMM) coding tools, limited-purpose tools, and general-purpose tools as defined in terms of a set of key requirements. In the light of these requirements the paper presents a detailed comparison of three special-purpose and six limited-purpose coding tools and discusses the challenges in building a first general-purpose NIMM annotation tool.",{"paper_id":6090,"title":6091,"year":197,"month":855,"day":63,"doi":6092,"resource_url":6093,"first_page":63,"last_page":63,"pdf_url":6094,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6095,"paper_type":860,"authors":6096,"abstract":6103},"lrec2004-main-316","Automated Morphological Segmentation and Evaluation","10.63317\u002F2au2may7cmt5","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-316","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F521.pdf","reichel-weilhammer-2004-automated",[6097,6100],{"paper_id":6090,"author_seq":247,"given_name":6098,"surname":6099,"affiliation":63,"orcid":63},"Uwe D.","Reichel",{"paper_id":6090,"author_seq":232,"given_name":6101,"surname":6102,"affiliation":63,"orcid":63},"Karl","Weilhammer","In this paper we introduce (i) a new method for morphological segmentation of German words and (ii) some measures related to the MDL principle for evaluation of morphological segmentations. Our segmentation method is based on general knowledge about inflection, derivation, and morphotactics, and part of speech information, all supplied by little effort. It includes the capabilities to generate allomorphs, to deal with hierarchical structure, and to retrieve morphemes not given in isolation in the input data. Manual evaluation of 1400 segmented types, counting omissions and false insertions of morpheme boundaries, gave 87 % recall and 98 % precision. In order to get automatic evaluation measures for morphological segmentations, we tested (i) vocabulary size and entropy measures (data size aspect of the MDL principle), (ii) model size represented as the number of states of reduced deterministic finite state automatons (DFSA) matching exactly the models' outputs, and (iii) a linear combination of (i) and (ii). These measures have been applied to segmentations of different qualities. As a result linear combination of vocabulary size and size of model-equivalent reduced DFSAs turned out to be an appropriate measure to rank segmentation models according to their quality.",{"paper_id":6105,"title":6106,"year":197,"month":855,"day":63,"doi":6107,"resource_url":6108,"first_page":63,"last_page":63,"pdf_url":6109,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6110,"paper_type":860,"authors":6111,"abstract":6114},"lrec2004-main-317","A Registry of Standard Data Categories for Linguistic Annotation","10.63317\u002F3zs9pxtv4kfo","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-317","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F522.pdf","ide-romary-2004-registry",[6112,6113],{"paper_id":6105,"author_seq":247,"given_name":2864,"surname":2865,"affiliation":63,"orcid":63},{"paper_id":6105,"author_seq":232,"given_name":1175,"surname":1176,"affiliation":63,"orcid":63},"In this paper we describe the most recent work within ISO TC37\u002FSC 4, and in particular the development of a Data Category Registry (DCR) component of the Linguistic Annotation Framework. The DCR will contain a formally defined set of linguistic categories in common use within the language engineering community for reference and use in linguistically annotated resources. We outline the first proposals for creation and management of the DCR, as a solicitation for input from the community.",{"paper_id":6116,"title":6117,"year":197,"month":855,"day":63,"doi":6118,"resource_url":6119,"first_page":63,"last_page":63,"pdf_url":6120,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6121,"paper_type":860,"authors":6122,"abstract":6128},"lrec2004-main-318","A Natural Language Approach to Information Management: Tracking Scientific Advances Through the Structure of Words","10.63317\u002F3qbgu23srh6c","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-318","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F523.pdf","hippisley-karavasili-2004-natural",[6123,6125],{"paper_id":6116,"author_seq":247,"given_name":3312,"surname":6124,"affiliation":63,"orcid":63},"Hippisley",{"paper_id":6116,"author_seq":232,"given_name":6126,"surname":6127,"affiliation":63,"orcid":63},"Chara","Karavasili","In scientific texts specialist words, or terms, express conceptual knowledge. We show that by looking at the use of a term and its family of derivatives over time we can have a tangible picture of how an underlying concept has evolved in scientific advances. This is because the structure of a word encases a core idea and how that idea has been extended in a particular direction. This paper is an outline of a research programme with some preliminary results from an analysis the term nucleus in a nine million word corpus of nuclear physics articles written over thirty-six years, from 1969 to the present day, representing a body of specialist knowledge of recognized growth over time.",{"paper_id":6130,"title":6131,"year":197,"month":855,"day":63,"doi":6132,"resource_url":6133,"first_page":63,"last_page":63,"pdf_url":6134,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6135,"paper_type":860,"authors":6136,"abstract":6144},"lrec2004-main-319","Building a Maritime Domain Lexicon: a Few Considerations on the Database Structure and the Semantic Coding","10.63317\u002F2qifm6ovh4m2","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-319","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F524.pdf","marinelli-etal-2004-building",[6137,6139,6142],{"paper_id":6130,"author_seq":247,"given_name":4974,"surname":6138,"affiliation":63,"orcid":63},"Marinelli",{"paper_id":6130,"author_seq":232,"given_name":6140,"surname":6141,"affiliation":63,"orcid":63},"Adriana","Roventini",{"paper_id":6130,"author_seq":218,"given_name":1743,"surname":6143,"affiliation":63,"orcid":63},"Enea","In this paper we refer about the building we are carrying out of a specialized lexicon belonging to the maritime domain, together with the coding performed according to the ItalWordNet semantic relations model. The main characteristics of the lexical semantic database and the specific features of the specialized language are taken into consideration and, in particular, we concentrate our attention on the following items: i) the main characteristics of this lexicon, its different levels of specificity and its distribution within different sub domains; ii) the verb class semantic coding; iii) the suitable concepts to outline a specific maritime domain ontology.",{"paper_id":6146,"title":6147,"year":197,"month":855,"day":63,"doi":6148,"resource_url":6149,"first_page":63,"last_page":63,"pdf_url":6150,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6151,"paper_type":860,"authors":6152,"abstract":6169},"lrec2004-main-320","Creating Open Language Resources for Hungarian","10.63317\u002F4od8htmmfbr3","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-320","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F525.pdf","halacsy-etal-2004-creating",[6153,6156,6159,6162,6164,6167],{"paper_id":6146,"author_seq":247,"given_name":6154,"surname":6155,"affiliation":63,"orcid":63},"Péter","Halácsy",{"paper_id":6146,"author_seq":232,"given_name":6157,"surname":6158,"affiliation":63,"orcid":63},"András","Kornai",{"paper_id":6146,"author_seq":218,"given_name":6160,"surname":6161,"affiliation":63,"orcid":63},"László","Németh",{"paper_id":6146,"author_seq":203,"given_name":6157,"surname":6163,"affiliation":63,"orcid":63},"Rung",{"paper_id":6146,"author_seq":188,"given_name":6165,"surname":6166,"affiliation":63,"orcid":63},"István","Szakadát",{"paper_id":6146,"author_seq":172,"given_name":4335,"surname":6168,"affiliation":63,"orcid":63},"Trón","With Hungary's ascension to the EU, wider availability of Hungarian language resources (LRs) is becoming more critical. Various Hungarian LRs and language technology tools (LTs) exist, but are for the most part proprietary products: the companies and research labs developing them are often reluctant to make them available even for research, let alone commercial purposes. The SzoSzablya `WordSword' project at the Centre of Media Research and Education of Budapest University of Technology and Economics started in March 2003 with the express goal to offer a solution to this problem by developing a comprehensive set of LRs with an LT toolkit which are made publicly available under an unrestrictive LGPL-style license. This paper is a report of our progress. We describe the process of creating a gigaword corpus by crawling the Hungarian web and collecting 18 million webpages from the .hu domain. We discuss the methods used for cleaning the data and document the way an approximate frequency dictionary was compiled from the corpus.",{"paper_id":6171,"title":6172,"year":197,"month":855,"day":63,"doi":6173,"resource_url":6174,"first_page":63,"last_page":63,"pdf_url":6175,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6176,"paper_type":860,"authors":6177,"abstract":6185},"lrec2004-main-321","Test Collections for Patent-to-Patent Retrieval and Patent Map Generation in NTCIR-4 Workshop","10.63317\u002F2mfgv8f4c7zb","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-321","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F526.pdf","fujii-etal-2004-test",[6178,6179,6182],{"paper_id":6171,"author_seq":247,"given_name":5722,"surname":5723,"affiliation":63,"orcid":63},{"paper_id":6171,"author_seq":232,"given_name":6180,"surname":6181,"affiliation":63,"orcid":63},"Makoto","Iwayama",{"paper_id":6171,"author_seq":218,"given_name":6183,"surname":6184,"affiliation":63,"orcid":63},"Noriko","Kando","This paper describes the Patent Retrieval Task in the Fourth NTCIR Workshop, and the test collections produced in this task. We perform the invalidity search task, in which each participant group searches a patent collection for the patents that can invalidate the demand in an existing claim. We also perform the automatic patent map generation task, in which the patents associated with a specific topic are organized in a multi-dimensional matrix.",{"paper_id":6187,"title":6188,"year":197,"month":855,"day":63,"doi":6189,"resource_url":6190,"first_page":63,"last_page":63,"pdf_url":6191,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6192,"paper_type":860,"authors":6193,"abstract":6199},"lrec2004-main-322","Part-of-Speech Annotation of Biology Research Abstracts","10.63317\u002F4kpxeameezza","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-322","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F528.pdf","tateisi-tsujii-2004-part",[6194,6197],{"paper_id":6187,"author_seq":247,"given_name":6195,"surname":6196,"affiliation":63,"orcid":63},"Yuka","Tateisi",{"paper_id":6187,"author_seq":232,"given_name":6198,"surname":930,"affiliation":63,"orcid":63},"Jun-ichi","A part-of-speech (POS) tagged corpus was built on research abstracts in biomedical domain with the Penn Treebank scheme. As consistent annotation was difficult without domain-specific knowledge we made use of the existing term annotation of the GENIA corpus. A list of frequent terms annotated in the GENIA corpus was compiled and the POS of each constituent of those terms were determined with assistance from domain specialists. The POS of the terms in the list are pre-assigned, then a tagger assigns POS to remaining words preserving the pre-assigned POS, whose results are corrected by human annotators. We also modified the PTB scheme slightly. An inter-annotator agreement tested on new 50 abstracts was 98.5%. A POS tagger trained with the annotated abstracts was tested against a gold-standard set made from the interannotator agreement. The untrained tagger had the accuracy of 83.0%. Trained with 2000 annotated abstracts the accuracy rose to 98.2%. The 2000 annotated abstracts are publicly available.",{"paper_id":6201,"title":6202,"year":197,"month":855,"day":63,"doi":6203,"resource_url":6204,"first_page":63,"last_page":63,"pdf_url":6205,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6206,"paper_type":860,"authors":6207,"abstract":6214},"lrec2004-main-323","Making Monolingual Corpora Comparable: a Case Study of Bulgarian and Croatian","10.63317\u002F4ftorx69ehby","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-323","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F529.pdf","bekavac-etal-2004-making",[6208,6211,6212,6213],{"paper_id":6201,"author_seq":247,"given_name":6209,"surname":6210,"affiliation":63,"orcid":63},"Božo","Bekavac",{"paper_id":6201,"author_seq":232,"given_name":3723,"surname":3724,"affiliation":63,"orcid":63},{"paper_id":6201,"author_seq":218,"given_name":3102,"surname":3103,"affiliation":63,"orcid":63},{"paper_id":6201,"author_seq":203,"given_name":5407,"surname":5408,"affiliation":63,"orcid":63},"This paper describes the first steps towards the creation of a Bulgarian-Croatian comparable corpus. Its base are two newspaper subcorpora from larger reference corpora of Bulgarian and Croatian. In the beginning we rely on more extralinguistically-oriented, but methodologically cleaner parameters of similarity like: specific topics, pre-defined time span and data size. The idea of `light' and `hard' comparable corpora is introduced. At this stage we aim at producing a `light' bilingual comparable corpus. The algorithm for identifying lexical similarity and aligning linguistic units is presented, and the initial experiments are outlined.",{"paper_id":6216,"title":6217,"year":197,"month":855,"day":63,"doi":6218,"resource_url":6219,"first_page":63,"last_page":63,"pdf_url":6220,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6221,"paper_type":860,"authors":6222,"abstract":63},"lrec2004-main-324","Corporate Voice, Tone of Voice and Controlled Language Techniques","10.63317\u002F5d8tdk4h4obf","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-324","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F530.pdf","henriksen-etal-2004-corporate",[6223,6226,6229],{"paper_id":6216,"author_seq":247,"given_name":6224,"surname":6225,"affiliation":63,"orcid":63},"Lina","Henriksen",{"paper_id":6216,"author_seq":232,"given_name":6227,"surname":6228,"affiliation":63,"orcid":63},"Bart","Jongejan",{"paper_id":6216,"author_seq":218,"given_name":918,"surname":919,"affiliation":63,"orcid":63},{"paper_id":6231,"title":6232,"year":197,"month":855,"day":63,"doi":6233,"resource_url":6234,"first_page":63,"last_page":63,"pdf_url":6235,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6236,"paper_type":860,"authors":6237,"abstract":6239},"lrec2004-main-325","Cypriot Speech Database: Data Collection and Greek to Cypriot Dialect Adaptation","10.63317\u002F3crfwqfckh4z","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-325","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F531.pdf","fakotakis-2004-cypriot",[6238],{"paper_id":6231,"author_seq":247,"given_name":1111,"surname":1112,"affiliation":63,"orcid":63},"This paper describes the Cypriot Greek speech database collected in the framework of the European project OrienTel (IST-2000-28373) and the acoustic models adaptation techniques that were applied in order to perform dialect adaptation from Greek to Cypriot. Greek and Cypriot Greek share the same phoneme set. However, there are some differences in the way the same phonemes are pronounced. That is, Cypriot Greek may be considered as a variation of standard Greek. Utterances from 500 speakers are used (450 for training, that is, performing adaptation, and 50 as testing material). Two tools are available for training, adaptation and evaluation of the acoustic models. These are the Wire Communications Laboratory (WCL) recognition tool and the Hidden Markov Models toolkit (HTK). For both recognition engines Greek acoustic models were already available using the SpeechDat-II Greek telephone database. Two well-known techniques are applied for adapting the Greek acoustic models to the new data: Maximum Likelihood Linear Regression (MLLR) and Maximum A-Posteriori (MAP) adaptation. Pure Cypriot Greek models are also trained using only the Cypriot Greek database, to be compared with the adapted ones. Preliminary results show a small improvement in the performance of the adapted models over the pure Greek and Cypriot Greek models.",{"paper_id":6241,"title":6242,"year":197,"month":855,"day":63,"doi":6243,"resource_url":6244,"first_page":63,"last_page":63,"pdf_url":6245,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6246,"paper_type":860,"authors":6247,"abstract":6256},"lrec2004-main-326","Automatic Extraction of Syntactic Semantic Patterns for Multilingual Resources","10.63317\u002F5cp6k5tmqvff","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-326","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F532.pdf","navarro-etal-2004-automatic",[6248,6250,6253],{"paper_id":6241,"author_seq":247,"given_name":6249,"surname":2314,"affiliation":63,"orcid":63},"Borja",{"paper_id":6241,"author_seq":232,"given_name":6251,"surname":6252,"affiliation":63,"orcid":63},"Manuel","Palomar",{"paper_id":6241,"author_seq":218,"given_name":6254,"surname":6255,"affiliation":63,"orcid":63},"Patricio","Martínez-Barco","In this paper we present an automatic system for the extraction of syntactic semantic patterns applied to the development of multilingual processing tools. In order to achieve optimum methods for the automatic treatment of more than one language, we propose the use of syntactic semantic patterns. These patterns are formed by a verbal head and the main arguments, and they are aligned among languages. In this paper we present an automatic system for the extraction and alignment of syntactic semantic patterns from two manually annotated corpora, and evaluate the main linguistic problems that we must deal with in the alignment process.",{"paper_id":6258,"title":6259,"year":197,"month":855,"day":63,"doi":6260,"resource_url":6261,"first_page":63,"last_page":63,"pdf_url":6262,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6263,"paper_type":860,"authors":6264,"abstract":6272},"lrec2004-main-327","The Integral Dictionary: An Ontological Resource for the Semantic Web: Integration of EuroWordNet, Balkanet, TID, and SUMO","10.63317\u002F2tykx3e3f9zf","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-327","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F533.pdf","dutoit-etal-2004-integral",[6265,6267,6270],{"paper_id":6258,"author_seq":247,"given_name":3087,"surname":6266,"affiliation":63,"orcid":63},"Dutoit",{"paper_id":6258,"author_seq":232,"given_name":6268,"surname":6269,"affiliation":63,"orcid":63},"Pierre","Nugues",{"paper_id":6258,"author_seq":218,"given_name":1519,"surname":6271,"affiliation":63,"orcid":63},"de Torcy","The semantic organization of the web is one the major challenges for the future of the Internet. This important task may be based on the development of new approaches, taking the risk of reinventing the wheel, or may consider the previous efforts and successes, offering the opportunity to move research to market. This paper is a technical study that examines issues related to the latter possibility. We will first consider the structure of the Suggested Upper Merged Ontology (SUMO), which is a general proposal on the semantic web. We will then outline the challenges and possible strategies to integrate two existing ontologies, Wordnet for the English language and the Integral Dictionary for French (TID), to SUMO. Then, we will discuss the motivation of the mappings.",{"paper_id":6274,"title":6275,"year":197,"month":855,"day":63,"doi":6276,"resource_url":6277,"first_page":63,"last_page":63,"pdf_url":6278,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6279,"paper_type":860,"authors":6280,"abstract":6285},"lrec2004-main-328","Categorizing Web Pages as a Preprocessing Step for Information Extraction","10.63317\u002F2uvbfc8ovrtu","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-328","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F534.pdf","pekar-etal-2004-categorizing",[6281,6282,6284],{"paper_id":6274,"author_seq":247,"given_name":4335,"surname":4336,"affiliation":63,"orcid":63},{"paper_id":6274,"author_seq":232,"given_name":4482,"surname":6283,"affiliation":63,"orcid":63},"Evans",{"paper_id":6274,"author_seq":218,"given_name":5666,"surname":5667,"affiliation":63,"orcid":63},"At present, information systems combining crawling and information extraction (IE) technologies acquire a lot of research and industrial interest. In this paper, we present an algorithm that exploits techniques for unsupervised IE pattern acquisition in order to facilitate identification of web pages containing information relevant to the IE task.",{"paper_id":6287,"title":6288,"year":197,"month":855,"day":63,"doi":6289,"resource_url":6290,"first_page":63,"last_page":63,"pdf_url":6291,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6292,"paper_type":860,"authors":6293,"abstract":63},"lrec2004-main-329","A Framework for Data-driven Video-realistic Audio-visual Speech-synthesis","10.63317\u002F2kydi29r84ps","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-329","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F535.pdf","weiss-2004-framework",[6294],{"paper_id":6287,"author_seq":247,"given_name":2000,"surname":6295,"affiliation":63,"orcid":63},"Weiss",{"paper_id":6297,"title":6298,"year":197,"month":855,"day":63,"doi":6299,"resource_url":6300,"first_page":63,"last_page":63,"pdf_url":6301,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6302,"paper_type":860,"authors":6303,"abstract":6308},"lrec2004-main-330","Corpus Based Enrichment of GermaNet Verb Frames","10.63317\u002F4v8cc9vo3yes","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-330","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F536.pdf","kunze-rosner-2004-corpus",[6304,6306],{"paper_id":6297,"author_seq":247,"given_name":5779,"surname":6305,"affiliation":63,"orcid":63},"Kunze",{"paper_id":6297,"author_seq":232,"given_name":1108,"surname":6307,"affiliation":63,"orcid":63},"Rösner","Lexical semantic resources, like WordNet, are often used in real applications of natural language document processing. For example,we integrated GermaNet in our document suite XDOC. In addition to hypernymy and synonymy relations, we want to exploit GermaNet verb frames for our analysis. In this paper, we outline an approach for the domain related enrichment of GermaNet verb frames by corpus based syntactic and co-occurrence data analyses of real documents.",{"paper_id":6310,"title":6311,"year":197,"month":855,"day":63,"doi":6312,"resource_url":6313,"first_page":63,"last_page":63,"pdf_url":6314,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6315,"paper_type":860,"authors":6316,"abstract":6322},"lrec2004-main-331","Semi-automatic Acquisition of Command Grammar","10.63317\u002F4wetiasn63ry","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-331","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F538.pdf","poibeau-goujon-2004-semi",[6317,6319],{"paper_id":6310,"author_seq":247,"given_name":5618,"surname":6318,"affiliation":63,"orcid":63},"Poibeau",{"paper_id":6310,"author_seq":232,"given_name":6320,"surname":6321,"affiliation":63,"orcid":63},"Bénédicte","Goujon","This paper presents an original strategy for the production of command grammars for complex systems. We show that command grammars describe a sublanguage that can be processed by local syntactic rules and compositional semantic strategies.",{"paper_id":6324,"title":6325,"year":197,"month":855,"day":63,"doi":6326,"resource_url":6327,"first_page":63,"last_page":63,"pdf_url":6328,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6329,"paper_type":860,"authors":6330,"abstract":6338},"lrec2004-main-332","Towards a Language Infrastructure for the Semantic Web","10.63317\u002F334a89cpdx3i","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-332","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F541.pdf","declerck-etal-2004-towards",[6331,6332,6333,6336],{"paper_id":6324,"author_seq":247,"given_name":5618,"surname":5619,"affiliation":63,"orcid":63},{"paper_id":6324,"author_seq":232,"given_name":973,"surname":5835,"affiliation":63,"orcid":63},{"paper_id":6324,"author_seq":218,"given_name":6334,"surname":6335,"affiliation":63,"orcid":63},"Nicoletta","Calzolari",{"paper_id":6324,"author_seq":203,"given_name":1743,"surname":6337,"affiliation":63,"orcid":63},"Lenci","In recent years, the Internet evolved from a global medium for information exchange (directed mainly towards human users) into a \"global, virtual work environment\" (for both human users and machines). Building on the world-wide-web, developments such as grid technology, web services and the semantic web contributed to this transformation, the implications of which are now slowly but clearly being integrated into all areas of the new digital society (e-business, e-government, e-science, etc.) In this conctext the semantic web allows for increasingly intelligent and therefore autonomous processing. This development brings new challenges for Human Language Technology (HLT), which require not only some adaptation of processes within the state of the art processing chain of HLT, but also changes at the infrastructure level of HLT resources.",{"paper_id":6340,"title":6341,"year":197,"month":855,"day":63,"doi":6342,"resource_url":6343,"first_page":63,"last_page":63,"pdf_url":6344,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6345,"paper_type":860,"authors":6346,"abstract":6357},"lrec2004-main-333","Conversational Telephone Speech Corpus Collection for the NIST Speaker Recognition Evaluation 2004","10.63317\u002F2kfvmbfb7kk8","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-333","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F542.pdf","martin-etal-2004-conversational",[6347,6349,6351,6352,6354],{"paper_id":6340,"author_seq":247,"given_name":6348,"surname":1087,"affiliation":63,"orcid":63},"Alvin",{"paper_id":6340,"author_seq":232,"given_name":1790,"surname":6350,"affiliation":63,"orcid":63},"Miller",{"paper_id":6340,"author_seq":218,"given_name":1024,"surname":1025,"affiliation":63,"orcid":63},{"paper_id":6340,"author_seq":203,"given_name":6353,"surname":908,"affiliation":63,"orcid":63},"Joseph",{"paper_id":6340,"author_seq":188,"given_name":6355,"surname":6356,"affiliation":63,"orcid":63},"Hirotaka","Nakasone","This paper discusses some of the factors that should be considered when designing a speech corpus collection to be used for text-independent speaker recognition evaluation. The factors include telephone handset type, telephone transmission type, language, and (non-telephone) microphone type. The paper describes the design of the new corpus collection being undertaken by the Linguistic Data Consortium (LDC) to support the 2004 and subsequent NIST speech recognition evaluations. Some preliminary information on the resulting 2004 evaluation test set is offered.",{"paper_id":6359,"title":6360,"year":197,"month":855,"day":63,"doi":6361,"resource_url":6362,"first_page":63,"last_page":63,"pdf_url":6363,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6364,"paper_type":860,"authors":6365,"abstract":6369},"lrec2004-main-334","Augmenting Manual Dictionaries for Statistical Machine Translation Systems","10.63317\u002F2oczjrkr4ia7","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-334","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F543.pdf","vogel-monson-2004-augmenting",[6366,6367],{"paper_id":6359,"author_seq":247,"given_name":1560,"surname":4435,"affiliation":63,"orcid":63},{"paper_id":6359,"author_seq":232,"given_name":2000,"surname":6368,"affiliation":63,"orcid":63},"Monson","We show that the usefulness of manually created dictionaries can be enhanced for a statistical machine translation system when new translations are automatically added which are simple morphological transformations (plural forms, different verb inflections) of the original. Further improvement is possible when assigning probabilities to the lexicon entries. We describe a method to do this on the basis of an automatically trained statistical lexicon. Experimental results are given for Chinese to English translation tasks and show a significant improvement in translation quality.",{"paper_id":6371,"title":6372,"year":197,"month":855,"day":63,"doi":6373,"resource_url":6374,"first_page":63,"last_page":63,"pdf_url":6375,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6376,"paper_type":860,"authors":6377,"abstract":63},"lrec2004-main-335","Linguistic Corpus Search","10.63317\u002F3pncxxezk6zn","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-335","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F546.pdf","biemann-etal-2004-linguistic",[6378,6380,6383],{"paper_id":6371,"author_seq":247,"given_name":2000,"surname":6379,"affiliation":63,"orcid":63},"Biemann",{"paper_id":6371,"author_seq":232,"given_name":6381,"surname":6382,"affiliation":63,"orcid":63},"Uwe","Quasthoff",{"paper_id":6371,"author_seq":218,"given_name":2000,"surname":6384,"affiliation":63,"orcid":63},"Wolff",{"paper_id":6386,"title":6387,"year":197,"month":855,"day":63,"doi":6388,"resource_url":6389,"first_page":63,"last_page":63,"pdf_url":6390,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6391,"paper_type":860,"authors":6392,"abstract":6410},"lrec2004-main-336","ENABLER Thematic Network of National Projects: Technical, Strategic and Political Issues of LRs","10.63317\u002F4wrvs9jemmqg","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-336","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F545.pdf","calzolari-etal-2004-enabler",[6393,6394,6395,6397,6398,6399,6400,6401,6404,6407],{"paper_id":6386,"author_seq":247,"given_name":6334,"surname":6335,"affiliation":63,"orcid":63},{"paper_id":6386,"author_seq":232,"given_name":3403,"surname":3404,"affiliation":63,"orcid":63},{"paper_id":6386,"author_seq":218,"given_name":5540,"surname":6396,"affiliation":63,"orcid":63},"Gavrilidou",{"paper_id":6386,"author_seq":203,"given_name":918,"surname":919,"affiliation":63,"orcid":63},{"paper_id":6386,"author_seq":188,"given_name":1752,"surname":3006,"affiliation":63,"orcid":63},{"paper_id":6386,"author_seq":172,"given_name":5377,"surname":5378,"affiliation":63,"orcid":63},{"paper_id":6386,"author_seq":155,"given_name":1743,"surname":6337,"affiliation":63,"orcid":63},{"paper_id":6386,"author_seq":138,"given_name":6402,"surname":6403,"affiliation":63,"orcid":63},"Valérie","Mapelli",{"paper_id":6386,"author_seq":121,"given_name":6405,"surname":6406,"affiliation":63,"orcid":63},"Monica","Monachini",{"paper_id":6386,"author_seq":104,"given_name":6408,"surname":6409,"affiliation":63,"orcid":63},"Stelios","Piperidis","In this paper we present general strategies concerning Language Resources (LRs) - Written, Spoken and, recently, Multimodal - as developed within the ENABLER Thematic Network. LRs are a central component of the so-called \"linguistic infrastructure\" (the other key element being Evaluation), necessary for the development of any Human Language Technology (HLT) application. They play a critical role, as horizontal technology, in different emerging areas of FP6, and have been recognized as a priority within a number of national projects around Europe and world-wide. The availability of LRs is also a \"sensitive\" issue, touching directly the sphere of linguistic and cultural identity, but also with economical, societal and political implications. This is going to be even more true in the new Europe with 25 languages on a par.",{"paper_id":6412,"title":6413,"year":197,"month":855,"day":63,"doi":6414,"resource_url":6415,"first_page":63,"last_page":63,"pdf_url":6416,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6417,"paper_type":860,"authors":6418,"abstract":6427},"lrec2004-main-337","The Influence of the Labeller’s Regional Background on Phonetic Transcriptions: Implications for the Evaluation of Spoken Language Resources","10.63317\u002F52sxizrh55tv","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-337","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F547.pdf","cousse-etal-2004-influence",[6419,6422,6424,6426],{"paper_id":6412,"author_seq":247,"given_name":6420,"surname":6421,"affiliation":63,"orcid":63},"Evie","Coussé",{"paper_id":6412,"author_seq":232,"given_name":2500,"surname":6423,"affiliation":63,"orcid":63},"Gillis",{"paper_id":6412,"author_seq":218,"given_name":5377,"surname":6425,"affiliation":63,"orcid":63},"Kloots",{"paper_id":6412,"author_seq":203,"given_name":2904,"surname":4406,"affiliation":63,"orcid":63},"Phonetic transcriptions of spoken language corpora are not an exact written reproduction of the speech signal. They are influenced by a variety of factors such as the transcriber’s native categorical perception. What remains unexplored is to what extent variation of perception within the same language exerts any influence on phonetic transcriptions. We report a case study of the labelling of vowel quality performed by native speakers of Dutch from either The Netherlands or Belgium. An analysis of the distribution of vowel quality labels reveals that labellers from The Netherlands have an other preference for certain vowel quality labels than labellers from Belgium. The inter-labeller agreement between is higher between labellers from the same region than between labellers from different regions. From these results, the conclusion can be drawn that labellers from The Netherlands and Belgium have a different perception of vowel quality in Standard Dutch. Thus, the factor regional background of transcribers should be taken into account when evaluating phonetic transcriptions of spoken language resources.",{"paper_id":6429,"title":6430,"year":197,"month":855,"day":63,"doi":6431,"resource_url":6432,"first_page":63,"last_page":63,"pdf_url":6433,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6434,"paper_type":860,"authors":6435,"abstract":6452},"lrec2004-main-338","Evaluation Resources for Concept-based Cross-Lingual Information Retrieval in the Medical Domain","10.63317\u002F4npjd6qfb9kz","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-338","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F548.pdf","buitelaar-etal-2004-evaluation",[6436,6437,6438,6440,6443,6446,6449,6451],{"paper_id":6429,"author_seq":247,"given_name":973,"surname":5835,"affiliation":63,"orcid":63},{"paper_id":6429,"author_seq":232,"given_name":1060,"surname":1650,"affiliation":63,"orcid":63},{"paper_id":6429,"author_seq":218,"given_name":1087,"surname":6439,"affiliation":63,"orcid":63},"Volk",{"paper_id":6429,"author_seq":203,"given_name":6441,"surname":6442,"affiliation":63,"orcid":63},"Dominic","Widdows",{"paper_id":6429,"author_seq":188,"given_name":6444,"surname":6445,"affiliation":63,"orcid":63},"Bogdan","Sacaleanu",{"paper_id":6429,"author_seq":172,"given_name":6447,"surname":6448,"affiliation":63,"orcid":63},"Špela","Vintar",{"paper_id":6429,"author_seq":155,"given_name":6450,"surname":1388,"affiliation":63,"orcid":63},"Stanley",{"paper_id":6429,"author_seq":138,"given_name":874,"surname":875,"affiliation":63,"orcid":63},"The paper describes evaluation resources for concept-based, cross-lingual information retrieval in the medical domain. All resources were constructed in the context of the MuchMore project and are freely available through the project website. Available resources include: a bilingual, parallel document collection of German and English medical scientific abstracts, a set of queries and corresponding relevance assessments, two manually disambiguated test sets for semantic annotation (sense disambiguation), two evaluation lists for German morphological decomposition of medical terms.",{"paper_id":6454,"title":6455,"year":197,"month":855,"day":63,"doi":6456,"resource_url":6457,"first_page":63,"last_page":63,"pdf_url":6458,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6459,"paper_type":860,"authors":6460,"abstract":6466},"lrec2004-main-339","Automatic Acquisition of Paradigmatic Relations Using Iterated Co-occurrences","10.63317\u002F4432om8pv665","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-339","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F549.pdf","biemann-etal-2004-automatic",[6461,6463,6465],{"paper_id":6454,"author_seq":247,"given_name":6462,"surname":6379,"affiliation":63,"orcid":63},"Chris",{"paper_id":6454,"author_seq":232,"given_name":961,"surname":6464,"affiliation":63,"orcid":63},"Bordag",{"paper_id":6454,"author_seq":218,"given_name":6381,"surname":6382,"affiliation":63,"orcid":63},"We introduce the notion of iterated co-occurrences, which can be obtained by performing the calculation of statistically significant co-occurrences not on sentence level, but on co-occurrence sets of previous calculations. The underlying mechanisms are explained in detail and we give reasons, why this iteration results in sets of semantically homogeneous words. These can be used for the automatic acquisition of paradigmatic relations in order to semi-automatically extend lexical-semantic word nets or thesauri, widening the acquisition bottleneck. A small evaluation for synset expansion for German language and some discussion conclude the work.",{"paper_id":6468,"title":6469,"year":197,"month":855,"day":63,"doi":6470,"resource_url":6471,"first_page":63,"last_page":63,"pdf_url":6472,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6473,"paper_type":860,"authors":6474,"abstract":6486},"lrec2004-main-340","Towards Ontology Engineering Based on Linguistic Analysis","10.63317\u002F26shqe7g5aj6","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-340","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F550.pdf","buitelaar-etal-2004-towards",[6475,6476,6478,6481,6483,6484],{"paper_id":6468,"author_seq":247,"given_name":973,"surname":5835,"affiliation":63,"orcid":63},{"paper_id":6468,"author_seq":232,"given_name":2268,"surname":6477,"affiliation":63,"orcid":63},"Olejnik",{"paper_id":6468,"author_seq":218,"given_name":6479,"surname":6480,"affiliation":63,"orcid":63},"Mihaela","Hutanu",{"paper_id":6468,"author_seq":203,"given_name":1081,"surname":6482,"affiliation":63,"orcid":63},"Schutz",{"paper_id":6468,"author_seq":188,"given_name":5618,"surname":5619,"affiliation":63,"orcid":63},{"paper_id":6468,"author_seq":172,"given_name":1357,"surname":6485,"affiliation":63,"orcid":63},"Sintek","In this paper we describe OntoLT, a plug-in for the widely used Protégé ontology development tool that supports the interactive extraction and\u002For extension of ontologies from text. The OntoLT approach aims at providingan environment for the integration of linguistic analysis in ontology development. OntoLT enables the definition of mapping rules with which concepts and attributes can be extracted automatically from linguistically annotated text collections. Mapping rules are defined by use of a constraint language. Constraints are implemented as XPATH expressions over the XML-based linguistic annotation. If all constraints are satisfied, the mapping rule activates one or more operators that describe in which way the ontology should be extended if a candidate is found.",{"paper_id":6488,"title":6489,"year":197,"month":855,"day":63,"doi":6490,"resource_url":6491,"first_page":63,"last_page":63,"pdf_url":6492,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6493,"paper_type":860,"authors":6494,"abstract":6520},"lrec2004-main-341","OrienTel - Telephony Databases Across Northern Africa and the Middle East","10.63317\u002F5n3pkhmfqtxs","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-341","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F552.pdf","iskra-etal-2004-orientel",[6495,6496,6499,6502,6504,6505,6506,6509,6510,6513,6516,6519],{"paper_id":6488,"author_seq":247,"given_name":3868,"surname":3869,"affiliation":63,"orcid":63},{"paper_id":6488,"author_seq":232,"given_name":6497,"surname":6498,"affiliation":63,"orcid":63},"Rainer","Siemund",{"paper_id":6488,"author_seq":218,"given_name":6500,"surname":6501,"affiliation":63,"orcid":63},"Jamal","Borno",{"paper_id":6488,"author_seq":203,"given_name":6503,"surname":3427,"affiliation":63,"orcid":63},"Asuncion",{"paper_id":6488,"author_seq":188,"given_name":3710,"surname":3711,"affiliation":63,"orcid":63},{"paper_id":6488,"author_seq":172,"given_name":3403,"surname":3404,"affiliation":63,"orcid":63},{"paper_id":6488,"author_seq":155,"given_name":6507,"surname":6508,"affiliation":63,"orcid":63},"Oren","Gedge",{"paper_id":6488,"author_seq":138,"given_name":3821,"surname":3822,"affiliation":63,"orcid":63},{"paper_id":6488,"author_seq":121,"given_name":6511,"surname":6512,"affiliation":63,"orcid":63},"Albino","Nogueiras",{"paper_id":6488,"author_seq":104,"given_name":6514,"surname":6515,"affiliation":63,"orcid":63},"Imed","Zitouni",{"paper_id":6488,"author_seq":87,"given_name":6517,"surname":6518,"affiliation":63,"orcid":63},"Anastasios","Tsopanoglou",{"paper_id":6488,"author_seq":73,"given_name":1111,"surname":1112,"affiliation":63,"orcid":63},"OrienTel is a project that over the past two-and-half years developed speech databases and phonetic standards across Northern Africa, the Middle East and the Arabian Gulf. The project is funded by the European Commission and is coordinated by ScanSoft (Germany and Belgium). Other partners are ELDA (France), IBM (Germany), NSC (Israel), Siemens (Germany), Lucent (UK), Knowledge, the University of Patras (both Greece) and UPC (Spain), plus SPEX (the Netherlands) as validation agency. Now that OrienTel has passed the finish line, the present paper gives an update of the design conventions, and an account of the project's achievements. The paper also illustrates some of the challenges that the consortium faced which are mostly related to the validation and the research subjects.",{"paper_id":6522,"title":6523,"year":197,"month":855,"day":63,"doi":6524,"resource_url":6525,"first_page":63,"last_page":63,"pdf_url":6526,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6527,"paper_type":860,"authors":6528,"abstract":6531},"lrec2004-main-342","ELRA Validation Methodology and Standard Promotion for Linguistic Resources","10.63317\u002F53xr76kxg9ku","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-342","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F553.pdf","fersoe-monachini-2004-elra",[6529,6530],{"paper_id":6522,"author_seq":247,"given_name":5377,"surname":5378,"affiliation":63,"orcid":63},{"paper_id":6522,"author_seq":232,"given_name":6405,"surname":6406,"affiliation":63,"orcid":63},"This paper describes the results of work made for ELRA during 2003-2004. It describes the methodology for validation of written language resources (WLRs), specifically lexica, which has been developed for ELRA and tested on a few resources in the ELRA catalogue. It discusses the importance of key issues in lexicon creation and validation such as the adoption of standards for the coding of linguistic content and the importance of documentation. It reports on the experience gained from applying the methodology to lexical resources in the ELRA catalogue arguing that the checks must be reasonable, informative, on a suitable level of detail, and generic. It proposes a set of basic elements to be included in future discussions on establishing standards for lexicon resources. In conclusion it sketches the work to be undertaken in 2004 to promote validation and the adoption of standards.",{"paper_id":6533,"title":6534,"year":197,"month":855,"day":63,"doi":6535,"resource_url":6536,"first_page":63,"last_page":63,"pdf_url":6537,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6538,"paper_type":860,"authors":6539,"abstract":6546},"lrec2004-main-343","The AAC [Austrian Academy Corpus] – An Enterprise to Develop Large Electronic Text Corpora","10.63317\u002F4ihare866yv6","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-343","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F554.pdf","biber-breiteneder-2004-aac",[6540,6543],{"paper_id":6533,"author_seq":247,"given_name":6541,"surname":6542,"affiliation":63,"orcid":63},"Hanno","Biber",{"paper_id":6533,"author_seq":232,"given_name":6544,"surname":6545,"affiliation":63,"orcid":63},"Evelyn","Breiteneder","The AAC [Austrian Academy Corpus] is a corpus research institution based at the Austrian Academy of Sciences in Vienna. The AAC is a very large and complex electronic text collection. Its aims are to create an innovative text corpus and to conduct scholarly and scientific research in the field of electronic text corpora. In the first phase of the corpus build up the AAC is committed to have at least 100 million running words of carefully selected and scholarly annotated significant texts. The corpus approach of the AAC will allow a variety of investigations into the linguistic properties, the textual structures and the historical and literary significance of the selected texts. In the second phase of application development the size of the AAC will increase to around one billion running words. In this phase selected subcorpora will be annotated in greater detail following the AAC schemes for annotation and according to its editorial principles. The AAC working group is endeavouring to establish a corpus that meets the needs of textual studies and conveys essential information about the German language as well as about the history of the time in focus as a history of texts and of language.",{"paper_id":6548,"title":6549,"year":197,"month":855,"day":63,"doi":6550,"resource_url":6551,"first_page":63,"last_page":63,"pdf_url":6552,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6553,"paper_type":860,"authors":6554,"abstract":6560},"lrec2004-main-344","Improving Automatic Phonetic Transcription of Spontaneous Speech Through Variant-Based Pronunciation Variation Modelling","10.63317\u002F38qggq8heoym","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-344","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F558.pdf","binnenpoorte-etal-2004-improving",[6555,6557,6558,6559],{"paper_id":6548,"author_seq":247,"given_name":1060,"surname":6556,"affiliation":63,"orcid":63},"Binnenpoorte",{"paper_id":6548,"author_seq":232,"given_name":5932,"surname":5933,"affiliation":63,"orcid":63},{"paper_id":6548,"author_seq":218,"given_name":4286,"surname":4287,"affiliation":63,"orcid":63},{"paper_id":6548,"author_seq":203,"given_name":5329,"surname":5330,"affiliation":63,"orcid":63},"In this paper we present an experiment aimed at improving automatic phonetic transcription of Dutch spontaneous speech through a variant-based method of pronunciation variation modelling. For spontaneous speech, the literature does not always provide enough rules to describe its characteristic phonological processes. Therefore, other methods should be applied to model pronunciation variation for automatic phonetic transcription. We show that a large amount of manually transcribed phonetic data is an extremely useful source for collecting pronunciation variants and their prior probabilities. From the results we can conclude that the adopted method is indeed suitable for improving automatic transcription of spontaneous speech, and that further improvements can be obtained by combining this method with rule-based methods of pronunciation variation modelling.",{"paper_id":6562,"title":6563,"year":197,"month":855,"day":63,"doi":6564,"resource_url":6565,"first_page":63,"last_page":63,"pdf_url":6566,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6567,"paper_type":860,"authors":6568,"abstract":6573},"lrec2004-main-345","A General-Purpose, Off-the-shelf Anaphora Resolution Module: Implementation and Preliminary Evaluation","10.63317\u002F2hw2na6qsij8","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-345","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F559.pdf","poesio-kabadjov-2004-general",[6569,6570],{"paper_id":6562,"author_seq":247,"given_name":2926,"surname":2927,"affiliation":63,"orcid":63},{"paper_id":6562,"author_seq":232,"given_name":6571,"surname":6572,"affiliation":63,"orcid":63},"Mijail A.","Kabadjov","GuiTAR is an anaphora resolution system designed to be modular and usable as an off-the-shelf component of a NL processing pipeline. We discuss the system’s design and a preliminary evaluation of the two algorithms implemented in the current version of the system – for definite descriptions and for pronoun resolution.",{"paper_id":6575,"title":6576,"year":197,"month":855,"day":63,"doi":6577,"resource_url":6578,"first_page":63,"last_page":63,"pdf_url":6579,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6580,"paper_type":860,"authors":6581,"abstract":63},"lrec2004-main-346","Building a Conceptual Graph Bank for Chinese Language","10.63317\u002F2tk4i2fkry94","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-346","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F561.pdf","ji-etal-2004-building",[6582,6583,6584],{"paper_id":6575,"author_seq":247,"given_name":5044,"surname":5045,"affiliation":63,"orcid":63},{"paper_id":6575,"author_seq":232,"given_name":5041,"surname":5042,"affiliation":63,"orcid":63},{"paper_id":6575,"author_seq":218,"given_name":5047,"surname":3634,"affiliation":63,"orcid":63},{"paper_id":6586,"title":6587,"year":197,"month":855,"day":63,"doi":6588,"resource_url":6589,"first_page":63,"last_page":63,"pdf_url":6590,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6591,"paper_type":860,"authors":6592,"abstract":63},"lrec2004-main-347","Enriching a French Treebank","10.63317\u002F2mnhhgo8b34o","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-347","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F562.pdf","abeille-barrier-2004-enriching",[6593,6595],{"paper_id":6586,"author_seq":247,"given_name":2977,"surname":6594,"affiliation":63,"orcid":63},"Abeillé",{"paper_id":6586,"author_seq":232,"given_name":6596,"surname":6597,"affiliation":63,"orcid":63},"Nicolas","Barrier",{"paper_id":6599,"title":6600,"year":197,"month":855,"day":63,"doi":6601,"resource_url":6602,"first_page":63,"last_page":63,"pdf_url":6603,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6604,"paper_type":860,"authors":6605,"abstract":6615},"lrec2004-main-348","French-English Multi-word Term Alignment Based on Lexical Context Analysis","10.63317\u002F3t6ywqrbctr3","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-348","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F563.pdf","daille-etal-2004-french",[6606,6609,6612],{"paper_id":6599,"author_seq":247,"given_name":6607,"surname":6608,"affiliation":63,"orcid":63},"Béatrice","Daille",{"paper_id":6599,"author_seq":232,"given_name":6610,"surname":6611,"affiliation":63,"orcid":63},"Samuel","Dufour-Kowalski",{"paper_id":6599,"author_seq":218,"given_name":6613,"surname":6614,"affiliation":63,"orcid":63},"Emmanuel","Morin","This article presents a method of extracting bilingual lexica composed of single-word terms (SWTs) and multi-word terms (MWTs) from comparable corpora of a technical domain. First, this method extracts MWTs in each language, and then uses statistical methods to align single words and MWTs by exploiting the term contexts. After explaining the difficulties involved in aligning MWTs and specifying our approach, we show the adopted process for bilingual terminology extraction and the resources used in our experiments. Finally, we evaluate our approach and demonstrate its significance, particularly in relation to non-compositional MWT alignment.",{"paper_id":6617,"title":6618,"year":197,"month":855,"day":63,"doi":6619,"resource_url":6620,"first_page":63,"last_page":63,"pdf_url":6621,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6622,"paper_type":860,"authors":6623,"abstract":6633},"lrec2004-main-349","An Argumentative Annotation Schema for Meeting Discussions","10.63317\u002F43w6y4nfdpux","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-349","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F564.pdf","pallotta-etal-2004-argumentative",[6624,6625,6628,6630],{"paper_id":6617,"author_seq":247,"given_name":1093,"surname":1094,"affiliation":63,"orcid":63},{"paper_id":6617,"author_seq":232,"given_name":6626,"surname":6627,"affiliation":63,"orcid":63},"Hatem","Ghorbel",{"paper_id":6617,"author_seq":218,"given_name":1519,"surname":6629,"affiliation":63,"orcid":63},"Ruch",{"paper_id":6617,"author_seq":203,"given_name":6631,"surname":6632,"affiliation":63,"orcid":63},"Giovanni","Coray","In this article, we are interested in the annotation of transcriptions of human-human dialogue taken from meeting records. We first propose a meeting content model where conversational acts are interpreted with respect to their argumentative force and their role in building the argumentative structure of the meeting discussion. Argumentation in dialogue describes the way participants take part in the discussion and argue their standpoints. Then, we propose an annotation scheme based on such an argumentative dialogue model as well as the evaluation of its adequacy. The obtained higher-level semantic annotations are exploited in the conceptual indexing of the information contained in meeting discussions.",{"paper_id":6635,"title":6636,"year":197,"month":855,"day":63,"doi":6637,"resource_url":6638,"first_page":63,"last_page":63,"pdf_url":6639,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6640,"paper_type":860,"authors":6641,"abstract":63},"lrec2004-main-350","A morphological Analyzer for Standard Albanian","10.63317\u002F2u7h2ch2wfj5","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-350","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F566.pdf","trommer-kallulli-2004-morphological",[6642,6645],{"paper_id":6635,"author_seq":247,"given_name":6643,"surname":6644,"affiliation":63,"orcid":63},"Jochen","Trommer",{"paper_id":6635,"author_seq":232,"given_name":6646,"surname":6647,"affiliation":63,"orcid":63},"Dalina","Kallulli",{"paper_id":6649,"title":6650,"year":197,"month":855,"day":63,"doi":6651,"resource_url":6652,"first_page":63,"last_page":63,"pdf_url":6653,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6654,"paper_type":860,"authors":6655,"abstract":6661},"lrec2004-main-351","Generating an Arabic Full-form Lexicon for Bidirectional Morphology Lookup","10.63317\u002F49e5nfn7zp7o","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-351","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F567.pdf","soudi-eisele-2004-generating",[6656,6659],{"paper_id":6649,"author_seq":247,"given_name":6657,"surname":6658,"affiliation":63,"orcid":63},"Abdelhadi","Soudi",{"paper_id":6649,"author_seq":232,"given_name":1647,"surname":6660,"affiliation":63,"orcid":63},"Eisele","We describe the generation of an Arabic full-form lexicon and its conversion into a two-level Finite State Transducer (FST) for morphology analysis and generation. The implementation of morphological lookup is based on a representation of the relevant data in the form of a FST, for which generic implementations exist that facilitate the integration into larger software systems for natural language processing. We show the feasibility of our encoding and the analysis of both vowelled and unvowelled Arabic words.",{"paper_id":6663,"title":6664,"year":197,"month":855,"day":63,"doi":6665,"resource_url":6666,"first_page":63,"last_page":63,"pdf_url":6667,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6668,"paper_type":860,"authors":6669,"abstract":6674},"lrec2004-main-352","Orthographic and Phonetic Annotation of Very Large Czech Corpora with Quality Assessment","10.63317\u002F2y9gy79ughgj","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-352","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F568.pdf","pollak-cernocky-2004-orthographic",[6670,6672],{"paper_id":6663,"author_seq":247,"given_name":1789,"surname":6671,"affiliation":63,"orcid":63},"Pollák",{"paper_id":6663,"author_seq":232,"given_name":1078,"surname":6673,"affiliation":63,"orcid":63},"Černocký","The annotation is generally indivisible part of speech database. In this paper we are presenting common orthographic and phonetic annotation of large Czech databases. Phonetic annotation may be very important and gives more information than pronunciation lexicon with possible pronunciation variants. Moreover, for Czech language phonetic annotation means just small additional effort to standard ortographic transcription. The tool FTP-Trascriber developed for thispurposes is also presented. In the second part we are presenting procedure of quality assessment applied to the annotation of large speech corpora collected at our laboratories. We are presenting semi-automated quality checks based on using several fully automated pre-checks decreasing necessarry additional manual effort.",{"paper_id":6676,"title":6677,"year":197,"month":855,"day":63,"doi":6678,"resource_url":6679,"first_page":63,"last_page":63,"pdf_url":6680,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6681,"paper_type":860,"authors":6682,"abstract":6695},"lrec2004-main-353","INQUER: A WordNet-based Question-Answering Application","10.63317\u002F4tv246ekt8jt","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-353","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F571.pdf","ribeiro-etal-2004-inquer",[6683,6685,6686,6689,6692],{"paper_id":6676,"author_seq":247,"given_name":6684,"surname":4674,"affiliation":63,"orcid":63},"Catarina",{"paper_id":6676,"author_seq":232,"given_name":4673,"surname":1061,"affiliation":63,"orcid":63},{"paper_id":6676,"author_seq":218,"given_name":6687,"surname":6688,"affiliation":63,"orcid":63},"João","Correia",{"paper_id":6676,"author_seq":203,"given_name":6690,"surname":6691,"affiliation":63,"orcid":63},"Rui Pedro","Chaves",{"paper_id":6676,"author_seq":188,"given_name":6693,"surname":6694,"affiliation":63,"orcid":63},"Palmira","Marrafa","The growing use of computer technologies as well as the amount of available information has posed new challenges to humanmachine interaction. Applying natural language processing techniques to information systems has brought advantages to this emerging field. Question-answering (QA) systems have been developed in order to ease the information access process. However, most of today’s QA systems retrieve a collection of documents whose contents may fulfill the answer to user’s queries. Such systems usually consider unstructured information sources, namely Web or textual documents. Adopting a different approach, INQUER, the QA system described in this paper, makes use of a structured knowledge base: WordNet.PT (Portuguese WordNet). This system provides direct natural language answers to user’s questions by applying inference and information extraction mechanisms that interact with WordNet.PT (Portuguese WordNet). Although INQUER deals specifically with Portuguese, the theoretical approach that underlies this application is language independent.",{"paper_id":6697,"title":6698,"year":197,"month":855,"day":63,"doi":6699,"resource_url":6700,"first_page":63,"last_page":63,"pdf_url":6701,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6702,"paper_type":860,"authors":6703,"abstract":6709},"lrec2004-main-354","Evaluating Solutions for the Rapid Development of State-of-the-Art POS Taggers for Portuguese","10.63317\u002F3dcds5yhtzo6","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-354","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F572.pdf","branco-silva-2004-evaluating",[6704,6707],{"paper_id":6697,"author_seq":247,"given_name":6705,"surname":6706,"affiliation":63,"orcid":63},"António","Branco",{"paper_id":6697,"author_seq":232,"given_name":6687,"surname":6708,"affiliation":63,"orcid":63},"Silva","We report on solutions we adopted for the specific issues that arise when developing new automatic taggers for Portuguese, solutions whose design is general enough, we believe, to be further reused to develop other new taggers for this language, even when using different training data than those we used in our experiments. We report also on the evaluation of tools that make use of such solutions and show that the latter permit to develop POS taggers for Portuguese whose performance matches or surpasses state-of-the-art results obtained for other languages when using the same technology.",{"paper_id":6711,"title":6712,"year":197,"month":855,"day":63,"doi":6713,"resource_url":6714,"first_page":63,"last_page":63,"pdf_url":6715,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6716,"paper_type":860,"authors":6717,"abstract":6720},"lrec2004-main-355","A High Quality Partial Parser for Annotating German Text Corpora","10.63317\u002F2kg4h2dyg68z","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-355","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F575.pdf","klatt-2004-high",[6718],{"paper_id":6711,"author_seq":247,"given_name":961,"surname":6719,"affiliation":63,"orcid":63},"Klatt","In this paper, a two-stage partial parser for untagged German sentences is presented. In the first stage, the sentence is segmented into better parsable units according to the Topological Field Model. In the second stage, {\\em minimal phrases\\\u002F} of NPs, DPs and PPs as well as nominal multiword units are identified in each of the recognized fields. In this paper, we discuss the results of the second stage. We evaluated 500 parsed sentences of a newspaper corpus. The achieved recall and precision rates are better than the ones of comparable systems as reported in literature so far.",{"paper_id":6722,"title":6723,"year":197,"month":855,"day":63,"doi":6724,"resource_url":6725,"first_page":63,"last_page":63,"pdf_url":6726,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6727,"paper_type":860,"authors":6728,"abstract":6731},"lrec2004-main-356","Bayesian Semantics Incorporation to Web Content for Natural Language Information Retrieval","10.63317\u002F3nhibi65xvmo","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-356","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F576.pdf","maragoudakis-fakotakis-2004-bayesian",[6729,6730],{"paper_id":6722,"author_seq":247,"given_name":3571,"surname":3572,"affiliation":63,"orcid":63},{"paper_id":6722,"author_seq":232,"given_name":1111,"surname":1112,"affiliation":63,"orcid":63},"For the present work, we endeavor with the important aspect of information retrieval of Web content using natural language queries. Currently, markup languages and formalisms do not fully provide mechanisms for effective and accurate analysis of Web content but rather provide means for describing the content in a more human-centric approach. As a result, natural language queries cannot be handled by the Internet search engines. Other approaches use grammar markup labels that attempt to fully match an unforeseen query. For the purposes of this paper, we introduce the theoretical and implementation issues of a novel, statistical framework that can cope with Web content analysis and information retrieval using natural language. The framework is based on Bayesian networks, a tool for knowledge representation and reasoning under conditions of uncertainty. The Web page designer provides the lexical items that contain useful information and labels the corresponding semantic interpretation, from a pre-defined set of domain categories. This knowledge is used for learning the structure and the parameters of a Bayesian network. At the time a user’s query is encountered, the network is used in order to return pages that contain the most related semantic content to the user’s query.",{"paper_id":6733,"title":6734,"year":197,"month":855,"day":63,"doi":6735,"resource_url":6736,"first_page":63,"last_page":63,"pdf_url":6737,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6738,"paper_type":860,"authors":6739,"abstract":6743},"lrec2004-main-357","Usability Evaluation of Spoken Dialogue Systems","10.63317\u002F3etbvzwprpmb","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-357","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F578.pdf","larsen-2004-usability",[6740],{"paper_id":6733,"author_seq":247,"given_name":6741,"surname":6742,"affiliation":63,"orcid":63},"Lars Bo","Larsen","This paper concerns the methodologies currently applied to evaluation of spoken and multi modal dialogue systems. Usability is concerned with the effectiveness, efficiency and satisfaction by which users achieves their goals when using a system. The methods developed and applied by the speech community over the past decade to measure the usability of Spoken Dialogue Systems (SDS) are discussed and critically reviewed. The paper starts by giving a general review and discussion of the current issues and problems of establishing SDS usability. This is supported by the presentation and analysis of a case, where it is shown that e.g. learnability can be derived from simple performance measures, such as duration and turn-taking. Results of applying the PARADISE method are presented and discussed.",{"paper_id":6745,"title":6746,"year":197,"month":855,"day":63,"doi":6747,"resource_url":6748,"first_page":63,"last_page":63,"pdf_url":6749,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6750,"paper_type":860,"authors":6751,"abstract":63},"lrec2004-main-358","Enriching EWN with Syntagmatic Information by Means of WSD","10.63317\u002F3f4mf7uyv5a2","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-358","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F579.pdf","nica-etal-2004-enriching",[6752,6755,6757,6760],{"paper_id":6745,"author_seq":247,"given_name":6753,"surname":6754,"affiliation":63,"orcid":63},"Iulia","Nica",{"paper_id":6745,"author_seq":232,"given_name":6756,"surname":988,"affiliation":63,"orcid":63},"Mª Antònia",{"paper_id":6745,"author_seq":218,"given_name":6758,"surname":6759,"affiliation":63,"orcid":63},"Andrés","Montoyo",{"paper_id":6745,"author_seq":203,"given_name":6761,"surname":5581,"affiliation":63,"orcid":63},"Sonia",{"paper_id":6763,"title":6764,"year":197,"month":855,"day":63,"doi":6765,"resource_url":6766,"first_page":63,"last_page":63,"pdf_url":6767,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6768,"paper_type":860,"authors":6769,"abstract":6771},"lrec2004-main-359","Proper Names and Polysemy: From a Lexicographic Experience","10.63317\u002F3sgk44jvortt","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-359","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F580.pdf","marinelli-2004-proper",[6770],{"paper_id":6763,"author_seq":247,"given_name":4974,"surname":6138,"affiliation":63,"orcid":63},"In the framework of the SI-TAL (Integrated Systems for the Automatic Treatment of Language) project the lexical coverage of IWN has been extended by adding, besides two grammatical categories not encoded in EWN (i.e. adjectives and adverbs), a set of proper names which are taken into consideration in this paper. This decision was also due to the high degree of incidence of proper names observed in the corpus selected within SI-TAL for semantic annotation. In this paper we would refer more widely about the relations involving the pn in particular codifying the relation between the pn and the senses (literal, derived and extended). We consider the pn as the basis for many extensions of meaning. In fact, many types of derivates and sense extensions are generated, by means of lexical rules that operate as “generative factors”. Novel usages of a word form can be derived through productive application of a lexical rule; therefore we propose to represent these lexical rules codifying new semantic relations in the database. We want to give prominence to the polysemy of pn to confirm the linguistic manifestation(s) of the faculty for generative categorization and compositional thought ” (Pustejovsky, 2001).",{"paper_id":6773,"title":6774,"year":197,"month":855,"day":63,"doi":6775,"resource_url":6776,"first_page":63,"last_page":63,"pdf_url":6777,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6778,"paper_type":860,"authors":6779,"abstract":6789},"lrec2004-main-360","Tools for Upgrading Printed Dictionaries by Means of Corpus-based Lexical Acquisition","10.63317\u002F352oyf38mxgd","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-360","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F581.pdf","heid-etal-2004-tools",[6780,6781,6783,6786],{"paper_id":6773,"author_seq":247,"given_name":5498,"surname":5499,"affiliation":63,"orcid":63},{"paper_id":6773,"author_seq":232,"given_name":3478,"surname":6782,"affiliation":63,"orcid":63},"Säuberlich",{"paper_id":6773,"author_seq":218,"given_name":6784,"surname":6785,"affiliation":63,"orcid":63},"Esther","Debus-Gregor",{"paper_id":6773,"author_seq":203,"given_name":6787,"surname":6788,"affiliation":63,"orcid":63},"Werner","Scholze-Stubenrecht","We present the architecture and tools developed in the project TFB-32 for updating existing dictionaries by comparing their content with corpus data. We focus on an interactive graphical user interface for manual selection of the results of this comparison. The tools have been developed and used within a cooperation with lexicographers from two German publishing houses.",{"paper_id":6791,"title":6792,"year":197,"month":855,"day":63,"doi":6793,"resource_url":6794,"first_page":63,"last_page":63,"pdf_url":6795,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6796,"paper_type":860,"authors":6797,"abstract":6799},"lrec2004-main-361","Extraction of Polish Named-Entities","10.63317\u002F4a9ti2zm94z9","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-361","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F582.pdf","piskorski-2004-extraction",[6798],{"paper_id":6791,"author_seq":247,"given_name":5832,"surname":5833,"affiliation":63,"orcid":63},"In this paper, we present some attempts towards constructing a named-entity recognition system for Polish on top of SProUT, a novel multi-lingual NLP platform and by deploying standard machine learning techniques.",{"paper_id":6801,"title":6802,"year":197,"month":855,"day":63,"doi":6803,"resource_url":6804,"first_page":63,"last_page":63,"pdf_url":6805,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6806,"paper_type":860,"authors":6807,"abstract":6820},"lrec2004-main-362","Automatic Acquisition of Sense Examples Using ExRetriever","10.63317\u002F32r9i2tocx2t","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-362","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F583.pdf","fernandez-etal-2004-automatic",[6808,6811,6814,6817,6819],{"paper_id":6801,"author_seq":247,"given_name":6809,"surname":6810,"affiliation":63,"orcid":63},"Juan","Fernández",{"paper_id":6801,"author_seq":232,"given_name":6812,"surname":6813,"affiliation":63,"orcid":63},"Mauro","Castillo",{"paper_id":6801,"author_seq":218,"given_name":6815,"surname":6816,"affiliation":63,"orcid":63},"German","Rigau",{"paper_id":6801,"author_seq":203,"given_name":2105,"surname":6818,"affiliation":63,"orcid":63},"Atserias",{"paper_id":6801,"author_seq":188,"given_name":2105,"surname":2913,"affiliation":63,"orcid":63},"A current research line for word sense disambiguation (WSD) focuses on the use of supervised machine learning techniques. One of the drawbacks of using such techniques is that previously sense annotated data is required. This paper presents ExRetriever, a new software tool for automatically acquiring large sets of sense tagged examples from large collections of text and the Web. ExRetriever exploits the knowledge contained in large-scale knowledge bases (e.g., WordNet) to build complex queries, each of them characterising particular senses of a word. These examples can be used as training instances for supervised WSD algorithms.",{"paper_id":6822,"title":6823,"year":197,"month":855,"day":63,"doi":6824,"resource_url":6825,"first_page":63,"last_page":63,"pdf_url":6826,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6827,"paper_type":860,"authors":6828,"abstract":63},"lrec2004-main-363","Combining Heterogeneous Lexical Resources","10.63317\u002F3d4ofvepowic","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-363","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F584.pdf","krstev-etal-2004-combining",[6829,6832,6835,6838,6841],{"paper_id":6822,"author_seq":247,"given_name":6830,"surname":6831,"affiliation":63,"orcid":63},"Cvetana","Krstev",{"paper_id":6822,"author_seq":232,"given_name":6833,"surname":6834,"affiliation":63,"orcid":63},"Duško","Vitas",{"paper_id":6822,"author_seq":218,"given_name":6836,"surname":6837,"affiliation":63,"orcid":63},"Ranka","Stankoviæ",{"paper_id":6822,"author_seq":203,"given_name":6839,"surname":6840,"affiliation":63,"orcid":63},"Ivan","Obradoviæ",{"paper_id":6822,"author_seq":188,"given_name":6842,"surname":6843,"affiliation":63,"orcid":63},"Gordana","Pavloviæ-Lažetiæ",{"paper_id":6845,"title":6846,"year":197,"month":855,"day":63,"doi":6847,"resource_url":6848,"first_page":63,"last_page":63,"pdf_url":6849,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6850,"paper_type":860,"authors":6851,"abstract":6864},"lrec2004-main-364","Spoken and Written Language Resources for Vietnamese","10.63317\u002F5gxrin2m5tsj","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-364","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F586.pdf","le-etal-2004-spoken",[6852,6854,6857,6859,6861],{"paper_id":6845,"author_seq":247,"given_name":6853,"surname":1671,"affiliation":63,"orcid":63},"Viet-Bac",{"paper_id":6845,"author_seq":232,"given_name":6855,"surname":6856,"affiliation":63,"orcid":63},"Do-Dat","Tran",{"paper_id":6845,"author_seq":218,"given_name":2721,"surname":6858,"affiliation":63,"orcid":63},"Castelli",{"paper_id":6845,"author_seq":203,"given_name":1175,"surname":6860,"affiliation":63,"orcid":63},"Besacier",{"paper_id":6845,"author_seq":188,"given_name":6862,"surname":6863,"affiliation":63,"orcid":63},"Jean-François","Serignat","This paper presents an overview of our activities for spoken and written language resources for Vietnamese implemented at CLIPS-IMAG Laboratory and International Research Center MICA. A new methodology for fast text corpora acquisition for minority languages which has been applied to Vietnamese is proposed. The first results of a process of building a large Vietnamese speech database (VNSpeechCorpus) and a phonetic dictionary, which is used for automatic alignment process, are also presented.",{"paper_id":6866,"title":6867,"year":197,"month":855,"day":63,"doi":6868,"resource_url":6869,"first_page":63,"last_page":63,"pdf_url":6870,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6871,"paper_type":860,"authors":6872,"abstract":63},"lrec2004-main-365","Building and Using a Corpus of Shallow Dialogue Annotated Meetings","10.63317\u002F297vv68h5j28","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-365","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F587.pdf","popescu-belis-etal-2004-building",[6873,6874,6876,6878],{"paper_id":6866,"author_seq":247,"given_name":3154,"surname":3155,"affiliation":63,"orcid":63},{"paper_id":6866,"author_seq":232,"given_name":5540,"surname":6875,"affiliation":63,"orcid":63},"Georgescul",{"paper_id":6866,"author_seq":218,"given_name":1081,"surname":6877,"affiliation":63,"orcid":63},"Clark",{"paper_id":6866,"author_seq":203,"given_name":3157,"surname":3158,"affiliation":63,"orcid":63},{"paper_id":6880,"title":6881,"year":197,"month":855,"day":63,"doi":6882,"resource_url":6883,"first_page":63,"last_page":63,"pdf_url":6884,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6885,"paper_type":860,"authors":6886,"abstract":6891},"lrec2004-main-366","XTERM: A Flexible Standard-Compliant XML-Based Termbase Management System","10.63317\u002F4wpsk5ei2dj6","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-366","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F588.pdf","piccioni-zanchetta-2004-xterm",[6887,6888],{"paper_id":6880,"author_seq":247,"given_name":3013,"surname":3014,"affiliation":63,"orcid":63},{"paper_id":6880,"author_seq":232,"given_name":6889,"surname":6890,"affiliation":63,"orcid":63},"Eros","Zanchetta","This paper introduces XTerm, a Termbase management system (TBMS) currently under development at the Terminology Center of the School for Interpreters and Translators of the University of Bologna. The system is designed to be ISO and XML compliant and to provide a friendly environment for the insertion and visualization of terminological data. It is also open to the future evolution of international standards since it does not rely on a closed set of hard-coded data representation models. In this paper we will first introduce the project “Languages and Productive Activities”, then we will outline the main features of the XTerm TBMS: XTerm.NET, the graphical user interface (the main tool of the terminographer), XTerm.portal, the web application that provides online access to the termbase and two tools that provide innovative functionalities to the whole system: CARMA and COSY Generator.",{"paper_id":6893,"title":6894,"year":197,"month":855,"day":63,"doi":6895,"resource_url":6896,"first_page":63,"last_page":63,"pdf_url":6897,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6898,"paper_type":860,"authors":6899,"abstract":6903},"lrec2004-main-367","Word Sense Disambiguation Using Random Indexing","10.63317\u002F42ufjyshjx8f","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-367","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F589.pdf","mihaltz-2004-word",[6900],{"paper_id":6893,"author_seq":247,"given_name":6901,"surname":6902,"affiliation":63,"orcid":63},"Márton","Miháltz","This paper presents the results of an experiment to apply a novel semantic representational formalism called Random Indexing for the supervised word sense disambiguation of English words. Random Indexing uses high-dimensional sparse vectors with random patterns modeling neural activation patterns in the brain to represent linguistic information. The presented learning and disambiguating method was trained and tested using manually sense-tagged corpora available from Senseval. The results are evaluated and compared to previous works using the same corpora, and the possible lacks and weaknesses of Random Indexing are pointed out both in general, both for the purpose of word sense disambiguation.",{"paper_id":6905,"title":6906,"year":197,"month":855,"day":63,"doi":6907,"resource_url":6908,"first_page":63,"last_page":63,"pdf_url":6909,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6910,"paper_type":860,"authors":6911,"abstract":6923},"lrec2004-main-368","Querying Both Time-aligned and Hierarchical Corpora with NXT Search","10.63317\u002F5i2zwk3bpqfk","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-368","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F590.pdf","heid-etal-2004-querying",[6912,6913,6916,6919,6921,6922],{"paper_id":6905,"author_seq":247,"given_name":5498,"surname":5499,"affiliation":63,"orcid":63},{"paper_id":6905,"author_seq":232,"given_name":6914,"surname":6915,"affiliation":63,"orcid":63},"Holger","Voormann",{"paper_id":6905,"author_seq":218,"given_name":6917,"surname":6918,"affiliation":63,"orcid":63},"Jan-Torsten","Milde",{"paper_id":6905,"author_seq":203,"given_name":1815,"surname":6920,"affiliation":63,"orcid":63},"Gut",{"paper_id":6905,"author_seq":188,"given_name":2545,"surname":2546,"affiliation":63,"orcid":63},{"paper_id":6905,"author_seq":172,"given_name":1075,"surname":2548,"affiliation":63,"orcid":63},"One problem of the (re-)usability and exchange of annotated corpora is in the lack of standards in corpus formats and corpus query tools. This paper reports on the NXT Search tool, which was used to query two corpora with very different annotation formats. It is shown that with automatic data format conversion both corpora can be accessed and searched with NXT Search.",{"paper_id":6925,"title":6926,"year":197,"month":855,"day":63,"doi":6927,"resource_url":6928,"first_page":63,"last_page":63,"pdf_url":6929,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6930,"paper_type":860,"authors":6931,"abstract":63},"lrec2004-main-369","Bypassing Greeklish!","10.63317\u002F4zt8vueknqdz","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-369","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F593.pdf","chalamandaris-etal-2004-bypassing",[6932,6934,6936,6938,6940],{"paper_id":6925,"author_seq":247,"given_name":2846,"surname":6933,"affiliation":63,"orcid":63},"Chalamandaris",{"paper_id":6925,"author_seq":232,"given_name":3550,"surname":6935,"affiliation":63,"orcid":63},"Tsiakoulis",{"paper_id":6925,"author_seq":218,"given_name":3556,"surname":6937,"affiliation":63,"orcid":63},"Raptis",{"paper_id":6925,"author_seq":203,"given_name":3656,"surname":6939,"affiliation":63,"orcid":63},"Giannopoulos",{"paper_id":6925,"author_seq":188,"given_name":3656,"surname":6941,"affiliation":63,"orcid":63},"Carayannis",{"paper_id":6943,"title":6944,"year":197,"month":855,"day":63,"doi":6945,"resource_url":6946,"first_page":63,"last_page":63,"pdf_url":6947,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6948,"paper_type":860,"authors":6949,"abstract":6954},"lrec2004-main-370","Semi-Automatic UNL Dictionary Generation Using WordNet.PT","10.63317\u002F2u9tsnsfodqu","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-370","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F594.pdf","ribeiro-etal-2004-semi",[6950,6951,6952,6953],{"paper_id":6943,"author_seq":247,"given_name":6684,"surname":4674,"affiliation":63,"orcid":63},{"paper_id":6943,"author_seq":232,"given_name":4673,"surname":1061,"affiliation":63,"orcid":63},{"paper_id":6943,"author_seq":218,"given_name":6690,"surname":6691,"affiliation":63,"orcid":63},{"paper_id":6943,"author_seq":203,"given_name":6693,"surname":6694,"affiliation":63,"orcid":63},"The increase of Internet users all over the world and the subsequent growth of available multilingual information on the Web have brought new challenges to machine translation systems. The Universal Networking Language (UNL) is a meta-language developed for conveying linguistic expressions in order to encode websites information into a standard representation. In order to integrate Portuguese into this platform it is necessary to develop both a dictionary and a grammar. This paper focuses on the development of a PT-UNL dictionary using the WordNet.PT knowledge base. UNL makes use of lexical-semantic relations that have some correspondence in WordNet.PT. Since building the dictionary is a very time-consuming task, a semi-automatic process to generate it has been developed and is described here. The reuse of manually built lexical resources is of great relevance both for the economy of the project and for the reliability of the results.",{"paper_id":6956,"title":6957,"year":197,"month":855,"day":63,"doi":6958,"resource_url":6959,"first_page":63,"last_page":63,"pdf_url":6960,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6961,"paper_type":860,"authors":6962,"abstract":6965},"lrec2004-main-371","Bootstrapping a Database of German Multi-word Expressions","10.63317\u002F2tgbmuu47jbw","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-371","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F595.pdf","geyken-2004-bootstrapping",[6963],{"paper_id":6956,"author_seq":247,"given_name":1081,"surname":6964,"affiliation":63,"orcid":63},"Geyken","We pre-classified 32,000 entries from the {Wörterbuch der deutschen Idiomatik} (Schemann 1993) using an inductive description of POS sequences in conjunction with a Brill Tagger trained on manually tagged idiomatic entries. This process assigned categories to 86% of entries with 88% accuracy. Further manual classification resulted in a database of multi-word expressions where each entry is associated with a sequence of POS-tag\u002Ftoken pairs. The second phase of our project, currently underway, addresses the association of a sequence of POS-tag\u002Ftoken pairs with a corpus example. To this end, we generate a weighted finite state transducer from the sequences for each entry and apply a finite state filter to the corpus. The filter will extract those sequences in the corpus that correspond to the longest match of the multi-word expression.",{"paper_id":6967,"title":6968,"year":197,"month":855,"day":63,"doi":6969,"resource_url":6970,"first_page":63,"last_page":63,"pdf_url":6971,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6972,"paper_type":860,"authors":6973,"abstract":63},"lrec2004-main-372","A Practical Comparison of Different Filters Used in Automatic Term Extraction","10.63317\u002F4yuezd7y5xry","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-372","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F596.pdf","ha-2004-practical",[6974],{"paper_id":6967,"author_seq":247,"given_name":6975,"surname":6976,"affiliation":63,"orcid":63},"Le An","Ha",{"paper_id":6978,"title":6979,"year":197,"month":855,"day":63,"doi":6980,"resource_url":6981,"first_page":63,"last_page":63,"pdf_url":6982,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6983,"paper_type":860,"authors":6984,"abstract":6989},"lrec2004-main-373","SVMTool: A general POS Tagger Generator Based on Support Vector Machines","10.63317\u002F52pkqry5zu2h","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-373","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F597.pdf","gimenez-marquez-2004-svmtool",[6985,6988],{"paper_id":6978,"author_seq":247,"given_name":6986,"surname":6987,"affiliation":63,"orcid":63},"Jesús","Giménez",{"paper_id":6978,"author_seq":232,"given_name":3218,"surname":2309,"affiliation":63,"orcid":63},"This paper presents the SVMTool, a simple, flexible, effective and efficient part-of-speech tagger based on Support Vector Machines. The SVMTool offers a fairly good balance among these properties which make it really practical for current NLP applications. It is very easy to use and easily configurable so as to perfectly fit the needs of a number of different applications. Results are also very competitive, achieving an accuracy of 97.16% for English on the Wall Street Journal corpus. It has been also successfully applied to Spanish exhibiting a similar performance. A first release of the SVMTool Perl prototype is now freely available for public use. A most efficient C++ version is coming very soon.",{"paper_id":6991,"title":6992,"year":197,"month":855,"day":63,"doi":6993,"resource_url":6994,"first_page":63,"last_page":63,"pdf_url":6995,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6996,"paper_type":860,"authors":6997,"abstract":63},"lrec2004-main-374","A Multi-Modal Documentation System for Warao","10.63317\u002F3kr72fjdvtzh","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-374","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F599.pdf","herrmann-etal-2004-multi",[6998,7001,7004],{"paper_id":6991,"author_seq":247,"given_name":6999,"surname":7000,"affiliation":63,"orcid":63},"Stefanie","Herrmann",{"paper_id":6991,"author_seq":232,"given_name":7002,"surname":7003,"affiliation":63,"orcid":63},"Hartmut","Keck",{"paper_id":6991,"author_seq":218,"given_name":1560,"surname":7005,"affiliation":63,"orcid":63},"Kepser",{"paper_id":7007,"title":7008,"year":197,"month":855,"day":63,"doi":7009,"resource_url":7010,"first_page":63,"last_page":63,"pdf_url":7011,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7012,"paper_type":860,"authors":7013,"abstract":7022},"lrec2004-main-375","The DeepThought Core Architecture Framework","10.63317\u002F27matdro5ycp","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-375","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F603.pdf","callmeier-etal-2004-deepthought",[7014,7016,7017,7019],{"paper_id":7007,"author_seq":247,"given_name":5498,"surname":7015,"affiliation":63,"orcid":63},"Callmeier",{"paper_id":7007,"author_seq":232,"given_name":1647,"surname":6660,"affiliation":63,"orcid":63},{"paper_id":7007,"author_seq":218,"given_name":5498,"surname":7018,"affiliation":63,"orcid":63},"Schäfer",{"paper_id":7007,"author_seq":203,"given_name":7020,"surname":7021,"affiliation":63,"orcid":63},"Melanie","Siegel","The research performed in the DeepThought project aims at demonstrating the potential of deep linguistic processing if combined with shallow methods for robustness. Classical information retrieval is extended by high precision concept indexing and relation detection. On the basis of this approach, the feasibility of three ambitious applications will be demonstrated, namely: precise information extraction for business intelligence; email response management for customer relationship management; creativity support for document production and collective brainstorming. Common to these applications, and the basis for their development is the XML-based, RMRS-enabled core architecture framework that will be described in detail in this paper. The framework is not limited to the applications envisaged in the DeepThought project, but can also be employed e.g. to generate and make use of XML standoff annotation of documents and linguistic corpora, and in general for a wide range of NLP-based applications and research purposes.",{"paper_id":7024,"title":7025,"year":197,"month":855,"day":63,"doi":7026,"resource_url":7027,"first_page":63,"last_page":63,"pdf_url":7028,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7029,"paper_type":860,"authors":7030,"abstract":7034},"lrec2004-main-376","Towards the Meaning Top Ontology: Sources of Ontological Meaning","10.63317\u002F28y3n68azj6a","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-376","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F604.pdf","atserias-etal-2004-towards",[7031,7032,7033],{"paper_id":7024,"author_seq":247,"given_name":2105,"surname":6818,"affiliation":63,"orcid":63},{"paper_id":7024,"author_seq":232,"given_name":2181,"surname":5437,"affiliation":63,"orcid":63},{"paper_id":7024,"author_seq":218,"given_name":6815,"surname":6816,"affiliation":63,"orcid":63},"This paper describes the initial research steps towards the Top Ontology for the Multilingual Central Repository ({\\sc Mcr}) built in the {\\sc Meaning} project. The current version of the {\\sc Mcr} integrates five local wordnets plus four versions of Princeton's English WordNet, three ontologies and hundreds of thousands of new semantic relations and properties automatically acquired from corpora. In order to maintain compatibility among all these heterogeneous knowledge resources, it is fundamental to have a robust and advanced ontological support. This paper studies the mapping of main Sources of Ontological Meaning onto the wordnets and, in particular, the current work in mapping the EuroWordNet Top Concept Ontology.",{"paper_id":7036,"title":7037,"year":197,"month":855,"day":63,"doi":7038,"resource_url":7039,"first_page":63,"last_page":63,"pdf_url":7040,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7041,"paper_type":860,"authors":7042,"abstract":7045},"lrec2004-main-377","An Environment for Dialogue Corpora Collection (ENDIACC)","10.63317\u002F4o273c3wuzug","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-377","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F605.pdf","vetulani-2004-environment",[7043],{"paper_id":7036,"author_seq":247,"given_name":3277,"surname":7044,"affiliation":63,"orcid":63},"Vetulani","In this paper we present an environment for dialogue corpora collection (ENDIACC) being a part of our long term program consisting in development of methodology and tools to design systems with Emulated Language Competence (ELC systems). ELC systems are those able to communicate interactively with their human users in the human language. The key point of our research is development of a methodology for systematic studies of the human user interacting with a machine or with another human. The methods of acquisition of the initial linguistic knowledge, necessary at the early steps of the design of ELC systems, are being systematically implemented for Polish language at the Adam Mickiewicz University. The element we focus on in this presentation is an open experimental setting to generate empirical data about NL dialogues in form of dialogue corpora. The main problem with the corpus-based empirical approach consists in absence of easy and inexpensive way of collecting naturally generated dialogue recordings. The problem may partially be solved by designing experiments were natural dialogues could be registered. The novelty of the proposal presented in this paper consists in proposing a free, easily accessible, language independent software platform ENDIACC (ENvironment for DIAlogue Corpora Collection) to provide an experimental setting for text mode written (keyboard) dialogue corpora collection. This platform is particularly well adapted to the collection of corpora of chat-like dialogues in text mode combined with MMS-like technologies. The system requires a graphical operating system (e.g. Windows or Linux) with a Java interpreter. It will be free accessible for research purposes from http:\u002F\u002Fmain.amu.edu.pl\u002F~zlisi.",{"paper_id":7047,"title":7048,"year":197,"month":855,"day":63,"doi":7049,"resource_url":7050,"first_page":63,"last_page":63,"pdf_url":7051,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7052,"paper_type":860,"authors":7053,"abstract":7071},"lrec2004-main-378","Development of Resources for a Bilingual Automatic Index System of Broadcast News in Basque and Spanish","10.63317\u002F3r5jvz3doocp","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-378","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F609.pdf","bordel-etal-2004-development",[7054,7056,7058,7061,7063,7065,7067,7069],{"paper_id":7047,"author_seq":247,"given_name":3656,"surname":7055,"affiliation":63,"orcid":63},"Bordel",{"paper_id":7047,"author_seq":232,"given_name":2846,"surname":7057,"affiliation":63,"orcid":63},"Ezeiza",{"paper_id":7047,"author_seq":218,"given_name":7059,"surname":7060,"affiliation":63,"orcid":63},"K.","Lopez de Ipina",{"paper_id":7047,"author_seq":203,"given_name":2298,"surname":7062,"affiliation":63,"orcid":63},"Méndez",{"paper_id":7047,"author_seq":188,"given_name":2298,"surname":7064,"affiliation":63,"orcid":63},"Peñagarikano",{"paper_id":7047,"author_seq":172,"given_name":7066,"surname":1004,"affiliation":63,"orcid":63},"T.",{"paper_id":7047,"author_seq":155,"given_name":3653,"surname":7068,"affiliation":63,"orcid":63},"Tovar",{"paper_id":7047,"author_seq":138,"given_name":3749,"surname":7070,"affiliation":63,"orcid":63},"Zulueta","The development of an automatic index system of broadcast news requires appropriate Video and Language Resources (LR) to design all the components of the system. Nowadays, large and well-defined resources can be found in most widely used languages, but there is a lot of work to do with respect to minority languages. The main goal of this work is the design of resources in Basque and Spanish for the transcription of broadcast news. These two languages have been chosen because they are both official in the Basque Autonomous Community and they are used in the Basque Public Radio and Television (EITB).",{"paper_id":7073,"title":7074,"year":197,"month":855,"day":63,"doi":7075,"resource_url":7076,"first_page":63,"last_page":63,"pdf_url":7077,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7078,"paper_type":860,"authors":7079,"abstract":7093},"lrec2004-main-379","An Acoustic Corpus Contemplating Regional Variation for Studies of European Portuguese Nasals","10.63317\u002F59t9km26goos","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-379","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F610.pdf","teixeira-etal-2004-acoustic",[7080,7082,7085,7088,7091],{"paper_id":7073,"author_seq":247,"given_name":6705,"surname":7081,"affiliation":63,"orcid":63},"Teixeira",{"paper_id":7073,"author_seq":232,"given_name":7083,"surname":7084,"affiliation":63,"orcid":63},"Liliana","Ferreira",{"paper_id":7073,"author_seq":218,"given_name":7086,"surname":7087,"affiliation":63,"orcid":63},"Lurdes","Moutinho",{"paper_id":7073,"author_seq":203,"given_name":7089,"surname":7090,"affiliation":63,"orcid":63},"Rosa Lídia","Coimbra",{"paper_id":7073,"author_seq":188,"given_name":3589,"surname":7092,"affiliation":63,"orcid":63},"Lisboa","Portuguese is one of the two standard Romance varieties having nasal vowels as independent phonemes. These are complex sounds that have a dynamic nature and present several problems for a complete description. In this paper we present a new corpus especially recorded to allow studies of European Portuguese nasal vowels. The main purpose of these studies is the improvement of knowledge about these sounds so that it can be applied to language teaching, speech therapy materials and the articulatory speech synthesizer that is being developed at the University of Aveiro. The corpus described is a valuable resource for such studies due to the regional and contextual coverage and the simultaneous availability of speech and EGG signal. Details about corpus definition, recording, annotation and availability are given.",{"paper_id":7095,"title":7096,"year":197,"month":855,"day":63,"doi":7097,"resource_url":7098,"first_page":63,"last_page":63,"pdf_url":7099,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7100,"paper_type":860,"authors":7101,"abstract":7108},"lrec2004-main-380","Experiments on Building Language Resources for Multi-Modal Dialogue Systems","10.63317\u002F4dbpuyrfcw88","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-380","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F611.pdf","romary-etal-2004-experiments",[7102,7103,7106],{"paper_id":7095,"author_seq":247,"given_name":1175,"surname":1176,"affiliation":63,"orcid":63},{"paper_id":7095,"author_seq":232,"given_name":7104,"surname":7105,"affiliation":63,"orcid":63},"Amalia","Todirascu",{"paper_id":7095,"author_seq":218,"given_name":1790,"surname":7107,"affiliation":63,"orcid":63},"Langlois","The paper presents the experiments made to adapt and to synchronise the linguistic resources of the French language processing modules integrated in the MIAMM prototype, designed to handle multi-modal human-machine interactions. These experiments allowed us to identify a methodology for adapting multilingual resources for a dialogue system. In the paper, we describe the iterative joint process used to build linguistic resources for the two cooperative modules: speech recognition for speech modality and syntactic\u002Fsemantic parsing.",{"paper_id":7110,"title":7111,"year":197,"month":855,"day":63,"doi":7112,"resource_url":7113,"first_page":63,"last_page":63,"pdf_url":7114,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7115,"paper_type":860,"authors":7116,"abstract":7128},"lrec2004-main-381","Callisto: A Configurable Annotation Workbench","10.63317\u002F2ggqce57s9td","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-381","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F612.pdf","day-etal-2004-callisto",[7117,7119,7122,7125],{"paper_id":7110,"author_seq":247,"given_name":1790,"surname":7118,"affiliation":63,"orcid":63},"Day",{"paper_id":7110,"author_seq":232,"given_name":7120,"surname":7121,"affiliation":63,"orcid":63},"Chad","McHenry",{"paper_id":7110,"author_seq":218,"given_name":7123,"surname":7124,"affiliation":63,"orcid":63},"Robyn","Kozierok",{"paper_id":7110,"author_seq":203,"given_name":7126,"surname":7127,"affiliation":63,"orcid":63},"Laurel","Riek","In order to support a range of textual annotation tasks, we have developed a new annotation tool called Callisto. To promote task-specific specialization of the interface and associated constraint checking, Callisto provides a facility for the independent development, compilation and installation of task module plug-ins (in the form of Java Archive jar files). The common Callisto backend provides a set of \"annotation services\" to which all separate GUI components can subscribe, enabling a common framework through which annotation updates are propagated to all components. A number of annotation task models have already been defined, and those that are of very general applicability have been made easily re-configurable for small changes in task definition. Callisto is implemented in Java to make use of Java's considerable support for Unicode-encoded multilingual data. Callisto is freely available for downloading and use.",{"paper_id":7130,"title":7131,"year":197,"month":855,"day":63,"doi":7132,"resource_url":7133,"first_page":63,"last_page":63,"pdf_url":7134,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7135,"paper_type":860,"authors":7136,"abstract":7151},"lrec2004-main-382","The Effect of Text Difficulty on Machine Translation Performance – A Pilot Study with ILR-Rated Texts in Spanish, Farsi, Arabic, Russian and Korean","10.63317\u002F5gwzcduuk297","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-382","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F613.pdf","clifford-etal-2004-effect",[7137,7140,7143,7146,7149],{"paper_id":7130,"author_seq":247,"given_name":7138,"surname":7139,"affiliation":63,"orcid":63},"Ray","Clifford",{"paper_id":7130,"author_seq":232,"given_name":7141,"surname":7142,"affiliation":63,"orcid":63},"Neil","Granoien",{"paper_id":7130,"author_seq":218,"given_name":7144,"surname":7145,"affiliation":63,"orcid":63},"Douglas","Jones",{"paper_id":7130,"author_seq":203,"given_name":7147,"surname":7148,"affiliation":63,"orcid":63},"Wade","Shen",{"paper_id":7130,"author_seq":188,"given_name":7139,"surname":7150,"affiliation":63,"orcid":63},"Weinstein","We report on initial experiments that examine the relationship between automated measures of machine translation performance (Doddington, 2003, and Papineni et al. 2001) and the Interagency Language Roundtable (ILR) scale of language proficiency\u002Fdifficulty that has been in standard use for U.S. government language training and assessment for the past several decades (Child, Clifford and Lowe 1993). The main question we ask is how technology-oriented measures of MT performance relate to the ILR difficulty levels, where we understand that a linguist with ILR proficiency level N is expected to be able to understand a document rated at level N, but to have increasing difficulty with documents at higher levels. In this paper, we find that some key aspects of MT performance track with ILR difficulty levels, primarily for MT output whose quality is good enough to be readable by human readers.",{"paper_id":7153,"title":7154,"year":197,"month":855,"day":63,"doi":7155,"resource_url":7156,"first_page":63,"last_page":63,"pdf_url":7157,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7158,"paper_type":860,"authors":7159,"abstract":7166},"lrec2004-main-383","An Annotated German-Language Medical Text Corpus as Language Resource","10.63317\u002F4k8adg8k358x","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-383","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F614.pdf","wermter-hahn-2004-annotated",[7160,7163],{"paper_id":7153,"author_seq":247,"given_name":7161,"surname":7162,"affiliation":63,"orcid":63},"Joachim","Wermter",{"paper_id":7153,"author_seq":232,"given_name":7164,"surname":7165,"affiliation":63,"orcid":63},"Udo","Hahn","We describe the structure of a German-language corpus which contains a variety of medical text genres. Clinical documents (discharge summaries, pathology, histology and surgery reports) are distinguished from non-clinical ones (textbook articles and consumer health care documents from a Web portal). After introducing a medical extension of the general-language STTS tagset which accounts for unique features of the medical sublanguage encountered in these documents, we discuss some of the quantitative properties of the annotations (e.g., distribution patterns of part-of-speech tags).",{"paper_id":7168,"title":7169,"year":197,"month":855,"day":63,"doi":7170,"resource_url":7171,"first_page":63,"last_page":63,"pdf_url":7172,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7173,"paper_type":860,"authors":7174,"abstract":7181},"lrec2004-main-384","Application of the BLEU Method for Evaluating Free-text Answers in an E-learning Environment","10.63317\u002F469ontwfq6bq","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-384","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F615.pdf","perez-etal-2004-application",[7175,7177,7179],{"paper_id":7168,"author_seq":247,"given_name":1060,"surname":7176,"affiliation":63,"orcid":63},"Pérez",{"paper_id":7168,"author_seq":232,"given_name":1000,"surname":7178,"affiliation":63,"orcid":63},"Alfonseca",{"paper_id":7168,"author_seq":218,"given_name":7180,"surname":2911,"affiliation":63,"orcid":63},"Pilar","We have applied BLEU (Papineni et al., 2001), a method originally designed to evaluate automatic Machine Translation systems, in assessing short essays written by students. We study how much BLEU scores correlate to human scorings and other keyword-based evaluation metrics. We conclude that, although it is only applicable to a restricted category of questions, BLEU attains better results than other keyword-based procedures. Its simplicity and language-independence makes it a good candidate to be combined with other well-studied computer assessment scoring procedures.",{"paper_id":7183,"title":7184,"year":197,"month":855,"day":63,"doi":7185,"resource_url":7186,"first_page":63,"last_page":63,"pdf_url":7187,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7188,"paper_type":860,"authors":7189,"abstract":7201},"lrec2004-main-385","Extraction of Hyperonymy of Adjectives from Large Corpora by Using the Neural Network Model","10.63317\u002F4nxsprrrf29v","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-385","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F616.pdf","kanzaki-etal-2004-extraction",[7190,7193,7196,7197,7200],{"paper_id":7183,"author_seq":247,"given_name":7191,"surname":7192,"affiliation":63,"orcid":63},"Kyoko","Kanzaki",{"paper_id":7183,"author_seq":232,"given_name":7194,"surname":7195,"affiliation":63,"orcid":63},"Qing","Ma",{"paper_id":7183,"author_seq":218,"given_name":5946,"surname":2243,"affiliation":63,"orcid":63},{"paper_id":7183,"author_seq":203,"given_name":7198,"surname":7199,"affiliation":63,"orcid":63},"Masaki","Murata",{"paper_id":7183,"author_seq":188,"given_name":5171,"surname":5172,"affiliation":63,"orcid":63},"In this research, we extract hierarchical abstract concepts of adjectives automatically from large corpora by using the Neural Network Model. We show the hierarchies on the Semantic Map and compare the hierarchies in the Semantic Map and a manually prepared thesaurus. We recognized five types of distributions on the map. By comparing the Semantic Map and a manual thesaurus, we found that the word that the abstract noun belongs to, whether a person, thing or event, is introduced as the standard of classification in the manual thesaurus. On the other hand, in the Semantic Map, we found that abstract nouns belonging to people or events are distributed together. We also found that the hierarchies of sokumen (side), imi (meaning), and kanten (viewpoint) are necessary for a category of adjectives.",{"paper_id":7203,"title":7204,"year":197,"month":855,"day":63,"doi":7205,"resource_url":7206,"first_page":63,"last_page":63,"pdf_url":7207,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7208,"paper_type":860,"authors":7209,"abstract":63},"lrec2004-main-386","The Penn Discourse Treebank","10.63317\u002F2rrr3zi7vbno","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-386","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F618.pdf","miltsakaki-etal-2004-penn",[7210,7213,7216,7219],{"paper_id":7203,"author_seq":247,"given_name":7211,"surname":7212,"affiliation":63,"orcid":63},"Eleni","Miltsakaki",{"paper_id":7203,"author_seq":232,"given_name":7214,"surname":7215,"affiliation":63,"orcid":63},"Rashmi","Prasad",{"paper_id":7203,"author_seq":218,"given_name":7217,"surname":7218,"affiliation":63,"orcid":63},"Aravind","Joshi",{"paper_id":7203,"author_seq":203,"given_name":7220,"surname":7221,"affiliation":63,"orcid":63},"Bonnie","Webber",{"paper_id":7223,"title":7224,"year":197,"month":855,"day":63,"doi":7225,"resource_url":7226,"first_page":63,"last_page":63,"pdf_url":7227,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7228,"paper_type":860,"authors":7229,"abstract":7238},"lrec2004-main-387","Using the Web as a Corpus for the Syntactic-Based Collocation Identification","10.63317\u002F3hjwepohqd4h","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-387","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F619.pdf","seretan-etal-2004-using",[7230,7233,7236],{"paper_id":7223,"author_seq":247,"given_name":7231,"surname":7232,"affiliation":63,"orcid":63},"Violeta","Seretan",{"paper_id":7223,"author_seq":232,"given_name":7234,"surname":7235,"affiliation":63,"orcid":63},"Luka","Nerima",{"paper_id":7223,"author_seq":218,"given_name":2721,"surname":7237,"affiliation":63,"orcid":63},"Wehrli","This paper presents an experiment that uses a Web search engine and a robust parser for the Web-based identification of collocations (statistically significant word associations representing “a conventional way of saying things” (Manning and Schütze, 1999)). We identify the possible collocates of a given word by parsing the text snippets returned by the search engine when querying that word. Then, we rank the list of syntactic co-occurrences retrieved according to the collocational strength of each pair by using different statistical measures.",{"paper_id":7240,"title":7241,"year":197,"month":855,"day":63,"doi":7242,"resource_url":7243,"first_page":63,"last_page":63,"pdf_url":7244,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7245,"paper_type":860,"authors":7246,"abstract":7250},"lrec2004-main-388","Automatic Methods to Supplement Broad-Coverage Subcategorization Lexicons","10.63317\u002F48p8eivtnhvx","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-388","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F620.pdf","schiehlen-spranger-2004-automatic",[7247,7249],{"paper_id":7240,"author_seq":247,"given_name":1357,"surname":7248,"affiliation":63,"orcid":63},"Schiehlen",{"paper_id":7240,"author_seq":232,"given_name":6074,"surname":6075,"affiliation":63,"orcid":63},"The paper describes a system for extracting subcategorization frames of verbs not found in existing broad-coverage valency lexicons. The system uses two parameters: the results of a finite-state parser and the predictions of a set of automatically learned rules which transfer subcategorization frames from cognate verbs. An in-depth evaluation quantified the contribution of the individual parameters.",{"paper_id":7252,"title":7253,"year":197,"month":855,"day":63,"doi":7254,"resource_url":7255,"first_page":63,"last_page":63,"pdf_url":7256,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7257,"paper_type":860,"authors":7258,"abstract":7274},"lrec2004-main-389","A Large-Scale Resource for Storing and Recognizing Technical Terminology","10.63317\u002F534iab5dnzib","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-389","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F621.pdf","harkema-etal-2004-large",[7259,7261,7263,7264,7266,7269,7272],{"paper_id":7252,"author_seq":247,"given_name":3420,"surname":7260,"affiliation":63,"orcid":63},"Harkema",{"paper_id":7252,"author_seq":232,"given_name":2607,"surname":7262,"affiliation":63,"orcid":63},"Gaizauskas",{"paper_id":7252,"author_seq":218,"given_name":1024,"surname":1464,"affiliation":63,"orcid":63},{"paper_id":7252,"author_seq":203,"given_name":7141,"surname":7265,"affiliation":63,"orcid":63},"Davis",{"paper_id":7252,"author_seq":188,"given_name":7267,"surname":7268,"affiliation":63,"orcid":63},"Yikun","Guo",{"paper_id":7252,"author_seq":172,"given_name":7270,"surname":7271,"affiliation":63,"orcid":63},"Angus","Roberts",{"paper_id":7252,"author_seq":155,"given_name":7273,"surname":7271,"affiliation":63,"orcid":63},"Ian","This paper discusses the design and implementation of Termino, a large-scale terminological resource for text processing. Dealing with terminology is a difficult but unavoidable task for natural language processing applications, such as information extraction in technical domains. Complex, heterogeneous information must be stored about large numbers of terms. At the same time term recognition must be performed in realistic time. Termino attempts to reconcile this tension by maintaining a flexible, extensible relational database for storing terminological information and compiling finite state machines from this database to do term recognition.",{"paper_id":7276,"title":7277,"year":197,"month":855,"day":63,"doi":7278,"resource_url":7279,"first_page":63,"last_page":63,"pdf_url":7280,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7281,"paper_type":860,"authors":7282,"abstract":7286},"lrec2004-main-390","Evaluation of a Multimodal Dialogue System for Small-screen Devices","10.63317\u002F2jnqa6uwf79n","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-390","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F622.pdf","hemsen-2004-evaluation",[7283],{"paper_id":7276,"author_seq":247,"given_name":7284,"surname":7285,"affiliation":63,"orcid":63},"Holmer","Hemsen","The small screen size of mobile phone devices introduces usability problems for the development of graphical user interfaces and applications beyond pure telephony applications. The question that arises is, in which way a multimodal application including speech interaction can circumvent these restrictions and usability problems. In this article problems during the development of the user interface for a prototype application in the real estate domain are described and design decisions taken are discussed. A first user test is described, which shows the acceptance of the system, but also usability problems that still needs to be solved.",{"paper_id":7288,"title":7289,"year":197,"month":855,"day":63,"doi":7290,"resource_url":7291,"first_page":63,"last_page":63,"pdf_url":7292,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7293,"paper_type":860,"authors":7294,"abstract":7299},"lrec2004-main-391","Web Services for Language Resources and Language Technology Applications","10.63317\u002F53jun6orkfd7","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-391","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F624.pdf","biemann-etal-2004-web",[7295,7296,7297,7298],{"paper_id":7288,"author_seq":247,"given_name":2000,"surname":6379,"affiliation":63,"orcid":63},{"paper_id":7288,"author_seq":232,"given_name":961,"surname":6464,"affiliation":63,"orcid":63},{"paper_id":7288,"author_seq":218,"given_name":6381,"surname":6382,"affiliation":63,"orcid":63},{"paper_id":7288,"author_seq":203,"given_name":2000,"surname":6384,"affiliation":63,"orcid":63},"In this paper we discuss the application of web service technology to the language technology (LT) and corpus processing domain. Motivated by a host of language technology tools which are widely available but which lack common technical standards for integrating them into language technology applications we discuss the implementation of a web service-based API for web-based processing and accessing of large linguistic data resources.",{"paper_id":7301,"title":7302,"year":197,"month":855,"day":63,"doi":7303,"resource_url":7304,"first_page":63,"last_page":63,"pdf_url":7305,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7306,"paper_type":860,"authors":7307,"abstract":7332},"lrec2004-main-392","Development of New Telephone Speech Databases for French: the NEOLOGOS Project","10.63317\u002F4axoeotppjiv","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-392","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F625.pdf","pinto-etal-2004-development",[7308,7309,7312,7314,7316,7319,7320,7323,7325,7326,7329],{"paper_id":7301,"author_seq":247,"given_name":1566,"surname":4471,"affiliation":63,"orcid":63},{"paper_id":7301,"author_seq":232,"given_name":7310,"surname":7311,"affiliation":63,"orcid":63},"Delphine","Charlet",{"paper_id":7301,"author_seq":218,"given_name":4199,"surname":7313,"affiliation":63,"orcid":63},"François",{"paper_id":7301,"author_seq":203,"given_name":7315,"surname":4209,"affiliation":63,"orcid":63},"Djamel",{"paper_id":7301,"author_seq":188,"given_name":7317,"surname":7318,"affiliation":63,"orcid":63},"Olivier","Boëffard",{"paper_id":7301,"author_seq":172,"given_name":3087,"surname":3088,"affiliation":63,"orcid":63},{"paper_id":7301,"author_seq":155,"given_name":7321,"surname":7322,"affiliation":63,"orcid":63},"Odile","Mella",{"paper_id":7301,"author_seq":138,"given_name":3350,"surname":7324,"affiliation":63,"orcid":63},"Bimbot",{"paper_id":7301,"author_seq":121,"given_name":3403,"surname":3404,"affiliation":63,"orcid":63},{"paper_id":7301,"author_seq":104,"given_name":7327,"surname":7328,"affiliation":63,"orcid":63},"Yann","Philip",{"paper_id":7301,"author_seq":87,"given_name":7330,"surname":7331,"affiliation":63,"orcid":63},"Francis","Charpentier","The NEOLOGOS project is a speech databases creation project for the French language, resulting from a collaboration between French universities and industrial companies, and supported by the French Ministry for Research. The goal of NEOLOGOS is to create new kinds of speech databases: firstly, a 1000 speakers telephone database of children’s voices, called PAIDIALOGOS, following the SpeechDat guidelines with some adaptations to the context of children speakers; secondly, a 200 speakers telephone database of adult voices, called IDIOLOGOS, with a new special design to provide adequate data for very fast adaptation techniques and for ASR systems making use of speakers characteristics.",{"paper_id":7334,"title":7335,"year":197,"month":855,"day":63,"doi":7336,"resource_url":7337,"first_page":63,"last_page":63,"pdf_url":7338,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7339,"paper_type":860,"authors":7340,"abstract":7347},"lrec2004-main-393","Top Ontology as a Tool for Semantic Role Tagging","10.63317\u002F3ywf94kg4qby","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-393","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F626.pdf","pala-smrz-2004-top",[7341,7344],{"paper_id":7334,"author_seq":247,"given_name":7342,"surname":7343,"affiliation":63,"orcid":63},"Karel","Pala",{"paper_id":7334,"author_seq":232,"given_name":7345,"surname":7346,"affiliation":63,"orcid":63},"Pavel","Smrz","This paper deals with the semantic roles in verb valency frames. It examines ontologies, particularly Top Ontology from the EuroWordnet 1,2 project in relation to the tagging of semantic roles of verb complements. We are aiming at a consistent system of semantic role tags that would form a base for lexico-semantic constraints integrated into a natural language parser. A new notation that exploits semantic roles (deep cases) based on the EuroWordNet Top Ontology and the set of the Base Concepts is also presented as well as preliminary statistical results of our research effort.",{"paper_id":7349,"title":7350,"year":197,"month":855,"day":63,"doi":7351,"resource_url":7352,"first_page":63,"last_page":63,"pdf_url":7353,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7354,"paper_type":860,"authors":7355,"abstract":7365},"lrec2004-main-394","A Suite of Tools for Marking Up Textual Data for Temporal Text Mining Scenarios","10.63317\u002F2gpt5c2sdbwa","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-394","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F627.pdf","vasilakopoulos-etal-2004-suite",[7356,7359,7362],{"paper_id":7349,"author_seq":247,"given_name":7357,"surname":7358,"affiliation":63,"orcid":63},"Argyrios","Vasilakopoulos",{"paper_id":7349,"author_seq":232,"given_name":7360,"surname":7361,"affiliation":63,"orcid":63},"Michele","Bersani",{"paper_id":7349,"author_seq":218,"given_name":7363,"surname":7364,"affiliation":63,"orcid":63},"William J.","Black","Text Mining is a relatively new area of research, very interesting for both computational linguists and data miners. It involves collecting and analyzing quantities of textual data by domain experts, whose main task is the manual revision of markup. We describe a suite of tools used to simplify the process: the Parmenides System that consists of data warehouse, ontology, semi-automatic information extraction and data mining tools. Here we focus on the Annotation Editor which incorporates linguistic tools that initialize the markup automatically.",{"paper_id":7367,"title":7368,"year":197,"month":855,"day":63,"doi":7369,"resource_url":7370,"first_page":63,"last_page":63,"pdf_url":7371,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7372,"paper_type":860,"authors":7373,"abstract":7378},"lrec2004-main-395","Frequent Term Distribution Measures for Dataset Profiling","10.63317\u002F2oosiivr6pq2","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-395","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F629.pdf","de-roeck-etal-2004-frequent",[7374,7375,7376],{"paper_id":7367,"author_seq":247,"given_name":2977,"surname":5696,"affiliation":63,"orcid":63},{"paper_id":7367,"author_seq":232,"given_name":5693,"surname":5694,"affiliation":63,"orcid":63},{"paper_id":7367,"author_seq":218,"given_name":973,"surname":7377,"affiliation":63,"orcid":63},"Garthwaite","We motivate the need for dataset profiling in the context of evaluation, and show that textual datasets differ in ways that challenge assumptions about the applicability of techniques. We set out some criteria for useful profiling measures. We argue that distribution patterns of frequent words are useful in profiling genre, and report on a series of experiments with ?2 based measures on the TIPSTER collection, and on textual intranet data. Findings show substantial differences in the distribution of very frequent terms across datasets.",{"paper_id":7380,"title":7381,"year":197,"month":855,"day":63,"doi":7382,"resource_url":7383,"first_page":63,"last_page":63,"pdf_url":7384,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7385,"paper_type":860,"authors":7386,"abstract":7403},"lrec2004-main-396","Issues in Annotation of the Czech Spontaneous Speech Corpus in the MALACH project","10.63317\u002F3uxjryb8mb8e","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-396","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F630.pdf","psutka-etal-2004-issues",[7387,7390,7392,7394,7397,7399,7401],{"paper_id":7380,"author_seq":247,"given_name":7388,"surname":7389,"affiliation":63,"orcid":63},"Josef","Psutka",{"paper_id":7380,"author_seq":232,"given_name":7345,"surname":7391,"affiliation":63,"orcid":63},"Ircing",{"paper_id":7380,"author_seq":218,"given_name":1078,"surname":7393,"affiliation":63,"orcid":63},"Hajič",{"paper_id":7380,"author_seq":203,"given_name":7395,"surname":7396,"affiliation":63,"orcid":63},"Vlasta","Radová",{"paper_id":7380,"author_seq":188,"given_name":7398,"surname":7389,"affiliation":63,"orcid":63},"Josef V.",{"paper_id":7380,"author_seq":172,"given_name":7363,"surname":7400,"affiliation":63,"orcid":63},"Byrne",{"paper_id":7380,"author_seq":155,"given_name":6610,"surname":7402,"affiliation":63,"orcid":63},"Gustman","The paper present the issues encountered in processing spontaneous Czech speech in the MALACH project. Specific problems connected with a frequent occurrence of colloquial words in spontaneous Czech are analyzed; a partial solution is proposed and experimentally evaluated.",{"paper_id":7405,"title":7406,"year":197,"month":855,"day":63,"doi":7407,"resource_url":7408,"first_page":63,"last_page":63,"pdf_url":7409,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7410,"paper_type":860,"authors":7411,"abstract":63},"lrec2004-main-397","Ontology Evaluation Functionalities of RDF(S),DAML+OIL, and OWL Parsers and Ontology Platforms","10.63317\u002F4ujmhwxuwzow","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-397","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F631.pdf","gomez-perez-suarez-figueroa-2004-ontology",[7412,7413],{"paper_id":7405,"author_seq":247,"given_name":2707,"surname":2708,"affiliation":63,"orcid":63},{"paper_id":7405,"author_seq":232,"given_name":7414,"surname":7415,"affiliation":63,"orcid":63},"M. Carmen","Suárez-Figueroa",{"paper_id":7417,"title":7418,"year":197,"month":855,"day":63,"doi":7419,"resource_url":7420,"first_page":63,"last_page":63,"pdf_url":7421,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7422,"paper_type":860,"authors":7423,"abstract":7427},"lrec2004-main-398","Word Association Norms as a Unique Supplement of Traditional Language Resources","10.63317\u002F4i5nnfuwkpkc","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-398","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F632.pdf","sinopalnikova-smrz-2004-word",[7424,7426],{"paper_id":7417,"author_seq":247,"given_name":4167,"surname":7425,"affiliation":63,"orcid":63},"Sinopalnikova",{"paper_id":7417,"author_seq":232,"given_name":7345,"surname":7346,"affiliation":63,"orcid":63},"The paper deals with reuse and testing of psycholinguistic resources in linguistic studies. We report on the experimental work aiming at comparing and evaluation of linguistic information provided by Word Association Norms (WAN) with that derived from other language resources (LRs), namely corpora and wordnets. Results for 3 European languages: Russian, English and Czech are presented. Quantitative and qualitative outputs of the research show that WAN can be employed to enrich the traditional LRs and serve as a tool for their consistency checking.",{"paper_id":7429,"title":7430,"year":197,"month":855,"day":63,"doi":7431,"resource_url":7432,"first_page":63,"last_page":63,"pdf_url":7433,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7434,"paper_type":860,"authors":7435,"abstract":7438},"lrec2004-main-399","Towards a Dynamic Lexicon: Predicting the Syntactic Argument Structure of Complex Verbs","10.63317\u002F3fw5y3nagjkj","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-399","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F633.pdf","aldinger-2004-towards",[7436],{"paper_id":7429,"author_seq":247,"given_name":4217,"surname":7437,"affiliation":63,"orcid":63},"Aldinger","The presentation summarizes the results of a recent (2002) corpus-based study on the syntactic argument structure of German separable verbs, in particular the question of its predictability from the argument structure of their bases. The study shows that the argument structure effects of verb formation with separable preverbs are astonishingly regular, even if the meaning of the resulting complex verb itself is opaque. Alternation patterns can be formulated for single preverbs or, in some cases, preverb classes as well. The results can be used in various ways in NLP and linguistic resource building: An implementation, e.g. in the form of LFG-style lexical rules, within a parser for German text will allow sensible guesses at argument structure frames for newly encountered complex verbs, even if the argument structure of the base verb is not known. Similarly, the patterns can be used for the acquisition of argument structure information for a (static) NLP lexicon. When integrated with a static dictionary, the patterns thus allow to deal with the productive formation of complex verbs and its impact on syntactic argument structure.",{"paper_id":7440,"title":7441,"year":197,"month":855,"day":63,"doi":7442,"resource_url":7443,"first_page":63,"last_page":63,"pdf_url":7444,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7445,"paper_type":860,"authors":7446,"abstract":7449},"lrec2004-main-400","Semantic Annotating of Czech Corpus via WSD","10.63317\u002F3yja7o5mv3kp","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-400","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F635.pdf","kral-2004-semantic",[7447],{"paper_id":7440,"author_seq":247,"given_name":2607,"surname":7448,"affiliation":63,"orcid":63},"Král","We would like to describe the relationship between word sense disambiguation (WSD) and language resources (LR) working with word   senses. We discuss the problem of sense division and tagging. Exploiting specific features of the inflectional languages for WSD is   encouraged. We present WSD methods for Czech ambiguous nouns. The advantage of these methods consists in reducing the manual   work by using a synonym training set. They could be utilized for building a semantically annotated corpus or gaining glosses for Czech   WordNet.",{"paper_id":7451,"title":7452,"year":197,"month":855,"day":63,"doi":7453,"resource_url":7454,"first_page":63,"last_page":63,"pdf_url":7455,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7456,"paper_type":860,"authors":7457,"abstract":7469},"lrec2004-main-401","Using the NITE XML Toolkit on the Switchboard Corpus to Study Syntactic Choice: a Case Study","10.63317\u002F234gmoiuwn5p","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-401","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F636.pdf","carletta-etal-2004-using",[7458,7460,7463,7466],{"paper_id":7451,"author_seq":247,"given_name":4252,"surname":7459,"affiliation":63,"orcid":63},"Carletta",{"paper_id":7451,"author_seq":232,"given_name":7461,"surname":7462,"affiliation":63,"orcid":63},"Shipra","Dingare",{"paper_id":7451,"author_seq":218,"given_name":7464,"surname":7465,"affiliation":63,"orcid":63},"Malvina","Nissim",{"paper_id":7451,"author_seq":203,"given_name":7467,"surname":7468,"affiliation":63,"orcid":63},"Tatiana","Nikitina","The NITE XML Toolkit (NXT) provides library support for working with multimodal language corpora. We describe our experiences in using it to study discourse effects on syntactic choice using the parsed Switchboard Corpus as a starting point, as a case study for others who may wish to adopt similar techniques using NXT or one of the other libraries that are beginning to emerge. We discuss conversion into the NXT data format; automatic annotation of markables and of constituent length; hand-annotation of markables for animacy information structure, and coreferential links; and data analysis.",{"paper_id":7471,"title":7472,"year":197,"month":855,"day":63,"doi":7473,"resource_url":7474,"first_page":63,"last_page":63,"pdf_url":7475,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7476,"paper_type":860,"authors":7477,"abstract":7483},"lrec2004-main-402","An Annotation Scheme for Information Status in Dialogue","10.63317\u002F5e8ipnwca99c","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-402","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F638.pdf","nissim-etal-2004-annotation",[7478,7479,7480,7481],{"paper_id":7471,"author_seq":247,"given_name":7464,"surname":7465,"affiliation":63,"orcid":63},{"paper_id":7471,"author_seq":232,"given_name":7461,"surname":7462,"affiliation":63,"orcid":63},{"paper_id":7471,"author_seq":218,"given_name":4252,"surname":7459,"affiliation":63,"orcid":63},{"paper_id":7471,"author_seq":203,"given_name":1024,"surname":7482,"affiliation":63,"orcid":63},"Steedman","We present an annotation scheme for information status (IS) in dialogue, and validate it on three Switchboard dialogues. We show that our scheme has good reproducibility, and compare it with previous attempts to code IS and related features. We eventually apply the scheme to 147 dialogues, thus producing a corpus that contains nearly 70,000 NPs annotated for IS and over 15,000 coreference links.",{"paper_id":7485,"title":7486,"year":197,"month":855,"day":63,"doi":7487,"resource_url":7488,"first_page":63,"last_page":63,"pdf_url":7489,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7490,"paper_type":860,"authors":7491,"abstract":63},"lrec2004-main-403","Speech Recognition Simulation and its Application for Wizard-of-Oz Experiments","10.63317\u002F4je5ufmtv4tr","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-403","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F639.pdf","trutnev-etal-2004-speech",[7492,7494,7496],{"paper_id":7485,"author_seq":247,"given_name":4437,"surname":7493,"affiliation":63,"orcid":63},"Trutnev",{"paper_id":7485,"author_seq":232,"given_name":4224,"surname":7495,"affiliation":63,"orcid":63},"Rozenknop",{"paper_id":7485,"author_seq":218,"given_name":1087,"surname":1088,"affiliation":63,"orcid":63},{"paper_id":7498,"title":7499,"year":197,"month":855,"day":63,"doi":7500,"resource_url":7501,"first_page":63,"last_page":63,"pdf_url":7502,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7503,"paper_type":860,"authors":7504,"abstract":7511},"lrec2004-main-404","Language Modeling Using Dynamic Bayesian Networks","10.63317\u002F45b8hz5tzn26","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-404","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F640.pdf","deviren-etal-2004-language",[7505,7508,7510],{"paper_id":7498,"author_seq":247,"given_name":7506,"surname":7507,"affiliation":63,"orcid":63},"Murat","Deviren",{"paper_id":7498,"author_seq":232,"given_name":3403,"surname":7509,"affiliation":63,"orcid":63},"Daoudi",{"paper_id":7498,"author_seq":218,"given_name":3084,"surname":3085,"affiliation":63,"orcid":63},"In this paper we propose a new approach to language modeling based on dynamic Bayesian networks. The principle idea of our approach is to find the dependence relations between variables that represent different linguistic units (word, class, concept, ...) that constitutes a language model. In the context of this paper the linguistic units that we consider are syntactic classes and words. Our approach shou ld not be considered as a model combination technique. Rather, it is an original and coherent methodology that processes words and classes in the same model. We attempt to identify and model the dependence of words and classes on their linguistic context. Our ultimate goal is to devise an automatic mechanism that extracts the best dependence relations between a word and its context, i.e., lexical and syntactic. Preliminary results are very encouraging, in particular the model obtained with a Bayesian network where a word depends not only on previous word but also on syntactic classes of two previous words outperforms the bi-gram model.",{"paper_id":7513,"title":7514,"year":197,"month":855,"day":63,"doi":7515,"resource_url":7516,"first_page":63,"last_page":63,"pdf_url":7517,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7518,"paper_type":860,"authors":7519,"abstract":7522},"lrec2004-main-405","Pumping Documents Through a Domain and Genre Classification Pipeline","10.63317\u002F4jb4oz6xnd3d","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-405","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F641.pdf","hahn-wermter-2004-pumping",[7520,7521],{"paper_id":7513,"author_seq":247,"given_name":7164,"surname":7165,"affiliation":63,"orcid":63},{"paper_id":7513,"author_seq":232,"given_name":7161,"surname":7162,"affiliation":63,"orcid":63},"We propose a simple, yet effective, pipeline architecture for document classification. The task we intend to solve is to classify large and content-wise heterogeneous document streams on a layered nine-category system, which distinguishes medical from non-medical texts and sorts medical texts into various subgenres. While the document classification problem is often dealt with using computationally powerful and, hence, costly classifiers (e.g., Bayesian ones), we have gathered empirical evidence that a much simpler approach based on n-gram-statistics achieves a comparable level of classification performance.",{"paper_id":7524,"title":7525,"year":197,"month":855,"day":63,"doi":7526,"resource_url":7527,"first_page":63,"last_page":63,"pdf_url":7528,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7529,"paper_type":860,"authors":7530,"abstract":7533},"lrec2004-main-406","A Hybrid Strategy For Regular Grammar Parsing","10.63317\u002F3ogesniu2n5c","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-406","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F642.pdf","simov-osenova-2004-hybrid",[7531,7532],{"paper_id":7524,"author_seq":247,"given_name":3102,"surname":3103,"affiliation":63,"orcid":63},{"paper_id":7524,"author_seq":232,"given_name":3723,"surname":3724,"affiliation":63,"orcid":63},"The paper outlines a hybrid architecture for a partial parser based on regular grammars over XML documents. The parser is used to support the annotation process in the BulTreeBank project. Thus the parser annotates only the `sure' cases. To maximize the number of the analyzed phrases the parser applies a set of grammars in a dynamic fashion. Each grammar determines not only the constituent structure (plus some syntactic dependencies internal to the structure), but also a description of the local and global context of the recognized phrase. The grammars available to the parser are arranged in a network. The order of the grammars application depends on the initial ordering in the network and the descriptions associated with the grammars. Thus the traverse is not deterministic. Additionally, the application of the grammars can be interleaved with the applications of other XML tools like remove, insert and transform operations. This architecture provides a flexible means for guiding the linguistic analysis in order to utilize all the available linguistic knowledge and to produce a very accurate partial analysis.",{"paper_id":7535,"title":7536,"year":197,"month":855,"day":63,"doi":7537,"resource_url":7538,"first_page":63,"last_page":63,"pdf_url":7539,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7540,"paper_type":860,"authors":7541,"abstract":7559},"lrec2004-main-407","Cross-Language Acquisition of Semantic Models for Verbal Predicates","10.63317\u002F2tugszxzo8ga","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-407","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F643.pdf","atserias-etal-2004-cross",[7542,7543,7544,7547,7550,7553,7554,7556],{"paper_id":7535,"author_seq":247,"given_name":2105,"surname":6818,"affiliation":63,"orcid":63},{"paper_id":7535,"author_seq":232,"given_name":5748,"surname":5749,"affiliation":63,"orcid":63},{"paper_id":7535,"author_seq":218,"given_name":7545,"surname":7546,"affiliation":63,"orcid":63},"Octavian","Popescu",{"paper_id":7535,"author_seq":203,"given_name":7548,"surname":7549,"affiliation":63,"orcid":63},"Eneko","Agirre",{"paper_id":7535,"author_seq":188,"given_name":7551,"surname":7552,"affiliation":63,"orcid":63},"Aitziber","Atutxa",{"paper_id":7535,"author_seq":172,"given_name":6815,"surname":6816,"affiliation":63,"orcid":63},{"paper_id":7535,"author_seq":155,"given_name":3376,"surname":7555,"affiliation":63,"orcid":63},"Carroll",{"paper_id":7535,"author_seq":138,"given_name":7557,"surname":7558,"affiliation":63,"orcid":63},"Rob","Koeling","This paper presents a semantic-driven methodology for the automatic acquisition of verbal models. Our approach relies strongly on the semantic generalizations allowed by already existing resources (e.g. Domain labels, Named Entity categories, concepts in the SUMO ontology, etc). Several experiments have been carried out using comparable corpora in four languages (Italian, Spanish, Basque and English) and two domains (FINANCE and SPORT) showing that the semantic patterns acquired can be general enough to be ported from one language to the other language.",{"paper_id":7561,"title":7562,"year":197,"month":855,"day":63,"doi":7563,"resource_url":7564,"first_page":63,"last_page":63,"pdf_url":7565,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7566,"paper_type":860,"authors":7567,"abstract":7570},"lrec2004-main-408","MED-TYP: A Typological Database for Mediterranean Languages","10.63317\u002F4atxw2cw3yr6","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-408","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F644.pdf","sanso-2004-med",[7568],{"paper_id":7561,"author_seq":247,"given_name":1461,"surname":7569,"affiliation":63,"orcid":63},"Sansò","Electronic databases are increasingly popular tools in typological research. Despite the advantages of such tools, there are problems connected both with their construction and with their standardization. For instance, there is generally a considerable gap between the information stored in typological databases and primary data: primary morphosyntactic data are much more difficult to handle compu-tationally than typological generalizations. Moreover, in current typological studies the need for standardization has led to the devel-opment of glossing practices and guidelines for collecting data, but there are still too few initiatives to increase standardization in typo-logical databases. The aim of this paper is to suggest a radically new approach to the storage of data for typological analysis. The Med-Typ Database, which is currently being developed at the University of Pavia, has been providing us with concrete experience of the problems that need to be addressed when creating typological databases. This database uses XML annotation and aims to be both a collection of data for future analyses of areal distribution of features within the Mediterranean area and a tool for systematic analysis of the range of variation found in various typological domains.",{"paper_id":7572,"title":7573,"year":197,"month":855,"day":63,"doi":7574,"resource_url":7575,"first_page":63,"last_page":63,"pdf_url":7576,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7577,"paper_type":860,"authors":7578,"abstract":7584},"lrec2004-main-409","A graphical Tool for Handling Rule Grammars in Java Speech Grammar Format","10.63317\u002F2kze83gireyr","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-409","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F645.pdf","georgila-etal-2004-graphical",[7579,7582,7583],{"paper_id":7572,"author_seq":247,"given_name":7580,"surname":7581,"affiliation":63,"orcid":63},"Kallirroi","Georgila",{"paper_id":7572,"author_seq":232,"given_name":1111,"surname":1112,"affiliation":63,"orcid":63},{"paper_id":7572,"author_seq":218,"given_name":1018,"surname":3575,"affiliation":63,"orcid":63},"This paper describes a graphical tool used for generating and depicting rule grammars in the Java Speech Grammar Format (JSGF), which has been developed in the framework of the EC-funded research project GEMINI (Generic Environment for Multilingual Interactive Natural Interfaces, IST-2001-32343). A vocabulary builder component that produces the phonetic transcription of the words included in the grammar file is also incorporated into the tool. Currently, the tool supports embedded grapheme-to-phoneme conversion only for Greek in SAMPA format. However, a language-independent function is included that enables the user to write context-dependent rules for symbol conversions (both grapheme-to-phoneme and phoneme-to-grapheme). Manual vs. tool-based handling of grammars are compared and evaluated in terms of time required for grammar creation and efficiency.",{"paper_id":7586,"title":7587,"year":197,"month":855,"day":63,"doi":7588,"resource_url":7589,"first_page":63,"last_page":63,"pdf_url":7590,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7591,"paper_type":860,"authors":7592,"abstract":7596},"lrec2004-main-410","A Flexible Language Acquisition Tool Kit for Natural Language Processing","10.63317\u002F4cqqn422okgm","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-410","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F647.pdf","sheremetyeva-2004-flexible",[7593],{"paper_id":7586,"author_seq":247,"given_name":7594,"surname":7595,"affiliation":63,"orcid":63},"Svetlana","Sheremetyeva","The paper describes a Flexible Language Acquisition Tool (FLAT) kit, - a suite of multipurpose interactive tools for developing NLP systems and\u002For training computational linguists. The kit facilitates the use of linguistic expertise in developing NLP applications and allows for maintaining and improving a system output without extra programming effort. It can be used for any language based on ANSI character set without reengineering and has an element providing for its integration in any NLP application, thus dramatically reducing the complexity and costs of development process.",{"paper_id":7598,"title":7599,"year":197,"month":855,"day":63,"doi":7600,"resource_url":7601,"first_page":63,"last_page":63,"pdf_url":7602,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7603,"paper_type":860,"authors":7604,"abstract":7607},"lrec2004-main-411","The Effect of Bias on an Automatically-built Word Sense Corpus","10.63317\u002F494mepyka46h","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-411","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F648.pdf","martinez-agirre-2004-effect",[7605,7606],{"paper_id":7598,"author_seq":247,"given_name":1790,"surname":2056,"affiliation":63,"orcid":63},{"paper_id":7598,"author_seq":232,"given_name":7548,"surname":7549,"affiliation":63,"orcid":63},"The goal of this paper is to explore the large-scale automatic acquisition of sense-tagged examples to be used for Word Sense Disambiguation (WSD). We have applied the ``monosemous relatives'' method on the Web in order to build such a resource for all nouns in WordNet. The analysis of some parameters revealed that the distribution of the word senses (bias) in the training and test corpus is a determinant factor. Provided there is a method to approximate the bias for each word sense, the results we obtained for English are comparable to the use of hand-tagged data (Semcor), which is a very interesting perspective for lesser studied languages.",{"paper_id":7609,"title":7610,"year":197,"month":855,"day":63,"doi":7611,"resource_url":7612,"first_page":63,"last_page":63,"pdf_url":7613,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7614,"paper_type":860,"authors":7615,"abstract":7630},"lrec2004-main-412","Bilingual Connections for Trilingual Corpora: An XML Approach","10.63317\u002F5cvjyzt9j2wd","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-412","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F649.pdf","arranz-etal-2004-bilingual",[7616,7618,7620,7623,7624,7627],{"paper_id":7609,"author_seq":247,"given_name":3371,"surname":7617,"affiliation":63,"orcid":63},"Arranz",{"paper_id":7609,"author_seq":232,"given_name":2530,"surname":7619,"affiliation":63,"orcid":63},"Castell",{"paper_id":7609,"author_seq":218,"given_name":7621,"surname":7622,"affiliation":63,"orcid":63},"Josep Maria","Crego",{"paper_id":7609,"author_seq":203,"given_name":6986,"surname":6987,"affiliation":63,"orcid":63},{"paper_id":7609,"author_seq":188,"given_name":7625,"surname":7626,"affiliation":63,"orcid":63},"Adrià","de Gispert",{"paper_id":7609,"author_seq":172,"given_name":7628,"surname":7629,"affiliation":63,"orcid":63},"Patrik","Lambert","This paper describes the design and development of a trilingual spontaneous speech corpus for statistical speech-to-speech translation. The languages considered are Catalan, Spanish and US-English. This corpus has been built bearing in mind the strong need for multilingual collections of on-line data within the area of statistical translation, as well as the need for data that are parallel or aligned, that contain different types of linguistic information and that can be used by diferent translation systems. For that reason, our aim has been the creation of a linguistically-enriched resource with an XML-based DTD that allows a useful, transparent and flexible storage of the data. Moreover, these resources are also valuable for a wide range of Natural Language Processing applications, such as multilingual resource acquisition or word sense discrimination, among others.",{"paper_id":7632,"title":7633,"year":197,"month":855,"day":63,"doi":7634,"resource_url":7635,"first_page":63,"last_page":63,"pdf_url":7636,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7637,"paper_type":860,"authors":7638,"abstract":7650},"lrec2004-main-413","CoGesT: a Formal Transcription System for Conversational Gesture","10.63317\u002F47zr9ujpxry4","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-413","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F650.pdf","trippel-etal-2004-cogest",[7639,7640,7641,7644,7645,7647,7649],{"paper_id":7632,"author_seq":247,"given_name":4733,"surname":4734,"affiliation":63,"orcid":63},{"paper_id":7632,"author_seq":232,"given_name":2494,"surname":2495,"affiliation":63,"orcid":63},{"paper_id":7632,"author_seq":218,"given_name":7642,"surname":7643,"affiliation":63,"orcid":63},"Alexandra","Thies",{"paper_id":7632,"author_seq":203,"given_name":6917,"surname":6918,"affiliation":63,"orcid":63},{"paper_id":7632,"author_seq":188,"given_name":1897,"surname":7646,"affiliation":63,"orcid":63},"Looks",{"paper_id":7632,"author_seq":172,"given_name":1900,"surname":7648,"affiliation":63,"orcid":63},"Hell",{"paper_id":7632,"author_seq":155,"given_name":1815,"surname":6920,"affiliation":63,"orcid":63},"In order to create reusable and sustainable multimodal resources a transcription model for hand and arm gestures in conversation is needed. We argue that transcription systems so far developed for sign language transcription and psychological analysis are not suitable for the linguistic analysis of conversational gesture. Such a model must adhere to a strict form-function distinction and be both computationally explicit and compatible with descriptive notations such as feature structures in other areas of computational and descriptive linguistics. We describe the development and evaluation of a suitable formal model using a feature-based transcription system, concentrating as a first step on arm gestures within the context of the development of an annotated video resource and gesture lexicon.",{"paper_id":7652,"title":7653,"year":197,"month":855,"day":63,"doi":7654,"resource_url":7655,"first_page":63,"last_page":63,"pdf_url":7656,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7657,"paper_type":860,"authors":7658,"abstract":63},"lrec2004-main-414","Memory-based Classification of Proper Names in Norwegian","10.63317\u002F2nm5k47fntwj","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-414","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F652.pdf","noklestad-2004-memory",[7659],{"paper_id":7652,"author_seq":247,"given_name":7660,"surname":7661,"affiliation":63,"orcid":63},"Anders","Nøklestad",{"paper_id":7663,"title":7664,"year":197,"month":855,"day":63,"doi":7665,"resource_url":7666,"first_page":63,"last_page":63,"pdf_url":7667,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7668,"paper_type":860,"authors":7669,"abstract":7672},"lrec2004-main-415","Comparative Evaluations in the Domain of Automatic Speech Recognition","10.63317\u002F25yx8u833z9w","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-415","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F654.pdf","trutnev-rajman-2004-comparative",[7670,7671],{"paper_id":7663,"author_seq":247,"given_name":4437,"surname":7493,"affiliation":63,"orcid":63},{"paper_id":7663,"author_seq":232,"given_name":1087,"surname":1088,"affiliation":63,"orcid":63},"The goal of this contribution is to present the results of a comparative evaluation of different, academic and commercial, speech recognitions engines (SREs). The evaluation was carried out at EPFL. The same test data sets were used for all the systems and the comparison was made on the basis of the obtained Word Error Rate (WER) scores. Besides the production of WER scores for several SREs in identical conditions, one of the important objectives of this work was to study relative performances of HMM (Hidden Markov Model) and hybrid HMM-ANN (Hidden Markov Model-Artificial Neural Network) technologies, as used in state-of-the-art systems. A second important objective was to evaluate whether the possibility to have control on the complete recognition process (in particular, the possibility to modify the different - acoustic and language - models used by the SREs), as it is often the case for academic SREs, lead to an observable advantage over the commercial systems that most often are delivered with pre-defined, non modifiable models. The evaluated SREs were all speaker independent continuous speech recognition engines either academic systems widely used in the research community or commercial tools currently available on the market. In this work, we considered 3 academic systems (HTK, Sirroco, and Strut\u002FDRSpeech) and 2 commercial systems (SRE1 and SRE2). HTK and Sirroco are HMM-based systems, while Strut\u002FDRSpeech uses a hybrid approach where ANN (multi-layer perceptron) are used for the estimation of phonemes probabilities distributions. The SRE1 and SRE2 engines are provided with their own models.  Various types of data, in two different languages (French and German), were used for the training (acoustic model and language model) and the evaluation of the systems. More precisely, for the training, we used: 1. the Swiss French Polyphone (SFP) database (continuous read speech in French, ~3'000 utterances spoken by ~400 persons) to train the acoustic models of both HMM-based and ANN-based systems for French; 2. the German SpeechDat (SDGe) database (continuous read speech in German, ~5'000 sentences spoken by ~3'000 persons) to train the acoustic models of the HMM-based systems for German; 3. the transcriptions of spontaneous speech extracted either from dialogues recorded during the InfoVox project (yielding a corpus of ~20'000 word in French) or from the SDGe database (yielding a corpus of ~400'000 words in German) to train the French and German language models. For the evaluation itself: 1. continuous free telephone quality speech recordings, consisting of user utterances from human-machine dialogues with a telephone-based vocal information system about restaurants, and telephone quality digits recordings extracted from the SFP database were used for the evaluation in French; the recordings of user utterances were produced during the 2 field-tests of the InfoVox project and correspond to a dataset of ~900 spoken utterances; the recorded digits correspond to a dataset of ~400 spoken utterances; 2. continuous microphone quality read speech recordings, consisting of user utterances from human-machine dialogues with a Smart home system (Inspire project), and telephone quality digits recordings extracted from the SDGe database were used for the evaluation in German; the recordings of user utterances were produced during the Inspire project and correspond to a dataset of ~1'400 spoken utterances; the recorded digits correspond to a dataset of 500 spoken utterances; furthermore, different types of noise generation techniques corresponding to ~30 noise conditions were applied to the recorded user utterances, therefore yielding an augmented evaluation database. The following evaluation protocol was used:  1. training of the acoustic models: the SRE1 and SRE2 systems are provided with their own internal acoustic models; acoustic model training was therefore required only for the academic systems; 2. training the language models: the SRE1 systems are provided with specific tools to train language models in their proprietary formats. For other systems, the CMU Toolkit was used. For both the acoustic models and the language models, the above mentioned training data was systematically used, thus yielding comparable models and increasing the relevance of the comparative evaluation of the recognition performances. 3. the training of the various meta-parameters (such as the scaling factor for language model for HTK, or inter-model transition penalties for HTK, Sirroco and Strut\u002FDRSpeech) was performed for all systems on the basis of the same training data (~20% of InfoVox and Inspire utterances); 4. for the evaluation as such, the above mentioned test data (SFP and SDGe digits and ~80% of InfoVox and Inspire utterances) was used for all systems; recognition performance evaluation was therefore carried out for all 5 systems in French and in German using the MAPSSWE Test implemented in the 'sclite' tool; WER scores for each of the SREs and for each of the test data sets were produced and compared.  The main results obtained during the experiments were the following: 1. the evaluation of the French HMM-based acoustic model on SFP digits shows a WER of 22.1% vs a WER of 36.0% obtained with the ANN-based acoustic model. Furthermore, the evaluation of the French acoustic models of the commercial systems on the SFP digits shows a WER of 8.0% for SRE2 and of 10.1% for SRE1; 2. the evaluation of the HMM-based HTK system on InfoVox utterances shows a WER of 61.5% for the first field-test and 63.3% for the second vs 72.5% for the first field-test and 76.6% for the second for Strut\u002FDRSpeech system. SRE2 shows a WER of 65.5% for the first field-test and 65.0% for the second. The corresponding results for SRE1 are 67.9% and 66.3% respectively; 3. the evaluation of German acoustic models on SDGe digits shows a WER of 26.6% for the HMM-based acoustic model, 0.4% for the SRE1 acoustic model and 5.8% for the SRE2 acoustic model. Furthermore, HTK shows a WER of 81.0% for Inspire utterances vs 35.8% obtained with SRE1.  In conclusion, the obtained results show that HMM-based technology performs better than the hybrid approach. HMM performance remains also better than the one of commercial systems for continuous French speech. On the other hand, the commercial systems show better recognition accuracy for continuous German speech.",{"paper_id":7674,"title":7675,"year":197,"month":855,"day":63,"doi":7676,"resource_url":7677,"first_page":63,"last_page":63,"pdf_url":7678,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7679,"paper_type":860,"authors":7680,"abstract":7684},"lrec2004-main-416","Consistent Storage of Metadata in Inference Lexica: the MetaLex Approach","10.63317\u002F2t5nrjpwj9a6","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-416","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F655.pdf","trippel-etal-2004-consistent",[7681,7682,7683],{"paper_id":7674,"author_seq":247,"given_name":4733,"surname":4734,"affiliation":63,"orcid":63},{"paper_id":7674,"author_seq":232,"given_name":4728,"surname":3061,"affiliation":63,"orcid":63},{"paper_id":7674,"author_seq":218,"given_name":2494,"surname":2495,"affiliation":63,"orcid":63},"With MetaLex we introduce a framework for metadata management where information can be inferred from different areas of metadata coding, such as metadata for catalogue descriptions, linguistic levels, or tiers. This is done for consistency and efficiency in metadata recording and applies the same inference techniques that are used for lexical inference. For this purpose we motivate the need for metadata descriptions on all document levels, describe the different structures of metadata, use existing metadata recommendations on different levels of annotations, and show a usecase of metadata inference.",{"paper_id":7686,"title":7687,"year":197,"month":855,"day":63,"doi":7688,"resource_url":7689,"first_page":63,"last_page":63,"pdf_url":7690,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7691,"paper_type":860,"authors":7692,"abstract":7698},"lrec2004-main-417","Applying a Part-of-Speech Tagger to Postal Address Detection on the Web","10.63317\u002F2y7qabdgm4ys","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-417","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F656.pdf","marques-goncalves-2004-applying",[7693,7696],{"paper_id":7686,"author_seq":247,"given_name":7694,"surname":7695,"affiliation":63,"orcid":63},"Nuno Cavalheiro","Marques",{"paper_id":7686,"author_seq":232,"given_name":1316,"surname":7697,"affiliation":63,"orcid":63},"Gonçalves","In this paper we show how a POS-tagger can be successfully adapted to a real world information retrieval system capable of extracting postal addresses from the Internet. We develop a particular tag-set for this system. Then we present and discuss the results acquired with the developed postal address tag-set. We conclude the paper by presenting a short description of the final IR system for Web postal address retrieval and drawing some conclusions.",{"paper_id":7700,"title":7701,"year":197,"month":855,"day":63,"doi":7702,"resource_url":7703,"first_page":63,"last_page":63,"pdf_url":7704,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7705,"paper_type":860,"authors":7706,"abstract":7717},"lrec2004-main-418","Unifying Lexicons in view of a Phonological and Morphological Lexical DB","10.63317\u002F2yscz6yzqft9","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-418","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F657.pdf","monachini-etal-2004-unifying",[7707,7708,7709,7711,7714],{"paper_id":7700,"author_seq":247,"given_name":6405,"surname":6406,"affiliation":63,"orcid":63},{"paper_id":7700,"author_seq":232,"given_name":4589,"surname":6335,"affiliation":63,"orcid":63},{"paper_id":7700,"author_seq":218,"given_name":7360,"surname":7710,"affiliation":63,"orcid":63},"Mammini",{"paper_id":7700,"author_seq":203,"given_name":7712,"surname":7713,"affiliation":63,"orcid":63},"Sergio","Rossi",{"paper_id":7700,"author_seq":188,"given_name":7715,"surname":7716,"affiliation":63,"orcid":63},"Marisa","Ulivieri","The present work falls in the line of activities promoted by the European Languguage Resource Association (ELRA) Production Committee (PCom) and raises issues in methods, procedures and tools for the reusability, creation, and management of Language Resources. A two-fold purpose lies behind this experiment. The first aim is to investigate the feasibility, define methods and procedures for combining two Italian lexical resources that have incompatible formats and complementary information into a Unified Lexicon (UL). The adopted strategy and the procedures appointed are described together with the driving criterion of the merging task, where a balance between human and computational efforts is pursued. The coverage of the UL has been maximized, by making use of simple and fast matching procedures. The second aim is to exploit this newly obtained resource for implementing the phonological and morphological layers of the CLIPS lexical database. Implementing these new layers and linking them with the already exisitng syntactic and semantic layers is not a trivial task. The constraints imposed by the model, the impact at the architectural level and the solution adopted in order to make the whole database 'speak' efficiently are presented. Advantages vs. disadvantages are discussed.",{"paper_id":7719,"title":7720,"year":197,"month":855,"day":63,"doi":7721,"resource_url":7722,"first_page":63,"last_page":63,"pdf_url":7723,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7724,"paper_type":860,"authors":7725,"abstract":7740},"lrec2004-main-419","Toward an Annotation Software for Video of Sign Language, Including Image Processing Tools and Signing Space Modelling","10.63317\u002F442zcqrzdvi9","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-419","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F660.pdf","braffort-etal-2004-toward",[7726,7728,7730,7732,7734,7736,7738],{"paper_id":7719,"author_seq":247,"given_name":2846,"surname":7727,"affiliation":63,"orcid":63},"Braffort",{"paper_id":7719,"author_seq":232,"given_name":2846,"surname":7729,"affiliation":63,"orcid":63},"Choisier",{"paper_id":7719,"author_seq":218,"given_name":3653,"surname":7731,"affiliation":63,"orcid":63},"Collet",{"paper_id":7719,"author_seq":203,"given_name":3550,"surname":7733,"affiliation":63,"orcid":63},"Dalle",{"paper_id":7719,"author_seq":188,"given_name":4991,"surname":7735,"affiliation":63,"orcid":63},"Gianni",{"paper_id":7719,"author_seq":172,"given_name":4991,"surname":7737,"affiliation":63,"orcid":63},"Lenseigne",{"paper_id":7719,"author_seq":155,"given_name":4226,"surname":7739,"affiliation":63,"orcid":63},"Segouat","Several French national projects have been achieved last years, allowing linguist and computer scientists working on French Sign Language (FSL) to establish a permanent collaboration. During these projects, several video FSL corpora have been realised. One of the research orientations induced by these projects relates to multi-disciplinary annotation. From the computer scientist's side, the aim is to develop an annotation software integrating different kind of tools based on image processing and 3d modelling that will be used to automatically annotate both lexical and syntactic information. This article presents several of these components. A first version of the annotation software integrating these components is under development.",{"paper_id":7742,"title":7743,"year":197,"month":855,"day":63,"doi":7744,"resource_url":7745,"first_page":63,"last_page":63,"pdf_url":7746,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7747,"paper_type":860,"authors":7748,"abstract":7751},"lrec2004-main-420","Building Distributed Language Resources By Grid Computing","10.63317\u002F34o5sanugpem","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-420","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F661.pdf","tamburini-2004-building",[7749],{"paper_id":7742,"author_seq":247,"given_name":3776,"surname":7750,"affiliation":63,"orcid":63},"Tamburini","The increasing demand for linguistic resources consisting of substantial amounts of data, such as large corpora, presents the challenge of building computational infrastructures capable of handling unprecedented amounts of information. One possible solution is the sharing of high-level, linguistically motivated and carefully balanced corpora for building one large language resource accessible worldwide. The most feasible way of integrating such widely distributed resources seems to be the construction of an infrastructure to connect various sites by interfacing local presentation formats, access methods and policies in a global network to automatically manage access procedures to widely distributed and diversified materials. Grid computing systems are designed to meet these requirements. This paper presents work in progress on an experiment for building a distributed corpus structure prototype. A small web portal was designed to perform global queries in the distributed corpus and collect the results of the same query applied to each local corpus forming part of the grid. Moreover, other computational services such as an online POS tagger and a morphological analyser\u002Fgenerator were inserted into the Grid to show the feasibility of such scenario.",{"paper_id":7753,"title":7754,"year":197,"month":855,"day":63,"doi":7755,"resource_url":7756,"first_page":63,"last_page":63,"pdf_url":7757,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7758,"paper_type":860,"authors":7759,"abstract":7766},"lrec2004-main-421","Mapping Dependency Structures to Phrase Structures and the Automatic Acquisition of Mapping Rules","10.63317\u002F5khpnfer8b6q","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-421","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F663.pdf","bohnet-seniv-2004-mapping",[7760,7763],{"paper_id":7753,"author_seq":247,"given_name":7761,"surname":7762,"affiliation":63,"orcid":63},"Bernd","Bohnet",{"paper_id":7753,"author_seq":232,"given_name":7764,"surname":7765,"affiliation":63,"orcid":63},"Halyna","Seniv","This paper describes a simple graph grammar based formalism that is capable to translate dependency structures into phrase structures. A procedure is introduced for the automatic acquisition of mapping rules from corpora which are annotated with both phrase structures and dependency structures. The acquired rules are evaluated by applying them to a corpus annotated with dependency structures.",{"paper_id":7768,"title":7769,"year":197,"month":855,"day":63,"doi":7770,"resource_url":7771,"first_page":63,"last_page":63,"pdf_url":7772,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7773,"paper_type":860,"authors":7774,"abstract":7778},"lrec2004-main-422","A Framework for Temporal Resolution","10.63317\u002F5n9ya4v8fmpv","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-422","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F664.pdf","puscasu-2004-framework",[7775],{"paper_id":7768,"author_seq":247,"given_name":7776,"surname":7777,"affiliation":63,"orcid":63},"Georgiana","Puşcaşu","The ability to identify and analyse temporal information is important for a variety of natural language applications, such as information extraction, question answering and multi-document summarisation. This paper discusses the automatic annotation of temporal information in newswire texts. The temporal tagger adheres to a framework initially designed for anaphora resolution and recently adapted for temporal resolution. It achieves an accuracy of 83.66% in identifying and labelling temporal expressions.",{"paper_id":7780,"title":7781,"year":197,"month":855,"day":63,"doi":7782,"resource_url":7783,"first_page":63,"last_page":63,"pdf_url":7784,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7785,"paper_type":860,"authors":7786,"abstract":7788},"lrec2004-main-423","EGRAM – A Grammar Development Environment and its Usage for Language Generation","10.63317\u002F3civr3gd9v4b","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-423","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F665.pdf","busemann-2004-egram",[7787],{"paper_id":7780,"author_seq":247,"given_name":1560,"surname":3618,"affiliation":63,"orcid":63},"The development of large grammars is inherently complex and can hardly be achieved using standard text editors. Although, e.g., emacs can be programmed to support this task to a certain extent, special-purpose functionalities are indispensable. Otherwise the increasing effort for the development and maintenance of large grammars may severely limit their applicability. To avoid this pitfall in the field of language generation, eGram has been developed, which provides a developer-friendly grammar format, syntactic verification of grammar knowledge, abbreviations through meta-rules, and integration with grammar testing. eGram is a implemented in Java and available under research or commercial licences.",{"paper_id":7790,"title":7791,"year":197,"month":855,"day":63,"doi":7792,"resource_url":7793,"first_page":63,"last_page":63,"pdf_url":7794,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7795,"paper_type":860,"authors":7796,"abstract":7826},"lrec2004-main-424","Large Scale Experiments for Semantic Labeling of Noun Phrases in Raw Text","10.63317\u002F3a8zgv5osxtk","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-424","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F666.pdf","guthrie-etal-2004-large",[7797,7800,7802,7804,7805,7806,7807,7810,7812,7814,7816,7819,7821,7823],{"paper_id":7790,"author_seq":247,"given_name":7798,"surname":7799,"affiliation":63,"orcid":63},"Louise","Guthrie",{"paper_id":7790,"author_seq":232,"given_name":1746,"surname":7801,"affiliation":63,"orcid":63},"Basili",{"paper_id":7790,"author_seq":218,"given_name":3776,"surname":7803,"affiliation":63,"orcid":63},"Zanzotto",{"paper_id":7790,"author_seq":203,"given_name":1431,"surname":1432,"affiliation":63,"orcid":63},{"paper_id":7790,"author_seq":188,"given_name":1434,"surname":1435,"affiliation":63,"orcid":63},{"paper_id":7790,"author_seq":172,"given_name":1790,"surname":7799,"affiliation":63,"orcid":63},{"paper_id":7790,"author_seq":155,"given_name":7808,"surname":7809,"affiliation":63,"orcid":63},"Jia","Cui",{"paper_id":7790,"author_seq":138,"given_name":3005,"surname":7811,"affiliation":63,"orcid":63},"Cammisa",{"paper_id":7790,"author_seq":121,"given_name":7813,"surname":3635,"affiliation":63,"orcid":63},"Jerry Cheng-Chieh",{"paper_id":7790,"author_seq":104,"given_name":7815,"surname":1087,"affiliation":63,"orcid":63},"Cassia Farria",{"paper_id":7790,"author_seq":87,"given_name":7817,"surname":7818,"affiliation":63,"orcid":63},"Kristiyan","Haralambiev",{"paper_id":7790,"author_seq":73,"given_name":1087,"surname":7820,"affiliation":63,"orcid":63},"Holub",{"paper_id":7790,"author_seq":55,"given_name":2959,"surname":7822,"affiliation":63,"orcid":63},"Macherey",{"paper_id":7790,"author_seq":38,"given_name":7824,"surname":7825,"affiliation":63,"orcid":63},"Fredrick","Jelinek","This paper gives a brief overview of the results of our work during the Summer 2003 Workshop of the Center for Language and Speech Processing at the Johns Hopkins University in Baltimore Maryland. The goal of the project was to determine the feasibility of extending named entity recognition to common nouns and determine whether or not it is possible to assign automatically a predetermined set of semantic tags and approach human performance in the task.",{"paper_id":7828,"title":7829,"year":197,"month":855,"day":63,"doi":7830,"resource_url":7831,"first_page":63,"last_page":63,"pdf_url":7832,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7833,"paper_type":860,"authors":7834,"abstract":7843},"lrec2004-main-425","Exploring Portability of Syntactic Information from English to Basque","10.63317\u002F26bgf8g5cng2","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-425","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F667.pdf","agirre-etal-2004-exploring",[7835,7836,7837,7840],{"paper_id":7828,"author_seq":247,"given_name":7548,"surname":7549,"affiliation":63,"orcid":63},{"paper_id":7828,"author_seq":232,"given_name":7551,"surname":7552,"affiliation":63,"orcid":63},{"paper_id":7828,"author_seq":218,"given_name":7838,"surname":7839,"affiliation":63,"orcid":63},"Koldo","Gojenola",{"paper_id":7828,"author_seq":203,"given_name":7841,"surname":7842,"affiliation":63,"orcid":63},"Kepa","Sarasola","This paper explores a crosslingual approach to the PP attachment problem. We built a large dependency database for English based on an automatic parse of the BNC, and Reuters (sports and finances sections). The Basque attachment decisions are taken based on the occurrence frequency of the translations of the Basque (verb-noun) pairs in the English syntactic database. The results show that with this simple technique it is possible to transfer syntactic information from a language like English in order to make PP attachment decisions in another language, in this case Basque.",{"paper_id":7845,"title":7846,"year":197,"month":855,"day":63,"doi":7847,"resource_url":7848,"first_page":63,"last_page":63,"pdf_url":7849,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7850,"paper_type":860,"authors":7851,"abstract":7856},"lrec2004-main-426","Spanish WordNet 1.6: Porting the Spanish Wordnet Across Princeton Versions","10.63317\u002F34wqmajxciab","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-426","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F668.pdf","atserias-etal-2004-spanish",[7852,7853,7855],{"paper_id":7845,"author_seq":247,"given_name":2105,"surname":6818,"affiliation":63,"orcid":63},{"paper_id":7845,"author_seq":232,"given_name":2886,"surname":7854,"affiliation":63,"orcid":63},"Villarejo",{"paper_id":7845,"author_seq":218,"given_name":6815,"surname":6816,"affiliation":63,"orcid":63},"This paper describes the new Spanish Wordnet aligned to Princeton WordNet1.6 and the analysis of the transformation from the previous version aligned to Princeton WordNet1.5. Although a mapping technology exists, to our knowledge it is the first time a whole local wordnet has been ported to a newer release of the Princeton WordNet.",{"paper_id":7858,"title":7859,"year":197,"month":855,"day":63,"doi":7860,"resource_url":7861,"first_page":63,"last_page":63,"pdf_url":7862,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7863,"paper_type":860,"authors":7864,"abstract":7885},"lrec2004-main-427","An Annotated Corpus of Tutorial Dialogs on Mathematical Theorem Proving","10.63317\u002F3m99rgk8ht4a","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-427","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F669.pdf","wolska-etal-2004-annotated",[7865,7868,7871,7874,7875,7878,7880,7883],{"paper_id":7858,"author_seq":247,"given_name":7866,"surname":7867,"affiliation":63,"orcid":63},"Magdalena","Wolska",{"paper_id":7858,"author_seq":232,"given_name":7869,"surname":7870,"affiliation":63,"orcid":63},"Bao Quoc","Vo",{"paper_id":7858,"author_seq":218,"given_name":7872,"surname":7873,"affiliation":63,"orcid":63},"Dimitra","Tsovaltzi",{"paper_id":7858,"author_seq":203,"given_name":2570,"surname":2571,"affiliation":63,"orcid":63},{"paper_id":7858,"author_seq":188,"given_name":7876,"surname":7877,"affiliation":63,"orcid":63},"Elena","Karagjosova",{"paper_id":7858,"author_seq":172,"given_name":5493,"surname":7879,"affiliation":63,"orcid":63},"Horacek",{"paper_id":7858,"author_seq":155,"given_name":7881,"surname":7882,"affiliation":63,"orcid":63},"Armin","Fiedler",{"paper_id":7858,"author_seq":138,"given_name":2956,"surname":7884,"affiliation":63,"orcid":63},"Benzmüller","Our goal is to develop a flexible dialog system for tutoring mathematical problem solving. Empirical findings in the area of intelligent tutoring show that flexible natural language dialog supports active learning. Therefore, we focus on the development of solutions allowing flexible dialog. However, little is known about the use of natural language in dialog settings in formal domains, such as mathematics, due to the lack of empirical data. We designed and performed an experiment with a simulated tutorial dialog system for teaching proofs in naive set theory. To investigate the correlations between (i) domain-specific content and its linguistic realization, and (ii) the use, distribution, and linguistic realization of dialog moves, we are annotating the corpus with (i) dependency-based semantic relations that build up the linguistic meaning of the utterances and (ii) with dialog moves.",{"paper_id":7887,"title":7888,"year":197,"month":855,"day":63,"doi":7889,"resource_url":7890,"first_page":63,"last_page":63,"pdf_url":7891,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7892,"paper_type":860,"authors":7893,"abstract":63},"lrec2004-main-428","Automatic Keyword Extraction from Spoken Text. A Comparison of Two Lexical Resources: EDR and WordNet","10.63317\u002F3a75v4wm99c8","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-428","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F670.pdf","van-der-plas-etal-2004-automatic",[7894,7897,7898,7899],{"paper_id":7887,"author_seq":247,"given_name":7895,"surname":7896,"affiliation":63,"orcid":63},"Lonneke","van der Plas",{"paper_id":7887,"author_seq":232,"given_name":1093,"surname":1094,"affiliation":63,"orcid":63},{"paper_id":7887,"author_seq":218,"given_name":1087,"surname":1088,"affiliation":63,"orcid":63},{"paper_id":7887,"author_seq":203,"given_name":6626,"surname":6627,"affiliation":63,"orcid":63},{"paper_id":7901,"title":7902,"year":197,"month":855,"day":63,"doi":7903,"resource_url":7904,"first_page":63,"last_page":63,"pdf_url":7905,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7906,"paper_type":860,"authors":7907,"abstract":7917},"lrec2004-main-429","Pronominal Anaphora Resolution for Unrestricted Text","10.63317\u002F3acaquiatmph","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-429","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F671.pdf","kupsc-etal-2004-pronominal",[7908,7910,7913,7915],{"paper_id":7901,"author_seq":247,"given_name":4167,"surname":7909,"affiliation":63,"orcid":63},"Kupść",{"paper_id":7901,"author_seq":232,"given_name":7911,"surname":7912,"affiliation":63,"orcid":63},"Teruko","Mitamura",{"paper_id":7901,"author_seq":218,"given_name":1900,"surname":7914,"affiliation":63,"orcid":63},"Van Durme",{"paper_id":7901,"author_seq":203,"given_name":2721,"surname":7916,"affiliation":63,"orcid":63},"Nyberg","The paper presents an anaphora resolution algorithm for unrestricted text. In particular, we examine portability of a knowledge-based approach of (Mitamura et al. 2002), proposed for a domain-specific task. We obtain up to 70% accuracy on unrestricted text, which is a significant improvement (almost 20%) over a baseline we set for general text. As the overall results leave much room for improvement, we provide a detailed error analysis and investigate possible enhancements.",{"paper_id":7919,"title":7920,"year":197,"month":855,"day":63,"doi":7921,"resource_url":7922,"first_page":63,"last_page":63,"pdf_url":7923,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7924,"paper_type":860,"authors":7925,"abstract":7937},"lrec2004-main-430","The ESTER Evaluation Campaign for the Rich Transcription of French Broadcast News","10.63317\u002F3ubdomqijkav","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-430","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F672.pdf","gravier-etal-2004-ester",[7926,7928,7931,7933,7935,7936],{"paper_id":7919,"author_seq":247,"given_name":3656,"surname":7927,"affiliation":63,"orcid":63},"Gravier",{"paper_id":7919,"author_seq":232,"given_name":7929,"surname":7930,"affiliation":63,"orcid":63},"J-F.","Bonastre",{"paper_id":7919,"author_seq":218,"given_name":3749,"surname":7932,"affiliation":63,"orcid":63},"Geoffrois",{"paper_id":7919,"author_seq":203,"given_name":3556,"surname":7934,"affiliation":63,"orcid":63},"Galliano",{"paper_id":7919,"author_seq":188,"given_name":7059,"surname":4206,"affiliation":63,"orcid":63},{"paper_id":7919,"author_seq":172,"given_name":7059,"surname":3404,"affiliation":63,"orcid":63},"This paper gives an overview of t..\\pdf\\672.pdfhe \\ester\\ evaluation campaign. The aim of this campaign is to evaluate automatic broadcast news transcription systems for the French language. The evaluation tasks are divided into three main categories: orthographic transcription, event detection and tracking (e.g. speech vs. music, speaker tracking), and information extraction (e.g. named entity detection, topic tracking). Each category is evaluated separately. This paper gives details about the tasks to be performed and the corpus, with particular emphasis on the manually transcribed reference transcription.",{"paper_id":7939,"title":7940,"year":197,"month":855,"day":63,"doi":7941,"resource_url":7942,"first_page":63,"last_page":63,"pdf_url":7943,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7944,"paper_type":860,"authors":7945,"abstract":7951},"lrec2004-main-431","Steps Towards Semantically Annotated Language Resources","10.63317\u002F4aji4fcf53o3","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-431","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F673.pdf","klenner-etal-2004-steps",[7946,7949,7950],{"paper_id":7939,"author_seq":247,"given_name":7947,"surname":7948,"affiliation":63,"orcid":63},"Manfred","Klenner",{"paper_id":7939,"author_seq":232,"given_name":3776,"surname":3777,"affiliation":63,"orcid":63},{"paper_id":7939,"author_seq":218,"given_name":1357,"surname":3779,"affiliation":63,"orcid":63},"The use of textual resources such as text corpora, tree banks, large-scale lexica etc., has become a widely accepted commitment in the field of computational linguistics. However the scope of the annotations proposed has been unbalanced towards the 'surface' level. Only recently corpora with a deeper level of annotations have started to emerge. In this paper we describe a machine learning approach aimed at learning transformational rules that would allow the (partial) generation of semantic annotations starting from syntactic annotations generated by a parser (or created manually).",{"paper_id":7953,"title":7954,"year":197,"month":855,"day":63,"doi":7955,"resource_url":7956,"first_page":63,"last_page":63,"pdf_url":7957,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7958,"paper_type":860,"authors":7959,"abstract":7991},"lrec2004-main-432","Designing a Realistic Evaluation of an End-to-end Interactive Question Answering System","10.63317\u002F3k5b3a369uwc","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-432","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F675.pdf","wacholder-etal-2004-designing",[7960,7963,7966,7969,7971,7973,7976,7978,7981,7984,7986,7988],{"paper_id":7953,"author_seq":247,"given_name":7961,"surname":7962,"affiliation":63,"orcid":63},"Nina","Wacholder",{"paper_id":7953,"author_seq":232,"given_name":7964,"surname":7965,"affiliation":63,"orcid":63},"Sharon","Small",{"paper_id":7953,"author_seq":218,"given_name":7967,"surname":7968,"affiliation":63,"orcid":63},"Bing","Bai",{"paper_id":7953,"author_seq":203,"given_name":7970,"surname":2608,"affiliation":63,"orcid":63},"Diane",{"paper_id":7953,"author_seq":188,"given_name":2607,"surname":7972,"affiliation":63,"orcid":63},"Rittman",{"paper_id":7953,"author_seq":172,"given_name":7974,"surname":7975,"affiliation":63,"orcid":63},"Sean","Ryan",{"paper_id":7953,"author_seq":155,"given_name":2607,"surname":7977,"affiliation":63,"orcid":63},"Salkin",{"paper_id":7953,"author_seq":138,"given_name":7979,"surname":7980,"affiliation":63,"orcid":63},"Peng","Song",{"paper_id":7953,"author_seq":121,"given_name":7982,"surname":7983,"affiliation":63,"orcid":63},"Ying","Sun",{"paper_id":7953,"author_seq":104,"given_name":7985,"surname":3635,"affiliation":63,"orcid":63},"Ting",{"paper_id":7953,"author_seq":87,"given_name":973,"surname":7987,"affiliation":63,"orcid":63},"Kantor",{"paper_id":7953,"author_seq":73,"given_name":7989,"surname":7990,"affiliation":63,"orcid":63},"Tomek","Strzalkowski","We report on the development of material for an evaluation exercise designed to assess the overall design and usability of HITIQA, an interactive question-answering system for preparing broad ranging reports on complex issues. The two basic objectives of the evaluation were (1) To perform a realistic assessment of the usefulness and usability of HITIQA as an end-to-end system, from the information seeker’s initial questions to completion of a draft report; and (2) To develop metrics to compare the answers obtained by different analysts and evaluate the quality of the support that HITIQA provides. We used qualitative and quantitative tools to obtain data about analyst’s comfort with the HITIQA system, especially its novel features such as the ability to answer complex questions and the interactive dialogue. Because of the impracticality of measuring the quality of HITIQA output with the standard metrics of precision and recall, we developed a new task –cross-evaluation--to indirectly measure the quality of the answers obtained using HITIQA; in this black-box assessment, analysts rate the quality of their own and their colleagues’ reports.",{"paper_id":7993,"title":7994,"year":197,"month":855,"day":63,"doi":7995,"resource_url":7996,"first_page":63,"last_page":63,"pdf_url":7997,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":7998,"paper_type":860,"authors":7999,"abstract":8002},"lrec2004-main-433","Semi-Automatic Construction of a Question Treebank","10.63317\u002F5n2gevqe2f5b","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-433","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F677.pdf","muller-2004-semi",[8000],{"paper_id":7993,"author_seq":247,"given_name":1897,"surname":8001,"affiliation":63,"orcid":63},"Müller","A method for the semi-automatic construction of a question treebank is presented. We exploit linguistic knowledge like grammatical functions, constituent structure and the relatively strict word order of English encoded in the Penn Treebank to generate semi-automatically questions. The outcome is a treebank of questions which might be useful for developing better tagging and parsing models for processing questions. We show that it is feasible to reuse the Penn Treebank. At the current stage our treebank comprises about 7000 questions, which can be easily extended.",{"paper_id":8004,"title":8005,"year":197,"month":855,"day":63,"doi":8006,"resource_url":8007,"first_page":63,"last_page":63,"pdf_url":8008,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8009,"paper_type":860,"authors":8010,"abstract":8017},"lrec2004-main-434","Calibrating Resource-light Automatic MT Evaluation: a Cheap Approach to Ranking MT Systems by the Usability of Their Output","10.63317\u002F3jfwmwj4dsqh","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-434","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F678.pdf","babych-etal-2004-calibrating",[8011,8013,8015],{"paper_id":8004,"author_seq":247,"given_name":6444,"surname":8012,"affiliation":63,"orcid":63},"Babych",{"paper_id":8004,"author_seq":232,"given_name":3855,"surname":8014,"affiliation":63,"orcid":63},"Elliott",{"paper_id":8004,"author_seq":218,"given_name":2819,"surname":8016,"affiliation":63,"orcid":63},"Hartley","MT systems are traditionally evaluated with different criteria, such as adequacy and fluency. Automatic evaluation scores are designed to match these quality parameters. In this paper we introduce a novel parameter - usability (or utility) of output, which was found to integrate both fluency and adequacy. We confronted two automated metrics, BLEU and LTV, with new data for which human evaluation scores were also produced; we then measured the agreement between the automated and human evaluation scores. The resources produced in the experiment are available on the authors' website.",{"paper_id":8019,"title":8020,"year":197,"month":855,"day":63,"doi":8021,"resource_url":8022,"first_page":63,"last_page":63,"pdf_url":8023,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8024,"paper_type":860,"authors":8025,"abstract":8047},"lrec2004-main-435","Multimodal, Multilingual Resources in the Subtitling Process","10.63317\u002F27kmhtasbp6q","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-435","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F680.pdf","piperidis-etal-2004-multimodal",[8026,8027,8030,8033,8035,8037,8038,8041,8044],{"paper_id":8019,"author_seq":247,"given_name":6408,"surname":6409,"affiliation":63,"orcid":63},{"paper_id":8019,"author_seq":232,"given_name":8028,"surname":8029,"affiliation":63,"orcid":63},"Iason","Demiros",{"paper_id":8019,"author_seq":218,"given_name":8031,"surname":8032,"affiliation":63,"orcid":63},"Prokopis","Prokopidis",{"paper_id":8019,"author_seq":203,"given_name":5354,"surname":8034,"affiliation":63,"orcid":63},"Vanroose",{"paper_id":8019,"author_seq":188,"given_name":2687,"surname":8036,"affiliation":63,"orcid":63},"Hoethker",{"paper_id":8019,"author_seq":172,"given_name":2672,"surname":2673,"affiliation":63,"orcid":63},{"paper_id":8019,"author_seq":155,"given_name":8039,"surname":8040,"affiliation":63,"orcid":63},"Elsa","Sklavounou",{"paper_id":8019,"author_seq":138,"given_name":8042,"surname":8043,"affiliation":63,"orcid":63},"Manos","Konstantinou",{"paper_id":8019,"author_seq":121,"given_name":8045,"surname":8046,"affiliation":63,"orcid":63},"Yannis","Karavidas","In view of the expansion of digital television and the increasing demand to manipulate audiovisual content, tools producing subtitles in a multilingual setting become indispensable for the subtitling industry. Operating in this setting, the MUSA project aims at the development of a system which combines speech recognition, advanced text analysis, and machine translation to help generate multilingual subtitles; a system that converts audio streams into text transcriptions, condenses the content to meet the spatio-temporal constraints of the subtitling process and produces draft translations in two language pairs. Three European languages are supported: English as source and target as far as subtitling generation is concerned, French and Greek as subtitle translation target languages. In order to train and evaluate system components, an array of application specific resources are necessary. Primary audiovisual data consist in BBC TV documentaries and \"newsy\" current affairs programmes. For each programme, the following data are captured: the actual video - its transcript or script - English, Greek and French subtitles - and topically relevant newspaper or web-sourced extracts.",{"paper_id":8049,"title":8050,"year":197,"month":855,"day":63,"doi":8051,"resource_url":8052,"first_page":63,"last_page":63,"pdf_url":8053,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8054,"paper_type":860,"authors":8055,"abstract":8070},"lrec2004-main-436","Perceptual Evaluation of Quality Deterioration Owing to Prosody Modification","10.63317\u002F4taia8nkn6ou","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-436","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F681.pdf","adachi-etal-2004-perceptual",[8056,8059,8062,8065,8067],{"paper_id":8049,"author_seq":247,"given_name":8057,"surname":8058,"affiliation":63,"orcid":63},"Kazuki","Adachi",{"paper_id":8049,"author_seq":232,"given_name":8060,"surname":8061,"affiliation":63,"orcid":63},"Tomoki","Toda",{"paper_id":8049,"author_seq":218,"given_name":8063,"surname":8064,"affiliation":63,"orcid":63},"Hiromichi","Kawanami",{"paper_id":8049,"author_seq":203,"given_name":1328,"surname":8066,"affiliation":63,"orcid":63},"Saruwatari",{"paper_id":8049,"author_seq":188,"given_name":8068,"surname":8069,"affiliation":63,"orcid":63},"Kiyohiro","Shikano","Our reasearch goal is to construct a Japanese TTS (Text-to-Speech) system that can output various kinds of prosody. Since such synthetic speech is useful for a practical use, many TTS systems have implemented global prosodic control processing. But fundamentally they're designed to output speech with standard pitch and speech rate. We discuss synthesis method for high quality speech with extreme prosody (very high, low, fast and slow) from a viewpoint of a speech database. As a speech synthesis method, we employ a unit selection-concatenation method. We also introduce an analysis-synthesis process to give precise target prosody to output speech. Many research has reported that speech quality get worse in proportion to an amount of prosody modification by analysis-synthesis or PSOLA. Following the reports, we take an approach to reduce prosody modification of a speech segment. Nine Japanese speech databases with different characteristics in prosody are prepared. First we confirm relationship between speech quality deterioration and prosody modification, using synthetic speech with through objective and subjective tests. We also investigate relationship between a speech deterioration tendency and each speech database. The result indicates that the tendencies depend on prosodic features of original speech.",{"paper_id":8072,"title":8073,"year":197,"month":855,"day":63,"doi":8074,"resource_url":8075,"first_page":63,"last_page":63,"pdf_url":8076,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8077,"paper_type":860,"authors":8078,"abstract":8082},"lrec2004-main-437","Integration of Russian Language Resources","10.63317\u002F58m32jjveqi3","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-437","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F682.pdf","yablonsky-2004-integration",[8079],{"paper_id":8072,"author_seq":247,"given_name":8080,"surname":8081,"affiliation":63,"orcid":63},"Serge A.","Yablonsky","In this paper we describe the creation of large scale linguistic resources for Russian language. Internet\u002Fintranet system architecture was developed to make a large volume of Russian language lexical information, corpora (texts) and knowledge base (Russian WordNet) available to the system at development and\u002For run time. There are four linguistic counterparts, corresponding to the major categories of lexical information developed in our system: lexicon, knowledge base, corpora and Russian language processing software.",{"paper_id":8084,"title":8085,"year":197,"month":855,"day":63,"doi":8086,"resource_url":8087,"first_page":63,"last_page":63,"pdf_url":8088,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8089,"paper_type":860,"authors":8090,"abstract":63},"lrec2004-main-438","A2Q: An Agent-based Architecure for Multilingual Q&A","10.63317\u002F52kyacqtn365","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-438","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F683.pdf","basili-etal-2004-a2q",[8091,8092,8095,8097],{"paper_id":8084,"author_seq":247,"given_name":1746,"surname":7801,"affiliation":63,"orcid":63},{"paper_id":8084,"author_seq":232,"given_name":8093,"surname":8094,"affiliation":63,"orcid":63},"Nicola","Lorusso",{"paper_id":8084,"author_seq":218,"given_name":1982,"surname":8096,"affiliation":63,"orcid":63},"Pazienza",{"paper_id":8084,"author_seq":203,"given_name":8098,"surname":7803,"affiliation":63,"orcid":63},"Fabio Massimo",{"paper_id":8100,"title":8101,"year":197,"month":855,"day":63,"doi":8102,"resource_url":8103,"first_page":63,"last_page":63,"pdf_url":8104,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8105,"paper_type":860,"authors":8106,"abstract":8114},"lrec2004-main-439","OntoTag’s Linguistic Ontologies: Enhancing Higher Level and Semantic Web Annotations","10.63317\u002F4j826bz72v9a","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-439","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F684.pdf","de-cea-etal-2004-ontotags",[8107,8110,8112],{"paper_id":8100,"author_seq":247,"given_name":8108,"surname":8109,"affiliation":63,"orcid":63},"Guadalupe Aguado","de Cea",{"paper_id":8100,"author_seq":232,"given_name":1301,"surname":8111,"affiliation":63,"orcid":63},"Álvarez-de-Mon",{"paper_id":8100,"author_seq":218,"given_name":3429,"surname":8113,"affiliation":63,"orcid":63},"Pareja-Lora","Following the road in-between purely linguistic annotation and solely ontology-based annotations for the Semantic Web, a hybrid (ontological and linguistic) model and platform, called OntoTag, has been created, aiming at better machine communication, interoperability and language understanding. These capabilities, which are the main topic of this paper, are derived from the incorporation into the platform of a set of linguistic ontologies, which are also the main referent for the generation of multi-levelled and standardized annotations of Semantic Web documents within OntoTag.",{"paper_id":8116,"title":8117,"year":197,"month":855,"day":63,"doi":8118,"resource_url":8119,"first_page":63,"last_page":63,"pdf_url":8120,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8121,"paper_type":860,"authors":8122,"abstract":8129},"lrec2004-main-440","Exploiting Language Resources for Semantic Web Annotations","10.63317\u002F3zt8bae6upic","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-440","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F686.pdf","kaljurand-etal-2004-exploiting",[8123,8126,8127,8128],{"paper_id":8116,"author_seq":247,"given_name":8124,"surname":8125,"affiliation":63,"orcid":63},"Kaarel","Kaljurand",{"paper_id":8116,"author_seq":232,"given_name":3776,"surname":3777,"affiliation":63,"orcid":63},{"paper_id":8116,"author_seq":218,"given_name":3767,"surname":3768,"affiliation":63,"orcid":63},{"paper_id":8116,"author_seq":203,"given_name":1357,"surname":3779,"affiliation":63,"orcid":63},"A large portion of the useful information on the web is in the form of unstructured natural language documents. Currently such documents are understandable to humans but not to software agents. One of the goals of the Semantic Web activity is to enrich a considerable number of web documents with annotations, which will then allow new generation search engines and novel web services to access those documents in a more intelligent fashion than currently possible. Currently the most reliable method of providing such semantic markup is via manual annotation, possibly based on predefined ontologies and with the support of specialized editors. In this paper we propose an approach for the automatic processing of textual documents to be published on the web, which can be used to automatically generate (some of) the semantic annotations. In particular, we focus on detecting the entities mentioned in the documents, their roles and relationships to other entities.",{"paper_id":8131,"title":8132,"year":197,"month":855,"day":63,"doi":8133,"resource_url":8134,"first_page":63,"last_page":63,"pdf_url":8135,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8136,"paper_type":860,"authors":8137,"abstract":8158},"lrec2004-main-441","Towards an International Standard on Feature Structure Representation","10.63317\u002F3u6z5qdsk9vx","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-441","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F687.pdf","lee-etal-2004-towards",[8138,8140,8141,8142,8144,8145,8146,8149,8152,8153,8156],{"paper_id":8131,"author_seq":247,"given_name":8139,"surname":1045,"affiliation":63,"orcid":63},"Kiyong",{"paper_id":8131,"author_seq":232,"given_name":5329,"surname":5899,"affiliation":63,"orcid":63},{"paper_id":8131,"author_seq":218,"given_name":1175,"surname":1176,"affiliation":63,"orcid":63},{"paper_id":8131,"author_seq":203,"given_name":2721,"surname":8143,"affiliation":63,"orcid":63},"de la Clergerie",{"paper_id":8131,"author_seq":188,"given_name":5618,"surname":5619,"affiliation":63,"orcid":63},{"paper_id":8131,"author_seq":172,"given_name":5894,"surname":5895,"affiliation":63,"orcid":63},{"paper_id":8131,"author_seq":155,"given_name":8147,"surname":8148,"affiliation":63,"orcid":63},"Harry","Bunt",{"paper_id":8131,"author_seq":138,"given_name":8150,"surname":8151,"affiliation":63,"orcid":63},"Lionel","Clément",{"paper_id":8131,"author_seq":121,"given_name":2128,"surname":2129,"affiliation":63,"orcid":63},{"paper_id":8131,"author_seq":104,"given_name":8154,"surname":8155,"affiliation":63,"orcid":63},"Azim","Roussanaly",{"paper_id":8131,"author_seq":87,"given_name":8157,"surname":5295,"affiliation":63,"orcid":63},"Claude","This paper describes some preliminary results from a joint initiative of the TEI (Text Encoding Initiative) Consortium and the ISO Committee TC 37\u002FSC 4 (Language Resource management), the goal of which is to define a standard for the representation and interchange of feature structures. The joint working group was established in December 2002, and its proposals are now progressing to Draft International Standard status.",{"paper_id":8160,"title":8161,"year":197,"month":855,"day":63,"doi":8162,"resource_url":8163,"first_page":63,"last_page":63,"pdf_url":8164,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8165,"paper_type":860,"authors":8166,"abstract":8173},"lrec2004-main-442","The Translation Correction Tool: English-Spanish User Studies","10.63317\u002F2za6hw44q2ha","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-442","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F688.pdf","font-llitjos-carbonell-2004-translation",[8167,8170],{"paper_id":8160,"author_seq":247,"given_name":8168,"surname":8169,"affiliation":63,"orcid":63},"Ariadna","Font Llitjós",{"paper_id":8160,"author_seq":232,"given_name":8171,"surname":8172,"affiliation":63,"orcid":63},"Jaime","Carbonell","Machine translation systems should improve with feedback from post-editors, but none do beyond the very localized benefit of adding the corrected translation to parallel training data (for statistical and example-base MTS) or a memory data base. Rule based systems to date improve only via manual debugging. In contrast, we introduce a largely automated method for capturing more information from the human post-editor, so that corrections may be performed automatically to translation grammar rules and lexical entries. This paper focuses on the information capture phase and reports on an experiment with English-Spanish translation.",{"paper_id":8175,"title":8176,"year":197,"month":855,"day":63,"doi":8177,"resource_url":8178,"first_page":63,"last_page":63,"pdf_url":8179,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8180,"paper_type":860,"authors":8181,"abstract":8184},"lrec2004-main-443","A Labelled Corpus for Prepositional Phrase Attachment","10.63317\u002F4z4uwgdjcu2q","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-443","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F690.pdf","mitchell-gaizauskas-2004-labelled",[8182,8183],{"paper_id":8175,"author_seq":247,"given_name":896,"surname":1022,"affiliation":63,"orcid":63},{"paper_id":8175,"author_seq":232,"given_name":2607,"surname":7262,"affiliation":63,"orcid":63},"This paper describes a labelled corpus intended for training learning algorithms to attach prepositional phrases (PPs). Taken from the PTB2, we believe it is the largest available resource for this purpose, especially as it contains many patterns in which PPs occur ambiguously (nearly all previous research has focused on just one pattern) and we present some results for the five most common patterns. Moreover, the corpus contains some features that, to our knowledge, have not been used before for attaching PPs.",{"paper_id":8186,"title":8187,"year":197,"month":855,"day":63,"doi":8188,"resource_url":8189,"first_page":63,"last_page":63,"pdf_url":8190,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8191,"paper_type":860,"authors":8192,"abstract":8198},"lrec2004-main-444","Comparing the Ambiguity Reduction Abilities of Probabilistic Context-Free Grammars","10.63317\u002F4simo8392msc","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-444","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F692.pdf","infante-lopez-de-rijke-2004-comparing",[8193,8195],{"paper_id":8186,"author_seq":247,"given_name":4101,"surname":8194,"affiliation":63,"orcid":63},"Infante-Lopez",{"paper_id":8186,"author_seq":232,"given_name":8196,"surname":8197,"affiliation":63,"orcid":63},"Maarten","de Rijke","We present a measure for evaluating Probabilistic Context Free Grammars (PCFG) based on their ambiguity resolution capabilities. Probabilities in a PCFG can be seen as a filtering mechanism: For an ambiguous sentence, the trees bearing maximum probability are single out, while all others are discarded. The level of ambiguity is related to the size of the singled out set of trees. Under our measure, a grammar is better than other if the first one has reduced the level of ambiguity in a higher degree. The measure we present is computed over a finite sample set of sentence because, as we show, it can not be computed over the set of sentences accepted by the grammar.",{"paper_id":8200,"title":8201,"year":197,"month":855,"day":63,"doi":8202,"resource_url":8203,"first_page":63,"last_page":63,"pdf_url":8204,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8205,"paper_type":860,"authors":8206,"abstract":8211},"lrec2004-main-445","NameNet: a Self-Improving Resource for Name Classification","10.63317\u002F5o2opnxmjpdt","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-445","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F693.pdf","morarescu-harabagiu-2004-namenet",[8207,8209],{"paper_id":8200,"author_seq":247,"given_name":973,"surname":8208,"affiliation":63,"orcid":63},"Morarescu",{"paper_id":8200,"author_seq":232,"given_name":8210,"surname":4852,"affiliation":63,"orcid":63},"Sanda","This paper presents a semantically structured resource of more than 1,600 Name Classes. This structure is based on the noun hypernymy hierarchies in WordNet, expanded and validated by corpus evidence collected from the World Wide Web. The set of seed examples provided by WordNet is boostrapped and the used to automatically construct an annotated training corpus for each Name Class. The resulting Named Entity resource enables a supervised Named Entity Recognizer to identify all the encoded Name Classes with high accuracy and without any human intervention.",{"paper_id":8213,"title":8214,"year":197,"month":855,"day":63,"doi":8215,"resource_url":8216,"first_page":63,"last_page":63,"pdf_url":8217,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8218,"paper_type":860,"authors":8219,"abstract":8224},"lrec2004-main-446","Image-Language Multimodal Corpora: Needs, Lacunae and an AI Synergy for Annotation","10.63317\u002F2kibd8c5msha","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-446","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F694.pdf","pastra-wilks-2004-image",[8220,8223],{"paper_id":8213,"author_seq":247,"given_name":8221,"surname":8222,"affiliation":63,"orcid":63},"Katerina","Pastra",{"paper_id":8213,"author_seq":232,"given_name":1456,"surname":1457,"affiliation":63,"orcid":63},"The growing demand for intelligent multimedia systems has led to the development of various multimodal resources and corresponding annotation schemes and processing tools. In this paper, we argue that there is a striking lack of multimodal corpora capturing the association and interaction of visual and linguistic data. We relate this research lacuna to vision-language integration prototypes developed within Artificial Intelligence (AI) and show how the needs of the latter dictate the development of such resources for a wide variety of applications. We identify the annotation requirements imposed on image-language corpora by these needs and the nature of the modalities involved and suggest a semi-automatic way of meeting them.",{"paper_id":8226,"title":8227,"year":197,"month":855,"day":63,"doi":8228,"resource_url":8229,"first_page":63,"last_page":63,"pdf_url":8230,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8231,"paper_type":860,"authors":8232,"abstract":8241},"lrec2004-main-447","Detecting Errors in English Article Usage with a Maximum Entropy Classifier Trained on a Large, Diverse Corpus","10.63317\u002F47ks6kaj3eij","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-447","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F695.pdf","han-etal-2004-detecting",[8233,8236,8238],{"paper_id":8226,"author_seq":247,"given_name":8234,"surname":8235,"affiliation":63,"orcid":63},"Na-Rae","Han",{"paper_id":8226,"author_seq":232,"given_name":1087,"surname":8237,"affiliation":63,"orcid":63},"Chodorow",{"paper_id":8226,"author_seq":218,"given_name":8239,"surname":8240,"affiliation":63,"orcid":63},"Claudia","Leacock","One of the most difficult challenges faced by non-native speakers of English is mastering the system of English articles. We trained a maximum entropy classifier to select among a\u002Fan, the, or zero article for noun phrases, based on a set of features extracted from the local context of each. When the classifier was trained on 6 million noun phrases, its performance was correct about 88% of the time. We also used the classifier to detect article errors in the TOEFL essays of native speakers of Chinese, Japanese, and Russian. Agreement with human annotators was about 88% (kappa = 0.36). Many of the disagreements were due to the classifier's lack of discourse information. Performance rose to 94% agreement (kappa = 0.47) when the system accepted noun phrases as correct in cases where its own confidence was low.",{"paper_id":8243,"title":8244,"year":197,"month":855,"day":63,"doi":8245,"resource_url":8246,"first_page":63,"last_page":63,"pdf_url":8247,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8248,"paper_type":860,"authors":8249,"abstract":8253},"lrec2004-main-448","The Core of the Czech Derivational Dictionary","10.63317\u002F49nrskr6xg6b","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-448","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F696.pdf","sedlacek-2004-core",[8250],{"paper_id":8243,"author_seq":247,"given_name":8251,"surname":8252,"affiliation":63,"orcid":63},"Radek","Sedláček","Amongst all available language resources for the Czech language one can find a lot of useful dictionaries, databases and corpora. There are machine readable dictionaries of literary Czech, the dictionary of Czech synonyms and two encyclopaedia: Otto and Diderot. Moreover, Czech researchers have two morphological databases, the Prague Dependency Treebank, the Czech version of WordNet and the Czech National Corpus at their disposal. However, any kind of resources containing information about word-formation and the detailed morphemic structure of Czech words is still missing. In this paper we will introduce the core of such a resource: the Czech Derivational Dictionary (CDD) and the procedure of its creation.",{"paper_id":8255,"title":8256,"year":197,"month":855,"day":63,"doi":8257,"resource_url":8258,"first_page":63,"last_page":63,"pdf_url":8259,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8260,"paper_type":860,"authors":8261,"abstract":8266},"lrec2004-main-449","Automatic Sentence Simplification for Subtitling in Dutch and English","10.63317\u002F4sxtq3n5wwww","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-449","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F697.pdf","daelemans-etal-2004-automatic",[8262,8263,8265],{"paper_id":8255,"author_seq":247,"given_name":2672,"surname":2673,"affiliation":63,"orcid":63},{"paper_id":8255,"author_seq":232,"given_name":2687,"surname":8264,"affiliation":63,"orcid":63},"Höthker",{"paper_id":8255,"author_seq":218,"given_name":2416,"surname":2417,"affiliation":63,"orcid":63},"We describe ongoing work on sentence summarization in the European MUSA project and the Flemish ATraNoS project. Both projects aim at automatic generation of TV subtitles for hearing-impaired people. This involves speech recognition, a topic which is not covered in this paper, and summarizing sentences in such a way that they fit in the available space for subtitles. The target language is equal to the source language: Dutch in ATraNoS and English in MUSA. A separate part of MUSA deals with translating the English subtitles to French and Greek. We compare two methods for monolingual sentence length reduction: one based on learning sentence reduction from a parallel corpus and one based on hand-crafted deletion rules.",{"paper_id":8268,"title":8269,"year":197,"month":855,"day":63,"doi":8270,"resource_url":8271,"first_page":63,"last_page":63,"pdf_url":8272,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8273,"paper_type":860,"authors":8274,"abstract":8281},"lrec2004-main-450","Enriching a Thai Lexical Database with Selectional Preferences","10.63317\u002F2p2537km4b46","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-450","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F698.pdf","kruengkrai-etal-2004-enriching",[8275,8278,8279,8280],{"paper_id":8268,"author_seq":247,"given_name":8276,"surname":8277,"affiliation":63,"orcid":63},"Canasai","Kruengkrai",{"paper_id":8268,"author_seq":232,"given_name":5159,"surname":5160,"affiliation":63,"orcid":63},{"paper_id":8268,"author_seq":218,"given_name":5162,"surname":5163,"affiliation":63,"orcid":63},{"paper_id":8268,"author_seq":203,"given_name":5171,"surname":5172,"affiliation":63,"orcid":63},"A statistical corpus-based approach for acquiring selectional preferences of verbs is proposed. By parsing through text corpora, we obtain examples of context nouns that are considered to be the selectional preferences of a given verb. The approach is to generalize initial noun classes to the most appropriate levels on a semantic hierarchy. We present an iterative algorithm for generalization by combining an agglomerative merging and a model selection technique called the Bayesian Information Criterion (BIC). In our experiments, we consider the Web as the large corpora. We also propose approaches for extracting examples from the Web. Preliminarily experimental results are given to show the feasibility and effectiveness of our approach.",{"paper_id":8283,"title":8284,"year":197,"month":855,"day":63,"doi":8285,"resource_url":8286,"first_page":63,"last_page":63,"pdf_url":8287,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8288,"paper_type":860,"authors":8289,"abstract":8292},"lrec2004-main-451","Results of the 2003 Topic Detection and Tracking Evaluation","10.63317\u002F4wer2u479phe","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-451","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F702.pdf","fiscus-2004-results",[8290],{"paper_id":8283,"author_seq":247,"given_name":8291,"surname":1668,"affiliation":63,"orcid":63},"Jonathan G.","The National Institute of Standards and Technology (NIST) administered the sixth open evaluation of Topic Detection and Tracking   (TDT) technologies in November of 2003. The TDT project supports development of technologies that automatically organize eventrelated   news stories. The program leverages expertise in core technologies, Automatic Speech Recognition (ASR), Document   Retrieval (DR), and Machine Translation (MT) to build the TDT technologies.   The participants in the 2003 TDT project built systems that organized broadcast news and newswire stories collected from Arabic,   English and Mandarin Chinese sources. There were four evaluation tasks in the 2003 evaluation: new event detection, topic detection,  topic tracking and link detection.   The 2004 TDT Evaluation is scheduled for Fall 2004. The evaluation will focus on the core task of monolingual TDT tasks. The   community is also experimenting with a new evaluation task, Hierarchical Topic Detection (HTD). HTD aims to overcome the   problems with the topic detection task – specifically topic granularity and multiple-topic stories.",{"paper_id":8294,"title":8295,"year":197,"month":855,"day":63,"doi":8296,"resource_url":8297,"first_page":63,"last_page":63,"pdf_url":8298,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8299,"paper_type":860,"authors":8300,"abstract":8304},"lrec2004-main-452","Parsing Ungrammatical Input: an Evaluation Procedure","10.63317\u002F3vjm8zsqfw8p","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-452","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F703.pdf","foster-2004-parsing",[8301],{"paper_id":8294,"author_seq":247,"given_name":8302,"surname":8303,"affiliation":63,"orcid":63},"Jennifer","Foster","This paper presents a procedure for evaluating a parser's ability to produce an accurate parse for an ungrammatical sentence. It is based on the existence of a corpus of ungrammatical sentences, and a parallel corpus containing corrected, and hence grammatical, versions of the sentences in the first corpus. This procedure is applied to a wide-coverage probabilistic parser (Charniak, 2002), and the performance of this parser with respect to ungrammatical input is analysed.",{"paper_id":8306,"title":8307,"year":197,"month":855,"day":63,"doi":8308,"resource_url":8309,"first_page":63,"last_page":63,"pdf_url":8310,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8311,"paper_type":860,"authors":8312,"abstract":8319},"lrec2004-main-453","An Automatic Method for Constructing Domain-Specific Ontology Resources","10.63317\u002F2sbrdj4ionau","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-453","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F705.pdf","degeratu-hatzivassiloglou-2004-automatic",[8313,8316],{"paper_id":8306,"author_seq":247,"given_name":8314,"surname":8315,"affiliation":63,"orcid":63},"Melania","Degeratu",{"paper_id":8306,"author_seq":232,"given_name":8317,"surname":8318,"affiliation":63,"orcid":63},"Vasileios","Hatzivassiloglou","Data flow across multiple independent applications and further natural language analysis both require the establishment of a common foundation of terms and relations. Such a foundation can provide in-depth understanding of term equivalence within a domain sublanguage, and serve as a model of concept relations and dependencies. In this paper we discuss a domain-independent, corpus-based method for dictionary-less automatic extraction of ontological knowledge from domain-specific unannotated documents. We present the architecture, algorithms, and results for OntoStruct - a system that uses machine learning and statistical techniques to analyze text sources, discover terms, link equivalent terms into concepts, and learn both hierarchical and non-hierarchical conceptual relations. We report on OntoStruct's results in constructing domain-specific ontological resources and empirical evaluation of their quality.",{"paper_id":8321,"title":8322,"year":197,"month":855,"day":63,"doi":8323,"resource_url":8324,"first_page":63,"last_page":63,"pdf_url":8325,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8326,"paper_type":860,"authors":8327,"abstract":8342},"lrec2004-main-454","A Lexicon Module for a Grammar Development Environment","10.63317\u002F4go6vsmmg4zj","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-454","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F706.pdf","copestake-etal-2004-lexicon",[8328,8331,8334,8336,8338,8340],{"paper_id":8321,"author_seq":247,"given_name":8329,"surname":8330,"affiliation":63,"orcid":63},"Ann","Copestake",{"paper_id":8321,"author_seq":232,"given_name":8332,"surname":8333,"affiliation":63,"orcid":63},"Fabre","Lambeau",{"paper_id":8321,"author_seq":218,"given_name":1900,"surname":8335,"affiliation":63,"orcid":63},"Waldron",{"paper_id":8321,"author_seq":203,"given_name":7330,"surname":8337,"affiliation":63,"orcid":63},"Bond",{"paper_id":8321,"author_seq":188,"given_name":2858,"surname":8339,"affiliation":63,"orcid":63},"Flickinger",{"paper_id":8321,"author_seq":172,"given_name":1560,"surname":8341,"affiliation":63,"orcid":63},"Oepen","Past approaches to developing an effective lexicon component in a grammar development environment have suffered from a number of usability and efficiency issues. We present a lexical database module currently in use by a number of grammar development projects. The database module presented addresses issues which have caused problems in the past and the power of a database architecture provides a number of practical advantages as well as a solid framework for future extension.",{"paper_id":8344,"title":8345,"year":197,"month":855,"day":63,"doi":8346,"resource_url":8347,"first_page":63,"last_page":63,"pdf_url":8348,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8349,"paper_type":860,"authors":8350,"abstract":8353},"lrec2004-main-455","Modelling Legitimate Translation Variation for Automatic Evaluation of MT Quality","10.63317\u002F4277ctpdu2tn","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-455","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F707.pdf","babych-hartley-2004-modelling",[8351,8352],{"paper_id":8344,"author_seq":247,"given_name":6444,"surname":8012,"affiliation":63,"orcid":63},{"paper_id":8344,"author_seq":232,"given_name":2819,"surname":8016,"affiliation":63,"orcid":63},"Automatic methods for MT evaluation are often based on the assumption that MT quality is related to some kind of distance between the evaluated text and a professional human translation (e.g., an edit distance or the precision of matched N-grams). However, independently produced human translations are necessarily different, conveying the same content by dissimilar means. Such legitimate translation variation is a serious problem for distance-based evaluation methods, because mismatches do not necessarily mean degradation in MT quality. In this paper we explore the link between legitimate translation variation and statistical measures of a words salience within a given document, such as tf.idf scores. We show that the use of such scores extends the N-gram distance measures in a way that allows us to accurately predict multiple quality parameters of the text, such as translation adequacy and fluency. However legitimate translation variation also reveals fundamental limits on the applicability of distance-based MT evaluation methods and on data-driven architectures for MT.",{"paper_id":8355,"title":8356,"year":197,"month":855,"day":63,"doi":8357,"resource_url":8358,"first_page":63,"last_page":63,"pdf_url":8359,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8360,"paper_type":860,"authors":8361,"abstract":8373},"lrec2004-main-456","Semantic Mark-up of Italian Legal Texts Through NLP-based Techniques","10.63317\u002F5kjswoxvhpdi","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-456","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F709.pdf","bartolini-etal-2004-semantic",[8362,8364,8365,8368,8371],{"paper_id":8355,"author_seq":247,"given_name":1746,"surname":8363,"affiliation":63,"orcid":63},"Bartolini",{"paper_id":8355,"author_seq":232,"given_name":1743,"surname":6337,"affiliation":63,"orcid":63},{"paper_id":8355,"author_seq":218,"given_name":8366,"surname":8367,"affiliation":63,"orcid":63},"Simonetta","Montemagni",{"paper_id":8355,"author_seq":203,"given_name":8369,"surname":8370,"affiliation":63,"orcid":63},"Vito","Pirrelli",{"paper_id":8355,"author_seq":188,"given_name":8239,"surname":8372,"affiliation":63,"orcid":63},"Soria","In this paper we illustrate an approach to information extraction from legal texts using SALEM. SALEM is an NLP architecture for semantic annotation and indexing of Italian legislative texts, developed by ILC in close collaboration with ITTIG-CNR, Florence. Results of SALEM performance on a test sample of about 500 Italian law paragraphs are provided.",{"paper_id":8375,"title":8376,"year":197,"month":855,"day":63,"doi":8377,"resource_url":8378,"first_page":63,"last_page":63,"pdf_url":8379,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8380,"paper_type":860,"authors":8381,"abstract":8389},"lrec2004-main-457","Morphology Based Automatic Acquisition of Large-coverage Lexica","10.63317\u002F3g5qu9artced","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-457","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F711.pdf","clement-etal-2004-morphology",[8382,8383,8386],{"paper_id":8375,"author_seq":247,"given_name":8150,"surname":8151,"affiliation":63,"orcid":63},{"paper_id":8375,"author_seq":232,"given_name":8384,"surname":8385,"affiliation":63,"orcid":63},"Benoît","Sagot",{"paper_id":8375,"author_seq":218,"given_name":8387,"surname":8388,"affiliation":63,"orcid":63},"Bernard","Lang","In this article, we introduce a new technique for constructing wide-coverage morphological lexica from large corpora and morphological knowledge, with an application to French. Basically, it relies on the idea that the existence of a hypothetical lemma can be guessed if several different words found in the corpus are best interpreted as morphological variants of this lemma. We first validated our technique by extracting verbs and adjectives on a general French corpus of 25 million words. Compared with other lexical resources available for French, our results are very satisfying, since we cover many words, often derived words, that are not always present in other lexica. Application of our algorithm to the acquisition of domain-specific adjectives on a botanic corpus gave also very good results, thus demonstrating its usability to extract domain-specific lexica. Moreover, it is generalizable to any language with a substantial morphology. Part of the resulting lexicon (currently verbal forms) is already freely available on http:\u002F\u002Fwww.lefff.net\u002F.",{"paper_id":8391,"title":8392,"year":197,"month":855,"day":63,"doi":8393,"resource_url":8394,"first_page":63,"last_page":63,"pdf_url":8395,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8396,"paper_type":860,"authors":8397,"abstract":8400},"lrec2004-main-458","Towards Intelligent Written Cultural Heritage Processing - Lexical processing","10.63317\u002F2czarnbjvd3q","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-458","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F712.pdf","ribarov-2004-towards",[8398],{"paper_id":8391,"author_seq":247,"given_name":3102,"surname":8399,"affiliation":63,"orcid":63},"Ribarov","Through ACT (Annotated Corpora of Text) software package for lexical and corpus processing of European written cultural sources (currently used for processing of mediaeval Slavonic manuscripts) this work presents another step forward towards a contextual and intelligent heritage Information Technology framework. ACT is suitable for capturing characteristics of old written sources including rich language variability on word and sentential level. It is not the word-form, but its \"understandings\" that become central processing units, which can be assigned morphology distinctions, head-words (including recensional), translation equivalents, multi-word units, and correlation to other sources. The whole annotation process is automated, and individual sorting orders and morphology tags structures can be defined. ACT incorporates modules for: complex searches on one or more sources, creation of various ready-to-use documents, web text and image access, incorporation of lexical card-files into a corpus, and text-from-card-files reconstruction.",{"paper_id":8402,"title":8403,"year":197,"month":855,"day":63,"doi":8404,"resource_url":8405,"first_page":63,"last_page":63,"pdf_url":8406,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8407,"paper_type":860,"authors":8408,"abstract":8417},"lrec2004-main-459","Developing Language Resources for a Transnational Digital Government System","10.63317\u002F3vhmyz2ftpyh","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-459","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F713.pdf","cavalli-sforza-etal-2004-developing",[8409,8412,8414],{"paper_id":8402,"author_seq":247,"given_name":8410,"surname":8411,"affiliation":63,"orcid":63},"Violetta","Cavalli-Sforza",{"paper_id":8402,"author_seq":232,"given_name":8413,"surname":8172,"affiliation":63,"orcid":63},"Jaime G.",{"paper_id":8402,"author_seq":218,"given_name":8415,"surname":8416,"affiliation":63,"orcid":63},"Peter J.","Jansen","We describe ongoing efforts towards developing language resources for a transnational digital government project aimed at applying information technology (IT) to a problem of international concern: detecting and monitoring activities related to the transnational movement of illicit drugs. The project seeks to support information sharing, coordination and collaboration among government agencies within a country and across national boundaries by combining a variety of technologies including a distributed query processor with form-based and conversational user interfaces, a language translation system, an event server for event filtering and notification, and an event-trigger-rule server. The prototype system is being developed by U.S. universities in collaboration with an international agency and with universities and government agencies in Belize and the Dominican Republic. This paper focuses on the linguistic resources and their use in Example-Based Machine Translation (EBMT). We are in the process of developing an English-Spanish parallel corpus, focused on the domain of information elicited and used at border crossings, to fuel the EBMT system. While significant parallel corpora are available for these two languages in the newswire domain, they were found to be of very limited use for the border crossings application, spurring the need to develop our own resources.",{"paper_id":8419,"title":8420,"year":197,"month":855,"day":63,"doi":8421,"resource_url":8422,"first_page":63,"last_page":63,"pdf_url":8423,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8424,"paper_type":860,"authors":8425,"abstract":8438},"lrec2004-main-460","Semi-automatic Syntactic and Semantic Corpus Annotation with a Deep Parser","10.63317\u002F3rbv9caf8p4o","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-460","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F714.pdf","swift-etal-2004-semi",[8426,8429,8432,8435],{"paper_id":8419,"author_seq":247,"given_name":8427,"surname":8428,"affiliation":63,"orcid":63},"Mary D.","Swift",{"paper_id":8419,"author_seq":232,"given_name":8430,"surname":8431,"affiliation":63,"orcid":63},"Myroslava O.","Dzikovska",{"paper_id":8419,"author_seq":218,"given_name":8433,"surname":8434,"affiliation":63,"orcid":63},"Joel R.","Tetreault",{"paper_id":8419,"author_seq":203,"given_name":8436,"surname":8437,"affiliation":63,"orcid":63},"James F.","Allen","We describe a semi-automatic method for linguistically rich corpus annotation using a broad-coverage deep parser to generate syntactic structure, semantic representation and discourse information for task-oriented dialogs. The parser-generated analyses are checked by trained annotators. Incomplete coverage and incorrect analyses are addressed through lexicon and grammar development, after which the dialogs undergo another cycle of parsing and checking. Currently we have 85% correct annotations in our emergency rescue task domain and 70% in our medication scheduling domain. This iterative process of corpus annotation allows us to create domain-specific gold-standard corpora for test suites and corpus-based experiments as part of general system development.",{"paper_id":8440,"title":8441,"year":197,"month":855,"day":63,"doi":8442,"resource_url":8443,"first_page":63,"last_page":63,"pdf_url":8444,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8445,"paper_type":860,"authors":8446,"abstract":8457},"lrec2004-main-461","Collecting and Sharing Bilingual Spontaneous Speech Corpora: the ChinFaDial Experiment","10.63317\u002F4fnbjsnjy7pz","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-461","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F715.pdf","fafiotte-etal-2004-collecting",[8447,8450,8452,8454],{"paper_id":8440,"author_seq":247,"given_name":8448,"surname":8449,"affiliation":63,"orcid":63},"Georges","Fafiotte",{"paper_id":8440,"author_seq":232,"given_name":2000,"surname":8451,"affiliation":63,"orcid":63},"Boitet",{"paper_id":8440,"author_seq":218,"given_name":1024,"surname":8453,"affiliation":63,"orcid":63},"Seligman",{"paper_id":8440,"author_seq":203,"given_name":8455,"surname":8456,"affiliation":63,"orcid":63},"Chengqing","Zong","We describe here the three main platforms in the ERIM family of Web-based environments for human interpreting, two of them in more details – ERIM-Interp and ERIM-Collect –, then ERIM-Aid. Each platform supports an aspect of the collecting or study of spontaneous bilingual dialogues, translated by an interpreter. ERIM-Interp is the core environment, providing mediated communication between speakers and human interpreters over the network. Using ERIM-Collect, French-Chinese interpreting data have been collected within the three-year \"ChinFaDial\" project supported by LIAMA, a French-Chinese laboratory in Beijing. These \"raw\" speech data will be made available in the spring of 2004 on an open-access basis, using the DistribDial server, on a CLIPS-GETA website. Our goal is to extend such corpora, on a collaborative scheme, to allow other research groups to contribute to the site whatever annotations they may have created, and to share them under the same conditions (GPL). An ERIM-Aid variant is intended to provide focused machine aids to human interpreters working over the Web, or possibly to distant monolingual speakers conversing in different languges.",{"paper_id":8459,"title":8460,"year":197,"month":855,"day":63,"doi":8461,"resource_url":8462,"first_page":63,"last_page":63,"pdf_url":8463,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8464,"paper_type":860,"authors":8465,"abstract":8474},"lrec2004-main-462","Can Anaphoric Definite Descriptions be Replaced by Pronouns?","10.63317\u002F37zi6av5xohm","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-462","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F718.pdf","preiss-etal-2004-anaphoric",[8466,8469,8471],{"paper_id":8459,"author_seq":247,"given_name":8467,"surname":8468,"affiliation":63,"orcid":63},"Judita","Preiss",{"paper_id":8459,"author_seq":232,"given_name":4214,"surname":8470,"affiliation":63,"orcid":63},"Gasperin",{"paper_id":8459,"author_seq":218,"given_name":8472,"surname":8473,"affiliation":63,"orcid":63},"Ted","Briscoe","We investigate the hypothesis that a pronoun is used in discourse, when its antecedent is in the focus of the discourse. We create a corpus of `replaced anaphoric definite descriptions', where occurrences of definite descriptions are replaced with a corresponding pronoun. We use the (Lappin, 1994) anaphora resolution algorithm on the new corpus, and obtain a much lower performance than when the corpus only contains genuine pronouns, thus supporting the hypothesis.",{"paper_id":8476,"title":8477,"year":197,"month":855,"day":63,"doi":8478,"resource_url":8479,"first_page":63,"last_page":63,"pdf_url":8480,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8481,"paper_type":860,"authors":8482,"abstract":8487},"lrec2004-main-463","Hybrid Constraints for Robust Parsing: First Experiments and Evaluation","10.63317\u002F4tic9s5p5892","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-463","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F719.pdf","bartolini-etal-2004-hybrid",[8483,8484,8485,8486],{"paper_id":8476,"author_seq":247,"given_name":1746,"surname":8363,"affiliation":63,"orcid":63},{"paper_id":8476,"author_seq":232,"given_name":1743,"surname":6337,"affiliation":63,"orcid":63},{"paper_id":8476,"author_seq":218,"given_name":8366,"surname":8367,"affiliation":63,"orcid":63},{"paper_id":8476,"author_seq":203,"given_name":8369,"surname":8370,"affiliation":63,"orcid":63},"In this paper we present IDEAL+, a parsing architecture for Italian, which pursues the goal of pairing robustness with deep linguistic analysis by extending a shallow processing kernel with a pool of hybrid constraints for the incremental identification of grammatical relations. The parsing output takes the form of dependency structures representing the full range of instantiated functional relations (e.g. subject, object, modifier, complement, etc.). The paper focuses on nature and interaction of the battery of hybrid constraints and evaluates their joint impact against a gold standard of more than 700 manually annotated sentences.",{"paper_id":8489,"title":8490,"year":197,"month":855,"day":63,"doi":8491,"resource_url":8492,"first_page":63,"last_page":63,"pdf_url":8493,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8494,"paper_type":860,"authors":8495,"abstract":8502},"lrec2004-main-464","E-Wiz: a Trapper Protocol for Hunting the Expressive Speech Corpora in Lab","10.63317\u002F5jpj7oz3soo9","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-464","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F721.pdf","auberge-etal-2004-e",[8496,8498,8500],{"paper_id":8489,"author_seq":247,"given_name":4098,"surname":8497,"affiliation":63,"orcid":63},"Aubergé",{"paper_id":8489,"author_seq":232,"given_name":6596,"surname":8499,"affiliation":63,"orcid":63},"Audibert",{"paper_id":8489,"author_seq":218,"given_name":5568,"surname":8501,"affiliation":63,"orcid":63},"Rilliard","The affects are expressed in different levels of speech: meta-linguistic (expressiveness), linguistic (attitudes), both anchored in the “linguistic time”, and para-linguistic (emotions expressions) that is anchored in the timing of the events that cause the emotion. In an experimental approach, the corpora are the basis of analysis. Most of emotional corpus have been produced by acting\u002Felicitating speakers on one hand (with a possible strong control), and on the other hand they have been collected in “real-life”. This paper proposes both a Wizard of Oz method for emotional induction and some tools (the E-Wiz platform and a pseudo language-learning software, the Sound Teacher scenario) in order to control the production of authentic data at the paralinguistic level of affects. The proposed protocol has been applied up to now to trap 17 subjects, including actors who reproduced the same utterances with acting methods. The labeling of emotional states was performed by the subjects themselves and is being perceptually validated.",{"paper_id":8504,"title":8505,"year":197,"month":855,"day":63,"doi":8506,"resource_url":8507,"first_page":63,"last_page":63,"pdf_url":8508,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8509,"paper_type":860,"authors":8510,"abstract":8516},"lrec2004-main-465","Agreement in Human Factoid Annotation for Summarization Evaluation","10.63317\u002F3ksw7ia96mie","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-465","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F723.pdf","teufel-van-halteren-2004-agreement",[8511,8514],{"paper_id":8504,"author_seq":247,"given_name":8512,"surname":8513,"affiliation":63,"orcid":63},"Simone","Teufel",{"paper_id":8504,"author_seq":232,"given_name":874,"surname":8515,"affiliation":63,"orcid":63},"van Halteren","Factoid analysis was introduced by vanHalteren and Teufel (2003) as an objective, yet semantics-oriented way of measuring overlap of information rather than surface strings in summaries. In this paper, we report on annotation experiments with two sets of summaries, and on a factoid-pairing program which finds correlations between factoids semi-automatically.",{"paper_id":8518,"title":8519,"year":197,"month":855,"day":63,"doi":8520,"resource_url":8521,"first_page":63,"last_page":63,"pdf_url":8522,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8523,"paper_type":860,"authors":8524,"abstract":8528},"lrec2004-main-466","Evaluating an Authentic Audio-Visual Expressive Speech Corpus","10.63317\u002F2u2k7ctata39","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-466","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F724.pdf","rilliard-etal-2004-evaluating",[8525,8526,8527],{"paper_id":8518,"author_seq":247,"given_name":5568,"surname":8501,"affiliation":63,"orcid":63},{"paper_id":8518,"author_seq":232,"given_name":4098,"surname":8497,"affiliation":63,"orcid":63},{"paper_id":8518,"author_seq":218,"given_name":6596,"surname":8499,"affiliation":63,"orcid":63},"This paper presents an evaluation of the acted part of an audio-visual corpus of emotional speech. This corpus is intended to collect both spontaneous and acted emotions, and then the perceptive efficiency of stimuli to carry emotional expression has to be rated. The evaluation of acted speech is presented here, and will give us a scale to measure the spontaneous expressions.",{"paper_id":8530,"title":8531,"year":197,"month":855,"day":63,"doi":8532,"resource_url":8533,"first_page":63,"last_page":63,"pdf_url":8534,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8535,"paper_type":860,"authors":8536,"abstract":8551},"lrec2004-main-467","The Italian NESPOLE! Corpus: a Multilingual Database with Interlingua Annotation in Tourism and Medical Domains","10.63317\u002F4ajk5g5nxuny","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-467","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F725.pdf","mana-etal-2004-italian",[8537,8540,8543,8546,8547,8549],{"paper_id":8530,"author_seq":247,"given_name":8538,"surname":8539,"affiliation":63,"orcid":63},"Nadia","Mana",{"paper_id":8530,"author_seq":232,"given_name":8541,"surname":8542,"affiliation":63,"orcid":63},"Roldano","Cattoni",{"paper_id":8530,"author_seq":218,"given_name":8544,"surname":8545,"affiliation":63,"orcid":63},"Emanuele","Pianta",{"paper_id":8530,"author_seq":203,"given_name":1215,"surname":7713,"affiliation":63,"orcid":63},{"paper_id":8530,"author_seq":188,"given_name":3776,"surname":8548,"affiliation":63,"orcid":63},"Pianesi",{"paper_id":8530,"author_seq":172,"given_name":1172,"surname":8550,"affiliation":63,"orcid":63},"Burger","This paper presents the Italian NESPOLE! Database. The database consists of three parts: The first two, called DB-1 and DB-2 concern the tourism domain, while the third part, DB-3, concentrates on the medical domain. The database includes audio files, transcriptions, Interlingua annotations in IF (Interchange Format) and translations into English, French and German. We describe how the database was built (data collection set-up, scenarios, recording procedure, data transcription and annotation) and statistically illustrates the corpus by providing a data analysis focused on language and spontaneous phenomena.",{"paper_id":8553,"title":8554,"year":197,"month":855,"day":63,"doi":8555,"resource_url":8556,"first_page":63,"last_page":63,"pdf_url":8557,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8558,"paper_type":860,"authors":8559,"abstract":8568},"lrec2004-main-468","Linguistic Miner: An Italian Linguistic Knowledge System","10.63317\u002F4ro3v86erjce","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-468","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F726.pdf","picchi-etal-2004-linguistic",[8560,8561,8562,8565,8566],{"paper_id":8553,"author_seq":247,"given_name":2991,"surname":2992,"affiliation":63,"orcid":63},{"paper_id":8553,"author_seq":232,"given_name":5776,"surname":5777,"affiliation":63,"orcid":63},{"paper_id":8553,"author_seq":218,"given_name":8563,"surname":8564,"affiliation":63,"orcid":63},"Sebastiana","Cucurullo",{"paper_id":8553,"author_seq":203,"given_name":5779,"surname":5780,"affiliation":63,"orcid":63},{"paper_id":8553,"author_seq":188,"given_name":1289,"surname":8567,"affiliation":63,"orcid":63},"Sassolini","Linguistic Miner is a project carried out at ILC whose objective is the development of an integrated system to build, organise and manage a corpus of Italian texts (of various origins and formats), and to design and constantly add new tools for the automatic extraction of tiered linguistic knowledge to be made available for many teaching, publishing, and other cultural purposes. The project is based on a notion that is preliminary to all the systems for corpus-based linguistic analysis: a language represented by the largest possible collection of heterogeneous texts is the best source of linguistic information at any level of analysis considered. The first goals of such a system are the semi-automated construction of an Italian data mine for the extraction of linguistic information, the validation of linguistic patterns, the installation of useful tools and resources for a range of different categories of Italian language users. The main feature of the project is its purpose of building large language reference corpora allowing for the creation and use of effective tools for the handling and processing, as well as the automatic linguistic synthesis, of such corpora.",{"paper_id":8570,"title":8571,"year":197,"month":855,"day":63,"doi":8572,"resource_url":8573,"first_page":63,"last_page":63,"pdf_url":8574,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8575,"paper_type":860,"authors":8576,"abstract":8583},"lrec2004-main-469","Metaphors in Wordnets: From Theory to Practice","10.63317\u002F4qrytm52dtkv","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-469","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F728.pdf","alonge-lonneker-2004-metaphors",[8577,8580],{"paper_id":8570,"author_seq":247,"given_name":8578,"surname":8579,"affiliation":63,"orcid":63},"Antonietta","Alonge",{"paper_id":8570,"author_seq":232,"given_name":8581,"surname":8582,"affiliation":63,"orcid":63},"Birte","Lönneker","WordNet-like resources enhanced with a more systematic, consistent and efficient way of encoding data related to metaphors should improve the performance of a number of applications. As for the methodology to be followed, we stress two basic points: 1) the necessity of adding corpora as data sources and 2) the necessity of adopting a well-established and generally accepted theoretical framework like the one proposed by Lakoff and Johnson. The main part of this paper discusses how the practical work of adding information on metaphors to wordnets should be carried out. We propose a detailed analysis of one conceptual metaphor and its actual and possible representation within EWN. We also provide examples of how the results obtained could enhance the performance of applications using wordnets containing such information.",{"paper_id":8585,"title":8586,"year":197,"month":855,"day":63,"doi":8587,"resource_url":8588,"first_page":63,"last_page":63,"pdf_url":8589,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8590,"paper_type":860,"authors":8591,"abstract":8594},"lrec2004-main-470","Standardization in Multimodal Content Representation: Some Methodological Issues","10.63317\u002F3emrqzinw45h","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-470","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F729.pdf","bunt-romary-2004-standardization",[8592,8593],{"paper_id":8585,"author_seq":247,"given_name":8147,"surname":8148,"affiliation":63,"orcid":63},{"paper_id":8585,"author_seq":232,"given_name":1175,"surname":1176,"affiliation":63,"orcid":63},"In this paper we discuss some basic methodological considerations of the activities undertaken in the ACL-SIGSEM Working Group on the Representation of Multimodal Semantic Information. This independent expert group was founded on the instigation of the International Organisation for Standardisation ISO for investigating the possibilities to develop well-founded guidelines for the representation and annotation of semantic information in multimodal contexts, with the aim to support the interoperability and reuse of multimodal and language resources.",{"paper_id":8596,"title":8597,"year":197,"month":855,"day":63,"doi":8598,"resource_url":8599,"first_page":63,"last_page":63,"pdf_url":8600,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8601,"paper_type":860,"authors":8602,"abstract":8606},"lrec2004-main-471","A Similarity Measure for Unsupervised Semantic Disambiguation","10.63317\u002F429yb25hzfks","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-471","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F732.pdf","basili-etal-2004-similarity",[8603,8604,8605],{"paper_id":8596,"author_seq":247,"given_name":1746,"surname":7801,"affiliation":63,"orcid":63},{"paper_id":8596,"author_seq":232,"given_name":3005,"surname":7811,"affiliation":63,"orcid":63},{"paper_id":8596,"author_seq":218,"given_name":8098,"surname":7803,"affiliation":63,"orcid":63},"In this paper we propose a similarity measure aimed to support an unsupervised approach to semantic tagging. This proposal represents a variant of the notion of \"Conceptual Density\" previously suggested as a tool for sense disambiguation. However, the major difference is the learning framework in which this measure applied to the Wordnet hierarchy enables a \"natural\" corpus-driven empirical estimation of lexical and contextual probabilities for probabilistic semantic tagging. Experimental results over an hand-annotated portion of the British National Corpus (about 5 M words) are also discussed. Although below the results obtained by a supervised method (Maximum Entropy trained over hand labelled data), the proposed unsupervised tagger confirms the effectiveness of the proposed metric as well as show a promising research direction.",{"paper_id":8608,"title":8609,"year":197,"month":855,"day":63,"doi":8610,"resource_url":8611,"first_page":63,"last_page":63,"pdf_url":8612,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8613,"paper_type":860,"authors":8614,"abstract":8618},"lrec2004-main-472","Usability Evaluation of Multimodal and Domain-Oriented Spoken Language Dialogue Systems","10.63317\u002F54r3k6czp4em","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-472","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F733.pdf","dybkjaer-etal-2004-usability",[8615,8616,8617],{"paper_id":8608,"author_seq":247,"given_name":1956,"surname":1954,"affiliation":63,"orcid":63},{"paper_id":8608,"author_seq":232,"given_name":5916,"surname":5917,"affiliation":63,"orcid":63},{"paper_id":8608,"author_seq":218,"given_name":1531,"surname":1532,"affiliation":63,"orcid":63},"Considerable work has been done regarding usability evaluation of task-oriented unimodal spoken language dialogue systems (SLDSs). However, there are still important gaps in our knowledge even in this area. If we move to multimodal task-oriented SLDSs, there are more challenges ahead primarily due to the combination of different modalities. For non-task-oriented conversational SLDSs, a major challenge is the new evaluation issues brought up by the nature of conversation. This paper presents a state-of-the-art in usability evaluation of these new types of SLDSs and conclusions based on the experience we have today.",{"paper_id":8620,"title":8621,"year":197,"month":855,"day":63,"doi":8622,"resource_url":8623,"first_page":63,"last_page":63,"pdf_url":8624,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8625,"paper_type":860,"authors":8626,"abstract":8636},"lrec2004-main-473","Using WordNet to Measure Semantic Orientations of Adjectives","10.63317\u002F222z3zcs9izx","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-473","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F734.pdf","kamps-etal-2004-using",[8627,8630,8632,8635],{"paper_id":8620,"author_seq":247,"given_name":8628,"surname":8629,"affiliation":63,"orcid":63},"Jaap","Kamps",{"paper_id":8620,"author_seq":232,"given_name":8196,"surname":8631,"affiliation":63,"orcid":63},"Marx",{"paper_id":8620,"author_seq":218,"given_name":8633,"surname":8634,"affiliation":63,"orcid":63},"Robert J.","Mokken",{"paper_id":8620,"author_seq":203,"given_name":8196,"surname":8197,"affiliation":63,"orcid":63},"Current WordNet-based measures of distance or similarity focus almost exclusively on WordNet's taxonomic relations. This effectively restricts their applicability to the syntactic categories of noun and verb. We investigate a graph-theoretic model of WordNet's most important relation--synonymy--and propose measures that determine the semantic orientation of adjectives for three factors of subjective meaning. Evaluation against human judgments shows the effectiveness of the resulting measures.",{"paper_id":8638,"title":8639,"year":197,"month":855,"day":63,"doi":8640,"resource_url":8641,"first_page":63,"last_page":63,"pdf_url":8642,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8643,"paper_type":860,"authors":8644,"abstract":8656},"lrec2004-main-474","MT Goes Farming: Comparing Two Machine Translation Approaches on a New Domain","10.63317\u002F4cfdymduh9ka","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-474","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F735.pdf","weijnitz-etal-2004-mt",[8645,8648,8650,8653,8655],{"paper_id":8638,"author_seq":247,"given_name":8646,"surname":8647,"affiliation":63,"orcid":63},"Per","Weijnitz",{"paper_id":8638,"author_seq":232,"given_name":1289,"surname":8649,"affiliation":63,"orcid":63},"Forsbom",{"paper_id":8638,"author_seq":218,"given_name":8651,"surname":8652,"affiliation":63,"orcid":63},"Ebba","Gustavii",{"paper_id":8638,"author_seq":203,"given_name":1289,"surname":8654,"affiliation":63,"orcid":63},"Pettersson",{"paper_id":8638,"author_seq":188,"given_name":3790,"surname":3791,"affiliation":63,"orcid":63},"In the paper we present detailed analyses of two machine translation systems when applied to documents of a previously unseen domain: agricultural texts from the European Union. The two systems compared are a statistical machine translation (SMT) system using the freely available ISI ReWrite Decoder, and the rule-based machine translation system MATS. For the purpose of comparison we use a sentence-aligned Swedish-English corpus of approximately 75,000 words per language, where 90\\% are used for training and 10% are used for evaluation. In the paper we discuss the outcome of automatic evaluation and the results of our manual quality assessment.",{"paper_id":8658,"title":8659,"year":197,"month":855,"day":63,"doi":8660,"resource_url":8661,"first_page":63,"last_page":63,"pdf_url":8662,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8663,"paper_type":860,"authors":8664,"abstract":8671},"lrec2004-main-475","VOXMEX Speech Database: Design of a Phonetically Balanced Corpus","10.63317\u002F2v24ntfogy93","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-475","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F736.pdf","uraga-gamboa-2004-voxmex",[8665,8668],{"paper_id":8658,"author_seq":247,"given_name":8666,"surname":8667,"affiliation":63,"orcid":63},"Esmeralda","Uraga",{"paper_id":8658,"author_seq":232,"given_name":8669,"surname":8670,"affiliation":63,"orcid":63},"César","Gamboa","We present a method for designing a phonetically balanced speech corpus. In this method, we used a phonotactic approach to design the phonetic content of VOXMEX: a phonetically balanced corpus for Mexican Spanish. The transcriptions of VOXMEX contain a complete coverage of phonemes and allophones of Mexican Spanish in every possible context. This corpus is designed for doing phonetic research and acoustic modeling in the speech recognition area. We are recording the readings of the designed text corpus to obtain the speech data of VOXMEX. Our main goal in this project is to construct a phonologically representative speech corpus for Mexican Spanish.",{"paper_id":8673,"title":8674,"year":197,"month":855,"day":63,"doi":8675,"resource_url":8676,"first_page":63,"last_page":63,"pdf_url":8677,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8678,"paper_type":860,"authors":8679,"abstract":63},"lrec2004-main-476","Data Driven Ontology Evaluation","10.63317\u002F4f6s9jyb29f4","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-476","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F737.pdf","brewster-etal-2004-data",[8680,8682,8685,8688],{"paper_id":8673,"author_seq":247,"given_name":3852,"surname":8681,"affiliation":63,"orcid":63},"Brewster",{"paper_id":8673,"author_seq":232,"given_name":8683,"surname":8684,"affiliation":63,"orcid":63},"Harith","Alani",{"paper_id":8673,"author_seq":218,"given_name":8686,"surname":8687,"affiliation":63,"orcid":63},"Srinandan","Dasmahapatra",{"paper_id":8673,"author_seq":203,"given_name":1456,"surname":1457,"affiliation":63,"orcid":63},{"paper_id":8690,"title":8691,"year":197,"month":855,"day":63,"doi":8692,"resource_url":8693,"first_page":63,"last_page":63,"pdf_url":8694,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8695,"paper_type":860,"authors":8696,"abstract":8700},"lrec2004-main-477","Embedding IMDI Metadata into a Large Phonetic Corpus","10.63317\u002F34xvruewvh49","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-477","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F738.pdf","schonefeld-milde-2004-embedding",[8697,8699],{"paper_id":8690,"author_seq":247,"given_name":5405,"surname":8698,"affiliation":63,"orcid":63},"Schonefeld",{"paper_id":8690,"author_seq":232,"given_name":6917,"surname":6918,"affiliation":63,"orcid":63},"The paper shows the set up of a large phonetic corpus (the LeaP corpus), how its metadata is structured and transformed into an extended IMDI\u002FISLE metadata structure, how this structure has been transcoded into the TASX metadata format and finally has been intergrated into the LeaP corpus.",{"paper_id":8702,"title":8703,"year":197,"month":855,"day":63,"doi":8704,"resource_url":8705,"first_page":63,"last_page":63,"pdf_url":8706,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8707,"paper_type":860,"authors":8708,"abstract":8711},"lrec2004-main-478","Using Semantic Language Resources to Support Textual Inference for Question Answering","10.63317\u002F5iwnwbec5ys8","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-478","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F741.pdf","bertagna-2004-using",[8709],{"paper_id":8702,"author_seq":247,"given_name":1749,"surname":8710,"affiliation":63,"orcid":63},"Bertagna","The paper presents an experiment aiming at verifying whether the rich connectivity of two Italian Language Resources, ItalWordNet and CLIPS, can be exploited to support inference for Question Answering. The methodology for the creation of primitive inferential rules from the ItalWordNet and CLIPS semantic relations is described. We also introduce some preliminary results about the usefulness of such inferential rules for QA.",{"paper_id":8713,"title":8714,"year":197,"month":855,"day":63,"doi":8715,"resource_url":8716,"first_page":63,"last_page":63,"pdf_url":8717,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8718,"paper_type":860,"authors":8719,"abstract":8728},"lrec2004-main-479","An Information Repository Model for Advanced Question Answering Systems","10.63317\u002F24b5tjmy8n93","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-479","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F742.pdf","pedro-etal-2004-information",[8720,8723,8726,8727],{"paper_id":8713,"author_seq":247,"given_name":8721,"surname":8722,"affiliation":63,"orcid":63},"Vasco Calais","Pedro",{"paper_id":8713,"author_seq":232,"given_name":8724,"surname":8725,"affiliation":63,"orcid":63},"Jeongwoo","Ko",{"paper_id":8713,"author_seq":218,"given_name":2721,"surname":7916,"affiliation":63,"orcid":63},{"paper_id":8713,"author_seq":203,"given_name":7911,"surname":7912,"affiliation":63,"orcid":63},"This paper presents the design and implementation of the information repository which is the central core of the JAVELIN open-domain question answering system. JAVELIN is comprised of several modules that perform a wide variety of question answering (QA) tasks, such as question analysis, document and passage retrieval, answer candidate extraction, answer selection, answer justification, and planning. The architecture is designed to support comparative component-level evaluation, so that different strategies for each module can be integrated and tested in a straightforward way. Each time a module uses a particular piece of information to produce an output, a dependency is created. To support answer justification and introspective learning, the system can use this long-term memory to trace the origin of each answer it produces for a particular question. The JAVELIN Repository implements a complete, consistent relational model for all of the information associated with a question answering scenario.",{"paper_id":8730,"title":8731,"year":197,"month":855,"day":63,"doi":8732,"resource_url":8733,"first_page":63,"last_page":63,"pdf_url":8734,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8735,"paper_type":860,"authors":8736,"abstract":8741},"lrec2004-main-480","Content Interoperability of Lexical Resources: Open Issues and “MILE” Perspectives","10.63317\u002F54uh65mufxad","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-480","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F743.pdf","bertagna-etal-2004-content",[8737,8738,8739,8740],{"paper_id":8730,"author_seq":247,"given_name":1749,"surname":8710,"affiliation":63,"orcid":63},{"paper_id":8730,"author_seq":232,"given_name":1743,"surname":6337,"affiliation":63,"orcid":63},{"paper_id":8730,"author_seq":218,"given_name":6405,"surname":6406,"affiliation":63,"orcid":63},{"paper_id":8730,"author_seq":203,"given_name":6334,"surname":6335,"affiliation":63,"orcid":63},"The paper tackles the issue of content interoperability among lexical resources, by presenting an experiment of mapping differently conceived lexicons, FrameNet and NOMLEX, onto MILE (Multilingual ISLE Lexical Entry), a meta-entry for the encoding of multilingual lexical information, acting as a general schema of shared and common lexical objects. The aim is to (i) raise problems and (ii) test the expressive potentialities of MILE as a standard environment for Computational Lexicons.",{"paper_id":8743,"title":8744,"year":197,"month":855,"day":63,"doi":8745,"resource_url":8746,"first_page":63,"last_page":63,"pdf_url":8747,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8748,"paper_type":860,"authors":8749,"abstract":8759},"lrec2004-main-481","Prague Czech-English Dependency Treebank. Syntactically Annotated Resources for Machine Translation","10.63317\u002F4q48htam6tur","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-481","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F745.pdf","cmejrek-etal-2004-prague",[8750,8752,8754,8755,8756],{"paper_id":8743,"author_seq":247,"given_name":1087,"surname":8751,"affiliation":63,"orcid":63},"Čmejrek",{"paper_id":8743,"author_seq":232,"given_name":1078,"surname":8753,"affiliation":63,"orcid":63},"Cuřín",{"paper_id":8743,"author_seq":218,"given_name":4118,"surname":4119,"affiliation":63,"orcid":63},{"paper_id":8743,"author_seq":203,"given_name":1078,"surname":7393,"affiliation":63,"orcid":63},{"paper_id":8743,"author_seq":188,"given_name":8757,"surname":8758,"affiliation":63,"orcid":63},"Vladislav","Kuboň","This paper introduces the Prague Czech-English Dependency Treebank (PCEDT), a new Czech-English parallel resource suitable for experiments in structural machine translation. We describe the process of building the core parts of the resources - a bilingual syntactically annotated corpus and translation dictionaries. A part of the Penn Treebank has been translated to Czech and its annotation tranformed into dependency annotation scheme. The annotation of Czech was done automatically from plain text. A subset of corresponding Czech and English sentences has been annotated by humans. The resources being created at Charles University in Prague are scheduled for release as Linguistic Data Consortium data collection in 2004. First experiments in Czech-English machine translation using these data were already carried out.",{"paper_id":8761,"title":8762,"year":197,"month":855,"day":63,"doi":8763,"resource_url":8764,"first_page":63,"last_page":63,"pdf_url":8765,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8766,"paper_type":860,"authors":8767,"abstract":8790},"lrec2004-main-482","Data Collection and Analysis of Mapudungun Morphology for Spelling Correction","10.63317\u002F2uwfhrjn6nqa","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-482","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F747.pdf","monson-etal-2004-data",[8768,8769,8772,8775,8778,8780,8783,8784,8787],{"paper_id":8761,"author_seq":247,"given_name":2000,"surname":6368,"affiliation":63,"orcid":63},{"paper_id":8761,"author_seq":232,"given_name":8770,"surname":8771,"affiliation":63,"orcid":63},"Lori","Levin",{"paper_id":8761,"author_seq":218,"given_name":8773,"surname":8774,"affiliation":63,"orcid":63},"Rodolfo","Vega",{"paper_id":8761,"author_seq":203,"given_name":8776,"surname":8777,"affiliation":63,"orcid":63},"Ralf","Brown",{"paper_id":8761,"author_seq":188,"given_name":8168,"surname":8779,"affiliation":63,"orcid":63},"Font Llitjos",{"paper_id":8761,"author_seq":172,"given_name":8781,"surname":8782,"affiliation":63,"orcid":63},"Alon","Lavie",{"paper_id":8761,"author_seq":155,"given_name":8171,"surname":8172,"affiliation":63,"orcid":63},{"paper_id":8761,"author_seq":138,"given_name":8785,"surname":8786,"affiliation":63,"orcid":63},"Eliseo","Cañulef",{"paper_id":8761,"author_seq":121,"given_name":8788,"surname":8789,"affiliation":63,"orcid":63},"Rosendo","Huisca","This paper describes part of a three year collaboration between Carnegie Mellon University's Language Technologies Institute, the Programa de Educación Intercultural Bilingüe of the Chilean Ministry of Education, and Universidad de La Frontera (Temuco, Chile). We are currently constructing a spelling checker for Mapudungun, a polysynthetic language spoken by the Mapuche people in Chile and Argentina. The spelling checker will be built in MySpell, the spell checking system used by the open source office suite OpenOffice. This paper also describes the spoken language corpus that is used as a source of data for developing the spelling checker.",{"paper_id":8792,"title":8793,"year":197,"month":855,"day":63,"doi":8794,"resource_url":8795,"first_page":63,"last_page":63,"pdf_url":8796,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8797,"paper_type":860,"authors":8798,"abstract":8805},"lrec2004-main-483","An Efficient Word Confidence Measure Using Likelihood Ratio Scores","10.63317\u002F55d8pjq6fggt","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-483","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F748.pdf","veiga-perdigao-2004-efficient",[8799,8802],{"paper_id":8792,"author_seq":247,"given_name":8800,"surname":8801,"affiliation":63,"orcid":63},"Arlindo O.","Veiga",{"paper_id":8792,"author_seq":232,"given_name":8803,"surname":8804,"affiliation":63,"orcid":63},"Fernando S.","Perdigão","This paper describes an efficient method to perform word confidence measures in an automatic speech recognition system. The confidence measure is computed during the decoding phase and is based on likelihood ratios between the top hypotheses that reach a word node. Experiments were carried out on a digit database with a connected-digit recognizer. The results show that this method outperforms word-graph confidence measure with a special grammar and is worse with a word loop grammar. Because the proposed confidence measure is evaluated with only one pass, it is very efficient and can be applied with advantage in small or medium vocabulary recognizers, with low computational resources.",{"paper_id":8807,"title":8808,"year":197,"month":855,"day":63,"doi":8809,"resource_url":8810,"first_page":63,"last_page":63,"pdf_url":8811,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8812,"paper_type":860,"authors":8813,"abstract":8819},"lrec2004-main-484","Adding Syntactic Annotations to Transcripts of Parent-Child Dialogs","10.63317\u002F4gbqq9vir6xj","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-484","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F749.pdf","sagae-etal-2004-adding",[8814,8817,8818],{"paper_id":8807,"author_seq":247,"given_name":8815,"surname":8816,"affiliation":63,"orcid":63},"Kenji","Sagae",{"paper_id":8807,"author_seq":232,"given_name":896,"surname":897,"affiliation":63,"orcid":63},{"paper_id":8807,"author_seq":218,"given_name":8781,"surname":8782,"affiliation":63,"orcid":63},"We describe an annotation scheme for syntactic information in the CHILDES database (MacWhinney, 2000), which contains several megabytes of transcribed dialogs between parents and children. The annotation scheme is based on grammatical relations (GRs) that are composed of bilexical dependencies (between a head and a dependent) labeled with the name of the relation involving the two words (such as subject, object and adjunct). We also discuss automatic annotation using our syntactic annotation scheme.",{"paper_id":8821,"title":8822,"year":197,"month":855,"day":63,"doi":8823,"resource_url":8824,"first_page":63,"last_page":63,"pdf_url":8825,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8826,"paper_type":860,"authors":8827,"abstract":8834},"lrec2004-main-485","Distributional Consistency: As a General Method for Defining a Core Lexicon","10.63317\u002F3ab7x7z3yjue","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-485","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F750.pdf","zhang-etal-2004-distributional",[8828,8830,8832],{"paper_id":8821,"author_seq":247,"given_name":8829,"surname":4907,"affiliation":63,"orcid":63},"Huarui",{"paper_id":8821,"author_seq":232,"given_name":8831,"surname":1544,"affiliation":63,"orcid":63},"Churen",{"paper_id":8821,"author_seq":218,"given_name":8833,"surname":1249,"affiliation":63,"orcid":63},"Shiwen","We propose Distributional Consistency (DC) as a general method for defining a Core Lexicon. The property of DC is investigated theoretically and empirically, showing that it is clearly distinguishable from word frequency and range of distribution. DC is also shown to reflect intuitive interpretations, especially when its value is close to 1. Its immediate application in NLP would include defining a core lexicon in a language and identifying topical words in a document. We also categorize the existent measures of dispersion into 3 groups via ratio of norm or entropy, proposed a simplified measure and a combined kind of measure. These new measures can be used as virtual prototype or medium type for the study and comparison of existent measures in the future.",{"paper_id":8836,"title":8837,"year":197,"month":855,"day":63,"doi":8838,"resource_url":8839,"first_page":63,"last_page":63,"pdf_url":8840,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8841,"paper_type":860,"authors":8842,"abstract":8846},"lrec2004-main-486","Computing Reliability for Coreference Annotation","10.63317\u002F2n8owy9onfzo","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-486","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F752.pdf","passonneau-2004-computing",[8843],{"paper_id":8836,"author_seq":247,"given_name":8844,"surname":8845,"affiliation":63,"orcid":63},"Rebecca J.","Passonneau","Coreference annotation is annotation of language corpora to indicate which expressions have been used to co-specify the same discourse entity. When annotations of the same data are collected from two or more coders, the reliability of the data may need to be quantified. Two obstacles have stood in the way of applying reliability metrics: incommensurate units across annotations, and lack of a convenient representation of the coding values. Given N coders and M coding units, reliability is computed from an N-by-M matrix that records the value assigned to unit M_&lt;j&gt; by coder N_&lt;k&gt;. The solution I present accommodates a wide range of coding choices for the annotator, while preserving the same units across codings. As a consequence, it permits a straightforward application of reliability measurement. In addition, in coreference annotation, disagreements can be complete or partial, so I incorporate a distance metric to scale disagreements. This method has also been applied to a quite distinct coding task, namely semantic annotation of summaries.",{"paper_id":8848,"title":8849,"year":197,"month":855,"day":63,"doi":8850,"resource_url":8851,"first_page":63,"last_page":63,"pdf_url":8852,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8853,"paper_type":860,"authors":8854,"abstract":8859},"lrec2004-main-487","Publicly Available Topic Signatures for all WordNet Nominal Senses","10.63317\u002F38bec6uwmf4d","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-487","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F753.pdf","agirre-de-lacalle-2004-publicly",[8855,8856],{"paper_id":8848,"author_seq":247,"given_name":7548,"surname":7549,"affiliation":63,"orcid":63},{"paper_id":8848,"author_seq":232,"given_name":8857,"surname":8858,"affiliation":63,"orcid":63},"Oier Lopez","de Lacalle","Topic signatures are context vectors built for word senses and concepts. They can be automatically acquired from the web for any concept hierarchy using the ``monosemous relative'' method. Topic signatures have been shown to be useful in Word Sense Disambiguation, for modeling similarity between word senses, classifying new terms in hierarchies and also building hierarchical clusters of word senses for a given word. In this work we present a publicly available resource which comprises both automatically extracted examples for all WordNet 1.6 noun senses and topic signatures built based on those examples. We gathered around 700 sentences per each noun in WordNet. When the monosemous relatives are used to build a sense corpus for polysemous words, they comprise an average of around 3,500 sentences per word sense. The size of the topic signatures thus constructed is of around 4,500 words per word sense.",{"paper_id":8861,"title":8862,"year":197,"month":855,"day":63,"doi":8863,"resource_url":8864,"first_page":63,"last_page":63,"pdf_url":8865,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8866,"paper_type":860,"authors":8867,"abstract":8876},"lrec2004-main-488","Road-testing the English Resource Grammar Over the British National Corpus","10.63317\u002F49266tjgdk8s","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-488","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F754.pdf","baldwin-etal-2004-road",[8868,8869,8872,8873,8875],{"paper_id":8861,"author_seq":247,"given_name":3329,"surname":3330,"affiliation":63,"orcid":63},{"paper_id":8861,"author_seq":232,"given_name":8870,"surname":8871,"affiliation":63,"orcid":63},"Emily M.","Bender",{"paper_id":8861,"author_seq":218,"given_name":2858,"surname":8339,"affiliation":63,"orcid":63},{"paper_id":8861,"author_seq":203,"given_name":8874,"surname":1709,"affiliation":63,"orcid":63},"Ara",{"paper_id":8861,"author_seq":188,"given_name":1560,"surname":8341,"affiliation":63,"orcid":63},"This paper addresses two questions: (1) when a large deep processing resource developed for relatively closed domains is run over open text, what coverage does it have, and (2) what are the most effective and time-efficient ways of consolidating gaps in the coverage of such as resource?",{"paper_id":8878,"title":8879,"year":197,"month":855,"day":63,"doi":8880,"resource_url":8881,"first_page":63,"last_page":63,"pdf_url":8882,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8883,"paper_type":860,"authors":8884,"abstract":8888},"lrec2004-main-489","Interpreting BLEU\u002FNIST Scores: How Much Improvement do We Need to Have a Better System?","10.63317\u002F2qqhpv7hcigk","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-489","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F755.pdf","zhang-etal-2004-interpreting",[8885,8886,8887],{"paper_id":8878,"author_seq":247,"given_name":7982,"surname":4907,"affiliation":63,"orcid":63},{"paper_id":8878,"author_seq":232,"given_name":1560,"surname":4435,"affiliation":63,"orcid":63},{"paper_id":8878,"author_seq":218,"given_name":4437,"surname":4438,"affiliation":63,"orcid":63},"Automatic evaluation metrics for Machine Translation (MT) systems, such as BLEU and the related NIST metric, are becoming increasingly important in MT. Yet, their behaviors are not fully understood. In this paper, we analyze some flaws in the BLEU\u002FNIST metrics. With a better understanding of these problems, we can better interpret the reported BLEU\u002FNIST scores. In addition, this paper reports a novel method of calculating the confidence intervals for BLEU\u002FNIST scores using bootstrapping. With this method, we can determine whether two MT systems are significantly different from each other.",{"paper_id":8890,"title":8891,"year":197,"month":855,"day":63,"doi":8892,"resource_url":8893,"first_page":63,"last_page":63,"pdf_url":8894,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8895,"paper_type":860,"authors":8896,"abstract":8899},"lrec2004-main-490","Exploiting Anchor Text as a Lexical Resource","10.63317\u002F3xarkk3z29qh","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-490","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F756.pdf","anick-2004-exploiting",[8897],{"paper_id":8890,"author_seq":247,"given_name":5354,"surname":8898,"affiliation":63,"orcid":63},"Anick","Anchor texts, the strings associated with hyperlinks on a web page, are currently employed to express millions of referrals to sites and topics on the world wide web. We consider how these strings might be exploited as a lexical resource, particularly when viewed from the perspective of their target documents rather than their sources. We find that for many target pages, incoming anchors form a miniature corpus of reference expressions whose properties with relation both to other target sites and to each other can be put to use for mining lexical information.",{"paper_id":8901,"title":8902,"year":197,"month":855,"day":63,"doi":8903,"resource_url":8904,"first_page":63,"last_page":63,"pdf_url":8905,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8906,"paper_type":860,"authors":8907,"abstract":8944},"lrec2004-main-491","MEAD - A Platform for Multidocument Multilingual Text Summarization","10.63317\u002F2nevc4eu3swt","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-491","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F757.pdf","radev-etal-2004-mead",[8908,8909,8911,8914,8916,8919,8922,8924,8927,8930,8932,8933,8936,8937,8938,8940,8942],{"paper_id":8901,"author_seq":247,"given_name":4866,"surname":4867,"affiliation":63,"orcid":63},{"paper_id":8901,"author_seq":232,"given_name":3329,"surname":8910,"affiliation":63,"orcid":63},"Allison",{"paper_id":8901,"author_seq":218,"given_name":8912,"surname":8913,"affiliation":63,"orcid":63},"Sasha","Blair-Goldensohn",{"paper_id":8901,"author_seq":203,"given_name":3376,"surname":8915,"affiliation":63,"orcid":63},"Blitzer",{"paper_id":8901,"author_seq":188,"given_name":8917,"surname":8918,"affiliation":63,"orcid":63},"Arda","Çelebi",{"paper_id":8901,"author_seq":172,"given_name":8920,"surname":8921,"affiliation":63,"orcid":63},"Stanko","Dimitrov",{"paper_id":8901,"author_seq":155,"given_name":8014,"surname":8923,"affiliation":63,"orcid":63},"Drabek",{"paper_id":8901,"author_seq":138,"given_name":8925,"surname":8926,"affiliation":63,"orcid":63},"Ali","Hakim",{"paper_id":8901,"author_seq":121,"given_name":8928,"surname":8929,"affiliation":63,"orcid":63},"Wai","Lam",{"paper_id":8901,"author_seq":104,"given_name":8931,"surname":3635,"affiliation":63,"orcid":63},"Danyu",{"paper_id":8901,"author_seq":87,"given_name":4863,"surname":4864,"affiliation":63,"orcid":63},{"paper_id":8901,"author_seq":73,"given_name":8934,"surname":8935,"affiliation":63,"orcid":63},"Hong","Qi",{"paper_id":8901,"author_seq":55,"given_name":2910,"surname":3884,"affiliation":63,"orcid":63},{"paper_id":8901,"author_seq":38,"given_name":8512,"surname":8513,"affiliation":63,"orcid":63},{"paper_id":8901,"author_seq":17,"given_name":1357,"surname":8939,"affiliation":63,"orcid":63},"Topper",{"paper_id":8901,"author_seq":4232,"given_name":3274,"surname":8941,"affiliation":63,"orcid":63},"Winkel",{"paper_id":8901,"author_seq":8943,"given_name":4906,"surname":4907,"affiliation":63,"orcid":63},"17","This paper describes the functionality of MEAD, a comprehensive, public domain, open source, multidocument multilingual summarization environment that has been thus far downloaded by more than 500 organizations. MEAD has been used in a variety of summarization applications ranging from summarization for mobile devices to Web page summarization within a search engine and to novelty detection.",{"paper_id":8946,"title":8947,"year":197,"month":855,"day":63,"doi":8948,"resource_url":8949,"first_page":63,"last_page":63,"pdf_url":8950,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8951,"paper_type":860,"authors":8952,"abstract":8968},"lrec2004-main-492","Evaluation of Transcription and Annotation Tools for a Multi-modal, Multi-party Dialogue Corpus","10.63317\u002F2akqzyqfi537","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-492","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F758.pdf","garg-etal-2004-evaluation",[8953,8956,8959,8961,8963,8965],{"paper_id":8946,"author_seq":247,"given_name":8954,"surname":8955,"affiliation":63,"orcid":63},"Saurabh","Garg",{"paper_id":8946,"author_seq":232,"given_name":8957,"surname":8958,"affiliation":63,"orcid":63},"Bilyana","Martinovski",{"paper_id":8946,"author_seq":218,"given_name":3157,"surname":8960,"affiliation":63,"orcid":63},"Robinson",{"paper_id":8946,"author_seq":203,"given_name":8962,"surname":1560,"affiliation":63,"orcid":63},"Jens",{"paper_id":8946,"author_seq":188,"given_name":8964,"surname":8434,"affiliation":63,"orcid":63},"Joel",{"paper_id":8946,"author_seq":172,"given_name":8966,"surname":8967,"affiliation":63,"orcid":63},"David R.","Traum","This paper reviews nine available transcription and annotation tools, considering in particular the special difficulties arising from transcribing and annotating multi-party, multi-modal dialogue. Tools are evaluated as to the ability to support the user's annotation scheme, ability to visualize the form of the data, compatibility with other tools, flexibility of data representation, and general user-friendliness.",{"paper_id":8970,"title":8971,"year":197,"month":855,"day":63,"doi":8972,"resource_url":8973,"first_page":63,"last_page":63,"pdf_url":8974,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8975,"paper_type":860,"authors":8976,"abstract":8979},"lrec2004-main-493","Current Projects in Languages of Military Interest at the Defense Language Institute","10.63317\u002F3azxyqo2u7qi","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-493","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F759.pdf","emonts-2004-current",[8977],{"paper_id":8970,"author_seq":247,"given_name":1357,"surname":8978,"affiliation":63,"orcid":63},"Emonts","Recent US military events have produced a desperate need for language data in languages which have previously been given very little attention. Data resources for languages of military interest are often unavailable or insufficient to adequately develop new technologies. Realizing this need, the Defense Language Institute Foreign Language Center, has created a language data distribution center, which focuses on collecting and distributing data in languages of military interest. This paper outlines current projects and resources available at the Defense Language Institute (DLI).",{"paper_id":8981,"title":8982,"year":197,"month":855,"day":63,"doi":8983,"resource_url":8984,"first_page":63,"last_page":63,"pdf_url":8985,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8986,"paper_type":860,"authors":8987,"abstract":63},"lrec2004-main-494","A Multilingual Database of Idioms","10.63317\u002F5m8v98ub86pc","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-494","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F760.pdf","villavicencio-etal-2004-multilingual",[8988,8990,8991],{"paper_id":8981,"author_seq":247,"given_name":5962,"surname":8989,"affiliation":63,"orcid":63},"Villavicencio",{"paper_id":8981,"author_seq":232,"given_name":3329,"surname":3330,"affiliation":63,"orcid":63},{"paper_id":8981,"author_seq":218,"given_name":1900,"surname":8335,"affiliation":63,"orcid":63},{"paper_id":8993,"title":8994,"year":197,"month":855,"day":63,"doi":8995,"resource_url":8996,"first_page":63,"last_page":63,"pdf_url":8997,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":8998,"paper_type":860,"authors":8999,"abstract":9004},"lrec2004-main-495","Annotation Tools for Large-Scale Corpus Development: Using AGTK at the Linguistic Data Consortium","10.63317\u002F4d5fxx3dpe7u","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-495","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F761.pdf","maeda-strassel-2004-annotation",[9000,9003],{"paper_id":8993,"author_seq":247,"given_name":9001,"surname":9002,"affiliation":63,"orcid":63},"Kazuaki","Maeda",{"paper_id":8993,"author_seq":232,"given_name":1030,"surname":1031,"affiliation":63,"orcid":63},"Large-scale corpus development demands substantial infrastructure. As part of this infrastructure, the Linguistic Data Consortium (LDC) has adopted the Annotation Graph Toolkit (AGTK) as a primary resource for annotation tool development. This paper reports on LDC's experiences using AGTK to develop and implement highly customized annotation tools for a variety of large-scale corpus creation efforts. We describe two primary tools that are currently in active use at LDC, one speech- and one text-based, as well as other new AGTK-based annotation tools. We also describe the use of AGTK to develop tools for comparing and adjudicating divergent annotations in order to produce gold standard evaluation data and to measure inter-annotator consistency. Finally, we discuss various issues in creating AGTK-based tools across a wide range of annotation tasks and divergent research areas.",{"paper_id":9006,"title":9007,"year":197,"month":855,"day":63,"doi":9008,"resource_url":9009,"first_page":63,"last_page":63,"pdf_url":9010,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9011,"paper_type":860,"authors":9012,"abstract":9014},"lrec2004-main-496","Linguistic Resources for Effective, Affordable, Reusable Speech-to-Text","10.63317\u002F5aggrbzknd6j","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-496","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F762.pdf","strassel-2004-linguistic",[9013],{"paper_id":9006,"author_seq":247,"given_name":1030,"surname":1031,"affiliation":63,"orcid":63},"This paper describes ongoing efforts at Linguistic Data Consortium to create shared evaluation resources for improved speech-to-text technology. The DARPA EARS Program (Effective, Affordable, Reusable Speech-to-Text) is focused on enabling core STT technology to produce rich, highly accurate output in a range of languages and speaking styles. The aggressive EARS program goals motivate new approaches to corpus creation and distribution. EARS research sites require multilingual broadcast news and telephone speech, transcripts and annotations at a much higher volume than for any previous technology program. In response to these demands, LDC has developed new corpora for training and evaluating speech-to-text systems in English, Arabic and Chinese and to support systems that distinguish speakers, identify and repair disfluencies and punctuate a text to improve readability.",{"paper_id":9016,"title":9017,"year":197,"month":855,"day":63,"doi":9018,"resource_url":9019,"first_page":63,"last_page":63,"pdf_url":9020,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9021,"paper_type":860,"authors":9022,"abstract":9025},"lrec2004-main-497","Building part-of-speech Corpora Through Histogram Hopping","10.63317\u002F2x54cw7qpvpz","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-497","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F763.pdf","vilain-2004-building",[9023],{"paper_id":9016,"author_seq":247,"given_name":2904,"surname":9024,"affiliation":63,"orcid":63},"Vilain","This paper are concerned with lowering the cost of producing training resources for part-of-speech taggers. We focus primarily on the resource needs of unsupervised taggers, as these can be trained with simpler resources than their supervised counterparts. We introduce histogram hopping, a new approach for developing the central training resources of unsupervised taggers, and describe a simple annotation prototype that implements the approach. We then discuss the applicability of histogram hopping to the development of resources for supervised taggers. Finally, we report on a preliminary pilot study for French that validates this work.",{"paper_id":9027,"title":9028,"year":197,"month":855,"day":63,"doi":9029,"resource_url":9030,"first_page":63,"last_page":63,"pdf_url":9031,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9032,"paper_type":860,"authors":9033,"abstract":9038},"lrec2004-main-498","An Emerging Transcontinental Collaborative Research and Education Agenda in Human Language Technologies","10.63317\u002F2id6wbaq39yh","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-498","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F765.pdf","monaco-soudi-2004-emerging",[9034,9037],{"paper_id":9027,"author_seq":247,"given_name":9035,"surname":9036,"affiliation":63,"orcid":63},"Gregory Ernest","Monaco",{"paper_id":9027,"author_seq":232,"given_name":6657,"surname":6658,"affiliation":63,"orcid":63},"This paper describes an emerging interdisciplinary research and education collaboration, in the area of Human Language Technology (HLT), among multiple institutions in the Great Plains (US) and Morocco. In particular, this collaboration is focusing on the complex problem of meeting the language technology needs of societies with multiple dominant and indigenous languages, as in the United States and Morocco, by, among other things, developing language resources for the spoken languages (Berber, Moroccan Arabic and Native American languages) in the two countries. We present the goal of the project, the activities undertaken so far and the road ahead in this collaboration.",{"paper_id":9040,"title":9041,"year":197,"month":855,"day":63,"doi":9042,"resource_url":9043,"first_page":63,"last_page":63,"pdf_url":9044,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9045,"paper_type":860,"authors":9046,"abstract":63},"lrec2004-main-499","Issues in Corpus Development for Multi-party Multi-modal Task-oriented Dialogue","10.63317\u002F3knkipypfrxf","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-499","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F766.pdf","robinson-etal-2004-issues",[9047,9048,9049,9050,9051],{"paper_id":9040,"author_seq":247,"given_name":3157,"surname":8960,"affiliation":63,"orcid":63},{"paper_id":9040,"author_seq":232,"given_name":8957,"surname":8958,"affiliation":63,"orcid":63},{"paper_id":9040,"author_seq":218,"given_name":8954,"surname":8955,"affiliation":63,"orcid":63},{"paper_id":9040,"author_seq":203,"given_name":8962,"surname":1560,"affiliation":63,"orcid":63},{"paper_id":9040,"author_seq":188,"given_name":1790,"surname":8967,"affiliation":63,"orcid":63},{"paper_id":9053,"title":9054,"year":197,"month":855,"day":63,"doi":9055,"resource_url":9056,"first_page":63,"last_page":63,"pdf_url":9057,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9058,"paper_type":860,"authors":9059,"abstract":9063},"lrec2004-main-500","The Fisher Corpus: a Resource for the Next Generations of Speech-to-Text","10.63317\u002F5q862povzthr","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-500","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F767.pdf","cieri-etal-2004-fisher",[9060,9061,9062],{"paper_id":9053,"author_seq":247,"given_name":3852,"surname":4640,"affiliation":63,"orcid":63},{"paper_id":9053,"author_seq":232,"given_name":1790,"surname":6350,"affiliation":63,"orcid":63},{"paper_id":9053,"author_seq":218,"given_name":4205,"surname":864,"affiliation":63,"orcid":63},"This paper describes, within the context of the DARPA EARS program, the design and implementation of the Fisher protocol for collecting conversational telephone speech which has yielded more than 16,000 English conversations. It also discusses the Quick Transcription specification that allowed 2000 hours of Fisher audio to be transcribed in less than one year. Fisher data is already in use within the DARPA EARS programs and will be published via the Linguistic Data Consortium for general use beginning in 2004.",{"paper_id":9065,"title":9066,"year":197,"month":855,"day":63,"doi":9067,"resource_url":9068,"first_page":63,"last_page":63,"pdf_url":9069,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9070,"paper_type":860,"authors":9071,"abstract":9075},"lrec2004-main-501","Evaluation of Multi-party Virtual Reality Dialogue Interaction","10.63317\u002F2j826bc6h3s7","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-501","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F768.pdf","traum-etal-2004-evaluation",[9072,9073,9074],{"paper_id":9065,"author_seq":247,"given_name":8966,"surname":8967,"affiliation":63,"orcid":63},{"paper_id":9065,"author_seq":232,"given_name":3157,"surname":8960,"affiliation":63,"orcid":63},{"paper_id":9065,"author_seq":218,"given_name":8962,"surname":1560,"affiliation":63,"orcid":63},"We describe a dialogue evaluation plan for a multi-character virtual reality training simulation. A multi-component evaluation plan is presented, including user satisfaction, intended task completion, recognition rate, and a new annotation scheme for appropriateness. Preliminary results for formative tests are also presented.",{"paper_id":9077,"title":9078,"year":197,"month":855,"day":63,"doi":9079,"resource_url":9080,"first_page":63,"last_page":63,"pdf_url":9081,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9082,"paper_type":860,"authors":9083,"abstract":9090},"lrec2004-main-502","The Mixer Corpus of Multilingual, Multichannel Speaker Recognition Data","10.63317\u002F47cmqdbzxaz3","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-502","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F771.pdf","cieri-etal-2004-mixer",[9084,9085,9087,9088,9089],{"paper_id":9077,"author_seq":247,"given_name":3852,"surname":4640,"affiliation":63,"orcid":63},{"paper_id":9077,"author_seq":232,"given_name":9086,"surname":908,"affiliation":63,"orcid":63},"Joseph P.",{"paper_id":9077,"author_seq":218,"given_name":6355,"surname":6356,"affiliation":63,"orcid":63},{"paper_id":9077,"author_seq":203,"given_name":1790,"surname":6350,"affiliation":63,"orcid":63},{"paper_id":9077,"author_seq":188,"given_name":4205,"surname":864,"affiliation":63,"orcid":63},"This paper describes efforts to create corpora to support and evaluate systems that perform speaker recognition where channel and language may vary. Beyond the ongoing evaluation of speaker recognition systems, these corpora are aimed at the bilingual and cross channel dimensions. We report on specific data collection efforts at the Linguistic Data Consortium and the research ongoing at the US Federal Bureau of Investigation and MIT Lincoln Laboratories. We cover the design and requirements, the collections and final properties of the corpus integrating discussions of the data preparation, research, technology development and evaluation on a grand scale.",{"paper_id":9092,"title":9093,"year":197,"month":855,"day":63,"doi":9094,"resource_url":9095,"first_page":63,"last_page":63,"pdf_url":9096,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9097,"paper_type":860,"authors":9098,"abstract":9103},"lrec2004-main-503","Building a Large Grammar for Italian","10.63317\u002F288hf579i9zr","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-503","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F772.pdf","mazzei-lombardo-2004-building",[9099,9101],{"paper_id":9092,"author_seq":247,"given_name":1743,"surname":9100,"affiliation":63,"orcid":63},"Mazzei",{"paper_id":9092,"author_seq":232,"given_name":1093,"surname":9102,"affiliation":63,"orcid":63},"Lombardo","We describe the construction of a large lexicalized tree adjoining grammar for Italian, automatically extracted from an annotated corpus. We first introduce the TUT, a dependency style treebank for Italian, then we illustrate the algorithm that we have designed to extract the grammar, and finally we report two experiments about parsing complexity and coverage of the extracted grammar.",{"paper_id":9105,"title":9106,"year":197,"month":855,"day":63,"doi":9107,"resource_url":9108,"first_page":63,"last_page":63,"pdf_url":9109,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9110,"paper_type":860,"authors":9111,"abstract":63},"lrec2004-main-504","Japanese MULTEXT: a Prosodic Corpus","10.63317\u002F3xtbpcb2bkfr","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-504","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F774.pdf","kitazawa-etal-2004-japanese",[9112,9115,9118,9121],{"paper_id":9105,"author_seq":247,"given_name":9113,"surname":9114,"affiliation":63,"orcid":63},"Shigeyoshi","Kitazawa",{"paper_id":9105,"author_seq":232,"given_name":9116,"surname":9117,"affiliation":63,"orcid":63},"Shinya","Kiriyama",{"paper_id":9105,"author_seq":218,"given_name":9119,"surname":9120,"affiliation":63,"orcid":63},"Toshihiko","Itoh",{"paper_id":9105,"author_seq":203,"given_name":907,"surname":908,"affiliation":63,"orcid":63},{"paper_id":9123,"title":9124,"year":197,"month":855,"day":63,"doi":9125,"resource_url":9126,"first_page":63,"last_page":63,"pdf_url":9127,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9128,"paper_type":860,"authors":9129,"abstract":9136},"lrec2004-main-505","The OLISSIPO and LECTIO Projects","10.63317\u002F5fxcm2h3xeqo","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-505","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F776.pdf","cappelli-alberto-2004-olissipo",[9130,9133],{"paper_id":9123,"author_seq":247,"given_name":9131,"surname":9132,"affiliation":63,"orcid":63},"Giuseppe","Cappelli",{"paper_id":9123,"author_seq":232,"given_name":9134,"surname":9135,"affiliation":63,"orcid":63},"Paulo","Alberto","OLISSIPO (Omnis Latinitatis Instrumentum Secundum Scholarum Instructionis Propositum Ordinatum) is a prototype developed by the Centro de Estudos Clássicos of the University of Lisbon and the Istituto di Linguistica Computazionale of CNR in Pisa, thanks to a common research project in the framework of the scientific agreement between the Consiglio Nazionale delle Ricerche (CNR - Italy) and the Gabinete de Relações Internacionais da Ciência e Ensino Superior (GRICES - Portugal). OLISSIPO extracts lists of basic vocabulary from any Latin text and displays them together with linguistic and extra-linguistic information stored in a database. It contains other functionalities, such as statistical analysis, search of words in the text, displaying of the context, search of words in the database (which can be used as a dictionary).The LECTIO project has also been carried out by the Centro de Estudos Clássicos of the University of Lisbon and the Istituto di Linguistica Computazionale of CNR in Pisa. Designed to be a follow-up of OLISSIPO, it aims at producing a prototype for analysis of Latin texts with more functionalities. The final result will be an open tool to be used both in teaching\u002Flearning and in scientific research.",{"paper_id":9138,"title":9139,"year":197,"month":855,"day":63,"doi":9140,"resource_url":9141,"first_page":63,"last_page":63,"pdf_url":9142,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9143,"paper_type":860,"authors":9144,"abstract":9154},"lrec2004-main-506","A Public Reference Implementation of the RAP Anaphora Resolution Algorithm","10.63317\u002F4x8aozctr2rb","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-506","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F778.pdf","qiu-etal-2004-public",[9145,9148,9151],{"paper_id":9138,"author_seq":247,"given_name":9146,"surname":9147,"affiliation":63,"orcid":63},"Long","Qiu",{"paper_id":9138,"author_seq":232,"given_name":9149,"surname":9150,"affiliation":63,"orcid":63},"Min-Yen","Kan",{"paper_id":9138,"author_seq":218,"given_name":9152,"surname":9153,"affiliation":63,"orcid":63},"Tat-Seng","Chua","This paper describes a standalone, publicly-available implementation of the Resolution of Anaphora Procedure (RAP) given by Lappin and Leass (1994). The RAP algorithm resolves third person pronouns, lexical anaphors, and identifies pleonastic pronouns. Our implementation, JavaRAP, fills a current need in anaphora resolution research by providing a reference implementation that can be benchmarked against current algorithms. The implementation uses the standard, publicly available Charniak (2000) parser as input, and generates a list of anaphora-antecedent pairs as output. Alternately, an in-place annotation or substitution of the anaphors with their antecedents can be produced. Evaluation on the MUC-6 co-reference task shows that JavaRAP has an accuracy of 57.9%, similar to the performance given previously in the literature (e.g., Preiss 2002).",{"paper_id":9156,"title":9157,"year":197,"month":855,"day":63,"doi":9158,"resource_url":9159,"first_page":63,"last_page":63,"pdf_url":9160,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9161,"paper_type":860,"authors":9162,"abstract":9174},"lrec2004-main-507","NLP-enhanced Content Filtering Within the POESIA Project","10.63317\u002F3dwxj3issubh","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-507","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F779.pdf","hepple-etal-2004-nlp",[9163,9164,9166,9169,9171,9172],{"paper_id":9156,"author_seq":247,"given_name":1024,"surname":1464,"affiliation":63,"orcid":63},{"paper_id":9156,"author_seq":232,"given_name":7141,"surname":9165,"affiliation":63,"orcid":63},"Ireson",{"paper_id":9156,"author_seq":218,"given_name":9167,"surname":9168,"affiliation":63,"orcid":63},"Paolo","Allegrini",{"paper_id":9156,"author_seq":203,"given_name":8512,"surname":9170,"affiliation":63,"orcid":63},"Marchi",{"paper_id":9156,"author_seq":188,"given_name":8366,"surname":8367,"affiliation":63,"orcid":63},{"paper_id":9156,"author_seq":172,"given_name":9173,"surname":2188,"affiliation":63,"orcid":63},"Jose Maria Gomez","This paper introduces the POESIA internet filtering system, which is open-source, and which combines standard filtering methods, such as positive\u002Fnegative URL lists, with more advanced techniques, such as image processing and NLP-enhanced text filtering. The description here focusses on components providing textual content filtering for three European languages (English, Italian and Spanish), employing NLP methods to enhance performance. We address also the acquisition of language data needed to develop these filters, and the evaluation of the system and its components.",{"paper_id":9176,"title":9177,"year":197,"month":855,"day":63,"doi":9178,"resource_url":9179,"first_page":63,"last_page":63,"pdf_url":9180,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9181,"paper_type":860,"authors":9182,"abstract":9184},"lrec2004-main-508","WinPitch Corpus, a Text to Speech Alignment Tool for Multimodal Corpora","10.63317\u002F3hvu5px82yia","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-508","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F780.pdf","martin-2004-winpitch",[9183],{"paper_id":9176,"author_seq":247,"given_name":2736,"surname":1087,"affiliation":63,"orcid":63},"WinPitch Corpus is an innovative software program for computer-aided alignment of large corpora. It provides a method for easy and precise selection of alignment units, ranging from syllable to whole sentences in a hierarchical storing system of aligned data. The method is based on the ability to link visually and select with a mouse click a text segment with the perception of the corresponding speech sound played back at slower speech. Clicking on a text segment generates bidirectional speech-text pointers defining the alignment. This method has the advantage on emerging automatic processes to be effective even for poor quality speech recordings, or in case of speakers’ voice overlap. A recent version of the software handles multimedia files and is capable to display the corresponding video streams at slower speed.",{"paper_id":9186,"title":9187,"year":197,"month":855,"day":63,"doi":9188,"resource_url":9189,"first_page":63,"last_page":63,"pdf_url":9190,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9191,"paper_type":860,"authors":9192,"abstract":9194},"lrec2004-main-509","The Statistical Analysis of Morphosyntactic Distributions","10.63317\u002F2fjgbxi7mzcf","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-509","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F781.pdf","evert-2004-statistical",[9193],{"paper_id":9186,"author_seq":247,"given_name":961,"surname":6071,"affiliation":63,"orcid":63},"This paper describes methods for the statistical analysis of quantitative data on the distribution of morphosyntactic features. A key problem is the large amount of ambiguity in automatically extracted data. In the paper, I argue for a conservative approach that treats ambiguous instances as counter-evidence. It is nonetheless possible to obtain detailed morphosyntactic information from the corpus data with the help of partial disambiguation and by exploiting systematic ambiguity classes.",{"paper_id":9196,"title":9197,"year":197,"month":855,"day":63,"doi":9198,"resource_url":9199,"first_page":63,"last_page":63,"pdf_url":9200,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9201,"paper_type":860,"authors":9202,"abstract":9210},"lrec2004-main-510","CHeM: A System for the Automatic Analysis of e-mails in the Restoration and Conservation Domain","10.63317\u002F3wcs5bkv5bv7","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-510","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F784.pdf","bordoni-etal-2004-chem",[9203,9204,9207],{"paper_id":9196,"author_seq":247,"given_name":1375,"surname":1376,"affiliation":63,"orcid":63},{"paper_id":9196,"author_seq":232,"given_name":9205,"surname":9206,"affiliation":63,"orcid":63},"Leonardo","Pasqualini",{"paper_id":9196,"author_seq":218,"given_name":9208,"surname":9209,"affiliation":63,"orcid":63},"Filippo","Sciarrone","In this paper, we present the CHeM system, Cultural Heritage e-mail Manager, a support system for the analysis of e-mails of the Restoration and Conservation newsgroup, hosted by the Yahoo portal from December 2000 to January 2003. The complexity of the domain as well as the specificity of the e-mails, prompted us to build the first system prototype based on a client-side architecture, to help less expert users in classifying information contained in e-mails. The system goal is therefore to provide an instrument capable of classifying the received messages, downloaded onto the users' desktops, into standard categories, based on their content, using the well-known techniques of Data Mining and Information Retrieval. The categories thus obtained are then used to label the messages in order to provide valuable information on the domain and therefore support specific information retrieval and produce new user groups by an automatic generation of mailing lists. The methodology presented and the first test results are encouraging with a view to porting the system in other similar domains.",{"paper_id":9212,"title":9213,"year":197,"month":855,"day":63,"doi":9214,"resource_url":9215,"first_page":63,"last_page":63,"pdf_url":9216,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9217,"paper_type":860,"authors":9218,"abstract":9223},"lrec2004-main-511","Resources for Place Name Analysis","10.63317\u002F4duwd69pskua","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-511","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F785.pdf","irie-sundheim-2004-resources",[9219,9221],{"paper_id":9212,"author_seq":247,"given_name":2607,"surname":9220,"affiliation":63,"orcid":63},"Irie",{"paper_id":9212,"author_seq":232,"given_name":4831,"surname":9222,"affiliation":63,"orcid":63},"Sundheim","We present a new resource for annotating and visualizing the meaning of place names in natural language text, along with insights gained from analysis of manual annotations. The work addresses the issue of place name (toponym) meaning resolution in text, moving beyond simple named entity recognition to address the problem of grounding textual references, i.e., making a connection between the references and the real-world entities that they denote. The name, \"San Francisco,\" for example, can be mapped to more than 900 distinct place entities that differ in terms of location and\u002For type, according to commonly available databases of named geographical entities, called gazetteers. Gazetteers serve as knowledge bases that can be exploited to support the analysis and disambiguation of named places, and the grounding of textual references in real-world entities. This process of grounding text in gazetteers offers a way of normalizing the meaning of the place name references that are found in the gazetteers, and essentially subsumes the text analysis process of determining when instances of a given name are being used to refer to the same or to different entities.",{"paper_id":9225,"title":9226,"year":197,"month":855,"day":63,"doi":9227,"resource_url":9228,"first_page":63,"last_page":63,"pdf_url":9229,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9230,"paper_type":860,"authors":9231,"abstract":9233},"lrec2004-main-512","NEMLAR - An Arabic Language Resources Project","10.63317\u002F4u7wvimttafj","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-512","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F786.pdf","maegaard-2004-nemlar-arabic",[9232],{"paper_id":9225,"author_seq":247,"given_name":918,"surname":919,"affiliation":63,"orcid":63},"The NEMLAR project is a European Commission supported project with partners from the EU and from Arabic speaking countries in the Mediterranean region. The project aims at surveying the stat-of-the art- of language resources and tools for Arabic in the region, at developing a BLARK definition for Arabic, and at starting development of language resources or updating of existing language resources. The project also aims to create visibility for Arabic language technology, through a newsletter and through an international conference.",{"paper_id":9235,"title":9236,"year":197,"month":855,"day":63,"doi":9237,"resource_url":9238,"first_page":63,"last_page":63,"pdf_url":9239,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9240,"paper_type":860,"authors":9241,"abstract":9260},"lrec2004-main-513","Korean-Chinese-Japanese Multilingual Wordnet with Shared Semantic Hierarchy","10.63317\u002F3ouzj5b9z3y9","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-513","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F787.pdf","choi-etal-2004-korean",[9242,9243,9245,9247,9249,9251,9253,9255,9257],{"paper_id":9235,"author_seq":247,"given_name":4963,"surname":1727,"affiliation":63,"orcid":63},{"paper_id":9235,"author_seq":232,"given_name":9244,"surname":4961,"affiliation":63,"orcid":63},"Hee-Sook",{"paper_id":9235,"author_seq":218,"given_name":9246,"surname":1706,"affiliation":63,"orcid":63},"Wonseok",{"paper_id":9235,"author_seq":203,"given_name":9248,"surname":1045,"affiliation":63,"orcid":63},"Juho",{"paper_id":9235,"author_seq":188,"given_name":9250,"surname":1709,"affiliation":63,"orcid":63},"Eunhe",{"paper_id":9235,"author_seq":172,"given_name":9252,"surname":1709,"affiliation":63,"orcid":63},"Hekyeong",{"paper_id":9235,"author_seq":155,"given_name":9254,"surname":1709,"affiliation":63,"orcid":63},"Donghee",{"paper_id":9235,"author_seq":138,"given_name":9256,"surname":7980,"affiliation":63,"orcid":63},"Youngbin",{"paper_id":9235,"author_seq":121,"given_name":9258,"surname":9259,"affiliation":63,"orcid":63},"Hyosik","Shin","A Chinese-Japanese-Korean wordnet is introduced. It is constructed based on a shared semantic hierarchy that is originated from NTT Goidaikei (Lexical Hierarchical System). Korean wordnet was constructed through the semantic category assignment to every sense of Korean words in a dictionary. Verbs and adjectives¡¯ senses are assigned to the same semantic hierarchy as that of nouns. Each sense of verbs is investigated from corpora for their usage, and compared with Japanese translation. Chinese wordnet with the same semantic hierarchy was built up based on the comparison with Korean wordnet. Each sense of Chinese verb corresponds to Korean with its argument structure. These works have lasted since 1994.",{"paper_id":9262,"title":9263,"year":197,"month":855,"day":63,"doi":9264,"resource_url":9265,"first_page":63,"last_page":63,"pdf_url":9266,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9267,"paper_type":860,"authors":9268,"abstract":9274},"lrec2004-main-514","Intranet Try To Find Project (ITTF): An Approach for the Search of Relevant Information Inside an Organization","10.63317\u002F463d84md6czu","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-514","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F788.pdf","jouis-ferru-2004-intranet",[9269,9271],{"paper_id":9262,"author_seq":247,"given_name":4283,"surname":9270,"affiliation":63,"orcid":63},"Jouis",{"paper_id":9262,"author_seq":232,"given_name":9272,"surname":9273,"affiliation":63,"orcid":63},"Jean-Marie","Ferru","We propose to demonstrate that from the point of view of the information flow (analyze, diffusion and\u002For information retrieval), an  organization cannot be described with one and only one exclusive thesaurus (or structured terminology with a simple link such as  “synonym”). In fact, generally, any human being organization (firm, university, company, etc.) is understood and then, represented  like a pyramid. The components of the pyramid are employees connected to each other in particular by a link of subordination. A  leader (or “CEO”, “captain”, “director”, “General”, etc.) is at the top of this pyramid. The leader must take the great decisions. The  leader is generally assisted by a “staff” (management team). But every organizations is at the same time composed of different kinds  of members (employees) having different categories of knowledges, goals and needs. If we consider an organization, the flow of  informations provided to an employee inside this organization must be filtered according to his needs and only those informations.  Consequently, flows of information are absolutely not proportional to the hierarchical levels of the employees. For instance, the  \"head\" of the organization could not make a decision if she\u002Fhe was informed of all the informations related to its own organization:  he\u002Fshe would be completely submerged! We thus deduce from it that it is necessary to distinguish various categories of employees by  functional properties. Then, for each category, it is necessary to specify the real needs for information. Lastly, it becomes possible to  reconstitute a thesaurus by category.  ITTF is an ongoing project organizing and filtering flow of external informations inside an organization using linguistic resources  about the organization. Relevant informations are presented to a particular user by the intermediary of a simplified and personalized  HTML interfaces.",{"paper_id":9276,"title":9277,"year":197,"month":855,"day":63,"doi":9278,"resource_url":9279,"first_page":63,"last_page":63,"pdf_url":9280,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9281,"paper_type":860,"authors":9282,"abstract":9286},"lrec2004-main-515","A Progress Report from the Linguistic Data Consortium: Recent Activities in Resource Creation and Distribution and the Development of Tools and Standards","10.63317\u002F3omnnfup9kzh","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-515","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F789.pdf","cieri-liberman-2004-progress",[9283,9284],{"paper_id":9276,"author_seq":247,"given_name":3852,"surname":4640,"affiliation":63,"orcid":63},{"paper_id":9276,"author_seq":232,"given_name":1024,"surname":9285,"affiliation":63,"orcid":63},"Liberman","This paper describes recent activities of the Linguistic Data Consortium in the collection, annotation and distribution of language data the developments of tools and standards for using that data, the creation of metadata to facilitate the search for linguistic resources.",{"paper_id":9288,"title":9289,"year":197,"month":855,"day":63,"doi":9290,"resource_url":9291,"first_page":63,"last_page":63,"pdf_url":9292,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9293,"paper_type":860,"authors":9294,"abstract":9296},"lrec2004-main-516","Recent Activities within the European Language Resources Association: Issues on Sharing Language Resources and Evaluation","10.63317\u002F3asuwaixua5t","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-516","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F791.pdf","choukri-2004-recent",[9295],{"paper_id":9288,"author_seq":247,"given_name":3403,"surname":3404,"affiliation":63,"orcid":63},"This paper aims at describing the recent activities within the European Language Resources Association (ELRA) on issues covering its main missions: making available Language Resources and providing Human Language Technologies Evaluation packages.",{"paper_id":9298,"title":9299,"year":197,"month":855,"day":63,"doi":9300,"resource_url":9301,"first_page":63,"last_page":63,"pdf_url":9302,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9303,"paper_type":860,"authors":9304,"abstract":9314},"lrec2004-main-517","EVALDA-CESART Project: Terminological Resources Acquisition Tools Evaluation Campaign","10.63317\u002F3jovg83r4ew3","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-517","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F792.pdf","hadi-etal-2004-evalda",[9305,9308,9311],{"paper_id":9298,"author_seq":247,"given_name":9306,"surname":9307,"affiliation":63,"orcid":63},"Widad Mustafa El","Hadi",{"paper_id":9298,"author_seq":232,"given_name":9309,"surname":9310,"affiliation":63,"orcid":63},"Ismail","Timimi",{"paper_id":9298,"author_seq":218,"given_name":9312,"surname":9313,"affiliation":63,"orcid":63},"Marianne","Dabbadie","This paper describes the ongoing evaluation work in CESART research project supported by the French Ministry of Research and Technology and coordinated by the University of Lille 3 and ELDA. The project deals with the evaluation of term and semantic relation extraction from corpora in French. CESART logically follows on the evaluation project achieved within the framework of the Concerted Research Project ARC A3 supported by the AUF, former Aupelf-Uref. This article sets the context, briefly mention the project objectives and reports on the suggested evaluation protocol.",{"paper_id":9316,"title":9317,"year":197,"month":855,"day":63,"doi":9318,"resource_url":9319,"first_page":63,"last_page":63,"pdf_url":9320,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9321,"paper_type":860,"authors":9322,"abstract":9330},"lrec2004-main-518","From Weaver to the ALPAC Report","10.63317\u002F2pr4okyxuqd7","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-518","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F794.pdf","pardelli-etal-2004-weaver",[9323,9326,9327],{"paper_id":9316,"author_seq":247,"given_name":9324,"surname":9325,"affiliation":63,"orcid":63},"Gabriella","Pardelli",{"paper_id":9316,"author_seq":232,"given_name":5779,"surname":5780,"affiliation":63,"orcid":63},{"paper_id":9316,"author_seq":218,"given_name":9328,"surname":9329,"affiliation":63,"orcid":63},"Sara","Goggi","This paper presents a sample pertaining to the creation and the use of words in the field of Natural Language Processing (NLP) in the years 1949-1966. These words have been statistically sorted and the results could be taken as a proof that electronic processing of linguistic data leads to the diffusion of clear and concise words for describing a complex concept which would need a circumlocution to be described instead. The aim of this article is to provide an evolutionary overview of these new lexical forms in the various languages for the period taken into account and, whereas possible, a data register and a tabular representation have been prepared as well.",{"paper_id":9332,"title":9333,"year":197,"month":855,"day":63,"doi":9334,"resource_url":9335,"first_page":63,"last_page":63,"pdf_url":9336,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9337,"paper_type":860,"authors":9338,"abstract":63},"lrec2004-main-519","The Verb in the Terminological Collocations. Contribution to the Development of a Morphological Analyser: MorphoCom","10.63317\u002F5mh4v37frkya","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-519","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F795.pdf","costa-silva-2004-verb",[9339,9342],{"paper_id":9332,"author_seq":247,"given_name":9340,"surname":9341,"affiliation":63,"orcid":63},"Rute","Costa",{"paper_id":9332,"author_seq":232,"given_name":3589,"surname":6708,"affiliation":63,"orcid":63},{"paper_id":9344,"title":9345,"year":197,"month":855,"day":63,"doi":9346,"resource_url":9347,"first_page":63,"last_page":63,"pdf_url":9348,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9349,"paper_type":860,"authors":9350,"abstract":9360},"lrec2004-main-520","Cluster Analysis and Classification of Named Entities","10.63317\u002F3xbgqf5airh3","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-520","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F796.pdf","da-silva-etal-2004-cluster",[9351,9354,9357],{"paper_id":9344,"author_seq":247,"given_name":9352,"surname":9353,"affiliation":63,"orcid":63},"Joaquim F. Ferreira","da Silva",{"paper_id":9344,"author_seq":232,"given_name":9355,"surname":9356,"affiliation":63,"orcid":63},"Zornitsa","Kozareva",{"paper_id":9344,"author_seq":218,"given_name":9358,"surname":9359,"affiliation":63,"orcid":63},"José Gabriel Pereira","Lopes","This paper presents a statistics-based and language independent unsupervised approach for clustering possible named entities. We describe and motivate the features and statistical filters used by our clustering process. Using the Model-Based Clustering Analysis software we obtained different clusters of named entities. The method was applied to Bulgarian and English. For some clusters, precision is close to 100%; this helps human validation and saves time. Other clusters still need further refinement. Based on the obtained clusters, it is possible to classify new named entities.",{"paper_id":9362,"title":9363,"year":197,"month":855,"day":63,"doi":9364,"resource_url":9365,"first_page":63,"last_page":63,"pdf_url":9366,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9367,"paper_type":860,"authors":9368,"abstract":9376},"lrec2004-main-521","Network of Data Centres (NetDC): BNSC - An Arabic Broadcast News Speech Corpus","10.63317\u002F2qw28eurrigi","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-521","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F797.pdf","choukri-etal-2004-network",[9369,9370,9373],{"paper_id":9362,"author_seq":247,"given_name":3403,"surname":3404,"affiliation":63,"orcid":63},{"paper_id":9362,"author_seq":232,"given_name":9371,"surname":9372,"affiliation":63,"orcid":63},"Mahtab","Nikkhou",{"paper_id":9362,"author_seq":218,"given_name":9374,"surname":9375,"affiliation":63,"orcid":63},"Niklas","Paulsson","Broadcast news is a very rich source of Language Resources that has been exploited to develop and assess a large set of Human Language Technologies. Some examples include systems to: automatically produce text transcriptions of spoken data; identify the language of a text; translate a text from one language to another; identify topics in the news and retrieve all stories discussing a target topic; retrieve stories directly from the broadcast audio and extract summaries of the content of news stories. BNSC is a broadcast news speech corpus developed in the framework of the European-funded project Network of Data Centres (NetDC). The corpus contains more than 20 hours of Arabic news recordings in modern standard Arabic. The news was recorded over a period of 3 months and were transcribed in Arabic script. The project was done in corporation with the LDC (Linguistic Data Consortium), which has produced a similar corpus of its Voice of America Arabic in the United States. This paper presents the BNSC corpus production from data collection to final product.",{"paper_id":9378,"title":9379,"year":197,"month":855,"day":63,"doi":9380,"resource_url":9381,"first_page":63,"last_page":63,"pdf_url":9382,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9383,"paper_type":860,"authors":9384,"abstract":9393},"lrec2004-main-522","Technolangue: A Permanent Evaluation and Information Infrastructure","10.63317\u002F2n7mv6ozs3ps","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-522","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F798.pdf","mapelli-etal-2004-technolangue",[9385,9386,9388,9391,9392],{"paper_id":9378,"author_seq":247,"given_name":6402,"surname":6403,"affiliation":63,"orcid":63},{"paper_id":9378,"author_seq":232,"given_name":5540,"surname":9387,"affiliation":63,"orcid":63},"Nava",{"paper_id":9378,"author_seq":218,"given_name":9389,"surname":9390,"affiliation":63,"orcid":63},"Sylvain","Surcin",{"paper_id":9378,"author_seq":203,"given_name":7315,"surname":4209,"affiliation":63,"orcid":63},{"paper_id":9378,"author_seq":188,"given_name":3403,"surname":3404,"affiliation":63,"orcid":63},"In France, Human Language Technologies (HLT) projects, though regularly supported by the French authorities, are usually limited in time and therefore do not offer long-term, continuous exploitation of their results. As an answer to these limitations and needs, the Technolangue programme, launched in 2002 by the three French funding bodies, namely the ministries of Research, Culture and Industry, aims at offering permanent infrastructures, allowing to capitalize on the results of industrial and academic R&amp;D projects, both at national and international levels. The first section of this paper presents an overview of the Technolangue programme and some selected projects. In addition to its involvement in a number of LR projects, ELDA is coordinating two crucial projects in the Evaluation and Technology watch action lines of Technolangue, namely the organisation of an evaluation platform called EVALDA and the implementation of an HLT-oriented information portal known as Technolangue.net. As developed in this paper, the setting up of an evaluation infrastructure and the implementation of a HLT web portal both aim to fit one of the main objectives of the programme, i.e. the creation of permanent actions in the HLT field.",{"paper_id":9395,"title":9396,"year":197,"month":855,"day":63,"doi":9397,"resource_url":9398,"first_page":63,"last_page":63,"pdf_url":9399,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9400,"paper_type":860,"authors":9401,"abstract":9403},"lrec2004-main-523","Extending Wordnets To Implicit Information","10.63317\u002F2j6qywj9y3bd","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-523","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F800.pdf","marrafa-2004-extending",[9402],{"paper_id":9395,"author_seq":247,"given_name":6693,"surname":6694,"affiliation":63,"orcid":63},"WordNets mostly deal with lexicalized expressions and lexical-semantic relations among them. Concepts are represented by sets of synonyms (synsets), which constitute the edges of the network. Each synset includes the lexicalized expressions that correspond to a given concept. This paper adduces evidences which support the claim that some concepts, expressed by a subtype of complex telic predicates, are semi-lexicalized, in the sense that the lexicalized expressions corresponding to them do not express, let us say, the whole concept. Since concepts are the basic units of WordNets, they have to be fully represented. Accordingly, a representation which includes the non-lexicalized information is defended. Besides, a new internal semantic relation is proposed, in order to capture appropriately the relationship between the semantics of these predicates and the semantics of their troponyms.",{"paper_id":9405,"title":9406,"year":197,"month":855,"day":63,"doi":9407,"resource_url":9408,"first_page":63,"last_page":63,"pdf_url":9409,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":9410,"paper_type":860,"authors":9411,"abstract":9424},"lrec2004-main-524","Russian Information Retrieval Evaluation Seminar","10.63317\u002F5huqk6x7sru6","https:\u002F\u002Flrec.elra.info\u002Flrec2004-main-524","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2004\u002Fpdf\u002F802.pdf","dobrov-etal-2004-russian",[9412,9414,9417,9419,9421],{"paper_id":9405,"author_seq":247,"given_name":9413,"surname":4019,"affiliation":63,"orcid":63},"Boris",{"paper_id":9405,"author_seq":232,"given_name":9415,"surname":9416,"affiliation":63,"orcid":63},"Igor","Kuralenok",{"paper_id":9405,"author_seq":218,"given_name":9418,"surname":4016,"affiliation":63,"orcid":63},"Natalia",{"paper_id":9405,"author_seq":203,"given_name":9415,"surname":9420,"affiliation":63,"orcid":63},"Nekrestyanov",{"paper_id":9405,"author_seq":188,"given_name":9422,"surname":9423,"affiliation":63,"orcid":63},"Ilya","Segalovich","This paper presents Russian information retrieval evaluation initiative and results obtained during first year. In particular, we describe first ROMIP seminar, used Cyrillic Web collection and search tasks as well as ongoing efforts on ROMIP’2004."]