[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"sidebar-data":3,"breadcrumb-conf-2002":849,"conference-2002":850,"papers-2002":851},{"conferences":4,"tutorials":254,"workshops":260},[5,27,47,65,79,96,113,130,147,164,180,195,211,225,240],{"conference_id":6,"year":7,"proceedings_title":8,"venue_ids":9,"isbn":10,"issn":11,"doi":12,"publisher":13,"editors":14,"conference_name":15,"conference_acronym":16,"conference_number":17,"conference_location":18,"conference_city":19,"conference_country":20,"conference_start_date":21,"conference_end_date":22,"conference_url":23,"pdf_url":24,"img_conf_url":25,"paperCount":26},"lrec2026","2026","Proceedings of the Fifteenth Language Resources and Evaluation Conference (LREC 2026)","lrec","978-2-493814-49-4","2522-2686","10.63317\u002F4fxzgre27xzj","European Language Resources Association (ELRA)","Stelios Piperidis, Núria Bel, Henk van den Heuvel, Nancy Ide, Simon Krek, Antonio Toral","The Fifteenth Language Resources and Evaluation Conference (LREC 2026)","LREC","15","Palau de Congressos de Palma","Palma, Mallorca","Spain","2026-05-11","2026-05-16","https:\u002F\u002Flrec2026.info","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2026\u002FLREC-2026.pdf",null,944,{"conference_id":28,"year":29,"proceedings_title":30,"venue_ids":31,"isbn":32,"issn":11,"doi":33,"publisher":34,"editors":35,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_city":40,"conference_country":41,"conference_start_date":42,"conference_end_date":43,"conference_url":44,"pdf_url":45,"img_conf_url":25,"paperCount":46},"lrec2024","2024","Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024)","lrec|coling","979-10-95546-34-4","10.63317\u002F375ba8vd9q2v","European Language Resources Association (ELRA) and ICCL","Nicoletta Calzolari, Min-Yen Kan, Veronique Hoste, Alessandro Lenci, Sakriani Sakti, Nianwen Xue","Joint International Conference on Computational Linguistics, Language Resources and Evaluation","LREC-COLING","14","Lingotto Conference Centre","Turin","Italy","2024-05-20","2024-05-25","https:\u002F\u002Flrec-coling-2024.org","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002FLREC-2024.pdf",1554,{"conference_id":48,"year":49,"proceedings_title":50,"venue_ids":9,"isbn":51,"issn":11,"doi":52,"publisher":13,"editors":53,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_city":57,"conference_country":58,"conference_start_date":59,"conference_end_date":60,"conference_url":61,"pdf_url":62,"img_conf_url":63,"paperCount":64},"lrec2022","2022","Proceedings of the Thirteenth International Conference on Language Resources and Evaluation (LREC 2022)","79-10-95546-38-2","10.63317\u002F296vkvmh42ye","Nicoletta Calzolari, Frédéric Béchet, Philippe Blache, Khalid Choukri, Christopher Cieri, Thierry Declerck, Sara Goggi, Hitoshi Isahara, Bente Maegaard, Joseph Mariani, Hélène Mazo, Jan Odijk, Stelios Piperidis2020","Thirteenth Language Resources and Evaluation Conference","13","Palais du Pharo","Marseille","France","2022-06-20","2022-06-25","https:\u002F\u002Flrec2022.lrec-conf.org\u002Fen\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2022\u002FLREC-2022.pdf","",804,{"conference_id":66,"year":67,"proceedings_title":68,"venue_ids":9,"isbn":69,"issn":11,"doi":70,"publisher":13,"editors":71,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_city":57,"conference_country":58,"conference_start_date":74,"conference_end_date":75,"conference_url":76,"pdf_url":77,"img_conf_url":63,"paperCount":78},"lrec2020","2020","Proceedings of the Twelfth International Conference on Language Resources and Evaluation (LREC 2020)","79-10-95546-34-4","10.63317\u002F4j46u44gnpwr","Nicoletta Calzolari, Frédéric Béchet, Philippe Blache, Khalid Choukri, Christopher Cieri, Thierry Declerck, Sara Goggi, Hitoshi Isahara, Bente Maegaard, Joseph Mariani, Hélène Mazo, Asuncion Moreno, Jan Odijk, Stelios Piperidis","Twelfth Language Resources and Evaluation Conference","12","2020-05-11","2020-05-16","https:\u002F\u002Flrec2020.lrec-conf.org\u002Fen\u002Findex.html","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002FLREC-2020.pdf",895,{"conference_id":80,"year":81,"proceedings_title":82,"venue_ids":9,"isbn":83,"issn":11,"doi":84,"publisher":13,"editors":85,"conference_name":86,"conference_acronym":16,"conference_number":87,"conference_location":88,"conference_city":89,"conference_country":90,"conference_start_date":91,"conference_end_date":92,"conference_url":93,"pdf_url":94,"img_conf_url":63,"paperCount":95},"lrec2018","2018","Proceedings of the Eleventh International Conference on Language Resources and Evaluation (LREC 2018)","79-10-95546-00-9","10.63317\u002F25jzjyk647iz","Nicoletta Calzolari, Khalid Choukri, Christopher Cieri, Thierry Declerck, Sara Goggi, Koiti Hasida, Hitoshi Isahara, Bente Maegaard, Joseph Mariani, Hélène Mazo, Asuncion Moreno, Jan Odijk, Stelios Piperidis, Takenobu Tokunaga","Eleventh International Conference on Language Resources and Evaluation","11","Phoenix Seagaia Resort","Miyazaki","Japan","2018-05-07","2018-05-12","http:\u002F\u002Flrec2018.lrec-conf.org\u002Fen\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2018\u002FLREC2018_Proceedings.zip",728,{"conference_id":97,"year":98,"proceedings_title":99,"venue_ids":9,"isbn":100,"issn":11,"doi":101,"publisher":13,"editors":102,"conference_name":103,"conference_acronym":16,"conference_number":104,"conference_location":105,"conference_city":106,"conference_country":107,"conference_start_date":108,"conference_end_date":109,"conference_url":110,"pdf_url":111,"img_conf_url":63,"paperCount":112},"lrec2016","2016","Proceedings of the Tenth International Conference on Language Resources and Evaluation (LREC 2016)","978-2-9517408-9-1","10.63317\u002F5mruwrazrwbg","Nicoletta Calzolari, Khalid Choukri, Thierry Declerck, Sara Goggi, Marko Grobelnik, Bente Maegaard, Joseph Mariani, Hélène Mazo, Asunción Moreno, Jan Odijk, Stelios Piperidis","Tenth International Conference on Language Resources and Evaluation","10","Bernardinsko Naselje","Portorož","Slovenia","2016-05-23","2016-05-28","http:\u002F\u002Flrec2016.lrec-conf.org\u002Fen\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2016\u002FLREC2016_Proceedings.zip",745,{"conference_id":114,"year":115,"proceedings_title":116,"venue_ids":9,"isbn":117,"issn":11,"doi":118,"publisher":13,"editors":119,"conference_name":120,"conference_acronym":16,"conference_number":121,"conference_location":122,"conference_city":123,"conference_country":124,"conference_start_date":125,"conference_end_date":126,"conference_url":127,"pdf_url":128,"img_conf_url":63,"paperCount":129},"lrec2014","2014","Proceedings of the Ninth International Conference on Language Resources and Evaluation (LREC 2014)","978-2-9517408-8-4","10.63317\u002F3ebxpiqq4ikp","Nicoletta Calzolari, Khalid Choukri, Thierry Declerck, Hrafn Loftsson, Bente Maegaard, Joseph Mariani, Asuncion Moreno, Jan Odijk, Stelios Piperidis","Ninth International Conference on Language Resources and Evaluation","9","Harpa Concert Hall and Conference Centre","Reykjavik","Iceland","2014-05-26","2014-05-31","http:\u002F\u002Flrec2014.lrec-conf.org\u002Fen\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2014\u002FLREC2014_Proceedings.zip",746,{"conference_id":131,"year":132,"proceedings_title":133,"venue_ids":9,"isbn":134,"issn":11,"doi":135,"publisher":13,"editors":136,"conference_name":137,"conference_acronym":16,"conference_number":138,"conference_location":139,"conference_city":140,"conference_country":141,"conference_start_date":142,"conference_end_date":143,"conference_url":144,"pdf_url":145,"img_conf_url":63,"paperCount":146},"lrec2012","2012","Proceedings of the Eighth International Conference on Language Resources and Evaluation (LREC 2012)","978-2-9517408-7-7","10.63317\u002F42za3jv29xvs","Nicoletta Calzolari, Khalid Choukri, Thierry Declerck, Mehmet Doğan, Bente Maegaard, Joseph Mariani, Asuncion Moreno, Jan Odijk, Stelios Piperidis","Eighth International Conference on Language Resources and Evaluation","8","Istanbul Convention & Exhibition Centre (ICEC) (Lütfi Kırdar)","Istanbul","Turkey","2012-05-21","2012-05-27","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2012\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2012\u002FLREC2012_Proceedings.zip",670,{"conference_id":148,"year":149,"proceedings_title":150,"venue_ids":9,"isbn":151,"issn":11,"doi":152,"publisher":13,"editors":153,"conference_name":154,"conference_acronym":16,"conference_number":155,"conference_location":156,"conference_city":157,"conference_country":158,"conference_start_date":159,"conference_end_date":160,"conference_url":161,"pdf_url":162,"img_conf_url":63,"paperCount":163},"lrec2010","2010","Proceedings of the Seventh International Conference on Language Resources and Evaluation (LREC 2010)","2-9517408-6-7","10.63317\u002F32m6vov78mmv","Nicoletta Calzolari, Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Mike Rosner, Daniel Tapias","Seventh International Conference on Language Resources and Evaluation","7","Mediterranean Conference Centre (MCC)","Valletta","Malta","2010-05-17","2010-05-23","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2010\u002F","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2010\u002FLREC2010_Proceedings.zip",645,{"conference_id":165,"year":166,"proceedings_title":167,"venue_ids":9,"isbn":168,"issn":11,"doi":169,"publisher":13,"editors":170,"conference_name":171,"conference_acronym":16,"conference_number":172,"conference_location":173,"conference_city":174,"conference_country":175,"conference_start_date":176,"conference_end_date":177,"conference_url":178,"pdf_url":63,"img_conf_url":63,"paperCount":179},"lrec2008","2008","Proceedings of the Sixth International Conference on Language Resources and Evaluation (LREC 2008)","2-9517408-4-0","10.63317\u002F3c6xa89msnta","Nicoletta Calzolari, Khalid Choukri, Bente Maegaard, Joseph Mariani, Jan Odijk, Stelios Piperidis, Daniel Tapias","Sixth International Conference on Language Resources and Evaluation","6","Palais des Congrès","Marrakech","Morocco","2008-05-28","2008-05-30","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2008\u002F",620,{"conference_id":181,"year":182,"proceedings_title":183,"venue_ids":9,"isbn":184,"issn":11,"doi":185,"publisher":13,"editors":186,"conference_name":187,"conference_acronym":16,"conference_number":188,"conference_location":189,"conference_city":190,"conference_country":41,"conference_start_date":191,"conference_end_date":192,"conference_url":193,"pdf_url":63,"img_conf_url":63,"paperCount":194},"lrec2006","2006","Proceedings of the Fifth International Conference on Language Resources and Evaluation (LREC 2006)","2-9517408-2-4","10.63317\u002F2xx3x75ppppa","Nicoletta Calzolari, Khalid Choukri, Aldo Gangemi, Bente Maegaard, Joseph Mariani, Jan Odijk, Daniel Tapias","Fifth International Conference on Language Resources and Evaluation","5","Magazzini del Cotone","Genoa","2006-05-24","2006-05-26","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2006\u002F",513,{"conference_id":196,"year":197,"proceedings_title":198,"venue_ids":9,"isbn":199,"issn":11,"doi":200,"publisher":13,"editors":201,"conference_name":202,"conference_acronym":16,"conference_number":203,"conference_location":204,"conference_city":205,"conference_country":206,"conference_start_date":207,"conference_end_date":208,"conference_url":209,"pdf_url":63,"img_conf_url":63,"paperCount":210},"lrec2004","2004","Proceedings of the Fourth International Conference on Language Resources and Evaluation (LREC 2004)","2-9517408-1-6","10.63317\u002F2s47745g6zhw","Maria Teresa Lino, Maria Francisca Xavier, Fatima Ferreira, Rute Costa, Raquel Silva","Fourth International Conference on Language Resources and Evaluation","4","Centro Cultural de Belém","Lisbon","Portugal","2004-05-26","2004-05-28","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2004\u002F",524,{"conference_id":212,"year":213,"proceedings_title":214,"venue_ids":9,"isbn":63,"issn":11,"doi":215,"publisher":13,"editors":216,"conference_name":217,"conference_acronym":16,"conference_number":218,"conference_location":219,"conference_city":220,"conference_country":20,"conference_start_date":221,"conference_end_date":222,"conference_url":223,"pdf_url":63,"img_conf_url":63,"paperCount":224},"lrec2002","2002","Proceedings of the Third International Conference on Language Resources and Evaluation (LREC 2002)","10.63317\u002F3ha6dpna2o97","Manuel González Rodríguez, Carmen Paz Suarez Araujo","Third International Conference on Language Resources and Evaluation","3","Auditorio Alfredo Kraus","Las Palmas","2002-05-29","2002-05-31","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2002\u002F",354,{"conference_id":226,"year":227,"proceedings_title":228,"venue_ids":9,"isbn":63,"issn":11,"doi":229,"publisher":13,"editors":230,"conference_name":231,"conference_acronym":16,"conference_number":232,"conference_location":233,"conference_city":234,"conference_country":235,"conference_start_date":236,"conference_end_date":237,"conference_url":238,"pdf_url":63,"img_conf_url":63,"paperCount":239},"lrec2000","2000","Proceedings of the Second International Conference on Language Resources and Evaluation (LREC 2000)","10.63317\u002F3yosukd7w6sn","Maria Gavrilidou, George Carayannis, Stella Markantonatou, Stelios Piperidis, Greg Stainhauer","Second International Conference on Language Resources and Evaluation","2","Zappeion Megaron","Athens","Greece","2000-05-31","2000-06-02","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec2000\u002F",280,{"conference_id":241,"year":242,"proceedings_title":243,"venue_ids":9,"isbn":63,"issn":11,"doi":244,"publisher":13,"editors":245,"conference_name":246,"conference_acronym":16,"conference_number":247,"conference_location":248,"conference_city":249,"conference_country":20,"conference_start_date":250,"conference_end_date":251,"conference_url":252,"pdf_url":63,"img_conf_url":63,"paperCount":253},"lrec1998","1998","Proceedings of the First International Conference on Language Resources and Evaluation (LREC 1998)","10.63317\u002F5a986fnjefzm","Antonio Rubio,Natividad Gallardo, Rosa Castro, Antonio Tejada","Language Resources and Evaluation Conference","1","Palacio de Congresos de Granada","Granada","1998-05-28","1998-05-30","http:\u002F\u002Fwww.lrec-conf.org\u002Flrec1998\u002F",212,[255],{"year":29,"proceedings_title":256,"paperCount":257,"doi":258,"pdf_url":259,"venue_ids":31,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"Proceedings of the 2024 Joint International Conference on Computational Linguistics, Language Resources and Evaluation (LREC-COLING 2024): Tutorial Summaries",13,"10.63317\u002F3piy8jnqffp3","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Ftutorials\u002FLREC-2024-Tutorials.pdf",{"2020":261,"2022":452,"2024":617},[262,269,276,282,289,296,302,308,315,322,328,335,341,348,354,359,364,370,376,382,387,392,397,402,407,413,419,425,431,436,442,447],{"workshop_id":263,"year":67,"full_workshop_id":264,"proceedings_title":265,"paperCount":266,"doi":267,"pdf_url":268,"venue_ids":263,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"aespen","lrec2020_ws_aespen","Proceedings of the Workshop on Automated Extraction of Socio-political Events from News 2020",11,"10.63317\u002F58onsa8rnrrz","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FAESPEN2020\u002FAESPEN-2020.pdf",{"workshop_id":270,"year":67,"full_workshop_id":271,"proceedings_title":272,"paperCount":273,"doi":274,"pdf_url":275,"venue_ids":270,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"ai4hi","lrec2020_ws_ai4hi","Proceedings of the 1st International Workshop on Artificial Intelligence for Historical Image Enrichment and Access",5,"10.63317\u002F3m5ep69cw7jj","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FAI4HI2020\u002FAI4HI-2020.pdf",{"workshop_id":277,"year":67,"full_workshop_id":278,"proceedings_title":279,"paperCount":266,"doi":280,"pdf_url":281,"venue_ids":277,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"bucc","lrec2020_ws_bucc","Proceedings of the 13th Workshop on Building and Using Comparable Corpora","10.63317\u002F2fx83jms4c9r","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FBUCC2020\u002FBUCC-2020.pdf",{"workshop_id":283,"year":67,"full_workshop_id":284,"proceedings_title":285,"paperCount":286,"doi":287,"pdf_url":288,"venue_ids":283,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"calcs","lrec2020_ws_calcs","Proceedings of the 4th Workshop on Computational Approaches to Code Switching",9,"10.63317\u002F3jbxrkvj6qkv","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FCS2020\u002FCALCS-2020.pdf",{"workshop_id":290,"year":67,"full_workshop_id":291,"proceedings_title":292,"paperCount":293,"doi":294,"pdf_url":295,"venue_ids":290,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"cllrd","lrec2020_ws_cllrd","Proceedings of the LREC 2020 Workshop on \"Citizen Linguistics in Language Resource Development\"",8,"10.63317\u002F3qo9e6q6vq5f","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002Fcllrd2020\u002FCLLRD-2020.pdf",{"workshop_id":297,"year":67,"full_workshop_id":298,"proceedings_title":299,"paperCount":266,"doi":300,"pdf_url":301,"venue_ids":297,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"clssts","lrec2020_ws_clssts","Proceedings of the workshop on Cross-Language Search and Summarization of Text and Speech (CLSSTS2020)","10.63317\u002F3p6twmv6mhc5","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FCLSSTS2020\u002FCLSSTS-2020.pdf",{"workshop_id":303,"year":67,"full_workshop_id":304,"proceedings_title":305,"paperCount":286,"doi":306,"pdf_url":307,"venue_ids":303,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"cmlc","lrec2020_ws_cmlc","Proceedings of the 8th Workshop on Challenges in the Management of Large Corpora","10.63317\u002F236pt6g4g4s4","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FCMLC-8\u002FCMLC-2020.pdf",{"workshop_id":309,"year":67,"full_workshop_id":310,"proceedings_title":311,"paperCount":312,"doi":313,"pdf_url":314,"venue_ids":309,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"computerm","lrec2020_ws_computerm","Proceedings of the 6th International Workshop on Computational Terminology",15,"10.63317\u002F4jp43md9xe2q","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FCOMPUTERM2020\u002FCOMPUTERM-2020.pdf",{"workshop_id":316,"year":67,"full_workshop_id":317,"proceedings_title":318,"paperCount":319,"doi":320,"pdf_url":321,"venue_ids":316,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"framenet","lrec2020_ws_framenet","Proceedings of the International FrameNet Workshop 2020: Towards a Global, Multilingual FrameNet",12,"10.63317\u002F4tjynpg2ohf3","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002Fframenet2020\u002FFrameNet-2020.pdf",{"workshop_id":323,"year":67,"full_workshop_id":324,"proceedings_title":325,"paperCount":319,"doi":326,"pdf_url":327,"venue_ids":323,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"gamnlp","lrec2020_ws_gamnlp","Proceedings of the Workshop on Games and Natural Language Processing","10.63317\u002F5ahttrxdfnza","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FGames-NLP\u002FGAMNLP-2020.pdf",{"workshop_id":329,"year":67,"full_workshop_id":330,"proceedings_title":331,"paperCount":332,"doi":333,"pdf_url":334,"venue_ids":329,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"globalex","lrec2020_ws_globalex","Proceedings of the 2020 Globalex Workshop on Linked Lexicography",18,"10.63317\u002F34yjjfrnwvj8","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FGLOBALEX2020\u002FGLOBALEX-2020.pdf",{"workshop_id":336,"year":67,"full_workshop_id":337,"proceedings_title":338,"paperCount":319,"doi":339,"pdf_url":340,"venue_ids":336,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"isa","lrec2020_ws_isa","Proceedings of the 16th Joint ACL-ISO Workshop on Interoperable Semantic Annotation","10.63317\u002F5id7rv8izjcd","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FISA16\u002FISA-2020.pdf",{"workshop_id":342,"year":67,"full_workshop_id":343,"proceedings_title":344,"paperCount":345,"doi":346,"pdf_url":347,"venue_ids":342,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"iwltp","lrec2020_ws_iwltp","Proceedings of the 1st International Workshop on Language Technology Platforms",17,"10.63317\u002F4hc34do825yz","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FIWLTP2020\u002FIWLTP-2020.pdf",{"workshop_id":349,"year":67,"full_workshop_id":350,"proceedings_title":351,"paperCount":319,"doi":352,"pdf_url":353,"venue_ids":349,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"ldl","lrec2020_ws_ldl","Proceedings of the 7th Workshop on Linked Data in Linguistics (LDL-2020)","10.63317\u002F3mn9ttzvdbxs","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2020\u002Fworkshops\u002FLDL2020\u002FLDL-2020.pdf",{"workshop_id":355,"year":67,"full_workshop_id":356,"proceedings_title":357,"paperCount":293,"doi":358,"pdf_url":63,"venue_ids":355,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"lincr","lrec2020_ws_lincr","Proceedings of the Second Workshop on Linguistic and Neurocognitive Resources","10.63317\u002F24gnv8q9cz94",{"workshop_id":360,"year":67,"full_workshop_id":361,"proceedings_title":362,"paperCount":286,"doi":363,"pdf_url":63,"venue_ids":360,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"lr4sshoc","lrec2020_ws_lr4sshoc","Proceedings of the Workshop about Language Resources for the SSH Cloud","10.63317\u002F5j7vesdm7yia",{"workshop_id":365,"year":67,"full_workshop_id":366,"proceedings_title":367,"paperCount":368,"doi":369,"pdf_url":63,"venue_ids":365,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"lt4gov","lrec2020_ws_lt4gov","Proceedings of the 1st Workshop on Language Technologies for Government and Public Administration (LT4Gov)",6,"10.63317\u002F5i8su82ish3i",{"workshop_id":371,"year":67,"full_workshop_id":372,"proceedings_title":373,"paperCount":374,"doi":375,"pdf_url":63,"venue_ids":371,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"lt4hala","lrec2020_ws_lt4hala","Proceedings of LT4HALA 2020 - 1st Workshop on Language Technologies for Historical and Ancient Languages",21,"10.63317\u002F4jnfg39ctsra",{"workshop_id":377,"year":67,"full_workshop_id":378,"proceedings_title":379,"paperCount":380,"doi":381,"pdf_url":63,"venue_ids":377,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"mmw","lrec2020_ws_mmw","Proceedings of the LREC 2020 Workshop on Multimodal Wordnets (MMW2020)",7,"10.63317\u002F5pcp4c88d6n8",{"workshop_id":383,"year":67,"full_workshop_id":384,"proceedings_title":385,"paperCount":368,"doi":386,"pdf_url":63,"venue_ids":383,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"multilingualbio","lrec2020_ws_multilingualbio","Proceedings of the LREC 2020 Workshop on Multilingual Biomedical Text Processing (MultilingualBIO 2020)","10.63317\u002F4pfckaywoxxa",{"workshop_id":388,"year":67,"full_workshop_id":389,"proceedings_title":390,"paperCount":273,"doi":391,"pdf_url":63,"venue_ids":388,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"onion","lrec2020_ws_onion","Proceedings of LREC2020 Workshop \"People in language, vision and the mind\" (ONION2020)","10.63317\u002F2oxdsr8tue27",{"workshop_id":393,"year":67,"full_workshop_id":394,"proceedings_title":395,"paperCount":332,"doi":396,"pdf_url":63,"venue_ids":393,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"osact","lrec2020_ws_osact","Proceedings of the 4th Workshop on Open-Source Arabic Corpora and Processing Tools, with a Shared Task on Offensive Language Detection","10.63317\u002F2xjmcg9vsxcp",{"workshop_id":398,"year":67,"full_workshop_id":399,"proceedings_title":400,"paperCount":257,"doi":401,"pdf_url":63,"venue_ids":398,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"parlaclarin","lrec2020_ws_parlaclarin","Proceedings of the Second ParlaCLARIN Workshop","10.63317\u002F3qhkh6dmemmn",{"workshop_id":403,"year":67,"full_workshop_id":404,"proceedings_title":405,"paperCount":286,"doi":406,"pdf_url":63,"venue_ids":403,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"rail","lrec2020_ws_rail","Proceedings of the first workshop on Resources for African Indigenous Languages","10.63317\u002F3fjhbkudhcmc",{"workshop_id":408,"year":67,"full_workshop_id":409,"proceedings_title":410,"paperCount":411,"doi":412,"pdf_url":63,"venue_ids":408,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"readi","lrec2020_ws_readi","Proceedings of the 1st Workshop on Tools and Resources to Empower People with REAding DIfficulties (READI)",14,"10.63317\u002F4p5m2euxriim",{"workshop_id":414,"year":67,"full_workshop_id":415,"proceedings_title":416,"paperCount":417,"doi":418,"pdf_url":63,"venue_ids":414,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"restup","lrec2020_ws_restup","Proceedings of the Workshop on Resources and Techniques for User and Author Profiling in Abusive Language",4,"10.63317\u002F3y3vzhsp3qb7",{"workshop_id":420,"year":67,"full_workshop_id":421,"proceedings_title":422,"paperCount":423,"doi":424,"pdf_url":63,"venue_ids":420,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"signlang","lrec2020_ws_signlang","Proceedings of the LREC2020 9th Workshop on the Representation and Processing of Sign Languages: Sign Language Resources in the Service of the Language Community, Technological Challenges and Application Perspectives",36,"10.63317\u002F3nocn9xntuki",{"workshop_id":426,"year":67,"full_workshop_id":427,"proceedings_title":428,"paperCount":429,"doi":430,"pdf_url":63,"venue_ids":426,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"sltu","lrec2020_ws_sltu","Proceedings of the 1st Joint Workshop on Spoken Language Technologies for Under-resourced languages (SLTU) and Collaboration and Computing for Under-Resourced Languages (CCURL)",52,"10.63317\u002F25p2yts6fk3q",{"workshop_id":432,"year":67,"full_workshop_id":433,"proceedings_title":434,"paperCount":293,"doi":435,"pdf_url":63,"venue_ids":432,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"stoc","lrec2020_ws_stoc","Proceedings for the First International Workshop on Social Threats in Online Conversations: Understanding and Management","10.63317\u002F4p7j6t9bjg8m",{"workshop_id":437,"year":67,"full_workshop_id":438,"proceedings_title":439,"paperCount":440,"doi":441,"pdf_url":63,"venue_ids":437,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"trac","lrec2020_ws_trac","Proceedings of the Second Workshop on Trolling, Aggression and Cyberbullying",25,"10.63317\u002F27yyhn22v2fc",{"workshop_id":443,"year":67,"full_workshop_id":444,"proceedings_title":445,"paperCount":293,"doi":446,"pdf_url":63,"venue_ids":443,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"wac","lrec2020_ws_wac","Proceedings of the 12th Web as Corpus Workshop","10.63317\u002F2va68regv5ni",{"workshop_id":448,"year":67,"full_workshop_id":449,"proceedings_title":450,"paperCount":319,"doi":451,"pdf_url":63,"venue_ids":448,"publisher":13,"editor":63,"conference_name":72,"conference_acronym":16,"conference_number":73,"conference_location":56,"conference_start_date":74,"conference_end_date":75},"wildre","lrec2020_ws_wildre","Proceedings of the WILDRE5– 5th Workshop on Indian Language Data: Resources and Evaluation","10.63317\u002F2ydivss2veo9",[453,458,463,467,472,478,483,488,494,499,504,509,514,520,525,530,535,540,545,550,554,559,564,569,573,577,582,587,593,598,603,608,613],{"workshop_id":277,"year":49,"full_workshop_id":454,"proceedings_title":455,"paperCount":286,"doi":456,"pdf_url":457,"venue_ids":277,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_bucc","Proceedings of the BUCC Workshop within LREC 2022","10.63317\u002F2mqwgvrp7zkn","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2022\u002Fworkshops\u002FBUCC\u002F2022.bucc-1.0.pdf",{"workshop_id":459,"year":49,"full_workshop_id":460,"proceedings_title":461,"paperCount":332,"doi":462,"pdf_url":63,"venue_ids":459,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"cltw","lrec2022_ws_cltw","Proceedings of the 4th Celtic Language Technology Workshop within LREC2022","10.63317\u002F3x8fjtq6m25s",{"workshop_id":303,"year":49,"full_workshop_id":464,"proceedings_title":465,"paperCount":368,"doi":466,"pdf_url":63,"venue_ids":303,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_cmlc","Proceedings of the Workshop on Challenges in the Management of Large Corpora (CMLC-10)","10.63317\u002F2ajestpwy3c8",{"workshop_id":468,"year":49,"full_workshop_id":469,"proceedings_title":470,"paperCount":293,"doi":471,"pdf_url":63,"venue_ids":468,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"csrnlp","lrec2022_ws_csrnlp","Proceedings of the First Computing Social Responsibility Workshop within the 13th Language Resources and Evaluation Conference","10.63317\u002F3xphwxosghv8",{"workshop_id":473,"year":49,"full_workshop_id":474,"proceedings_title":475,"paperCount":476,"doi":477,"pdf_url":63,"venue_ids":473,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"dclrl","lrec2022_ws_dclrl","Proceedings of the Workshop on Dataset Creation for Lower-Resourced Languages within the 13th Language Resources and Evaluation Conference",10,"10.63317\u002F4652bsvzarmy",{"workshop_id":479,"year":49,"full_workshop_id":480,"proceedings_title":481,"paperCount":368,"doi":482,"pdf_url":63,"venue_ids":479,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"digitam","lrec2022_ws_digitam","Proceedings of the Workshop on Processing Language Variation: Digital Armenian (DigitAm) within the 13th Language Resources and Evaluation Conference","10.63317\u002F369nz2tcm6qc",{"workshop_id":484,"year":49,"full_workshop_id":485,"proceedings_title":486,"paperCount":332,"doi":487,"pdf_url":63,"venue_ids":484,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"eurali","lrec2022_ws_eurali","Proceedings of the Workshop on Resources and Technologies for Indigenous, Endangered and Lesser-resourced Languages in Eurasia within the 13th Language Resources and Evaluation Conference","10.63317\u002F4dhjcavy7q7y",{"workshop_id":489,"year":49,"full_workshop_id":490,"proceedings_title":491,"paperCount":492,"doi":493,"pdf_url":63,"venue_ids":489,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"fnp","lrec2022_ws_fnp","Proceedings of the 4th Financial Narrative Processing Workshop @LREC2022",24,"10.63317\u002F29xpoafy85p4",{"workshop_id":495,"year":49,"full_workshop_id":496,"proceedings_title":497,"paperCount":380,"doi":498,"pdf_url":63,"venue_ids":495,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"games","lrec2022_ws_games","Proceedings of the 9th Workshop on Games and Natural Language Processing within the 13th Language Resources and Evaluation Conference","10.63317\u002F5om6f5meam4s",{"workshop_id":500,"year":49,"full_workshop_id":501,"proceedings_title":502,"paperCount":257,"doi":503,"pdf_url":63,"venue_ids":500,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"gwll","lrec2022_ws_gwll","Proceedings of Globalex Workshop on Linked Lexicography within the 13th Language Resources and Evaluation Conference","10.63317\u002F5knvvemaz9uw",{"workshop_id":336,"year":49,"full_workshop_id":505,"proceedings_title":506,"paperCount":507,"doi":508,"pdf_url":63,"venue_ids":336,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_isa","Proceedings of the 18th Joint ACL - ISO Workshop on Interoperable Semantic Annotation within LREC2022",19,"10.63317\u002F4h3ue6m3sam4",{"workshop_id":510,"year":49,"full_workshop_id":511,"proceedings_title":512,"paperCount":368,"doi":513,"pdf_url":63,"venue_ids":510,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lateraisse","lrec2022_ws_lateraisse","Proceedings of the First Workshop on Language Technology and Resources for a Fair, Inclusive, and Safe Society within the 13th Language Resources and Evaluation Conference","10.63317\u002F5osn5jjjbomp",{"workshop_id":515,"year":49,"full_workshop_id":516,"proceedings_title":517,"paperCount":518,"doi":519,"pdf_url":63,"venue_ids":515,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"law","lrec2022_ws_law","Proceedings of the 16th Linguistic Annotation Workshop (LAW-XVI) within LREC2022",20,"10.63317\u002F3fysdho22dbb",{"workshop_id":521,"year":49,"full_workshop_id":522,"proceedings_title":523,"paperCount":312,"doi":524,"pdf_url":63,"venue_ids":521,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"legal","lrec2022_ws_legal","Proceedings of the Workshop on Ethical and Legal Issues in Human Language Technologies and Multilingual De-Identification of Sensitive Data In Language Resources within the 13th Language Resources and Evaluation Conference","10.63317\u002F273whfjsjapd",{"workshop_id":371,"year":49,"full_workshop_id":526,"proceedings_title":527,"paperCount":528,"doi":529,"pdf_url":63,"venue_ids":371,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_lt4hala","Proceedings of the Second Workshop on Language Technologies for Historical and Ancient Languages",31,"10.63317\u002F3dte53mz4zvu",{"workshop_id":531,"year":49,"full_workshop_id":532,"proceedings_title":533,"paperCount":345,"doi":534,"pdf_url":63,"venue_ids":531,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"mwe","lrec2022_ws_mwe","Proceedings of the 18th Workshop on Multiword Expressions @LREC2022","10.63317\u002F2fftdmypb747",{"workshop_id":536,"year":49,"full_workshop_id":537,"proceedings_title":538,"paperCount":286,"doi":539,"pdf_url":63,"venue_ids":536,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"nidcp","lrec2022_ws_nidcp","Proceedings of the 2nd Workshop on Novel Incentives in Data Collection from People: models, implementations, challenges and results within LREC 2022","10.63317\u002F2dox4kgfq3mg",{"workshop_id":541,"year":49,"full_workshop_id":542,"proceedings_title":543,"paperCount":312,"doi":544,"pdf_url":63,"venue_ids":541,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"nlperspectives","lrec2022_ws_nlperspectives","Proceedings of the 1st Workshop on Perspectivist Approaches to NLP @LREC2022","10.63317\u002F5nzs42fwjimz",{"workshop_id":393,"year":49,"full_workshop_id":546,"proceedings_title":547,"paperCount":548,"doi":549,"pdf_url":63,"venue_ids":393,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_osact","Proceedinsg of the 5th Workshop on Open-Source Arabic Corpora and Processing Tools with Shared Tasks on Qur'an QA and Fine-Grained Hate Speech Detection",28,"10.63317\u002F4u4quhegagc5",{"workshop_id":398,"year":49,"full_workshop_id":551,"proceedings_title":552,"paperCount":507,"doi":553,"pdf_url":63,"venue_ids":398,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_parlaclarin","Proceedings of the Workshop ParlaCLARIN III within the 13th Language Resources and Evaluation Conference","10.63317\u002F4zzb69hz9ebb",{"workshop_id":555,"year":49,"full_workshop_id":556,"proceedings_title":557,"paperCount":411,"doi":558,"pdf_url":63,"venue_ids":555,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"politicalnlp","lrec2022_ws_politicalnlp","Proceedings of the LREC 2022 workshop on Natural Language Processing for Political Sciences","10.63317\u002F5h778npybpti",{"workshop_id":560,"year":49,"full_workshop_id":561,"proceedings_title":562,"paperCount":368,"doi":563,"pdf_url":63,"venue_ids":560,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"pvlam","lrec2022_ws_pvlam","Proceedings of the 2nd Workshop on People in Vision, Language, and the Mind","10.63317\u002F52rissdzp475",{"workshop_id":565,"year":49,"full_workshop_id":566,"proceedings_title":567,"paperCount":319,"doi":568,"pdf_url":63,"venue_ids":565,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"rapid","lrec2022_ws_rapid","Proceedings of the RaPID Workshop - Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive\u002Fpsychiatric\u002Fdevelopmental impairments - within the 13th Language Resources and Evaluation Conference","10.63317\u002F4jch25mm92pa",{"workshop_id":408,"year":49,"full_workshop_id":570,"proceedings_title":571,"paperCount":286,"doi":572,"pdf_url":63,"venue_ids":408,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_readi","Proceedings of the 2nd Workshop on Tools and Resources to Empower People with REAding DIfficulties (READI) within the 13th Language Resources and Evaluation Conference","10.63317\u002F56pm6ipunttk",{"workshop_id":414,"year":49,"full_workshop_id":574,"proceedings_title":575,"paperCount":417,"doi":576,"pdf_url":63,"venue_ids":414,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_restup","Proceedings of the Second International Workshop on Resources and Techniques for User Information in Abusive Language Analysis","10.63317\u002F53due7cg7w44",{"workshop_id":578,"year":49,"full_workshop_id":579,"proceedings_title":580,"paperCount":368,"doi":581,"pdf_url":63,"venue_ids":578,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"salld","lrec2022_ws_salld","Proceedings of the 2nd Workshop on Sentiment Analysis and Linguistic Linked Data","10.63317\u002F2ueor4yvpz4s",{"workshop_id":420,"year":49,"full_workshop_id":583,"proceedings_title":584,"paperCount":585,"doi":586,"pdf_url":63,"venue_ids":420,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_signlang","Proceedings of the LREC2022 10th Workshop on the Representation and Processing of Sign Languages: Multilingual Sign Language Resources",32,"10.63317\u002F2rifm6bf4efz",{"workshop_id":588,"year":49,"full_workshop_id":589,"proceedings_title":590,"paperCount":591,"doi":592,"pdf_url":63,"venue_ids":588,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"sigul","lrec2022_ws_sigul","Proceedings of the 1st Annual Meeting of the ELRA\u002FISCA Special Interest Group on Under-Resourced Languages",27,"10.63317\u002F5nb3qu29q9zi",{"workshop_id":594,"year":49,"full_workshop_id":595,"proceedings_title":596,"paperCount":507,"doi":597,"pdf_url":63,"venue_ids":594,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"sltat","lrec2022_ws_sltat","Proceedings of the 7th International Workshop on Sign Language Translation and Avatar Technology: The Junction of the Visual and the Textual: Challenges and Perspectives","10.63317\u002F3xfoevzar6ig",{"workshop_id":599,"year":49,"full_workshop_id":600,"proceedings_title":601,"paperCount":476,"doi":602,"pdf_url":63,"venue_ids":599,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"smila","lrec2022_ws_smila","Proceedings of the Workshop on Smiling and Laughter across Contexts and the Life-span within the 13th Language Resources and Evaluation Conference","10.63317\u002F47g2oou8nqdu",{"workshop_id":604,"year":49,"full_workshop_id":605,"proceedings_title":606,"paperCount":368,"doi":607,"pdf_url":63,"venue_ids":604,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"tdle","lrec2022_ws_tdle","Proceedings of the Workshop Towards Digital Language Equality within the 13th Language Resources and Evaluation Conference","10.63317\u002F3cx3opcocn9i",{"workshop_id":609,"year":49,"full_workshop_id":610,"proceedings_title":611,"paperCount":380,"doi":612,"pdf_url":63,"venue_ids":609,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"term","lrec2022_ws_term","Proceedings of the Workshop on Terminology in the 21st century: many faces, many places","10.63317\u002F23pdrqa3onr3",{"workshop_id":448,"year":49,"full_workshop_id":614,"proceedings_title":615,"paperCount":345,"doi":616,"pdf_url":63,"venue_ids":448,"publisher":13,"editor":63,"conference_name":54,"conference_acronym":16,"conference_number":55,"conference_location":56,"conference_start_date":59,"conference_end_date":60},"lrec2022_ws_wildre","Proceedings of the WILDRE-6 Workshop within the 13th Language Resources and Evaluation Conference","10.63317\u002F34agbocrmxe4",[618,623,630,638,644,649,656,663,670,677,683,690,696,703,711,717,723,729,735,742,748,755,762,768,774,780,786,792,798,805,812,818,825,831,837,843],{"workshop_id":277,"year":29,"full_workshop_id":619,"proceedings_title":620,"paperCount":312,"doi":621,"pdf_url":622,"venue_ids":277,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_bucc","Proceedings of the 17th Workshop on Building and Using Comparable Corpora (BUCC) @ LREC-COLING 2024","10.63317\u002F3tk8bqt3knqn","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fbucc\u002F2024.bucc-1.0.pdf",{"workshop_id":624,"year":29,"full_workshop_id":625,"proceedings_title":626,"paperCount":293,"doi":627,"pdf_url":628,"venue_ids":629,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"cawl","lrec2024_ws_cawl","Proceedings of the Second Workshop on Computation and Written Language (CAWL) @ LREC-COLING 2024","10.63317\u002F5jv5da4ct2px","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fcawl\u002F2024.cawl-1.0.pdf","cawl|ws",{"workshop_id":631,"year":29,"full_workshop_id":632,"proceedings_title":633,"paperCount":634,"doi":635,"pdf_url":636,"venue_ids":637,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"cl4health","lrec2024_ws_cl4health","Proceedings of the First Workshop on Patient-Oriented Language Processing (CL4Health) @ LREC-COLING 2024",33,"10.63317\u002F3keuurbv54de","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fpolp\u002F2024.cl4health-1.0.pdf","cl4health|ws",{"workshop_id":639,"year":29,"full_workshop_id":640,"proceedings_title":641,"paperCount":507,"doi":642,"pdf_url":643,"venue_ids":639,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"cogalex","lrec2024_ws_cogalex","Proceedings of the Workshop on Cognitive Aspects of the Lexicon @ LREC-COLING 2024","10.63317\u002F2gq7359pqznx","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fdelite\u002F2024.delite-1.0.pdf",{"workshop_id":645,"year":29,"full_workshop_id":646,"proceedings_title":647,"paperCount":380,"doi":648,"pdf_url":643,"venue_ids":645,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"delite","lrec2024_ws_delite","Proceedings of the First Workshop on Language-driven Deliberation Technology (DELITE) @ LREC-COLING 2024","10.63317\u002F3pcpupr4j9wb",{"workshop_id":650,"year":29,"full_workshop_id":651,"proceedings_title":652,"paperCount":332,"doi":653,"pdf_url":654,"venue_ids":655,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"determit","lrec2024_ws_determit","Proceedings of the Workshop on DeTermIt! Evaluating Text Difficulty in a Multilingual Context @ LREC-COLING 2024","10.63317\u002F32qtrrr46eau","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fdetermit\u002F2024.determit-1.0.pdf","determit|ws",{"workshop_id":657,"year":29,"full_workshop_id":658,"proceedings_title":659,"paperCount":293,"doi":660,"pdf_url":661,"venue_ids":662,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"dlnld","lrec2024_ws_dlnld","Proceedings of the Workshop on Deep Learning and Linked Data (DLnLD) @ LREC-COLING 2024","10.63317\u002F543pjjgkbst9","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fdlnld\u002F2024.dlnld-1.0.pdf","dlnld|ws",{"workshop_id":664,"year":29,"full_workshop_id":665,"proceedings_title":666,"paperCount":345,"doi":667,"pdf_url":668,"venue_ids":669,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"dmr","lrec2024_ws_dmr","Proceedings of the Fifth International Workshop on Designing Meaning Representations @ LREC-COLING 2024","10.63317\u002F5q4wbidauaxn","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fdmr\u002F2024.dmr-1.0.pdf","dmr|ws",{"workshop_id":671,"year":29,"full_workshop_id":672,"proceedings_title":673,"paperCount":312,"doi":674,"pdf_url":675,"venue_ids":676,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"ecnlp","lrec2024_ws_ecnlp","Proceedings of the Seventh Workshop on e-Commerce and NLP @ LREC-COLING 2024","10.63317\u002F4upp3i6m57nt","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fecnlp\u002F2024.ecnlp-1.0.pdf","ecnlp|ws",{"workshop_id":484,"year":29,"full_workshop_id":678,"proceedings_title":679,"paperCount":293,"doi":680,"pdf_url":681,"venue_ids":682,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_eurali","Proceedings of the 2nd Workshop on Resources and Technologies for Indigenous, Endangered and Lesser-resourced Languages in Eurasia (EURALI) @ LREC-COLING 2024","10.63317\u002F3z633pd4tyg2","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Feurali\u002F2024.eurali-1.0.pdf","eurali|ws",{"workshop_id":684,"year":29,"full_workshop_id":685,"proceedings_title":686,"paperCount":687,"doi":688,"pdf_url":689,"venue_ids":684,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"finnlp","lrec2024_ws_finnlp","Proceedings of the Joint Workshop of the 7th Financial Technology and Natural Language Processing, the 5th Knowledge Discovery from Unstructured Data in Financial Services, and the 4th Workshop on Economics and Natural Language Processing",34,"10.63317\u002F46uvxxoj8prq","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Ffinnlp\u002F2024.finnlp-1.0.pdf",{"workshop_id":495,"year":29,"full_workshop_id":691,"proceedings_title":692,"paperCount":319,"doi":693,"pdf_url":694,"venue_ids":695,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_games","Proceedings of the 10th Workshop on Games and Natural Language Processing @ LREC-COLING 2024","10.63317\u002F4d46836qy76p","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fgames\u002F2024.games-1.0.pdf","games|ws",{"workshop_id":697,"year":29,"full_workshop_id":698,"proceedings_title":699,"paperCount":286,"doi":700,"pdf_url":701,"venue_ids":702,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"htres","lrec2024_ws_htres","Proceedings of the First Workshop on Holocaust Testimonies as Language Resources (HTRes) @ LREC-COLING 2024","10.63317\u002F47iakwwytvs8","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fhtres\u002F2024.htres-1.0.pdf","htres|ws",{"workshop_id":704,"year":29,"full_workshop_id":705,"proceedings_title":706,"paperCount":707,"doi":708,"pdf_url":709,"venue_ids":710,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"humeval","lrec2024_ws_humeval","Proceedings of the Fourth Workshop on Human Evaluation of NLP Systems (HumEval) @ LREC-COLING 2024",26,"10.63317\u002F3jfrug2yvkgc","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fhumeval\u002F2024.humeval-1.0.pdf","humeval|ws",{"workshop_id":336,"year":29,"full_workshop_id":712,"proceedings_title":713,"paperCount":332,"doi":714,"pdf_url":715,"venue_ids":716,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_isa","Proceedings of the 20th Joint ACL - ISO Workshop on Interoperable Semantic Annotation @ LREC-COLING 2024","10.63317\u002F5g5ddg8i3y47","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fisa\u002F2024.isa-1.0.pdf","isa|ws",{"workshop_id":349,"year":29,"full_workshop_id":718,"proceedings_title":719,"paperCount":312,"doi":720,"pdf_url":721,"venue_ids":722,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_ldl","Proceedings of the 9th Workshop on Linked Data in Linguistics @ LREC-COLING 2024","10.63317\u002F4gz96nfw2gdk","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fldl\u002F2024.ldl-1.0.pdf","ldl|ws",{"workshop_id":521,"year":29,"full_workshop_id":724,"proceedings_title":725,"paperCount":266,"doi":726,"pdf_url":727,"venue_ids":728,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_legal","Proceedings of the Workshop on Legal and Ethical Issues in Human Language Technologies @ LREC-COLING 2024","10.63317\u002F2wkziwv5fb97","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Flegal\u002F2024.legal-1.0.pdf","legal|ws",{"workshop_id":371,"year":29,"full_workshop_id":730,"proceedings_title":731,"paperCount":634,"doi":732,"pdf_url":733,"venue_ids":734,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_lt4hala","Proceedings of the Third Workshop on Language Technologies for Historical and Ancient Languages (LT4HALA) @ LREC-COLING-2024","10.63317\u002F2vavxjcscp8z","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Flt4hala\u002F2024.lt4hala-1.0.pdf","lt4hala|ws",{"workshop_id":736,"year":29,"full_workshop_id":737,"proceedings_title":738,"paperCount":273,"doi":739,"pdf_url":740,"venue_ids":741,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"mathnlp","lrec2024_ws_mathnlp","Proceedings of the 2nd Workshop on Mathematical Natural Language Processing @ LREC-COLING 2024","10.63317\u002F2ydwrzo67zpj","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fmathnlp\u002F2024.mathnlp-1.0.pdf","mathnlp|ws",{"workshop_id":531,"year":29,"full_workshop_id":743,"proceedings_title":744,"paperCount":591,"doi":745,"pdf_url":746,"venue_ids":747,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_mwe","Proceedings of the Joint Workshop on Multiword Expressions and Universal Dependencies (MWE-UD) @ LREC-COLING 2024","10.63317\u002F42csaq87z39r","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fmwe\u002F2024.mweud-1.0.pdf","mwe|udw|ws",{"workshop_id":749,"year":29,"full_workshop_id":750,"proceedings_title":751,"paperCount":273,"doi":752,"pdf_url":753,"venue_ids":754,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"neusymbridge","lrec2024_ws_neusymbridge","Proceedings of the Workshop: Bridging Neurons and Symbols for Natural Language Processing and Knowledge Graphs Reasoning (NeusymBridge) @ LREC-COLING-2024","10.63317\u002F2vsheftp3ti9","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fneusymbridge\u002F2024.neusymbridge-1.0.pdf","neusymbridge|ws",{"workshop_id":541,"year":29,"full_workshop_id":756,"proceedings_title":757,"paperCount":758,"doi":759,"pdf_url":760,"venue_ids":761,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_nlperspectives","Proceedings of the 3rd Workshop on Perspectivist Approaches to NLP (NLPerspectives) @ LREC-COLING 2024",16,"10.63317\u002F2cojnfknheph","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fnlperspectives\u002F2024.nlperspectives-1.0.pdf","nlperspectives|ws",{"workshop_id":393,"year":29,"full_workshop_id":763,"proceedings_title":764,"paperCount":345,"doi":765,"pdf_url":766,"venue_ids":767,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_osact","Proceedings of the 6th Workshop on Open-Source Arabic Corpora and Processing Tools (OSACT) with Shared Tasks on Arabic LLMs Hallucination and Dialect to MSA Machine Translation @ LREC-COLING 2024","10.63317\u002F5d5qxytkajay","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fosact\u002F2024.osact-1.0.pdf","osact|ws",{"workshop_id":398,"year":29,"full_workshop_id":769,"proceedings_title":770,"paperCount":440,"doi":771,"pdf_url":772,"venue_ids":773,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_parlaclarin","Proceedings of the IV Workshop on Creating, Analysing, and Increasing Accessibility of Parliamentary Corpora (ParlaCLARIN) @ LREC-COLING 2024","10.63317\u002F46c8xka7m8f7","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fparlaclarin\u002F2024.parlaclarin-1.0.pdf","parlaclarin|ws",{"workshop_id":555,"year":29,"full_workshop_id":775,"proceedings_title":776,"paperCount":476,"doi":777,"pdf_url":778,"venue_ids":779,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_politicalnlp","Proceedings of the Second Workshop on Natural Language Processing for Political Sciences @ LREC-COLING 2024","10.63317\u002F3qf3r8pwtkvp","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fpoliticalnlp\u002F2024.politicalnlp-1.0.pdf","politicalnlp|ws",{"workshop_id":403,"year":29,"full_workshop_id":781,"proceedings_title":782,"paperCount":345,"doi":783,"pdf_url":784,"venue_ids":785,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_rail","Proceedings of the Fifth Workshop on Resources for African Indigenous Languages @ LREC-COLING 2024","10.63317\u002F2iyqymd34fup","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Frail\u002F2024.rail-1.0.pdf","rail|ws",{"workshop_id":565,"year":29,"full_workshop_id":787,"proceedings_title":788,"paperCount":266,"doi":789,"pdf_url":790,"venue_ids":791,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_rapid","Proceedings of the Fifth Workshop on Resources and ProcessIng of linguistic, para-linguistic and extra-linguistic Data from people with various forms of cognitive\u002Fpsychiatric\u002Fdevelopmental impairments @LREC-COLING 2024","10.63317\u002F5pc4wtot6r3x","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Frapid\u002F2024.rapid-1.0.pdf","rapid|ws",{"workshop_id":408,"year":29,"full_workshop_id":793,"proceedings_title":794,"paperCount":286,"doi":795,"pdf_url":796,"venue_ids":797,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_readi","Proceedings of the 3rd Workshop on Tools and Resources for People with REAding DIfficulties (READI) @ LREC-COLING 2024","10.63317\u002F4b546asxrjr6","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Freadi\u002F2024.readi-1.0.pdf","readi|ws",{"workshop_id":799,"year":29,"full_workshop_id":800,"proceedings_title":801,"paperCount":273,"doi":802,"pdf_url":803,"venue_ids":804,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"rfp","lrec2024_ws_rfp","Proceedings of the First Workshop on Reference, Framing, and Perspective @ LREC-COLING 2024","10.63317\u002F4xwx3twp9qoy","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Frfp\u002F2024.rfp-1.0.pdf","rfp|ws",{"workshop_id":806,"year":29,"full_workshop_id":807,"proceedings_title":808,"paperCount":273,"doi":809,"pdf_url":810,"venue_ids":811,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"safety4convai","lrec2024_ws_safety4convai","Proceedings of Safety4ConvAI: The Third Workshop on Safety for Conversational AI @ LREC-COLING 2024","10.63317\u002F4johe7jpagg6","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fsafeai\u002F2024.safety4convai-1.0.pdf","safety4convai|ws",{"workshop_id":420,"year":29,"full_workshop_id":813,"proceedings_title":814,"paperCount":815,"doi":816,"pdf_url":817,"venue_ids":420,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_signlang","Proceedings of the LREC-COLING 2024 11th Workshop on the Representation and Processing of Sign Languages: Evaluation of Sign Language Resources",45,"10.63317\u002F4e7aayu2htd6","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fsignlang\u002F2024.signlang-1.0.pdf",{"workshop_id":588,"year":29,"full_workshop_id":819,"proceedings_title":820,"paperCount":821,"doi":822,"pdf_url":823,"venue_ids":824,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_sigul","Proceedings of the 3rd Annual Meeting of the Special Interest Group on Under-resourced Languages @ LREC-COLING 2024",50,"10.63317\u002F55wjiy53vy99","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fsigul\u002F2024.sigul-1.0.pdf","sigul|ws",{"workshop_id":604,"year":29,"full_workshop_id":826,"proceedings_title":827,"paperCount":368,"doi":828,"pdf_url":829,"venue_ids":830,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_tdle","Proceedings of the Second International Workshop Towards Digital Language Equality (TDLE): Focusing on Sustainability @ LREC-COLING 2024","10.63317\u002F3p5nrhhwdhbe","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Ftdle\u002F2024.tdle-1.0.pdf","tdle|ws",{"workshop_id":437,"year":29,"full_workshop_id":832,"proceedings_title":833,"paperCount":345,"doi":834,"pdf_url":835,"venue_ids":836,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_trac","Proceedings of the Fourth Workshop on Threat, Aggression & Cyberbullying @ LREC-COLING-2024","10.63317\u002F2ev2ox49nijy","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Ftrac\u002F2024.trac-1.0.pdf","trac|ws",{"workshop_id":838,"year":29,"full_workshop_id":839,"proceedings_title":840,"paperCount":758,"doi":841,"pdf_url":842,"venue_ids":838,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"unlp","lrec2024_ws_unlp","Proceedings of the Third Ukrainian Natural Language Processing Workshop (UNLP) @ LREC-COLING 2024","10.63317\u002F5bwu58575ghh","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Funlp\u002F2024.unlp-1.0.pdf",{"workshop_id":448,"year":29,"full_workshop_id":844,"proceedings_title":845,"paperCount":266,"doi":846,"pdf_url":847,"venue_ids":848,"publisher":34,"editor":63,"conference_name":36,"conference_acronym":37,"conference_number":38,"conference_location":39,"conference_start_date":42,"conference_end_date":43},"lrec2024_ws_wildre","Proceedings of the 7th Workshop on Indian Language Data: Resources and Evaluation","10.63317\u002F52j5bum2j3fk","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec-coling-2024\u002Fwildre\u002F2024.wildre-1.0.pdf","wildre|ws",{"conference_id":212,"year":213,"proceedings_title":214,"venue_ids":9,"isbn":63,"issn":11,"doi":215,"publisher":13,"editors":216,"conference_name":217,"conference_acronym":16,"conference_number":218,"conference_location":219,"conference_city":220,"conference_country":20,"conference_start_date":221,"conference_end_date":222,"conference_url":223,"pdf_url":63,"img_conf_url":63,"paperCount":224},{"conference_id":212,"year":213,"proceedings_title":214,"venue_ids":9,"isbn":63,"issn":11,"doi":215,"publisher":13,"editors":216,"conference_name":217,"conference_acronym":16,"conference_number":218,"conference_location":219,"conference_city":220,"conference_country":20,"conference_start_date":221,"conference_end_date":222,"conference_url":223,"pdf_url":63,"img_conf_url":63,"paperCount":224},[852,875,887,913,925,943,955,973,988,1006,1027,1042,1060,1078,1105,1118,1133,1151,1172,1186,1201,1213,1225,1237,1252,1264,1284,1299,1311,1321,1336,1362,1377,1395,1416,1440,1455,1470,1481,1493,1508,1523,1544,1559,1580,1592,1617,1632,1647,1663,1683,1704,1716,1728,1742,1755,1773,1793,1837,1855,1867,1881,1902,1917,1933,1944,1962,1974,2003,2029,2064,2088,2103,2117,2135,2150,2170,2185,2200,2215,2233,2248,2265,2276,2292,2304,2319,2331,2346,2362,2380,2393,2407,2428,2443,2457,2482,2503,2526,2538,2557,2575,2588,2606,2621,2641,2670,2681,2693,2708,2724,2746,2758,2773,2783,2798,2812,2831,2843,2859,2873,2885,2897,2914,2932,2952,2971,2992,3006,3025,3043,3060,3072,3090,3104,3119,3129,3146,3162,3180,3192,3210,3225,3243,3273,3299,3317,3329,3343,3357,3368,3388,3402,3415,3435,3462,3478,3506,3526,3553,3564,3591,3603,3617,3648,3663,3683,3705,3719,3731,3743,3758,3771,3785,3806,3817,3832,3852,3879,3894,3912,3929,3947,3965,3980,3995,4011,4026,4041,4051,4067,4090,4106,4123,4134,4146,4159,4188,4200,4217,4238,4253,4267,4285,4299,4317,4329,4339,4365,4377,4392,4415,4431,4446,4460,4483,4501,4513,4528,4548,4563,4578,4592,4609,4626,4640,4654,4666,4680,4694,4707,4722,4734,4767,4790,4805,4845,4864,4880,4898,4910,4922,4950,4965,4988,5002,5014,5024,5036,5048,5060,5078,5095,5110,5128,5146,5157,5168,5201,5219,5244,5255,5274,5288,5304,5318,5332,5359,5375,5402,5420,5431,5447,5468,5488,5502,5517,5534,5550,5569,5588,5600,5617,5629,5655,5674,5688,5703,5721,5734,5769,5780,5797,5812,5824,5840,5851,5864,5878,5893,5905,5924,5942,5958,5970,5984,5997,6009,6024,6040,6052,6068,6084,6101,6114,6126,6141,6152,6170,6186,6196,6208,6220,6235,6245,6266,6279,6290,6304,6321,6333,6361,6376,6388,6399,6415,6427,6448,6465,6490,6500,6515,6536,6548,6563,6587,6598,6610,6624,6640,6657,6673,6685,6695],{"paper_id":853,"title":854,"year":213,"month":855,"day":63,"doi":856,"resource_url":857,"first_page":63,"last_page":63,"pdf_url":858,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":859,"paper_type":860,"authors":861,"abstract":874},"lrec2002-main-001","Floresta Sintá(c)tica: A treebank for Portuguese","05","10.63317\u002F45dcr6ebdxoy","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-001","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F1.pdf","afonso-etal-2002-floresta","main",[862,865,868,871],{"paper_id":853,"author_seq":247,"given_name":863,"surname":864,"affiliation":63,"orcid":63},"Susana","Afonso",{"paper_id":853,"author_seq":232,"given_name":866,"surname":867,"affiliation":63,"orcid":63},"Eckhard","Bick",{"paper_id":853,"author_seq":218,"given_name":869,"surname":870,"affiliation":63,"orcid":63},"Renato","Haber",{"paper_id":853,"author_seq":203,"given_name":872,"surname":873,"affiliation":63,"orcid":63},"Diana","Santos","This paper reviews the first year of the creation of  a publicly available treebank for Portuguese, Floresta Sintá(c)tica, a  collaboration project between the VISL and the Computational Processing  of Portuguese projects. After briefly describing the main goals and the  organization of the project, the creation of the annotated objects is  presented in detail: preparing the text to be annotated, applying the  Constraint Grammar based PALAVRAS parser, revising its output manually  in a two-stage process, and carefully documenting the linguistic  options. Some examples of the kind of interesting problems dealt with  are presented, and the paper ends with a brief description of the tools  developed, the project results so fa1.r, and a mention to a preliminary  inter-annotator test and what was learned from it.",{"paper_id":876,"title":877,"year":213,"month":855,"day":63,"doi":878,"resource_url":879,"first_page":63,"last_page":63,"pdf_url":880,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":881,"paper_type":860,"authors":882,"abstract":886},"lrec2002-main-002","Learning of word sense disambiguation rules by Co-training, checking co-occurrence of features","10.63317\u002F3xeqgrk4od47","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-002","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F2.pdf","shinnou-2002-learning",[883],{"paper_id":876,"author_seq":247,"given_name":884,"surname":885,"affiliation":63,"orcid":63},"Hiroyuki","Shinnou","In this paper, we propose a method to improve  Co-training and apply it to word sense disambiguation problems.  Co-training is an unsupervised learning method to overcome the problem  that labeled training data is fairly expensive to obtain. Co-training is  theoretically promising, but it requires two feature sets with the  conditional independence assumption. This assumption is too rigid. In  fact there is no choice but to use incomplete feature sets, and then the  accuracy of learned rules reaches a limit. In this paper, we check  co-occurrence between two feature sets to avoid such undesirable  situation when we add unlabeled instances to training data. In  experiments, we applied our method to word sense disambiguation problems  for the three Japanese words ‘koe’, ‘toppu’ and ‘kabe’ and  demonstrated that it improved Co-training.",{"paper_id":888,"title":889,"year":213,"month":855,"day":63,"doi":890,"resource_url":891,"first_page":63,"last_page":63,"pdf_url":892,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":893,"paper_type":860,"authors":894,"abstract":912},"lrec2002-main-003","ELSST: a broad-based Multilingual Thesaurus for the Social Sciences","10.63317\u002F2cymfjcviz85","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-003","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F3.pdf","balkan-etal-2002-elsst",[895,898,901,904,907,910],{"paper_id":888,"author_seq":247,"given_name":896,"surname":897,"affiliation":63,"orcid":63},"Lorna","Balkan",{"paper_id":888,"author_seq":232,"given_name":899,"surname":900,"affiliation":63,"orcid":63},"Ken","Miller",{"paper_id":888,"author_seq":218,"given_name":902,"surname":903,"affiliation":63,"orcid":63},"Birgit","Austin",{"paper_id":888,"author_seq":203,"given_name":905,"surname":906,"affiliation":63,"orcid":63},"Anne","Etheridge",{"paper_id":888,"author_seq":188,"given_name":908,"surname":909,"affiliation":63,"orcid":63},"Myriam Garcia","Bernabé",{"paper_id":888,"author_seq":172,"given_name":911,"surname":900,"affiliation":63,"orcid":63},"Pam","This paper describes the motivation for, and methodology behind the creation of ELSST (European Language Social Science Thesaurus), a broad-based multilingual thesaurus for the social sciences. The thesaurus was produced by the UK Data Archive (UKDA) as part of the EU-funded LIMBER (Language Independent Metadata Browsing of European Resources) project and was derived from their in-house English monolingual thesaurus, HASSET (Humanities and Social Science Electronic Thesaurus).  The multilingual thesaurus is currently available in four languages English, French, German and Spanish and in various formats, including RDF (Resource Description Framework).",{"paper_id":914,"title":915,"year":213,"month":855,"day":63,"doi":916,"resource_url":917,"first_page":63,"last_page":63,"pdf_url":918,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":919,"paper_type":860,"authors":920,"abstract":924},"lrec2002-main-004","Lexicon Optimization: Maximizing Lexical Coverage in Speech Recognition through Automated Compounding","10.63317\u002F4b2rzn4itj7z","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-004","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F4.pdf","vandeghinste-2002-lexicon",[921],{"paper_id":914,"author_seq":247,"given_name":922,"surname":923,"affiliation":63,"orcid":63},"Vincent","Vandeghinste","In this report we show that a lexicon can be designed in such a way  that lexical coverage can be maximized by real-time lexicon expansion and a limited word part lexicon for Dutch speech recognition. More  specifically, we describe how the lexicon is designed and how the real-time expansion module was built and tested. Tests were performed  using a 36.000 entries lexicon. The test results show that out-of-vocabulary  rates are rather small, due to automated rule-based compounding of the lexical building blocks. Statistical information was included  to improve the accuracy of the rule-based compounding system. This approach proved to be successful.",{"paper_id":926,"title":927,"year":213,"month":855,"day":63,"doi":928,"resource_url":929,"first_page":63,"last_page":63,"pdf_url":930,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":931,"paper_type":860,"authors":932,"abstract":942},"lrec2002-main-005","Computer-Aided Specification of Quality Models for Machine Translation Evaluation","10.63317\u002F3pzoy2dpds3k","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-005","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F5.pdf","hovy-etal-2002-computer",[933,936,939],{"paper_id":926,"author_seq":247,"given_name":934,"surname":935,"affiliation":63,"orcid":63},"Eduard","Hovy",{"paper_id":926,"author_seq":232,"given_name":937,"surname":938,"affiliation":63,"orcid":63},"Margaret","King",{"paper_id":926,"author_seq":218,"given_name":940,"surname":941,"affiliation":63,"orcid":63},"Andrei","Popescu-Belis","This article describes the principles and mechanism  of an integrative effort in machine translation (MT) evaluation.  Building upon previous standardization initiatives, above all ISO\u002FIEC  9126, 14598 and EAGLES, we attempt to classify into a coherent taxonomy  most of the characteristics, attributes and metrics that have been  proposed for MT evaluation. The main articulation of this flexible  framework is the link between a taxonomy that helps evaluators define a  context of use for the evaluated software, and a taxonomy of the quality  characteristics and associated metrics. The article explains the  theoretical grounds of this articulation, along with an overview of the  taxonomies in their present state, and a perspective on ongoing work in  MT evaluation standardization.",{"paper_id":944,"title":945,"year":213,"month":855,"day":63,"doi":946,"resource_url":947,"first_page":63,"last_page":63,"pdf_url":948,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":949,"paper_type":860,"authors":950,"abstract":954},"lrec2002-main-006","Meaning as use: exploitation of aligned corpora for the contrastive study of lexical semantics","10.63317\u002F4oti7nwhqb22","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-006","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F6.pdf","sharoff-2002-meaning",[951],{"paper_id":944,"author_seq":247,"given_name":952,"surname":953,"affiliation":63,"orcid":63},"Serge","Sharoff","The paper discusses the use of corpora for  experimental studies in contrastive lexical semantics, in particular,  for comparing how a state of affairs is expressed in different languages  and by different translators. Three topics are addressed: (1) a  lexicographic database, which is aimed at storing and maintaining  contrastive descriptions of a class of lexical items in several  languages; (2) an aligned parallel English-Russian corpus, including  several literary texts and software manuals (the total size is about one  million words), together with tools for querying the corpus by means of  Perl-based regular expressions; and (3) an example of development of a  lexicographical database of the most frequent size adjectives in  English, German and Russian.",{"paper_id":956,"title":957,"year":213,"month":855,"day":63,"doi":958,"resource_url":959,"first_page":63,"last_page":63,"pdf_url":960,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":961,"paper_type":860,"authors":962,"abstract":972},"lrec2002-main-007","Using the Annotated Bibliography as a Resource for Indicative Summarization","10.63317\u002F3q9bjzc335qj","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-007","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F7.pdf","kan-etal-2002-using",[963,966,969],{"paper_id":956,"author_seq":247,"given_name":964,"surname":965,"affiliation":63,"orcid":63},"Min-Yen","Kan",{"paper_id":956,"author_seq":232,"given_name":967,"surname":968,"affiliation":63,"orcid":63},"Judith L.","Klavans",{"paper_id":956,"author_seq":218,"given_name":970,"surname":971,"affiliation":63,"orcid":63},"Kathleen R.","McKeown","We report on a language resource consisting of 2000 annotated bibliography entries, which is being analyzed as part of our  research on indicative document summarization.  We show how annotated bibliographies cover certain aspects of summarization that have not  been covered by other summary corpora, and motivate why it is an important form to study for information retrieval.  We detail our  methodology for collecting the corpus, and overview our document feature markup that we introduced to facilitate summary analysis.  We  present the characteristics of the corpus, methods of collection, and show its use in finding the distribution of types of information  included in indicative summaries and their relative ordering within the summaries.",{"paper_id":974,"title":975,"year":213,"month":855,"day":63,"doi":976,"resource_url":977,"first_page":63,"last_page":63,"pdf_url":978,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":979,"paper_type":860,"authors":980,"abstract":987},"lrec2002-main-008","Computational Linguistics at Universiti Sains Malaysia","10.63317\u002F2hfgm9ts3cjh","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-008","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F8.pdf","chuah-yusoff-2002-computational",[981,984],{"paper_id":974,"author_seq":247,"given_name":982,"surname":983,"affiliation":63,"orcid":63},"Choy-Kim","Chuah",{"paper_id":974,"author_seq":232,"given_name":985,"surname":986,"affiliation":63,"orcid":63},"Zaharin","Yusoff","This paper gives a brief history of UTMK, a computer-aided translation unit, and reports on her projects and research co-operations. After its beginnings as a thesis project on Malay affixation, UTMK's interest moved from machine translation to the development of tools for translation. Today, UTMK's focus is on the development of natural language processing applications and tools (internet browsers, and corpus and dictionary databases). And, continuing with its policy for research collaborations, UTMK is leading a three-country project to pool computing and linguistic resources and expertise on Malay. Due to historical reasons, bahasa Indonesia and bahasa Melayu, the Malay used respectively in Indonesia and in Malaysia have diverged with differences in vocabulary, pronunciation and spelling. For effective communication, a council was set up in 1972 to standardize the spelling and terminology used in the two countries. Brunei joined this council in 1985. To encourage studies on Malay, texts need to be available. However, resources in digital form are wanting. At a recent meeting, the council proposed to set up a Malay language portal to make linguistic resources from the three countries available on-line, and also to popularise Malay as a South-East Asian language. The participation of non-member countries in the portal is welcomed.",{"paper_id":989,"title":990,"year":213,"month":855,"day":63,"doi":991,"resource_url":992,"first_page":63,"last_page":63,"pdf_url":993,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":994,"paper_type":860,"authors":995,"abstract":1005},"lrec2002-main-009","Towards an Ontology for a Human Genome Knowledge Base","10.63317\u002F32donqv3pb22","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-009","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F9.pdf","feliu-etal-2002-towards",[996,999,1002],{"paper_id":989,"author_seq":247,"given_name":997,"surname":998,"affiliation":63,"orcid":63},"Judit","Feliu",{"paper_id":989,"author_seq":232,"given_name":1000,"surname":1001,"affiliation":63,"orcid":63},"Jorge","Vivaldi",{"paper_id":989,"author_seq":218,"given_name":1003,"surname":1004,"affiliation":63,"orcid":63},"M. Teresa","Cabré","Ontology, usually understood as a particular  representation of a given domain, will become an essential item in the  information retrieval system we aim to build. Our research activities  are developed on the communicative terminology framework, that is, we  mainly deal with units effectively contained in specialized discourse.  Bearing in mind this theoretical approach, we consider essential   to establish a link between the specialized knowledge units appearing in  specialized texts and the concepts organized in a particular ontology.  Having the specialized knowledge units closely linked to a conceptual  organization will lead us to propose an information retrieval system  based on a Human Genome Ontology that should perform better than the  current state-of-the-art systems.",{"paper_id":1007,"title":1008,"year":213,"month":855,"day":63,"doi":1009,"resource_url":1010,"first_page":63,"last_page":63,"pdf_url":1011,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1012,"paper_type":860,"authors":1013,"abstract":1026},"lrec2002-main-010","An Improved Algorithm for the Automatic Segmentation of Speech Corpora","10.63317\u002F2ud5367m2vke","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-010","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F10.pdf","laureys-etal-2002-improved",[1014,1017,1020,1023],{"paper_id":1007,"author_seq":247,"given_name":1015,"surname":1016,"affiliation":63,"orcid":63},"Tom","Laureys",{"paper_id":1007,"author_seq":232,"given_name":1018,"surname":1019,"affiliation":63,"orcid":63},"Kris","Demuynck",{"paper_id":1007,"author_seq":218,"given_name":1021,"surname":1022,"affiliation":63,"orcid":63},"Jacques","Duchateau",{"paper_id":1007,"author_seq":203,"given_name":1024,"surname":1025,"affiliation":63,"orcid":63},"Patrick","Wambacq","In this paper we describe an improved algorithm for the automatic  segmentation of speech corpora. Apart from their usefulness in several speech technology domains, segmentations  provide easy access to speech  corpora by using time stamps to couple the orthographic transcription to the speech signal. The segmentation tool we propose is based on the  Forward-Backward algorithm. The Forward-Backward method not only produces more  accurate segmentation results than the traditionally  used Viterbi method, it also provides us with a confidence interval for each of the generated boundaries.  These confidence  intervals  allow us to perform some advanced post-processing operations, leading  to further improvement of the quality of automatic segmentations.",{"paper_id":1028,"title":1029,"year":213,"month":855,"day":63,"doi":1030,"resource_url":1031,"first_page":63,"last_page":63,"pdf_url":1032,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1033,"paper_type":860,"authors":1034,"abstract":1041},"lrec2002-main-011","Towards a Corpus Annotated for Metonymies: the Case of Location Names","10.63317\u002F5d6b8td5qh2b","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-011","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F11.pdf","markert-nissim-2002-towards",[1035,1038],{"paper_id":1028,"author_seq":247,"given_name":1036,"surname":1037,"affiliation":63,"orcid":63},"Katja","Markert",{"paper_id":1028,"author_seq":232,"given_name":1039,"surname":1040,"affiliation":63,"orcid":63},"Malvina","Nissim","At the moment, language resources do not contain the necessary information for  large-scale  metonymy processing. As a contribution, we here present a corpus  annotated for metonymies. We describe a framework for  annotating metonymies in domain-independent text that considers the regularity, productivity and  underspecification of metonymic usage. We then present a fully  worked out annotation scheme for location names and a gold standard  corpus containing 2000 annotated location names. The  annotation scheme is rigorously evaluated as to  its reliability and  compared to previous metonymy classification proposals. In  particular, we show that it is not sufficient to rely  on intuitions for reliable metonymy identification and  that an  annotation effort with trained annotators and explicit guidelines is  necessary.",{"paper_id":1043,"title":1044,"year":213,"month":855,"day":63,"doi":1045,"resource_url":1046,"first_page":63,"last_page":63,"pdf_url":1047,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1048,"paper_type":860,"authors":1049,"abstract":1059},"lrec2002-main-012","Translators at work with TRANSTYPE: Resource and Evaluation.","10.63317\u002F5hjzdcgnwwdb","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-012","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F12.pdf","langlais-etal-2002-translators",[1050,1053,1056],{"paper_id":1043,"author_seq":247,"given_name":1051,"surname":1052,"affiliation":63,"orcid":63},"Philippe","Langlais",{"paper_id":1043,"author_seq":232,"given_name":1054,"surname":1055,"affiliation":63,"orcid":63},"Marie","Loranger",{"paper_id":1043,"author_seq":218,"given_name":1057,"surname":1058,"affiliation":63,"orcid":63},"Guy","Lapalme","TransType is an interactive machine translation prototype which has  been designed at RALI with the hope that it could efficiently assist translators in their day work.  During spring 2001, the latest version  of our prototype was evaluated in an in situ setting. We first describe our evaluation protocol and then analyse the data we  obtained.  Second, we know that TransType has been emulated and a few clones of it are already available.  Therefore we have decided to open  our log-files to the small but growing TransType community. We believe such a database to be a helpful resource for both the researchers  involved in the TransType project, and also for all those interested in observing  translators at work.  Third, we briefly describe a free  TransType Player tool that we have developped; that is, a JAVA application which allows a given log-file to be played back.",{"paper_id":1061,"title":1062,"year":213,"month":855,"day":63,"doi":1063,"resource_url":1064,"first_page":63,"last_page":63,"pdf_url":1065,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1066,"paper_type":860,"authors":1067,"abstract":1077},"lrec2002-main-013","Annotating the functional chunks in Chinese sentences","10.63317\u002F58efnm4k25h3","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-013","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F13.pdf","zhou-etal-2002-annotating",[1068,1071,1074],{"paper_id":1061,"author_seq":247,"given_name":1069,"surname":1070,"affiliation":63,"orcid":63},"Qiang","Zhou",{"paper_id":1061,"author_seq":232,"given_name":1072,"surname":1073,"affiliation":63,"orcid":63},"Elliott Franco","Drabek",{"paper_id":1061,"author_seq":218,"given_name":1075,"surname":1076,"affiliation":63,"orcid":63},"Fuji","Ren","The paper proposed a new syntactic annotation scheme --- functional chunk, which tried to represent information about grammatical relations between sentence-level predicates and their arguments. Under this scheme, we built a Chinese chunk bank with about two million Chinese characters, and developed some learned models for automatically annotating fresh text with functional chunks. We also proposed a two-stages approach to build Chinese tree bank on the top of chunk bank, and gave some experimental results of chunk-based syntactic parser to show the advantage of functional chunk for parsing performance increase. All these work lays good foundations for further research project to build a large scale Chinese tree bank.",{"paper_id":1079,"title":1080,"year":213,"month":855,"day":63,"doi":1081,"resource_url":1082,"first_page":63,"last_page":63,"pdf_url":1083,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1084,"paper_type":860,"authors":1085,"abstract":1104},"lrec2002-main-014","The Present Status of Speech Database in Japan: Development, Management, and Application to Speech Research","10.63317\u002F2fteay2ptc34","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-014","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F14.pdf","kuwabara-etal-2002-present",[1086,1089,1092,1095,1098,1101],{"paper_id":1079,"author_seq":247,"given_name":1087,"surname":1088,"affiliation":63,"orcid":63},"Hisao","Kuwabara",{"paper_id":1079,"author_seq":232,"given_name":1090,"surname":1091,"affiliation":63,"orcid":63},"Shuich","Itahashi",{"paper_id":1079,"author_seq":218,"given_name":1093,"surname":1094,"affiliation":63,"orcid":63},"Mikio","Yamamoto",{"paper_id":1079,"author_seq":203,"given_name":1096,"surname":1097,"affiliation":63,"orcid":63},"Toshiyuki","Takezawa",{"paper_id":1079,"author_seq":188,"given_name":1099,"surname":1100,"affiliation":63,"orcid":63},"Satoshi","Nakamura",{"paper_id":1079,"author_seq":172,"given_name":1102,"surname":1103,"affiliation":63,"orcid":63},"Kazuya","Takeda","The present status of Japanese speech database has  been described. The database project in Japan started in early 1980s.  The first one was a committee of Japan Electronic Industry Development  Association, abbreviated as JEIDA, which aimed at creating a speech  database that can commonly evaluate performance of the then existing  speech input\u002Foutput machines and systems. Several database projects have  been undertaken since then including the one initiated by the Advanced  Telecommunication Research Institute (ATR) and now it has come to the  point where an enormous amount of spontaneous speech data is available.  A survey has been conducted recently about the usage of the presently  existing speech databases among industry and university institutions in  Japan where speech research is now actively going on. It has been  revealed that the ATR’s continuous speech database is the most  frequently used followed by the equivalent version of the Acoustical  Society of Japan.",{"paper_id":1106,"title":1107,"year":213,"month":855,"day":63,"doi":1108,"resource_url":1109,"first_page":63,"last_page":63,"pdf_url":1110,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1111,"paper_type":860,"authors":1112,"abstract":1117},"lrec2002-main-015","Evaluation of parsed corpora: Experiments in user-transparent and user-visible evaluation","10.63317\u002F3ewwi6wbfa9u","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-015","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F15.pdf","santos-gasperin-2002-evaluation",[1113,1114],{"paper_id":1106,"author_seq":247,"given_name":872,"surname":873,"affiliation":63,"orcid":63},{"paper_id":1106,"author_seq":232,"given_name":1115,"surname":1116,"affiliation":63,"orcid":63},"Caroline","Gasperin","In the present paper, we describe and discuss the evaluation of parsed corpora, namely the ones that are available on the Web for querying in the AC\u002FDC project. The paper has two parts: the first one suggests a set of different evaluation parameters and measures that are much more illuminating than commonly used simple precision measures, while the second evaluates the parsed corpus for a particular task -- that of automatic thesaurus building. The two evaluations are thus complementary, in that, in Gaizauskas (1998) terminology, the first is a typical user-transparent evaluation, while the second is user-visible.",{"paper_id":1119,"title":1120,"year":213,"month":855,"day":63,"doi":1121,"resource_url":1122,"first_page":63,"last_page":63,"pdf_url":1123,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1124,"paper_type":860,"authors":1125,"abstract":1132},"lrec2002-main-016","Acoustic Modeling and Training of a Bilingual ASR System when a Minority Language is Involved","10.63317\u002F47cqoviag6ii","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-016","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F16.pdf","docio-fernandez-garcia-mateo-2002-acoustic",[1126,1129],{"paper_id":1119,"author_seq":247,"given_name":1127,"surname":1128,"affiliation":63,"orcid":63},"Laura","Docío-Fernández",{"paper_id":1119,"author_seq":232,"given_name":1130,"surname":1131,"affiliation":63,"orcid":63},"Carmen","García-Mateo","This paper describes our work in developing a bilingual speech recognition system using two SpeechDat databases. The bilingual aspect of this work is of particular importance in the Galician region of Spain where both languages Galician and Spanish coexist and one of the languages, the Galician one, is a minority language. Based on a global   Spanish-Galician phoneme set we built a bilingual speech recognition system which can handle both languages: Spanish and Galician. The recognizer makes use of context dependent acoustic models based on continuous density hidden Markov models. The system has been evaluated on a isolated-word large-vocabulary task. The tests show that Spanish system exhibits a better performance than the Galician system due to its better training. The bilingual system provides an equivalent performance to that achieved by the language specific systems.",{"paper_id":1134,"title":1135,"year":213,"month":855,"day":63,"doi":1136,"resource_url":1137,"first_page":63,"last_page":63,"pdf_url":1138,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1139,"paper_type":860,"authors":1140,"abstract":1150},"lrec2002-main-017","The Open Language Archives Community","10.63317\u002F2w64gqufbnhp","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-017","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F17.pdf","bird-etal-2002-open",[1141,1144,1147],{"paper_id":1134,"author_seq":247,"given_name":1142,"surname":1143,"affiliation":63,"orcid":63},"Steven","Bird",{"paper_id":1134,"author_seq":232,"given_name":1145,"surname":1146,"affiliation":63,"orcid":63},"Hans","Uszkoreit",{"paper_id":1134,"author_seq":218,"given_name":1148,"surname":1149,"affiliation":63,"orcid":63},"Gary","Simons","The goal of this symposium is to disseminate the OLAC  vision to the language resources community, and to the European research  community more broadly. We hope to encourage the community to archive  and publish their resources using archival formats, and to document them  using standard metadata. Presentations will address the following  questions: What is the Open Language Archives Community? Why is language  archiving important? What does it take to participate in OLAC?  Discussion time will be used to clarify the OLAC model and to identify  and address any concerns raised by the audience. Substantive feedback  will help to guide the future evolution of OLAC. This symposium will  mark the official launch of OLAC in Europe.",{"paper_id":1152,"title":1153,"year":213,"month":855,"day":63,"doi":1154,"resource_url":1155,"first_page":63,"last_page":63,"pdf_url":1156,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1157,"paper_type":860,"authors":1158,"abstract":1171},"lrec2002-main-018","A Flexible XML-based Regular Compiler for Creation and Conversion of Linguistic Resources","10.63317\u002F5bicy9bkmeb3","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-018","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F18.pdf","piskorski-etal-2002-flexible",[1159,1162,1165,1168],{"paper_id":1152,"author_seq":247,"given_name":1160,"surname":1161,"affiliation":63,"orcid":63},"Jakub","Piskorski",{"paper_id":1152,"author_seq":232,"given_name":1163,"surname":1164,"affiliation":63,"orcid":63},"Witold","Drożdżyński",{"paper_id":1152,"author_seq":218,"given_name":1166,"surname":1167,"affiliation":63,"orcid":63},"Oliver","Scherf",{"paper_id":1152,"author_seq":203,"given_name":1169,"surname":1170,"affiliation":63,"orcid":63},"Feiyu","Xu","Finite-state devices are widely used to compactly  model linguistic phenomena, whereas regular expressions are regarded as  the adequate level of abstraction for thinking about finite-state  languages. In this paper we present a flexible XML-based and  Unicodecompatible regular compiler for creating, and integrating  existing linguistic resources. Our tool provides user-friendly graphical  interface which enables the transparent control of the compilation  process and allows for testing generated finite-state grammars with  several diagnostic tools. Through the direct database connection,  existing  linguistic resources can be converted into user-definable  finite-state representations.",{"paper_id":1173,"title":1174,"year":213,"month":855,"day":63,"doi":1175,"resource_url":1176,"first_page":63,"last_page":63,"pdf_url":1177,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1178,"paper_type":860,"authors":1179,"abstract":63},"lrec2002-main-019","A Contrastive Acoustic-Phonetic Analysis of Slovenian and English Diphthongs","10.63317\u002F53uha2p8kdcm","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-019","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F19.pdf","modic-petek-2002-contrastive",[1180,1183],{"paper_id":1173,"author_seq":247,"given_name":1181,"surname":1182,"affiliation":63,"orcid":63},"Robert","Modic",{"paper_id":1173,"author_seq":232,"given_name":1184,"surname":1185,"affiliation":63,"orcid":63},"Bojan","Petek",{"paper_id":1187,"title":1188,"year":213,"month":855,"day":63,"doi":1189,"resource_url":1190,"first_page":63,"last_page":63,"pdf_url":1191,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1192,"paper_type":860,"authors":1193,"abstract":1200},"lrec2002-main-020","Three New Corpora at the Bavarian Archive for Speech Signals – and a First Step Towards Distributed Web-Based Recording","10.63317\u002F3ek4x8i4xeyy","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-020","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F20.pdf","draxler-schiel-2002-three",[1194,1197],{"paper_id":1187,"author_seq":247,"given_name":1195,"surname":1196,"affiliation":63,"orcid":63},"Christoph","Draxler",{"paper_id":1187,"author_seq":232,"given_name":1198,"surname":1199,"affiliation":63,"orcid":63},"Florian","Schiel","The Bavarian Archive for Speech Signals has released three new speech corpora for both industrial and academic use: a)  Hempels Sofa contains recordings of up to 60 seconds of non-scripted telephone speech, b) ZipTel is a corpus with telephone speech  covering postal addresses and telephone numbers from a real world application, and c) RVG-J, an extension of the original Regional  Variants of German corpus with juvenile speakers. All three corpora were transcribed orthographically according to the SpeechDat  annotation guidelines using the WWWTranscribe annotation software. Recently, BAS has begun to investigate performing large-scale audio  recordings via the web, and RVG-J has become the testbed for this type of recording.",{"paper_id":1202,"title":1203,"year":213,"month":855,"day":63,"doi":1204,"resource_url":1205,"first_page":63,"last_page":63,"pdf_url":1206,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1207,"paper_type":860,"authors":1208,"abstract":1212},"lrec2002-main-021","n-grams of Seeds: A Hybrid System for Corpus-Based Text Summarization","10.63317\u002F2mg7qeehcnp8","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-021","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F21.pdf","schneider-2002-n",[1209],{"paper_id":1202,"author_seq":247,"given_name":1210,"surname":1211,"affiliation":63,"orcid":63},"René","Schneider","This paper presents a hybrid system for automatic text summarization which combines statistical and knowledge-based methods.  In particular, it demonstrates how two corpus-based learning and indexing algorithms, namely an n-gram and a seed-oriented approach, may be combined to bring out the best of both approaches.  This system selects sentences from an input text to constract a highly compressed, generic, and informative summary.  The hybrid algorithm described here was developed and tested with a corpus of movie reviews collected from several on-line data bases.",{"paper_id":1214,"title":1215,"year":213,"month":855,"day":63,"doi":1216,"resource_url":1217,"first_page":63,"last_page":63,"pdf_url":1218,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1219,"paper_type":860,"authors":1220,"abstract":1224},"lrec2002-main-022","Building a Resource for Evaluating the Importance of Sentences","10.63317\u002F5dgbsfvaknyd","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-022","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F22.pdf","schiffman-2002-building",[1221],{"paper_id":1214,"author_seq":247,"given_name":1222,"surname":1223,"affiliation":63,"orcid":63},"Barry","Schiffman","This paper will introduce a new lexical resource for  measuring the importance of short segments of text, such as sentences.  The resource, a list of words compiled automatically from a large  background corpus of news articles, can provide evidence that a text  segment is globally important, that is intrinsically interesting, not  only interesting in relation to a specied topic or set of documents.",{"paper_id":1226,"title":1227,"year":213,"month":855,"day":63,"doi":1228,"resource_url":1229,"first_page":63,"last_page":63,"pdf_url":1230,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1231,"paper_type":860,"authors":1232,"abstract":1236},"lrec2002-main-023","A Subcategorisation Lexicon for German Verbs induced from a Lexicalised PCFG","10.63317\u002F4eow6fntm7jk","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-023","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F23.pdf","schulte-im-walde-2002-subcategorisation",[1233],{"paper_id":1226,"author_seq":247,"given_name":1234,"surname":1235,"affiliation":63,"orcid":63},"Sabine","Schulte im Walde","The paper presents a large-scale computational subcategorisation  lexicon for several thousand German verbs. The lexical entries were obtained by unsupervised learning in a statistical grammar framework:  a German context-free grammar containing rame-predicting grammar rules and information about lexical heads was trained on 18.7 million  words of a large German newspaper corpus. We developed a simple methodology to utilise frequency distributions in the lexicalised  version of the probabilistic grammar for inducing syntactic verb frame descriptions. The frame definition is variable with respect to the  inclusion of prepositional phrase refinement. An evaluation against a manual dictionary justifies the utilisation of the machine-readable  lexicon as a valuable component for supporting NLP-tasks. As to our knowledge, no  former computational approach has obtained a  subcategorisation lexicon for German comparable in size (the number of verbs in the lexicon), restriction (no limit concerning  the  frequencies of the verbs), or verified reliability (successful extensive evaluation  against dictionary).",{"paper_id":1238,"title":1239,"year":213,"month":855,"day":63,"doi":1240,"resource_url":1241,"first_page":63,"last_page":63,"pdf_url":1242,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1243,"paper_type":860,"authors":1244,"abstract":1251},"lrec2002-main-024","Evaluation of Pronunciation Variants in the ASR Lexicon for Different Speaking Styles","10.63317\u002F5kscipdmkgqs","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-024","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F24.pdf","amdal-svendsen-2002-evaluation",[1245,1248],{"paper_id":1238,"author_seq":247,"given_name":1246,"surname":1247,"affiliation":63,"orcid":63},"Ingunn","Amdal",{"paper_id":1238,"author_seq":232,"given_name":1249,"surname":1250,"affiliation":63,"orcid":63},"Torbjørn","Svendsen","One of the challenges in automatic speech recognition is how to handle  pronunciation variation. The main causes for pronunciation variation  are the speaker (voice  characteristics, accent, non-nativeness etc.) and the speaking style (reading, spontaneous  responses, conversation  etc.). An ASR system has basically two options for modelling the\n  variation on the word and sub-word level: lexical modelling of the  pronunciation  variation or adaptation, i.e. re-training of the  acoustic models. The answer to the question  of which technique to  choose, or how to combine them, may depend on the speaking style.  We  have therefore investigated the effects of using pronunciation  variants for recognition  of read speech, spontaneous dictation, and  non-native speech. The variants in the standard  purpose lexicon tested  gave modest improvements and best results for read speech, which  is  the speaking style of the acoustic model training set.",{"paper_id":1253,"title":1254,"year":213,"month":855,"day":63,"doi":1255,"resource_url":1256,"first_page":63,"last_page":63,"pdf_url":1257,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1258,"paper_type":860,"authors":1259,"abstract":1263},"lrec2002-main-025","LAperLA: an integrated graphical-linguistic System for old printed Latin Texts","10.63317\u002F3fzqgyjj3hn6","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-025","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F25.pdf","bozzi-2002-laperla",[1260],{"paper_id":1253,"author_seq":247,"given_name":1261,"surname":1262,"affiliation":63,"orcid":63},"Andrea","Bozzi","LAperLA (Lettore Automatico per Libri Antichi) is a  prototype for the automatic  recognition of Latin texts in old  printed books. The strengths of the system are the neural architecture  and the post-processing linguistic tool that is represented by an index  of Latin forms (more than 500,000) and by a query management system  which uses the information of the index to check and correct the  interpreted words. The images have been taken from the text of  \"Contradicentium Medicorum\" by Girolamo Cardano in the edition  printed on 1663; the main textual material consists of a set of 40  image-files (11 for the training and 29 for testing) with a resolution  of 118 DPI. We would like to point out that the  interpretation  results produced on images chosen as benchmarks by LAperLA have been  compared with Fine Reader 4.0 by Abby and Omnipage Pro 10 by Caere.  FineReader reaches correctness percentage of 61.19%; Omnipage gets to  54.41%, while LAperLA recognises the 80.95% of words which increases  with the aid of the specific linguistic module (93,22%). A very easy to  use system interface has been developed not only for the training of the  net, but also to select the parts of the image-files to be interpreted.",{"paper_id":1265,"title":1266,"year":213,"month":855,"day":63,"doi":1267,"resource_url":1268,"first_page":63,"last_page":63,"pdf_url":1269,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1270,"paper_type":860,"authors":1271,"abstract":1283},"lrec2002-main-026","Computerized linguistic resources of the research laboratory ATILF for lexical and textual analysis: Frantext, TLFi, and the software Stella","10.63317\u002F5jma2krptxh7","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-026","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F26.pdf","bernard-etal-2002-computerized",[1272,1275,1278,1280],{"paper_id":1265,"author_seq":247,"given_name":1273,"surname":1274,"affiliation":63,"orcid":63},"Pascale","Bernard",{"paper_id":1265,"author_seq":232,"given_name":1276,"surname":1277,"affiliation":63,"orcid":63},"Josette","Lecomte",{"paper_id":1265,"author_seq":218,"given_name":1021,"surname":1279,"affiliation":63,"orcid":63},"Dendien",{"paper_id":1265,"author_seq":203,"given_name":1281,"surname":1282,"affiliation":63,"orcid":63},"Jean-Marie","Pierrel","This paper presents some of the computerized linguistic resources of the Research Laboratory ATILF (Analyse et Traitement Informatique de la Langue Française) available via the Web, and will serve as a helping document for demonstrations planned within the framework of LREC 2002. The Research Laboratory ATILF is the new UMR (Unité Mixte de Recherche) created in association between the CNRS and the University of Nancy 2 since 2001-January 2nd, and succeeds to the local component of the INaLF situated in Nancy. This considerable amount of resources concerning French language consists in a set of more than 3400 literary works grouped together in Frantext, plus a number of dictionaries, lexis and other databases. These web available resources are operated and run through the potentialities and powerful capacities of a software called Stella, a search engine specially dedicated to textual databases and relying on a new theory of textual objects.",{"paper_id":1285,"title":1286,"year":213,"month":855,"day":63,"doi":1287,"resource_url":1288,"first_page":63,"last_page":63,"pdf_url":1289,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1290,"paper_type":860,"authors":1291,"abstract":1298},"lrec2002-main-027","Automatic extraction of differences between spoken and written languages, and automatic translation from the written to the spoken language","10.63317\u002F4c4okq3oz29i","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-027","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F27.pdf","murata-isahara-2002-automatic",[1292,1295],{"paper_id":1285,"author_seq":247,"given_name":1293,"surname":1294,"affiliation":63,"orcid":63},"Masaki","Murata",{"paper_id":1285,"author_seq":232,"given_name":1296,"surname":1297,"affiliation":63,"orcid":63},"Hitoshi","Isahara","We extracted the differences between spoken language and written  language from a spoken-language corpus and a written-language corpus by using the UNIX command ``diff'' and examined the differences to  determine the construction of the grammars of the two corpora. We also transformed written-language sentences into spoken-language sentences  by using rules based on the extracted differences.",{"paper_id":1300,"title":1301,"year":213,"month":855,"day":63,"doi":1302,"resource_url":1303,"first_page":63,"last_page":63,"pdf_url":1304,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1305,"paper_type":860,"authors":1306,"abstract":1310},"lrec2002-main-028","Automatic detection of prosodic prominence in continuous speech","10.63317\u002F5ppd4yn5c93b","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-028","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F28.pdf","tamburini-2002-automatic",[1307],{"paper_id":1300,"author_seq":247,"given_name":1308,"surname":1309,"affiliation":63,"orcid":63},"Fabio","Tamburini","This paper presents work in progress on the automatic detection of prosodic prominence  in continuous speech. Prosodic prominence involves two different phonetic features:  pitch  accents, connected with fundamental frequency (F0) movements and syllable overall  energy,  and stress, which exhibits a strong correlation with syllable duration and  high-frequency  emphasis. By deriving a set of acoustic parameters it is possible to build  syllable-stress  detectors as well as pitch-accent detectors and combine them to build an  automatic system  devoted to prominence detection. Starting from a syllable-segmented utterance, the system  presented here is capable of correctly identify prominent syllables with an agreement, with  human-tagged data, comparable with the inter-human agreement reported in the literature.",{"paper_id":1312,"title":1313,"year":213,"month":855,"day":63,"doi":1314,"resource_url":1315,"first_page":63,"last_page":63,"pdf_url":1316,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1317,"paper_type":860,"authors":1318,"abstract":1320},"lrec2002-main-029","A dynamic model for reference corpora structure definition","10.63317\u002F3cb3viwiypr2","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-029","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F29.pdf","tamburini-2002-dynamic",[1319],{"paper_id":1312,"author_seq":247,"given_name":1308,"surname":1309,"affiliation":63,"orcid":63},"A representative corpus of written Italian ­ CORIS ­ constructed at the Centre for  Theoretical and Applied Linguistics of Bologna University (CILTA) is available on-line.  Considering the importance of the comparability of reference corpora in interlinguistic  studies, a further corpus ­ CODIS ­ was designed. Aimed at specialist needs, CODIS  presents a dynamic and adaptive structure providing for the selection of the subcorpora  pertinent to a specific research project and allowing the researcher to define the size  of each subcorpus. CODIS is designed to be dynamically adapted by the scholar to different  comparative needs by a careful combination of small corpus chunks of various types and  sizes. The chunk sizes were carefully selected in order to allow for various combinations  creating subcorpora of different sizes, ranging from 0 to the maximum size of each CORIS  subcorpus. This fine granularity provides a wide range of corpora composition options,  satisfying almost all comparative needs.",{"paper_id":1322,"title":1323,"year":213,"month":855,"day":63,"doi":1324,"resource_url":1325,"first_page":63,"last_page":63,"pdf_url":1326,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1327,"paper_type":860,"authors":1328,"abstract":1335},"lrec2002-main-030","An ontology-based approach in the literary research: two case-studies","10.63317\u002F4x4nmkbxe3vb","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-030","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F30.pdf","alderuccio-bordoni-2002-ontology",[1329,1332],{"paper_id":1322,"author_seq":247,"given_name":1330,"surname":1331,"affiliation":63,"orcid":63},"Daniela","Alderuccio",{"paper_id":1322,"author_seq":232,"given_name":1333,"surname":1334,"affiliation":63,"orcid":63},"Luciana","Bordoni","The present paper suggests an application of ontologies in the analysis of a literary phenomenon. Starting from the needs  of humanistic research and from the availibility on the Web of Language Resources, the adoption of such methodology, whose reference tool is an ontology, has led to a deeper analysis of the \"Dualism Truth vs. Propaganda\" in Karl Kraus's writings and of the novel \"Cloud, castle and lake\" of Vladimir Nabokov.",{"paper_id":1337,"title":1338,"year":213,"month":855,"day":63,"doi":1339,"resource_url":1340,"first_page":63,"last_page":63,"pdf_url":1341,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1342,"paper_type":860,"authors":1343,"abstract":1361},"lrec2002-main-031","A Multilingual Speaker Verification System: Architecture and Performance Evaluation","10.63317\u002F4csavhsa4vwx","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-031","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F31.pdf","caminero-etal-2002-multilingual",[1344,1347,1350,1352,1355,1358],{"paper_id":1337,"author_seq":247,"given_name":1345,"surname":1346,"affiliation":63,"orcid":63},"Javier","Caminero",{"paper_id":1337,"author_seq":232,"given_name":1348,"surname":1349,"affiliation":63,"orcid":63},"Joaquín","González-Rodríguez",{"paper_id":1337,"author_seq":218,"given_name":1345,"surname":1351,"affiliation":63,"orcid":63},"Ortega-García",{"paper_id":1337,"author_seq":203,"given_name":1353,"surname":1354,"affiliation":63,"orcid":63},"Daniel","Tapias",{"paper_id":1337,"author_seq":188,"given_name":1356,"surname":1357,"affiliation":63,"orcid":63},"Pedro M.","Ruz",{"paper_id":1337,"author_seq":172,"given_name":1359,"surname":1360,"affiliation":63,"orcid":63},"Mercedes","Solá","In this contribution we present a multilingual secure access front-end  that checks the identity of the user of a service through the mobile, PSTN and the IP networks (G.723, G.729). Our system prototype is based  on speech recognition and speaker verification technologies and it uses a decision mechanism to combine the outputs of both modules. The main  objective of the system is to increase the services access security with no  increase of the service complexity. The system initially works in six European Languages (Spanish, English, French, Catalan, Galician  and Basque) even though the system architecture easily allows the addition of new languages. The system has been developed through a EC  funded project called SAFE (Secure-Access Front End, IST-1999-20959).",{"paper_id":1363,"title":1364,"year":213,"month":855,"day":63,"doi":1365,"resource_url":1366,"first_page":63,"last_page":63,"pdf_url":1367,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1368,"paper_type":860,"authors":1369,"abstract":1376},"lrec2002-main-032","Lexical token alignment: experiments, results and applications","10.63317\u002F5bptnb3g7hxa","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-032","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F32.pdf","tufis-barbu-2002-lexical",[1370,1373],{"paper_id":1363,"author_seq":247,"given_name":1371,"surname":1372,"affiliation":63,"orcid":63},"Dan","Tufiş",{"paper_id":1363,"author_seq":232,"given_name":1374,"surname":1375,"affiliation":63,"orcid":63},"Ana-Maria","Barbu","Lexical alignment is one of the most challenging  tasks in processing and exploiting  parallel texts. There are  numerous applications that may benefit from an accurate multilingual  lexical alignment of bi- and multi-language corpora. We describe in this  paper a hypothesistesting approach to the problem of automatic  extraction of translation equivalents from sentence-aligned and tagged  parallel corpora. The algorithm was used for automatic extraction of 6  bi-lingual lexicons with English as source language and Bulgarian,  Czech, Estonian, Hungarian, Romanian and Slovene as the target one, as  well as a 7-language lexicon with English as a hub and the other 6 CEE  languages. For the experiments described here we used the 7-language  aligned corpus based on Orwell’s \"1984\" novel.",{"paper_id":1378,"title":1379,"year":213,"month":855,"day":63,"doi":1380,"resource_url":1381,"first_page":63,"last_page":63,"pdf_url":1382,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1383,"paper_type":860,"authors":1384,"abstract":1394},"lrec2002-main-033","Designing Prosodic Databases for Automatic Modeling of Slovenian Language in a Multilingual TTS System","10.63317\u002F35huqupwrrbk","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-033","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F33.pdf","muller-etal-2002-designing",[1385,1388,1391],{"paper_id":1378,"author_seq":247,"given_name":1386,"surname":1387,"affiliation":63,"orcid":63},"Achim F.","Müller",{"paper_id":1378,"author_seq":232,"given_name":1389,"surname":1390,"affiliation":63,"orcid":63},"Janez","Stergar",{"paper_id":1378,"author_seq":218,"given_name":1392,"surname":1393,"affiliation":63,"orcid":63},"Bogomir","Horvat","In this paper the design of a prosodic data base and the data driven  prediction of phrase breaks for modeling Slovenian language in a multilingual text-to-speech (TTS) system are  presented. Automatic  learning techniques offer a solution in adapting prosodic models to a new language, voice or a new application, because they allow prosodic  regularities to be automatically extracted from a prosodic database of natural speech. Such techniques  depend on the construction of a large  corpus labeled with symbolic prosody labels. The labeling can be done either automatically or by hand. While automatic labeling can be  less  accurate than hand labeling, the latter is very time consuming. Therefore an  interactive tool for semi-automatic labeling that uses the segmented spoken counterpart of  the text as input will be  presented. The tool combines the advantage of hand labeling and  automatic labeling by achieving a high consistency in labeling and reducing the time that  would be needed for hand labeling. The labeled  Slovenian corpus has been used to train our phrase break prediction module. Experiments for the data driven prediction of major  and minor  phrase break labels have been performed. The achieved prediction accuracy  marks state-of-the art for phrase break prediction accuracy  for Slovenian language.",{"paper_id":1396,"title":1397,"year":213,"month":855,"day":63,"doi":1398,"resource_url":1399,"first_page":63,"last_page":63,"pdf_url":1400,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1401,"paper_type":860,"authors":1402,"abstract":1415},"lrec2002-main-034","PILLS: Multilingual generation of medical information documents with overlapping content","10.63317\u002F54yqt2ccfngk","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-034","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F34.pdf","bouayad-agha-etal-2002-pills",[1403,1406,1409,1412],{"paper_id":1396,"author_seq":247,"given_name":1404,"surname":1405,"affiliation":63,"orcid":63},"Nadjet","Bouayad-Agha",{"paper_id":1396,"author_seq":232,"given_name":1407,"surname":1408,"affiliation":63,"orcid":63},"Richard","Power",{"paper_id":1396,"author_seq":218,"given_name":1410,"surname":1411,"affiliation":63,"orcid":63},"Donia","Scott",{"paper_id":1396,"author_seq":203,"given_name":1413,"surname":1414,"affiliation":63,"orcid":63},"Anja","Belz","In the pharmaceutical industry, products have to be described by a range of document  types with overlapping  content. Moreover, much of this documentation has to be produced in many languages. This situation is commonplace in many commercial  domains, and leads to well-known problems in maintaining a set of related documents and their  translations. We describe a potential solution explored in the PILLS project. All relevant  knowledge about a  product is entered only once, through a natural-language interface to   a knowledge base. From this `master model', specialised models for arange of document  types are derived automatically; from each  specialised model, documents are generated automatically in all supported languages. As an illustration of this approach, the PILLS  demonstrator generates three medical document types in English, German and French.",{"paper_id":1417,"title":1418,"year":213,"month":855,"day":63,"doi":1419,"resource_url":1420,"first_page":63,"last_page":63,"pdf_url":1421,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1422,"paper_type":860,"authors":1423,"abstract":1439},"lrec2002-main-035","Co-reference annotation and resources: A multilingual corpus of typologically diverse languages","10.63317\u002F2qafhpkicmra","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-035","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F35.pdf","sasaki-etal-2002-co",[1424,1427,1430,1433,1436],{"paper_id":1417,"author_seq":247,"given_name":1425,"surname":1426,"affiliation":63,"orcid":63},"Felix","Sasaki",{"paper_id":1417,"author_seq":232,"given_name":1428,"surname":1429,"affiliation":63,"orcid":63},"Claudia","Wegener",{"paper_id":1417,"author_seq":218,"given_name":1431,"surname":1432,"affiliation":63,"orcid":63},"Andreas","Witt",{"paper_id":1417,"author_seq":203,"given_name":1434,"surname":1435,"affiliation":63,"orcid":63},"Dieter","Metzing",{"paper_id":1417,"author_seq":188,"given_name":1437,"surname":1438,"affiliation":63,"orcid":63},"Jens","Pönninghaus","This article introduces a dialogue corpus containing data from two typologically different languages, Japanese and Kilivila. The corpus is annotated in accordance with language specific annotation schemes for co-referential and similar relations. The article describes the corpus data, the properties of language specific co-reference in the two languages and a methodology for its annotation. Examples from the corpus show how this methodology is used in the workflow of the annotation process.",{"paper_id":1441,"title":1442,"year":213,"month":855,"day":63,"doi":1443,"resource_url":1444,"first_page":63,"last_page":63,"pdf_url":1445,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1446,"paper_type":860,"authors":1447,"abstract":1454},"lrec2002-main-036","Towards Very Large Ontologies for Medical Language Processing","10.63317\u002F2wpbgebtfcob","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-036","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F36.pdf","hahn-schulz-2002-towards",[1448,1451],{"paper_id":1441,"author_seq":247,"given_name":1449,"surname":1450,"affiliation":63,"orcid":63},"Udo","Hahn",{"paper_id":1441,"author_seq":232,"given_name":1452,"surname":1453,"affiliation":63,"orcid":63},"Stefan","Schulz","We describe an ontology engineering methodology by  which conceptual knowledge is extracted from an informal medical  thesaurus (UMLS) and automatically converted into a formal description  logics system. Our approach consists of four steps: concept definitions  are automatically generated from the UMLS source, integrity checking of  taxonomic and partonomic hierarchies is performed by the terminological  classifier, cycles and inconsistencies are eliminated, and incremental  refinement of the evolving knowledge base is performed by a domain  expert. We report on experiments with a knowledge base  composed of  164,000 concepts and 76,000 relations.",{"paper_id":1456,"title":1457,"year":213,"month":855,"day":63,"doi":1458,"resource_url":1459,"first_page":63,"last_page":63,"pdf_url":1460,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1461,"paper_type":860,"authors":1462,"abstract":1469},"lrec2002-main-037","Improving an Ontology Refinement Method with Hyponymy Patterns","10.63317\u002F29hzhoingp7q","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-037","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F37.pdf","alfonseca-manandhar-2002-improving",[1463,1466],{"paper_id":1456,"author_seq":247,"given_name":1464,"surname":1465,"affiliation":63,"orcid":63},"Enrique","Alfonseca",{"paper_id":1456,"author_seq":232,"given_name":1467,"surname":1468,"affiliation":63,"orcid":63},"Suresh","Manandhar","We describe here a procedure to combine two different existing  techniques for Ontology Enrichment with domain-specific concepts. The resulting algorithm is fully unsupervised, and the level of precision  is higher than when they are used separately, so we believe that both algorithms benefit from each other. The experiments have been  performed by extending WordNet with concepts extracted from \"The Lord of the Rings\"",{"paper_id":1471,"title":1472,"year":213,"month":855,"day":63,"doi":1473,"resource_url":1474,"first_page":63,"last_page":63,"pdf_url":1475,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1476,"paper_type":860,"authors":1477,"abstract":1480},"lrec2002-main-038","Proposal for Evaluating Ontology Refinement Methods","10.63317\u002F4vykzxym8yba","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-038","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F38.pdf","alfonseca-manandhar-2002-proposal",[1478,1479],{"paper_id":1471,"author_seq":247,"given_name":1464,"surname":1465,"affiliation":63,"orcid":63},{"paper_id":1471,"author_seq":232,"given_name":1467,"surname":1468,"affiliation":63,"orcid":63},"Ontologies are a tool for Knowledge Representation that is now widely  used, but the effort employed to build an ontology is still high. There are a few automatic and  semi-automatic methods for extending ontologies with domain-specific information, but they use  different training and test data, and different evaluation metrics. The work  described in this paper is an attempt to build a benchmark corpus that can be used for comparing these systems. We provide  standard evaluation metrics as well as two different annotated corpora: one in which every unknown word has been labelled with the places where it should be added onto the ontology, and other in which only the  high-frequency unknown terms have been annotated.",{"paper_id":1482,"title":1483,"year":213,"month":855,"day":63,"doi":1484,"resource_url":1485,"first_page":63,"last_page":63,"pdf_url":1486,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1487,"paper_type":860,"authors":1488,"abstract":1492},"lrec2002-main-039","Methods for Constructing Lexicon-Grammar Resources: The Example of Measure Expressions","10.63317\u002F49cp9r5mrots","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-039","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F39.pdf","constant-2002-methods",[1489],{"paper_id":1482,"author_seq":247,"given_name":1490,"surname":1491,"affiliation":63,"orcid":63},"Matthieu","Constant","We construct, in the framework of the lexicon-grammar theory, a set of grammars dealing with measure expressions. First, we manually represent compounds with graphs:  determiners such as ten pounds of and prepositions such as 34 miles from. Then, by the means of lexicon-grammar matrices, graphs and a semi-automatic process, we build a set of grammars of kernel sentences e.g. the door is 2-meter high. Finally, we evaluate our methods and grammars according to three points: production, maintenance and concrete application.",{"paper_id":1494,"title":1495,"year":213,"month":855,"day":63,"doi":1496,"resource_url":1497,"first_page":63,"last_page":63,"pdf_url":1498,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1499,"paper_type":860,"authors":1500,"abstract":1507},"lrec2002-main-040","Living off the land: The Web as a source of practice texts for learners of less prevalent languages","10.63317\u002F22gu49utxeui","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-040","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F40.pdf","nilsson-borin-2002-living",[1501,1504],{"paper_id":1494,"author_seq":247,"given_name":1502,"surname":1503,"affiliation":63,"orcid":63},"Kristina","Nilsson",{"paper_id":1494,"author_seq":232,"given_name":1505,"surname":1506,"affiliation":63,"orcid":63},"Lars","Borin","This study focuses on how to automatically locate text sources  published on the World Wide Web in order to produce adequate and up-to-date learning materials for second  language learners of Nordic  languages. The Web is an excellent source of authentic text  materials. However, the large amount of information available on the Web makes search  services necessary. Hence, we are developing  Squirrel, a prototype Web meta-search service, described in this paper, which collects text material in the Nordic languages  according  to language, topic and difficulty level. Our primary target group consists of  exchange students to Nordic institutions of higher  education, and their language teachers, although in the longer perspective, we would also  like to be able to do something for  minority language communities. We describe the basic implementation of Squirrel, and  present preliminary results from trying it out. Finally  we discuss the (lack of) Web resources in less prevalent languages, and how we imagine that applications like Squirrel  could fit into a  second or foreign language learning situation.",{"paper_id":1509,"title":1510,"year":213,"month":855,"day":63,"doi":1511,"resource_url":1512,"first_page":63,"last_page":63,"pdf_url":1513,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1514,"paper_type":860,"authors":1515,"abstract":1522},"lrec2002-main-041","Diagnostic Assessment of Telephone Transmission Impact on ASR Performance and Human-to-Human Speech Quality","10.63317\u002F58nn3dbk8kjf","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-041","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F41.pdf","moller-kavallieratou-2002-diagnostic",[1516,1519],{"paper_id":1509,"author_seq":247,"given_name":1517,"surname":1518,"affiliation":63,"orcid":63},"Sebastian","Möller",{"paper_id":1509,"author_seq":232,"given_name":1520,"surname":1521,"affiliation":63,"orcid":63},"Ergina","Kavallieratou","This paper addresses the transmission channel impact  on human-to-human speech communication quality as well as on ASR  performance. Transmission channels include standard wireline or mobile  telephone networks and IP-based networks, which can be operated via  different types of user interfaces. In order to gain control over the  transmission channel, a simulation model is developed. It implements all  types of stationary impairments which can be found in the mentioned networks. Human-to-human speech communication quality in these  situations is estimated using a network planning model. Experiments are carried out for assessing ASR performance over the same  channel, with three  different types of recognizers: two prototypical recognizers used in a  telephone-based information server, and a standardized set-up developed under  the AURORA framework for distributed ASR. It turns out that some interesting  differences exist in behavior between the ASR system performance and speech  quality in human-to-human communication. The differences should be taken into  account by both developers of ASR systems and transmission network planners.",{"paper_id":1524,"title":1525,"year":213,"month":855,"day":63,"doi":1526,"resource_url":1527,"first_page":63,"last_page":63,"pdf_url":1528,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1529,"paper_type":860,"authors":1530,"abstract":1543},"lrec2002-main-042","A Labelling Proposal to Annotate Dialogues","10.63317\u002F4ck256e6ppwn","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-042","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F42.pdf","martinez-hinarejos-etal-2002-labelling",[1531,1534,1537,1540],{"paper_id":1524,"author_seq":247,"given_name":1532,"surname":1533,"affiliation":63,"orcid":63},"Carlos D.","Martínez-Hinarejos",{"paper_id":1524,"author_seq":232,"given_name":1535,"surname":1536,"affiliation":63,"orcid":63},"Emilio","Sanchís",{"paper_id":1524,"author_seq":218,"given_name":1538,"surname":1539,"affiliation":63,"orcid":63},"Fernando","García-Granada",{"paper_id":1524,"author_seq":203,"given_name":1541,"surname":1542,"affiliation":63,"orcid":63},"Pablo","Aibar","Stochastic models are widely used in some fields of Language Technology.  Dialogue systems are one interesting application in Language Technology. In  recent years, the stochastic modelling approach of dialogue systems has  gained interest. These stochastic models are estimated from a set of  annotated dialogues. The definition of the set of labels to annotate  dialogues is therefore an important issue in the development of stochastic  dialogue models. We propose a set of labels, which is composed of three  levels, and a set of rules for using them. The application of this labelling  to a specific set of dialogues is reported. The adequacy of the set of  labels for stochastic modelling is also  demonstrated.",{"paper_id":1545,"title":1546,"year":213,"month":855,"day":63,"doi":1547,"resource_url":1548,"first_page":63,"last_page":63,"pdf_url":1549,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1550,"paper_type":860,"authors":1551,"abstract":1558},"lrec2002-main-043","Collection and linguistic processing of a large-scale corpus of medical articles","10.63317\u002F283ht4g5aqdo","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-043","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F43.pdf","teufel-elhadad-2002-collection",[1552,1555],{"paper_id":1545,"author_seq":247,"given_name":1553,"surname":1554,"affiliation":63,"orcid":63},"Simone","Teufel",{"paper_id":1545,"author_seq":232,"given_name":1556,"surname":1557,"affiliation":63,"orcid":63},"Noemie","Elhadad","We have collected a large-scale corpus of electronic  articles in the cardiology domain (85 million+ words) in the framework  of a digital library project that tailors the presentation of online  medical literature to both patients and healthcare providers. We  describe the webbased and XML technologies we used for the collection,  encoding and linguistic processing of the corpus. This resulted in a  largescale, high-quality, thoroughly marked-up resource which is used by  many researchers in our project, in the areas of natural language  processing, information retrieval and medical informatics. We show how  the final use of the resource has influenced the design of its  structural and linguistic encoding. The procedure we describe is general  enough to be of use to researchers in a similar position wishing to  compile, encode and linguistically annotate their own corpus from the  web.",{"paper_id":1560,"title":1561,"year":213,"month":855,"day":63,"doi":1562,"resource_url":1563,"first_page":63,"last_page":63,"pdf_url":1564,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1565,"paper_type":860,"authors":1566,"abstract":1579},"lrec2002-main-044","Constructing a lexicon of action","10.63317\u002F3yikv6bzrv2c","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-044","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F44.pdf","tokunaga-etal-2002-constructing",[1567,1570,1573,1576],{"paper_id":1560,"author_seq":247,"given_name":1568,"surname":1569,"affiliation":63,"orcid":63},"Takenobu","Tokunaga",{"paper_id":1560,"author_seq":232,"given_name":1571,"surname":1572,"affiliation":63,"orcid":63},"Manabu","Okumura",{"paper_id":1560,"author_seq":218,"given_name":1574,"surname":1575,"affiliation":63,"orcid":63},"Suguru","Saitô",{"paper_id":1560,"author_seq":203,"given_name":1577,"surname":1578,"affiliation":63,"orcid":63},"Hozumi","Tanaka","This paper describes a Japanese speech dialogue system that enables a user to interact with agents in a virtual world and proposes a design framework for building a lexicon of action. This lexicon is used to realize the behavior of the agents in response to the user's commands. The lexicon has two levels - a macro and micro level.  The system uses the  macro-level lexicon, which is similar to a conventional plan library, to translate the  user's goal to a sequence of basic movements. This process is the same as conventional planning with symbol manipulation. The micro-level lexicon is used to translate the basic movements into animation, which is represented by a sequence of avatar postures. We discuss how to define a set of basic movements and how to make these basic movements reusable.",{"paper_id":1581,"title":1582,"year":213,"month":855,"day":63,"doi":1583,"resource_url":1584,"first_page":63,"last_page":63,"pdf_url":1585,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1586,"paper_type":860,"authors":1587,"abstract":1591},"lrec2002-main-045","Building Concept Frames based on Text Corpora","10.63317\u002F2mvzcs2rqavn","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-045","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F45.pdf","lonneker-2002-building",[1588],{"paper_id":1581,"author_seq":247,"given_name":1589,"surname":1590,"affiliation":63,"orcid":63},"Birte","Lönneker","Linguists have been using different kinds of frame representation since the  emergence of the notion \"frame\". The main goal of the annotation system described in this paper is to provide an interactive and easy-to-use tool  for structuring concept-specific information in linguistic frames for discourse analysis or cultural studies. These frames take into account  background or \"world\" knowledge associated with the concepts, which is not necessarily  present in lexicographic frames. A frame hierarchy  providing default information, example texts containing specific information on a concept, and the annotations made by a  user are  combined together in one database. All frames have a predefined structure,  and the information they contain is represented in natural language. The  collected information can also be used as input to knowledge bases, or for  defining patterns for Information Extraction.",{"paper_id":1593,"title":1594,"year":213,"month":855,"day":63,"doi":1595,"resource_url":1596,"first_page":63,"last_page":63,"pdf_url":1597,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1598,"paper_type":860,"authors":1599,"abstract":1616},"lrec2002-main-046","BIZKAIFON: A sound archive of dialectal varieties of spoken Basque","10.63317\u002F2nkxjw7sspbt","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-046","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F46.pdf","hernaez-etal-2002-bizkaifon",[1600,1603,1606,1609,1611,1613],{"paper_id":1593,"author_seq":247,"given_name":1601,"surname":1602,"affiliation":63,"orcid":63},"I.","Hernáez",{"paper_id":1593,"author_seq":232,"given_name":1604,"surname":1605,"affiliation":63,"orcid":63},"E.","Navas",{"paper_id":1593,"author_seq":218,"given_name":1607,"surname":1608,"affiliation":63,"orcid":63},"J.","Sánchez",{"paper_id":1593,"author_seq":203,"given_name":1601,"surname":1610,"affiliation":63,"orcid":63},"Madariaga",{"paper_id":1593,"author_seq":188,"given_name":1601,"surname":1612,"affiliation":63,"orcid":63},"Gaminde",{"paper_id":1593,"author_seq":172,"given_name":1614,"surname":1615,"affiliation":63,"orcid":63},"X.","Zalbide","This work presents the sound archive of dialectal varieties of spoken Basque called BIZKAIFON. This database contains sound archives with their associated information and it is accessible via a web interface. A prototype of BIZKAIFON is available at http:\u002F\u002Fbizkaifon.ehu.es\u002F.",{"paper_id":1618,"title":1619,"year":213,"month":855,"day":63,"doi":1620,"resource_url":1621,"first_page":63,"last_page":63,"pdf_url":1622,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1623,"paper_type":860,"authors":1624,"abstract":1631},"lrec2002-main-047","Automatic Adaptation of WordNet to Domains","10.63317\u002F4bkpxgznmjcn","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-047","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F47.pdf","navigli-velardi-2002-automatic",[1625,1628],{"paper_id":1618,"author_seq":247,"given_name":1626,"surname":1627,"affiliation":63,"orcid":63},"Roberto","Navigli",{"paper_id":1618,"author_seq":232,"given_name":1629,"surname":1630,"affiliation":63,"orcid":63},"Paola","Velardi","The objetive of this paper is to present a method to  automatically enrich WordNet with sub-trees of concepts in a given  language domain. WordNet is then trimmed to reduce unnecessary ambiguity  and singleton nodes. The process is based on the use of statistical  method and linguistic processing to extract candidate domain terms.  Multiword terms are semantically disambiguated and interpreted using  ontological and contextual Knowledge stored in WordNet on singleton  words.",{"paper_id":1633,"title":1634,"year":213,"month":855,"day":63,"doi":1635,"resource_url":1636,"first_page":63,"last_page":63,"pdf_url":1637,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1638,"paper_type":860,"authors":1639,"abstract":1646},"lrec2002-main-048","From DTD to relational dB. An automatic generation of a lexicographical station out off ISLE guidelines","10.63317\u002F5arcaxoi826x","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-048","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F48.pdf","villegas-bel-2002-dtd",[1640,1643],{"paper_id":1633,"author_seq":247,"given_name":1641,"surname":1642,"affiliation":63,"orcid":63},"Marta","Villegas",{"paper_id":1633,"author_seq":232,"given_name":1644,"surname":1645,"affiliation":63,"orcid":63},"Nuria","Bel","This paper describes the Lexicographic Station  Development Platform and how it has been used to implement the resulting  lexicon guidelines and standards generated by ISLE Computational Lexicon  Group in a prototype tool for lexical encoding. The aims of the work  described here were to (i) exemplify and disseminate the Multilingual  ISLE Lexical Entry (MILE) using an actual model and available  monolingual data (ii) make extensive use of already existing PAROLE and  SIMPLE lexicons and (iii) to eventually test the goodness of the  guidelines by using a real scenario. To cope with these aims, the LSDP  was designed as a tool generator which could automatically generate a  prototype lexicographic station out of ISLE guidelines when formally  expressed in a DTD. Thus, we have tested and exemplified the  recommendations expressed in MILE but in addition we have also proved  that MILE can be implemented on already existing monolingual resources.",{"paper_id":1648,"title":1649,"year":213,"month":855,"day":63,"doi":1650,"resource_url":1651,"first_page":63,"last_page":63,"pdf_url":1652,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1653,"paper_type":860,"authors":1654,"abstract":1662},"lrec2002-main-049","The SmartKom Multimodal Corpus at BAS","10.63317\u002F33yzs36hh5o2","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-049","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F49.pdf","schiel-etal-2002-smartkom",[1655,1656,1659],{"paper_id":1648,"author_seq":247,"given_name":1198,"surname":1199,"affiliation":63,"orcid":63},{"paper_id":1648,"author_seq":232,"given_name":1657,"surname":1658,"affiliation":63,"orcid":63},"Silke","Steininger",{"paper_id":1648,"author_seq":218,"given_name":1660,"surname":1661,"affiliation":63,"orcid":63},"Ulrich","Türk","In this contribution we announce and describe in detail the new  multimodal corpus evolving from the publicly funded German SmartKom project. The first release of the corpus (BAS SK-P 1.0) has been  finished end of 2001 and will be ready for distribution to the scientific community in July  2002. The SmartKom corpus will be the first of a new generation of Language Resources (LR) designed for a more or less complete data  gathering of human-machine communication combining acoustic, visual and tactile input and output modalities. Since the funding of about EU 2 Mio  for this LR is 100% public, the corpus will be available without royalties via the Bavarian Archive for Speech  Signals (BAS) at the University of Munich.",{"paper_id":1664,"title":1665,"year":213,"month":855,"day":63,"doi":1666,"resource_url":1667,"first_page":63,"last_page":63,"pdf_url":1668,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1669,"paper_type":860,"authors":1670,"abstract":1682},"lrec2002-main-050","End-to-End Evaluation of Multimodal Dialogue Systems – can we Transfer Established Methods?","10.63317\u002F2mxz8wdt8o6g","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-050","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F50.pdf","beringer-etal-2002-end",[1671,1674,1677,1680],{"paper_id":1664,"author_seq":247,"given_name":1672,"surname":1673,"affiliation":63,"orcid":63},"Nicole","Beringer",{"paper_id":1664,"author_seq":232,"given_name":1675,"surname":1676,"affiliation":63,"orcid":63},"Katerina","Louka",{"paper_id":1664,"author_seq":218,"given_name":1678,"surname":1679,"affiliation":63,"orcid":63},"Victoria","Penide-Lopez",{"paper_id":1664,"author_seq":203,"given_name":1681,"surname":1661,"affiliation":63,"orcid":63},"Uli","The goal of this paper is to define a methodology for the\t\t\tend-to-end evaluation of the  multimodal dialogue system SmartKom along the lines of the DARPA  guidelines for spoken dialogue systems. The methodology consists of an extended   framework for the evaluation of a multimodal dialogue system,\t\t\tevaluation metrics for its various  components,and an approach\t\t\tto compare the user satisfaction with the system's technical performance.",{"paper_id":1684,"title":1685,"year":213,"month":855,"day":63,"doi":1686,"resource_url":1687,"first_page":63,"last_page":63,"pdf_url":1688,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1689,"paper_type":860,"authors":1690,"abstract":1703},"lrec2002-main-051","Word Sense Disambiguation using Statistical Models and WordNet","10.63317\u002F3ktoywfurnv2","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-051","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F51.pdf","molina-etal-2002-word",[1691,1694,1697,1700],{"paper_id":1684,"author_seq":247,"given_name":1692,"surname":1693,"affiliation":63,"orcid":63},"Antonio","Molina",{"paper_id":1684,"author_seq":232,"given_name":1695,"surname":1696,"affiliation":63,"orcid":63},"Ferran","Pla",{"paper_id":1684,"author_seq":218,"given_name":1698,"surname":1699,"affiliation":63,"orcid":63},"Encarna","Segarra",{"paper_id":1684,"author_seq":203,"given_name":1701,"surname":1702,"affiliation":63,"orcid":63},"Lidia","Moreno","One of the main problems in Natural Language  Processing is lexical ambiguity, words often have multiple lexical  functionalities (i.e. they can have various parts-of-speech) or have  several semantic meanings. Nowadays, the semantic ambiguity problem,  most known asWord Sense Disambiguation, is still an open problem in this  area. The accuracy of the different approaches for semantic  disambiguation is much lower than the accuracy of the systems which  solve other kinds of ambiguity, such as part-of-speech tagging.  Corpus-based approaches have been widely used in nearly all natural  language processing tasks. In this work, we propose a Word Sense  Disambiguation system which is based on Hidden Markov Models and the use  of WordNet. Some experimental results of our system on the SemCor corpus  are provided.",{"paper_id":1705,"title":1706,"year":213,"month":855,"day":63,"doi":1707,"resource_url":1708,"first_page":63,"last_page":63,"pdf_url":1709,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1710,"paper_type":860,"authors":1711,"abstract":1715},"lrec2002-main-052","Bilingual FrameNet Dictionaries for Machine Translation","10.63317\u002F4otx22fqqe89","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-052","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F52.pdf","boas-2002-bilingual",[1712],{"paper_id":1705,"author_seq":247,"given_name":1713,"surname":1714,"affiliation":63,"orcid":63},"Hans C.","Boas","This paper describes issues surrounding the planning and design of  GermanFrameNet (GFN), a counterpart to the English-based FrameNet project. The goals  of GFN are (a) to create lexical entries for German nouns, verbs, and adjectives that  correspond to existing FrameNet entries, and (b) to link the parallel lexicon fragments by  means of common semantic frames and numerical indexing mechanisms. GFN will take a  fine-grained approach towards polysemy that seeks to split word senses based on the  semantic frames that underlie their analysis. The parallel lexicon fragments represent an  important step towards capturing valuable information about the different syntactic  realizations of frame semantic concepts across languages, which is relevant for  information retrieval, machine translation, and language generation.",{"paper_id":1717,"title":1718,"year":213,"month":855,"day":63,"doi":1719,"resource_url":1720,"first_page":63,"last_page":63,"pdf_url":1721,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1722,"paper_type":860,"authors":1723,"abstract":1727},"lrec2002-main-053","Experiments in Topic Detection","10.63317\u002F5agbhnncc4sh","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-053","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F53.pdf","chali-2002-experiments",[1724],{"paper_id":1717,"author_seq":247,"given_name":1725,"surname":1726,"affiliation":63,"orcid":63},"Yllias","Chali","Dividing documents into topically-coherent units and discovering their topic might have many uses. We present a system that  proceeds in two steps: (1) the input text is segmented at places where there is a probable topic shift, (2) lexical chains are extracted from  each segment as indicators of its topic. Two implementations, based on public domain  resources, are presented: one based on WordNet and the second one based on Roget's thesaurus. An evaluation of the algorithm  shows that lexical chains are acceptable as topic indicator with $44.5%$ of precision and  $63.8%$ of recall.",{"paper_id":1729,"title":1730,"year":213,"month":855,"day":63,"doi":1731,"resource_url":1732,"first_page":63,"last_page":63,"pdf_url":1733,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1734,"paper_type":860,"authors":1735,"abstract":63},"lrec2002-main-054","Querying Dependency Treebanks in XML","10.63317\u002F27p3gfdn63uz","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-054","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F54.pdf","bouma-kloosterman-2002-querying",[1736,1739],{"paper_id":1729,"author_seq":247,"given_name":1737,"surname":1738,"affiliation":63,"orcid":63},"Gosse","Bouma",{"paper_id":1729,"author_seq":232,"given_name":1740,"surname":1741,"affiliation":63,"orcid":63},"Geert","Kloosterman",{"paper_id":1743,"title":1744,"year":213,"month":855,"day":63,"doi":1745,"resource_url":1746,"first_page":63,"last_page":63,"pdf_url":1747,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1748,"paper_type":860,"authors":1749,"abstract":1754},"lrec2002-main-055","Corpus-based Evaluation of a French Spelling and Grammar Checker","10.63317\u002F3k576mxaty7h","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-055","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F55.pdf","starlander-popescu-belis-2002-corpus",[1750,1753],{"paper_id":1743,"author_seq":247,"given_name":1751,"surname":1752,"affiliation":63,"orcid":63},"Marianne","Starlander",{"paper_id":1743,"author_seq":232,"given_name":940,"surname":941,"affiliation":63,"orcid":63},"This article describes an evaluation method for spelling and grammar checkers and gives the results of its application to two French checkers. The evaluation process follows  closely the ISO\u002FIEC and EAGLES guidelines, and defines precisely the evaluation  metrics, so that they can be easily reproduced. The choice of professional translators as user profile entails the use of a corpus of spelling mistakes, which was collected and annotated. The metrics are divided into three sets: classification of perfect vs. imperfect sentences; detection of mistakes; correction of mistakes. The results show in which respect the two systems are the most adapted to the user needs, and the points on which they could be improved.",{"paper_id":1756,"title":1757,"year":213,"month":855,"day":63,"doi":1758,"resource_url":1759,"first_page":63,"last_page":63,"pdf_url":1760,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1761,"paper_type":860,"authors":1762,"abstract":1772},"lrec2002-main-056","Formal Mechanisms for Capturing Regularizations","10.63317\u002F4hyq2u6dgx9j","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-056","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F56.pdf","meyers-etal-2002-formal",[1763,1766,1769],{"paper_id":1756,"author_seq":247,"given_name":1764,"surname":1765,"affiliation":63,"orcid":63},"Adam","Meyers",{"paper_id":1756,"author_seq":232,"given_name":1767,"surname":1768,"affiliation":63,"orcid":63},"Ralph","Grishman",{"paper_id":1756,"author_seq":218,"given_name":1770,"surname":1771,"affiliation":63,"orcid":63},"Michiko","Kosaka","While initial treebanks and treebank parsers primarily  involved surface analysis, recent work focuses on predicate argument (PA) structure. PA structure provides means to  regularize variants  (e.g., actives\u002Fpassives) of sentences so that individual patterns may have better coverage (in MT, QA, IE, etc.), offsetting the  sparse data problem. We encode such PA information in the GLARF framework.  Our previous work discusses  procedures for  producing GLARF from treebanks and parsed data. This paper shows   that GLARF is particularly well-suited for capturing regularization.  We discuss crucial  components of GLARF and demonstrate that other frameworks would require equivalent  components to adequately express regularization.",{"paper_id":1774,"title":1775,"year":213,"month":855,"day":63,"doi":1776,"resource_url":1777,"first_page":63,"last_page":63,"pdf_url":1778,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1779,"paper_type":860,"authors":1780,"abstract":1792},"lrec2002-main-057","A Hybrid Architecture for Robust Parsing of German","10.63317\u002F4cjkdbs5jzb9","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-057","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F57.pdf","hinrichs-etal-2002-hybrid",[1781,1784,1787,1789],{"paper_id":1774,"author_seq":247,"given_name":1782,"surname":1783,"affiliation":63,"orcid":63},"Erhard W.","Hinrichs",{"paper_id":1774,"author_seq":232,"given_name":1785,"surname":1786,"affiliation":63,"orcid":63},"Sandra","Kübler",{"paper_id":1774,"author_seq":218,"given_name":1788,"surname":1387,"affiliation":63,"orcid":63},"Frank H.",{"paper_id":1774,"author_seq":203,"given_name":1790,"surname":1791,"affiliation":63,"orcid":63},"Tylman","Ule","This paper provides an overview of current research  on a hybrid and robust parsing architecture for the morphological,  syntactic and semantic annotation of German text corpora. The novel  contribution of this research lies not in the individual parsing  modules, each of which relies on state-of-the-art algorithms and  techniques. Rather what is new about the present approach is the  combination of these modules into a single architecture. This  combination provides a means to significantly optimize the performance  of each component, resulting in an increased accuracy of annotation.",{"paper_id":1794,"title":1795,"year":213,"month":855,"day":63,"doi":1796,"resource_url":1797,"first_page":63,"last_page":63,"pdf_url":1798,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1799,"paper_type":860,"authors":1800,"abstract":1836},"lrec2002-main-058","OrienTel - Multilingual access to interactive communication services for the Mediterranean and the Middle East","10.63317\u002F5cirfcxsxip7","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-058","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F58.pdf","siemund-etal-2002-orientel",[1801,1804,1807,1810,1813,1816,1819,1822,1825,1827,1830,1833],{"paper_id":1794,"author_seq":247,"given_name":1802,"surname":1803,"affiliation":63,"orcid":63},"Rainer","Siemund",{"paper_id":1794,"author_seq":232,"given_name":1805,"surname":1806,"affiliation":63,"orcid":63},"Barbara","Heuft",{"paper_id":1794,"author_seq":218,"given_name":1808,"surname":1809,"affiliation":63,"orcid":63},"Khalid","Choukri",{"paper_id":1794,"author_seq":203,"given_name":1811,"surname":1812,"affiliation":63,"orcid":63},"Ossama","Emam",{"paper_id":1794,"author_seq":188,"given_name":1814,"surname":1815,"affiliation":63,"orcid":63},"Emmanuel","Maragoudakis",{"paper_id":1794,"author_seq":172,"given_name":1817,"surname":1818,"affiliation":63,"orcid":63},"Herbert","Tropf",{"paper_id":1794,"author_seq":155,"given_name":1820,"surname":1821,"affiliation":63,"orcid":63},"Oren","Gedge",{"paper_id":1794,"author_seq":138,"given_name":1823,"surname":1824,"affiliation":63,"orcid":63},"Sherrie","Shammass",{"paper_id":1794,"author_seq":121,"given_name":1826,"surname":1702,"affiliation":63,"orcid":63},"Asuncion",{"paper_id":1794,"author_seq":104,"given_name":1828,"surname":1829,"affiliation":63,"orcid":63},"Albino Nogueiras","Rodriguez",{"paper_id":1794,"author_seq":87,"given_name":1831,"surname":1832,"affiliation":63,"orcid":63},"Imed","Zitouni",{"paper_id":1794,"author_seq":73,"given_name":1834,"surname":1835,"affiliation":63,"orcid":63},"Dorota","Iskra","OrienTel is a project funded within the European Commission's IST framework that focuses on collecting linguistic data for telephony-based IT applications across the Mediterranean and the Middle East. Languages covered in this SpeechDat-based project are Cypriote Greek, Turkish, Hebrew, different varieties of Arabic, French, English and German. Within the project's lifetime of 30 months, starting in September 2001, OrienTel will produce a set of 22 databases, develop dialect adaptation techniques, conduct research into multilingual acoustic modelling and deploy two demonstrators as a proof of concept.",{"paper_id":1838,"title":1839,"year":213,"month":855,"day":63,"doi":1840,"resource_url":1841,"first_page":63,"last_page":63,"pdf_url":1842,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1843,"paper_type":860,"authors":1844,"abstract":1854},"lrec2002-main-059","Comparing and Extracting Paraphrasing Words with 2-Way Bilingual Dictionaries","10.63317\u002F4trujtfoa4uf","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-059","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F59.pdf","takao-etal-2002-comparing",[1845,1848,1851],{"paper_id":1838,"author_seq":247,"given_name":1846,"surname":1847,"affiliation":63,"orcid":63},"Kazutaka","Takao",{"paper_id":1838,"author_seq":232,"given_name":1849,"surname":1850,"affiliation":63,"orcid":63},"Kenji","Imamura",{"paper_id":1838,"author_seq":218,"given_name":1852,"surname":1853,"affiliation":63,"orcid":63},"Hideki","Kashioka","We analyze a variety of lexical expressions with 2-way bilingual dictionaries and  propose a method for extracting paraphrasing words. First, we compare the coverage between an English-Japanese dictionary and a Japanese-English dictionary from the viewpoint of the returnability of the words by translating English to Japanese, and then back to English again. The variety is shown using examples. Next, we propose a method of automatically extracting English paraphrasing word groups; we gathered the English index words which have the same Japanese translation words in the E-J dictionary. The English words which are difficult to distinguish for native speakers of Japanese were then extracted into a paraphrasing group. We also extract the Japanese paraphrasing word groups for comparison. This method will be useful for sentence matching, especially in order to accept the variety of expressions.",{"paper_id":1856,"title":1857,"year":213,"month":855,"day":63,"doi":1858,"resource_url":1859,"first_page":63,"last_page":63,"pdf_url":1860,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1861,"paper_type":860,"authors":1862,"abstract":1866},"lrec2002-main-060","A Part-of-Speech-Based Search Algorithm for Translation Memories","10.63317\u002F3rynbnb434ca","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-060","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F60.pdf","rapp-2002-part",[1863],{"paper_id":1856,"author_seq":247,"given_name":1864,"surname":1865,"affiliation":63,"orcid":63},"Reinhard","Rapp","The retrieval of related sentences in state-of-the-art translation memory systems is based on orthographic similarities. This often leads to poor search results, since  orthographically similar sentences are not necessarily semantically related. In this paper we propose a search algorithm that aims to reduce this problem by taking part-of-speech information into account. It requires that the parallel sentences stored in the translation memory are processed using standard tools for word alignment and part-of-speech tagging. The work described is part of an ongoing project in example-based machine translation.",{"paper_id":1868,"title":1869,"year":213,"month":855,"day":63,"doi":1870,"resource_url":1871,"first_page":63,"last_page":63,"pdf_url":1872,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1873,"paper_type":860,"authors":1874,"abstract":1880},"lrec2002-main-061","Developments in the TIGER Annotation Scheme and their Realization in the Corpus","10.63317\u002F3u7eqtzgtbig","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-061","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F61.pdf","brants-hansen-2002-developments",[1875,1877],{"paper_id":1868,"author_seq":247,"given_name":1234,"surname":1876,"affiliation":63,"orcid":63},"Brants",{"paper_id":1868,"author_seq":232,"given_name":1878,"surname":1879,"affiliation":63,"orcid":63},"Silvia","Hansen","This paper presents the annotation of the German TIGER Treebank. First,  issues concerning the annotation, representation as well as querying of the treebank are  discussed. Within this context, the annotation tool ANNOTATE, the export and XML formats of the TIGER Treebank  and the TIGER search tool are briefly introduced. Secondly, the developments of the TIGER annotation scheme and their  realization in the corpus are introduced focussing on the differences between the underlying NEGRA annotation scheme and  the further developed TIGER annotation scheme. The main differences are concerned with verb-subcategorization,  coordination, appositions and parentheses as well as proper nouns.  Thirdly, the annotation scheme is assessed through an  evaluation and a problem discussion of the above mentioned changes. For this  purpose, inter-annotator agreement in the TIGER project has been analyzed focussing on exactly these  changes. This analysis shows where the annotators' decision problems are. These difficulties are discussed in greater detail  on the basis of annotation examples. The paper concludes with some suggestions for the improvement of the TIGER annotation  scheme.",{"paper_id":1882,"title":1883,"year":213,"month":855,"day":63,"doi":1884,"resource_url":1885,"first_page":63,"last_page":63,"pdf_url":1886,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1887,"paper_type":860,"authors":1888,"abstract":1901},"lrec2002-main-062","Nexing Corpus: a corpus of verbal protocols on syllogistic reasoning","10.63317\u002F3a2636j4o83j","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-062","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F62.pdf","branco-etal-2002-nexing",[1889,1892,1895,1898],{"paper_id":1882,"author_seq":247,"given_name":1890,"surname":1891,"affiliation":63,"orcid":63},"António","Branco",{"paper_id":1882,"author_seq":232,"given_name":1893,"surname":1894,"affiliation":63,"orcid":63},"José","Leitão",{"paper_id":1882,"author_seq":218,"given_name":1896,"surname":1897,"affiliation":63,"orcid":63},"João","Silva",{"paper_id":1882,"author_seq":203,"given_name":1899,"surname":1900,"affiliation":63,"orcid":63},"Luís","Gomes","In this paper, we describe the Nexing Corpus and report on the tools implemented and the tasks undertaken for its development. The Nexing Corpus includes (i) a collection of  written transcriptions of verbal data elicited during a psycholinguistic experiment on syllogistic reasoning; and (ii) performance data concerning that experiment, such as latencies, confidence levels and accuracy of answers provided. The verbal productions recorded in the corpus are of a specific linguistic type that is seldom, if at all, represented in corpora. These data are relevant for the development of human language technologies aimed at modeling this type of linguistic behavior, which is not uncommon in evolved interactions of cooperative agents. This corpus with thinking aloud data on syllogistic reasoning is also an important source of material for cognitive science, in particular for research on the nature of human deductive reasoning.",{"paper_id":1903,"title":1904,"year":213,"month":855,"day":63,"doi":1905,"resource_url":1906,"first_page":63,"last_page":63,"pdf_url":1907,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1908,"paper_type":860,"authors":1909,"abstract":1916},"lrec2002-main-063","Argument\u002FValency Structure in PropBank, LCS Database and Prague Dependency Treebank: A Comparative Pilot Study","10.63317\u002F3mmh35dmk8sz","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-063","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F63.pdf","hajicova-kucerova-2002-argument",[1910,1913],{"paper_id":1903,"author_seq":247,"given_name":1911,"surname":1912,"affiliation":63,"orcid":63},"Eva","Hajičová",{"paper_id":1903,"author_seq":232,"given_name":1914,"surname":1915,"affiliation":63,"orcid":63},"Ivona","Kučerová","Three scenarios of corpora annotation on underlying  syntactic level (PorpBank, LCS Database and Prague Dependency Treebank)  are compared as for the classification of values and structures  assigned, and tentative steps of a mapping from PropBank to PDT are  formulated.",{"paper_id":1918,"title":1919,"year":213,"month":855,"day":63,"doi":1920,"resource_url":1921,"first_page":63,"last_page":63,"pdf_url":1922,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1923,"paper_type":860,"authors":1924,"abstract":1932},"lrec2002-main-064","Multi-Tier Annotations in the Verbmobil Corpus","10.63317\u002F4xwm8nehfn43","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-064","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F64.pdf","weilhammer-etal-2002-multi",[1925,1928,1931],{"paper_id":1918,"author_seq":247,"given_name":1926,"surname":1927,"affiliation":63,"orcid":63},"Karl","Weilhammer",{"paper_id":1918,"author_seq":232,"given_name":1929,"surname":1930,"affiliation":63,"orcid":63},"Uwe","Reichel",{"paper_id":1918,"author_seq":218,"given_name":1198,"surname":1199,"affiliation":63,"orcid":63},"In very large and diverse scientific projects where as different groups as linguists and engineers with different intentions work on the same signal data or its orthographic transcript and annotate new valuable information, it will not be easy to build a homogeneous corpus. We will describe how this can be achieved, considering the fact that some of these annotations have not been updated properly, or are based on erroneous or deliberately changed versions of the basis transcription. We used an algorithm similar to dynamic programming to detect differences between the transcription on which the annotation depends and the reference transcription for the whole corpus. These differences are automatically mapped on a set of repair operations for the transcriptions such as splitting compound words and merging neighbouring words. On the basis of these operations the correction process in the annotation is carried out. It always depends on the type of the annotation as well as on the position and the nature of the difference, whether a correction can be carried out automatically or has to be fixed manually. Finally we present a investigation in which we exploit the multi-tier annotations of the Verbmobil corpus to find out how breathing is correlated with prosodic-syntactic boundaries and dialog acts.",{"paper_id":1934,"title":1935,"year":213,"month":855,"day":63,"doi":1936,"resource_url":1937,"first_page":63,"last_page":63,"pdf_url":1938,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1939,"paper_type":860,"authors":1940,"abstract":1943},"lrec2002-main-065","A Database for the Analysis of Cross-Lingual Pronunciation Variants of European City Names","10.63317\u002F5hhujgnzi6mg","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-065","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F65.pdf","schaden-2002-database",[1941],{"paper_id":1934,"author_seq":247,"given_name":1452,"surname":1942,"affiliation":63,"orcid":63},"Schaden","This paper reports on a speech database that includes non-native pronunciation variants of city  names\u002Ftown names from several European languages. The database is designed as a research tool  for the study of pronunciation variants in this specific domain that occur in different groups  of non-native speakers. The ongoing data collection currently comprises 20 to 27 native speakers  of 3 languages each who pronounce material from 5 languages. The languages covered are English,  German, French, Italian, and Dutch. All languages are examined as the source language (L1) and  as the target language (L2). For the first stage of the data collection, the targeted status is  a collection of 5 x 5 language directions with at least 20 speakers per native language.  After a brief overview of related studies and an outline of some specifics of proper names  (place names in particular) in the context of speech technology applications, the database  design and the current stage of the data collection is described.",{"paper_id":1945,"title":1946,"year":213,"month":855,"day":63,"doi":1947,"resource_url":1948,"first_page":63,"last_page":63,"pdf_url":1949,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1950,"paper_type":860,"authors":1951,"abstract":1961},"lrec2002-main-066","SAM: System for Multi-criteria Text Alignment.","10.63317\u002F46y2ysr49jxk","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-066","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F66.pdf","ghorbel-etal-2002-sam",[1952,1955,1958],{"paper_id":1945,"author_seq":247,"given_name":1953,"surname":1954,"affiliation":63,"orcid":63},"Hatem","Ghorbel",{"paper_id":1945,"author_seq":232,"given_name":1956,"surname":1957,"affiliation":63,"orcid":63},"Giovanni","Coray",{"paper_id":1945,"author_seq":218,"given_name":1959,"surname":1960,"affiliation":63,"orcid":63},"André","Linden","The problem of text alignment is to establish the  correspondence between subparts of two ore more translations or versions  of the same document. Most of the methods used in alignment are based on  the statistical analysis of word or character frequencies or of string  occurrences. In order to achieve more accurate results, other methods  have incorporated some structural properties of the documents as further  criteria.",{"paper_id":1963,"title":1964,"year":213,"month":855,"day":63,"doi":1965,"resource_url":1966,"first_page":63,"last_page":63,"pdf_url":1967,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1968,"paper_type":860,"authors":1969,"abstract":1973},"lrec2002-main-067","Word Formation and the Validation of Lexical Resources","10.63317\u002F2new96bmmoxg","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-067","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F67.pdf","ten-hacken-2002-word-formation",[1970],{"paper_id":1963,"author_seq":247,"given_name":1971,"surname":1972,"affiliation":63,"orcid":63},"Pius","ten Hacken","In the framework of Word Manager (WM), morphological  dictionaries are produced by the classification of lexemes in terms of a  rule database. The intricate structure of the resulting lexical  resources, conceived primarily for flexible use, also offers novel  opportunities for the validation of the lexical specification. Many of  the inconsistencies and errors encountered in lexical specification in a  text file are excluded in WM, because the lexicographerÕs interface  supports decisions by the exploitation of the procedural nature of  inflection and word formation rules. There remains a set of  lexicographic decisions, based on facts of the language and on the  theoretical analysis of these facts, which cannot be supported in this  formal way. They include the contents of the lexicographic guidelines.  For the validation of these decisions, two types of browser are  provided, the tree browser which gives access to partitionings of the  set of lexemes, and the lexeme browser which concentrates on information  for a single lexeme and on its links to other lexemes. The possibilities  available because of the structure in the database constitute a  challenge for the generality of the approach to validation described by  Underwood & Navarretta (1997), which requires the reduction of  lexical databases to text files.",{"paper_id":1975,"title":1976,"year":213,"month":855,"day":63,"doi":1977,"resource_url":1978,"first_page":63,"last_page":63,"pdf_url":1979,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":1980,"paper_type":860,"authors":1981,"abstract":2002},"lrec2002-main-068","Knowledge Mining and Discovery for Searching in Literary Texts","10.63317\u002F49tawmharkez","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-068","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F68.pdf","cappelli-etal-2002-knowledge",[1982,1985,1988,1991,1994,1997,2000],{"paper_id":1975,"author_seq":247,"given_name":1983,"surname":1984,"affiliation":63,"orcid":63},"A.","Cappelli",{"paper_id":1975,"author_seq":232,"given_name":1986,"surname":1987,"affiliation":63,"orcid":63},"M. N.","Catarsi",{"paper_id":1975,"author_seq":218,"given_name":1989,"surname":1990,"affiliation":63,"orcid":63},"P.","Michelassi",{"paper_id":1975,"author_seq":203,"given_name":1992,"surname":1993,"affiliation":63,"orcid":63},"L.","Moretti",{"paper_id":1975,"author_seq":188,"given_name":1995,"surname":1996,"affiliation":63,"orcid":63},"M.","Baglioni",{"paper_id":1975,"author_seq":172,"given_name":1998,"surname":1999,"affiliation":63,"orcid":63},"F.","Turini",{"paper_id":1975,"author_seq":155,"given_name":1995,"surname":2001,"affiliation":63,"orcid":63},"Tavoni","The article describes a query system on texts and  literary material with advanced information retrieval tools suitable to  retrieve the content of a text, either as material specifically  organized with respect to linguistic, stylistic and rethoric features,  and in its historical, social and cultural context. As a test bed we  chose the Dante’s characters of al di là. This method of  investigation should help a scholar of a literary text to realize part  of his interpretative intentions. For this purpose, we will adopt  advanced methodologies in knowledge management and knowledge discovery  to be applied to a rich representation of the tagged content of a text.",{"paper_id":2004,"title":2005,"year":213,"month":855,"day":63,"doi":2006,"resource_url":2007,"first_page":63,"last_page":63,"pdf_url":2008,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2009,"paper_type":860,"authors":2010,"abstract":2028},"lrec2002-main-069","SiSSA: An Infrastructure for Developing NLP Applications","10.63317\u002F3aakbygvhr9k","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-069","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F69.pdf","lavelli-etal-2002-sissa",[2011,2014,2016,2019,2022,2025],{"paper_id":2004,"author_seq":247,"given_name":2012,"surname":2013,"affiliation":63,"orcid":63},"Alberto","Lavelli",{"paper_id":2004,"author_seq":232,"given_name":1308,"surname":2015,"affiliation":63,"orcid":63},"Pianesi",{"paper_id":2004,"author_seq":218,"given_name":2017,"surname":2018,"affiliation":63,"orcid":63},"Ermanno","Maci",{"paper_id":2004,"author_seq":203,"given_name":2020,"surname":2021,"affiliation":63,"orcid":63},"Irina","Prodanof",{"paper_id":2004,"author_seq":188,"given_name":2023,"surname":2024,"affiliation":63,"orcid":63},"Luca","Dini",{"paper_id":2004,"author_seq":172,"given_name":2026,"surname":2027,"affiliation":63,"orcid":63},"Giampaolo","Mazzini","In recent years there has been a growing interest in the commercial  deployment of NLP  technologies. This paper presents SiSSA, a project  whose main aim is that of developing  an infrastructure for  prototyping, editing and validation of NLP application architectures.\n  The system will provide the user with a graphical environment for  (1) selecting the NLP  activities relevant for the particular NLP  task and the associated linguistic processors that  execute them;  (2) connecting new linguistic processors to SiSSA; (3) checking that  the  chosen architectural hypothesis corresponds to the functional  specifications of the given  application. The proposed infrastructure  makes crucial use of state-of-the-art software  technologies (CORBA,  XML, RDF) to integrate different linguistic processors in an  effective way. In the paper the definition of a metaformalism for  the unification of  different formalisms for grammar description is also briefly presented.",{"paper_id":2030,"title":2031,"year":213,"month":855,"day":63,"doi":2032,"resource_url":2033,"first_page":63,"last_page":63,"pdf_url":2034,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2035,"paper_type":860,"authors":2036,"abstract":2063},"lrec2002-main-070","Building a Linguistically Interpreted Corpus of Bulgarian: the BulTreeBank","10.63317\u002F2a8yxamoowrk","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-070","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F70.pdf","simov-etal-2002-building",[2037,2040,2043,2046,2049,2052,2055,2058,2060],{"paper_id":2030,"author_seq":247,"given_name":2038,"surname":2039,"affiliation":63,"orcid":63},"Kiril","Simov",{"paper_id":2030,"author_seq":232,"given_name":2041,"surname":2042,"affiliation":63,"orcid":63},"Petya","Osenova",{"paper_id":2030,"author_seq":218,"given_name":2044,"surname":2045,"affiliation":63,"orcid":63},"Milena","Slavcheva",{"paper_id":2030,"author_seq":203,"given_name":2047,"surname":2048,"affiliation":63,"orcid":63},"Sia","Kolkovska",{"paper_id":2030,"author_seq":188,"given_name":2050,"surname":2051,"affiliation":63,"orcid":63},"Elisaveta","Balabanova",{"paper_id":2030,"author_seq":172,"given_name":2053,"surname":2054,"affiliation":63,"orcid":63},"Dimitar","Doikoff",{"paper_id":2030,"author_seq":155,"given_name":2056,"surname":2057,"affiliation":63,"orcid":63},"Krassimira","Ivanova",{"paper_id":2030,"author_seq":138,"given_name":2059,"surname":2039,"affiliation":63,"orcid":63},"Alexander",{"paper_id":2030,"author_seq":121,"given_name":2061,"surname":2062,"affiliation":63,"orcid":63},"Milen","Kouylekov","In the field of Human Language Technology (HLT), the existence of linguistically  interpreted real-world texts provides the license necessary for a given language to enter the area of  high-tech applications. The significance of BulTreeBank is the granting of an HLT license to a ``less processed'' language like  Bulgarian which, until recently, has been formally modelled and processed mainly on the morphology level. The BulTreeBank project  aims at the creation of syntactically annotated data for Bulgarian and the tools for their production, management and automatic  processing. It provides not only language resources, but develops an infrastructure of research solutions, production scenarios and",{"paper_id":2065,"title":2066,"year":213,"month":855,"day":63,"doi":2067,"resource_url":2068,"first_page":63,"last_page":63,"pdf_url":2069,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2070,"paper_type":860,"authors":2071,"abstract":2087},"lrec2002-main-071","Syntactic Analysis in the Spoken Dutch Corpus (CGN)","10.63317\u002F5h535agjtv8p","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-071","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F71.pdf","van-der-wouden-etal-2002-syntactic",[2072,2075,2078,2081,2084],{"paper_id":2065,"author_seq":247,"given_name":2073,"surname":2074,"affiliation":63,"orcid":63},"Ton","van der Wouden",{"paper_id":2065,"author_seq":232,"given_name":2076,"surname":2077,"affiliation":63,"orcid":63},"Heleen","Hoekstra",{"paper_id":2065,"author_seq":218,"given_name":2079,"surname":2080,"affiliation":63,"orcid":63},"Michael","Moortgat",{"paper_id":2065,"author_seq":203,"given_name":2082,"surname":2083,"affiliation":63,"orcid":63},"Bram","Renmans",{"paper_id":2065,"author_seq":188,"given_name":2085,"surname":2086,"affiliation":63,"orcid":63},"Ineke","Schuurman","The paper describes the syntactic annotation of the  Spoken Dutch Corpus (\"Corpus Gesproken Nederlands\" or CGN),  the Dutch-Flemish project (1998-2003) aiming at the collection,  description and annotation of ten million words of spoken Dutch. In the  first part, the background of the parsing strategy is discussed, as well  as some details concerning the actual implementation of the parsing  process. The second part discusses some examples of practical  applications of the result of the parsing process.",{"paper_id":2089,"title":2090,"year":213,"month":855,"day":63,"doi":2091,"resource_url":2092,"first_page":63,"last_page":63,"pdf_url":2093,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2094,"paper_type":860,"authors":2095,"abstract":2102},"lrec2002-main-072","Electronic Dictionaries - from Publisher Data to a Distribution Server: the DicoPro, DicoEast and RERO Projects","10.63317\u002F3qbxo97pb9cm","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-072","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F72.pdf","popescu-belis-etal-2002-electronic",[2096,2097,2100],{"paper_id":2089,"author_seq":247,"given_name":940,"surname":941,"affiliation":63,"orcid":63},{"paper_id":2089,"author_seq":232,"given_name":2098,"surname":2099,"affiliation":63,"orcid":63},"Susan","Armstrong",{"paper_id":2089,"author_seq":218,"given_name":2101,"surname":1181,"affiliation":63,"orcid":63},"Gilbert","This article describes a set of initiatives in the domain if electronic dictionary  distribution. Their basis is the DicoPro server, which enables secure access to dictionary data on a server. In the DicoEast and RERO projects, the goal is to acquire high-quality  publisher data, convert it into numeric format, and provide access to dictionary entries for the participating institutions. We analyze the various problems that appear throughout this process and describe the solutions we found.",{"paper_id":2104,"title":2105,"year":213,"month":855,"day":63,"doi":2106,"resource_url":2107,"first_page":63,"last_page":63,"pdf_url":2108,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2109,"paper_type":860,"authors":2110,"abstract":2116},"lrec2002-main-073","GermaNet - representation, visualization, application","10.63317\u002F5epbd5kgb46i","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-073","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F73.pdf","kunze-lemnitzer-2002-germanet",[2111,2113],{"paper_id":2104,"author_seq":247,"given_name":1428,"surname":2112,"affiliation":63,"orcid":63},"Kunze",{"paper_id":2104,"author_seq":232,"given_name":2114,"surname":2115,"affiliation":63,"orcid":63},"Lothar","Lemnitzer","This paper outlines current developments centering  around the lexical-semantic database GermaNet and its applicability both  within language engineering tasks and on the Semantic Web. From this  perspective, representation and standardization are considered to be  crucial issues as regards compatibility and interoperability with other  languages resources. Representation variants of lexical data, eg.  XML-based formats, enable various web applications and data exchange.  Furthermore, visualization tools for exploring ontologies can be adopted  and enhanced.",{"paper_id":2118,"title":2119,"year":213,"month":855,"day":63,"doi":2120,"resource_url":2121,"first_page":63,"last_page":63,"pdf_url":2122,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2123,"paper_type":860,"authors":2124,"abstract":2134},"lrec2002-main-074","Design of the VICO Spoken Dialogue System: Evaluation of User Expectations by Wizard-of-Oz Experiments","10.63317\u002F5ape7iytam3p","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-074","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F74.pdf","geutner-etal-2002-design",[2125,2128,2131],{"paper_id":2118,"author_seq":247,"given_name":2126,"surname":2127,"affiliation":63,"orcid":63},"Petra","Geutner",{"paper_id":2118,"author_seq":232,"given_name":2129,"surname":2130,"affiliation":63,"orcid":63},"Frank","Steffens",{"paper_id":2118,"author_seq":218,"given_name":2132,"surname":2133,"affiliation":63,"orcid":63},"Dietrich","Manstetten","Steadily increasing dissemination of computer  applications has resulted in an ever-growing functional complexity of  electronic services and devices. Hence the utilization of natural  language is highly desirable to facilitate their usage, especially in  the automotive environment where safety is a mandatory requirement.  User-friendly, comfortable and safe vocal interfaces that ensure natural  interactivity are needed. VICO, the Virtual Intelligent Co-Driver, aims  at the development of an intelligent conversational agent enabling  ubiquitous natural interaction between humans, and digital devices and  services in the car. This paper gives an introduction into the key  objectives and goals of the VICO project. It presents detailed  information about design and experimental setup of the performed  Wizard-of-Oz experiments to evaluate expectations of potential users  early in the design and development process. The results of the  conducted experiments are introduced. The subjective ratings of test  persons towards the evaluated simulated prototype system were very high,  and the speech-controlled approach considered as extremely easy-to-use.  Finally, conclusions as well as consequences of the perceived results on  design and development of the first prototype VICO system are described.",{"paper_id":2136,"title":2137,"year":213,"month":855,"day":63,"doi":2138,"resource_url":2139,"first_page":63,"last_page":63,"pdf_url":2140,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2141,"paper_type":860,"authors":2142,"abstract":2149},"lrec2002-main-075","The Lexico-semantic Annotation of an Italian Treebank","10.63317\u002F5267gu5p89po","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-075","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F75.pdf","mana-corazzari-2002-lexico",[2143,2146],{"paper_id":2136,"author_seq":247,"given_name":2144,"surname":2145,"affiliation":63,"orcid":63},"Nadia","Mana",{"paper_id":2136,"author_seq":232,"given_name":2147,"surname":2148,"affiliation":63,"orcid":63},"Ornella","Corazzari","Corpora annotated at semantic level play a crucial role both in  research and in applicative contexts in which systems of natural language processing are studied and developed. In this paper we  present the lexico-semantic annotation of an Italian treebank, a first attempt to recover the lack of such resource for Italian. We will  describe the annotation realized, focusing on the methodology followed, the results achieved, and possible further work and  applications.",{"paper_id":2151,"title":2152,"year":213,"month":855,"day":63,"doi":2153,"resource_url":2154,"first_page":63,"last_page":63,"pdf_url":2155,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2156,"paper_type":860,"authors":2157,"abstract":2169},"lrec2002-main-076","Towards Automatic Evaluation of Question\u002FAnswering Systems","10.63317\u002F2mi3mywgccyc","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-076","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F76.pdf","magnini-etal-2002-towards",[2158,2161,2164,2166],{"paper_id":2151,"author_seq":247,"given_name":2159,"surname":2160,"affiliation":63,"orcid":63},"Bernardo","Magnini",{"paper_id":2151,"author_seq":232,"given_name":2162,"surname":2163,"affiliation":63,"orcid":63},"Matteo","Negri",{"paper_id":2151,"author_seq":218,"given_name":1626,"surname":2165,"affiliation":63,"orcid":63},"Prevete",{"paper_id":2151,"author_seq":203,"given_name":2167,"surname":2168,"affiliation":63,"orcid":63},"Hristo","Tanev","This paper presents an innovative approach to the automatic evaluation  of Question Answering systems.  The methodology  relies on the use of the Web,  considered as an ``oracle''   containing all the information  needed to check the relevance of a  candidate answer with respect to a given question.  The procedure is completely automatic (i.e.  no human  intervention is required)  and it is  based on the assumption that the answers'  relevance  can be    assessed  from a   purely  quantitative perspective.  The methodology is based on a  Web search using patterns  derived both from the question and from the answer. Different kinds of patterns have been identified, ranging from ``lenient'' (i.e.  boolean  combinations of  single words),  to  ``strict'' patterns  (i.e.  whole sentences   or  combinations  of   phrases).   A   statistically-based  algorithm  has been developed   which  considers  both the   kinds  of patterns used in the search and  the number of documents returned from  the Web.  Experiments carried out on the TREC-10  corpus show that the approach achieves a high level of performance (i.e.80% success rate).",{"paper_id":2171,"title":2172,"year":213,"month":855,"day":63,"doi":2173,"resource_url":2174,"first_page":63,"last_page":63,"pdf_url":2175,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2176,"paper_type":860,"authors":2177,"abstract":2184},"lrec2002-main-077","Automatic Ranking of MT Systems","10.63317\u002F3uzqckr7pwwm","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-077","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F77.pdf","rajman-hartley-2002-automatic",[2178,2181],{"paper_id":2171,"author_seq":247,"given_name":2179,"surname":2180,"affiliation":63,"orcid":63},"Martin","Rajman",{"paper_id":2171,"author_seq":232,"given_name":2182,"surname":2183,"affiliation":63,"orcid":63},"Anthony","Hartley","In earlier work, we succeeded in automatically predicting  the relative rankings of MT systems derived from human judgments on the Fluency, Adequacy or Informativeness of  their output. In this paper, we present an experiment - using human evaluators and  additional data - designed to test the robustness of our earlier results. These had yielded  two promising automatically computable predictors, the D-score based on semantic  features of the MT output, and the X-score based on syntactic features. We conclude that  the X-score is indeed a robust and reliable predictor, even on new data for  which it has not been specifically tuned.",{"paper_id":2186,"title":2187,"year":213,"month":855,"day":63,"doi":2188,"resource_url":2189,"first_page":63,"last_page":63,"pdf_url":2190,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2191,"paper_type":860,"authors":2192,"abstract":2199},"lrec2002-main-078","Opportunistic Semantic Tagging","10.63317\u002F577g9fi2edt9","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-078","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F78.pdf","bentivogli-pianta-2002-opportunistic",[2193,2196],{"paper_id":2186,"author_seq":247,"given_name":2194,"surname":2195,"affiliation":63,"orcid":63},"Luisa","Bentivogli",{"paper_id":2186,"author_seq":232,"given_name":2197,"surname":2198,"affiliation":63,"orcid":63},"Emanuele","Pianta","Building semantically annotated corpora from scratch is a time consuming activity  requiring very specialized resources. In this paper we present a pilot study carried out to  test a methodology that can be used to create a semantically annotated corpus by  exploiting information contained in an already annotated corpus. The main hypothesis underlying the proposed methodology is that, given a text and its translation into another language, the translation preserves to a large extent the meaning of the source target. This means that if one of the two texts is already semantically tagged, and if we can align at the appropriate level the parallel texts, it should be possible to transfer the semantic annotation from the tagged text to its translation. More specifically, in our experiment we considered word level semantic annotation. The pilot study has been carried out on six texts taken from the SemCor corpus and their Italian translations. To test the methodology we implemented an annotation transfer system based on an English\u002FItalian word aligner, developed at ITC-irst, which relies mostly on information contained in bilingual  dictionaries.",{"paper_id":2201,"title":2202,"year":213,"month":855,"day":63,"doi":2203,"resource_url":2204,"first_page":63,"last_page":63,"pdf_url":2205,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2206,"paper_type":860,"authors":2207,"abstract":2214},"lrec2002-main-079","Tool for Czech Pronunciation Generation Combining Fixed Rules with Pronunciation Lexicon and Lexicon Management Tool","10.63317\u002F4r6c5th6p8hd","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-079","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F79.pdf","pollak-hanzl-2002-tool",[2208,2211],{"paper_id":2201,"author_seq":247,"given_name":2209,"surname":2210,"affiliation":63,"orcid":63},"Petr","Pollák",{"paper_id":2201,"author_seq":232,"given_name":2212,"surname":2213,"affiliation":63,"orcid":63},"Václav","Hanžl","This paper presents two different tools which may be used as a support  of speech recognition. The tool \"transc\" is the first one and it generates the phonetic transcription (pronunciation) of given  utterance. It is based mainly on fixed rules which can be defined for Czech pronunciation but it can work also with specified list of  exceptions which is defined on lexicon basis. It allows the usage of \"transc\" for unknown text with high  probability of correct phonetic transcription generation. The second part is devoted to  lexicon management tool \"lexedit\" which may be useful in the phase of generation of  pronunciation lexicon for collected corpora. The presented tool allows editing of  pronunciation, playing examples of pronunciation, comparison with reference lexicon,  updating of reference lexicon, etc.",{"paper_id":2216,"title":2217,"year":213,"month":855,"day":63,"doi":2218,"resource_url":2219,"first_page":63,"last_page":63,"pdf_url":2220,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2221,"paper_type":860,"authors":2222,"abstract":2232},"lrec2002-main-080","The Reuters Corpus Volume 1 -from Yesterday’s News to Tomorrow’s Language Resources","10.63317\u002F5aeuc8zmytsy","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-080","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F80.pdf","rose-etal-2002-reuters",[2223,2226,2229],{"paper_id":2216,"author_seq":247,"given_name":2224,"surname":2225,"affiliation":63,"orcid":63},"Tony","Rose",{"paper_id":2216,"author_seq":232,"given_name":2227,"surname":2228,"affiliation":63,"orcid":63},"Mark","Stevenson",{"paper_id":2216,"author_seq":218,"given_name":2230,"surname":2231,"affiliation":63,"orcid":63},"Miles","Whitehead","Reuters, the global information, news and technology group, has for the first time made  available free of charge, large quantities of archived Reuters news stories for use by  research communities around the world. The Reuters Corpus Volume 1 (RCV1) includes  over 800,000 news stories - typical of the annual English language news output of  Reuters. This paper describes the origins of RCV1, the motivations behind its creation,  and how it differs from previous corpora. In addition we discuss the system of category  coding, whereby each story is annotated for topic, region and industry sector. We also  discuss the process by which these codes were applied, and examine the issues involved  in maintaining quality and consistency of coding in an operational, commercial  environment.",{"paper_id":2234,"title":2235,"year":213,"month":855,"day":63,"doi":2236,"resource_url":2237,"first_page":63,"last_page":63,"pdf_url":2238,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2239,"paper_type":860,"authors":2240,"abstract":2247},"lrec2002-main-081","Implementation and Evaluation of PAROLE PoS in a National Context","10.63317\u002F5ftzd44ee38h","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-081","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F81.pdf","dutilh-kruyt-2002-implementation",[2241,2244],{"paper_id":2234,"author_seq":247,"given_name":2242,"surname":2243,"affiliation":63,"orcid":63},"Tilly","Dutilh",{"paper_id":2234,"author_seq":232,"given_name":2245,"surname":2246,"affiliation":63,"orcid":63},"Truus","Kruyt","We are annotating the complete 20 million Dutch PAROLE corpus with PoS and lemma.  The morphosyntactic tagging of 250,000 words during the PAROLE project was the first  confrontation of the fine-grained Dutch PAROLE tagset and its 'functional' mode of  application, with real corpus data. The correction of the manual tagging and the  compilation of a 100,000 words training corpus for the automatic tagger initiated the  evaluation of the suitability of the tagset and the methodology of tag assignment, which  topics will both be discussed in this paper. The reality of corpus data brought about a  number of adaptations, linguistic restrictions and generalisations. The most salient tagger  results will be presented. Our experience is relevant for a new project: the Integrated  Language Database of 8th - 21st Century Dutch (ILD), which will contain a text corpus  covering all these centuries. The corpus will be annotated with lemma and PoS, in which  process historical lexica will be used. Obviously, we will have to tailor tagset and  methodology of tag assignment optimally to these purposes.",{"paper_id":2249,"title":2250,"year":213,"month":855,"day":63,"doi":2251,"resource_url":2252,"first_page":63,"last_page":63,"pdf_url":2253,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2254,"paper_type":860,"authors":2255,"abstract":2264},"lrec2002-main-082","A Machine Learning Approach to Automatic Functor Assignment in the Prague Dependency Treebank","10.63317\u002F5geikyhnbg9f","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-082","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F82.pdf","zabokrtsky-etal-2002-machine",[2256,2259,2261],{"paper_id":2249,"author_seq":247,"given_name":2257,"surname":2258,"affiliation":63,"orcid":63},"Zdeněk","Žabokrtský",{"paper_id":2249,"author_seq":232,"given_name":2209,"surname":2260,"affiliation":63,"orcid":63},"Sgall",{"paper_id":2249,"author_seq":218,"given_name":2262,"surname":2263,"affiliation":63,"orcid":63},"Sašo","Džeroski","The aim of this paper is to describe and evaluate a  system that automates a part of the transition from analytical to  tectogrammatical tree structures within the Prague Dependency Treebank.  In particular, it assigns functors to autosemantic words. The system is  based on the machine learning approach of decision tree induction. The  resulting software tool is incorporated into the annotation process and  significantly reduces the manual annotation effort during the transition  from analytical tree structures to the tectogrammatical tree structures,  which consumes a huge amount of time of linguistic experts.",{"paper_id":2266,"title":2267,"year":213,"month":855,"day":63,"doi":2268,"resource_url":2269,"first_page":63,"last_page":63,"pdf_url":2270,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2271,"paper_type":860,"authors":2272,"abstract":63},"lrec2002-main-083","How to build a multilingual inheritance-based lexicon","10.63317\u002F566v7ujtn65u","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-083","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F83.pdf","tiberius-2002-build",[2273],{"paper_id":2266,"author_seq":247,"given_name":2274,"surname":2275,"affiliation":63,"orcid":63},"Carole","Tiberius",{"paper_id":2277,"title":2278,"year":213,"month":855,"day":63,"doi":2279,"resource_url":2280,"first_page":63,"last_page":63,"pdf_url":2281,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2282,"paper_type":860,"authors":2283,"abstract":2291},"lrec2002-main-084","A typological database of agreement","10.63317\u002F3jgmnxauncws","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-084","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F84.pdf","tiberius-etal-2002-typological",[2284,2285,2288],{"paper_id":2277,"author_seq":247,"given_name":2274,"surname":2275,"affiliation":63,"orcid":63},{"paper_id":2277,"author_seq":232,"given_name":2286,"surname":2287,"affiliation":63,"orcid":63},"Dunstan","Brown",{"paper_id":2277,"author_seq":218,"given_name":2289,"surname":2290,"affiliation":63,"orcid":63},"Greville","Corbett","This paper discusses the construction of a typological  database of agreement on the basis of fifteen languages taken from different language families so as to maximise diversity.  For each of these languages, the database will contain detailed information about  agreement controllers, targets, domains, categories, and conditions. Thus the database is  designed to help us to develop a general typology of agreement systems which predicts  what is, and what is not a possible agreement system in natural language. This is  primarily a theoretical aim, but the database may also have practical applications in that  agreement has implications for the design of parsers in natural language systems.",{"paper_id":2293,"title":2294,"year":213,"month":855,"day":63,"doi":2295,"resource_url":2296,"first_page":63,"last_page":63,"pdf_url":2297,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2298,"paper_type":860,"authors":2299,"abstract":2303},"lrec2002-main-085","The Web as a Resource for Question Answering: Perspectives and Challenges","10.63317\u002F5dhm4mrjh979","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-085","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F85.pdf","lin-2002-web",[2300],{"paper_id":2293,"author_seq":247,"given_name":2301,"surname":2302,"affiliation":63,"orcid":63},"Jimmy","Lin","The vast amounts of information readily available on the World Wide  Web can be effectively used for question answering in two fundamentally different ways.  In the  federated approach, techniques for handling semistructured data are applied to access  Web sources as if they were databases, allowing large classes of common questions to  be  answered uniformly.  In the distributed approach, large-scale text-processing techniques  are used to extract answers directly from unstructured Web documents.  Because the Web   is orders of magnitude larger than any human-collected corpus, question answering  systems can capitalize on its unparalleled-levels of data redundancy.  Analysis of  real-world user questions reveals that the federated and distributed approaches  complement each other nicely, suggesting a hybrid approach in future question answering  systems.",{"paper_id":2305,"title":2306,"year":213,"month":855,"day":63,"doi":2307,"resource_url":2308,"first_page":63,"last_page":63,"pdf_url":2309,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2310,"paper_type":860,"authors":2311,"abstract":2318},"lrec2002-main-086","Automatic paraphrasing based on parallel corpus for normalization","10.63317\u002F3sguu3wb9wga","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-086","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F86.pdf","shimohata-sumita-2002-automatic",[2312,2315],{"paper_id":2305,"author_seq":247,"given_name":2313,"surname":2314,"affiliation":63,"orcid":63},"Mitsuo","Shimohata",{"paper_id":2305,"author_seq":232,"given_name":2316,"surname":2317,"affiliation":63,"orcid":63},"Eiichiro","Sumita","There are various ways to express the same meaning in natural language.  This  diversity causes difficulty in many fields of natural language processing.  It  can be reduced by normalization of synonymous expressions, which is done by  replacing various synonymous expressions with a standard one.  In this paper, we  propose a method for extracting paraphrases from a parallel corpus automatically  and utilizing them for normalization.  First, synonymous sentences are grouped  by the equivalence of translation. Then, synonymous expressions are extracted  by the differences between synonymous sentences.  Synonymous expressions contain  not only interchangeable words but also surrounding words in order to consider  contextual condition.  Our method has two advantages: 1) only a parallel corpus  is required, and 2) various types of paraphrases can be acquired.",{"paper_id":2320,"title":2321,"year":213,"month":855,"day":63,"doi":2322,"resource_url":2323,"first_page":63,"last_page":63,"pdf_url":2324,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2325,"paper_type":860,"authors":2326,"abstract":2330},"lrec2002-main-087","Speech to Speech Translation: Present and Future Challenges","10.63317\u002F4ev7xif639bd","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-087","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F87.pdf","lazzari-2002-speech",[2327],{"paper_id":2320,"author_seq":247,"given_name":2328,"surname":2329,"affiliation":63,"orcid":63},"Gianni","Lazzari","Significant progress has been made in the field of  human language technologies. Various tasks like continuous speech  recognition for large vocabulary, speaker and language identification,  spoken information inquiry, information extraction and cross-language  retrieval in restricted domains are today feasible and different  prototypes and systems are running. The spoken translation problem on  the other hand is still a significant challenge: \"Good text  translation was hard enough to pull off. Speech to speech MT was beyond  going to the Moon – it was Mars…\" [Steve Silbermann, Wired  Magazine]. Research issues and approaches to the spoken translation  problem will be reviewed by considering present projects and achieved  results. Moreover foreseen applications offered by the Web to  multilingual person to person communication will be introduced. Issues  related to portability, language resources and evaluation will also be  discussed. Finally a video of NESPOLE! project, a common EU NSF funded  project, exploring future applications in the e-commerce and e-service  sectors will be shown.",{"paper_id":2332,"title":2333,"year":213,"month":855,"day":63,"doi":2334,"resource_url":2335,"first_page":63,"last_page":63,"pdf_url":2336,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2337,"paper_type":860,"authors":2338,"abstract":2345},"lrec2002-main-088","Databases of Heterogeneous Segments for Concatenative Speech Synthesis","10.63317\u002F2cbwpktcyfys","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-088","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F88.pdf","kopecek-pala-2002-databases",[2339,2342],{"paper_id":2332,"author_seq":247,"given_name":2340,"surname":2341,"affiliation":63,"orcid":63},"Ivan","Kopeček",{"paper_id":2332,"author_seq":232,"given_name":2343,"surname":2344,"affiliation":63,"orcid":63},"Karel","Pala","Heterogeneous segments can enhance the quality of  concatenative speech synthesis especially for highly inflected  languages. In this paper we present a brief analysis of the segment  types on a general level and discuss the problems related to optimising  databases of heterogeneous segments. We present a brief discussion of  the algorithmical complexity for the proposed approach and offer some  heuristics for optimizing databases of heterogeneous segments. We also  mention the syllable and morphemic segments in relation to the  development of the Czech speech synthesis system Demosthenes.",{"paper_id":2347,"title":2348,"year":213,"month":855,"day":63,"doi":2349,"resource_url":2350,"first_page":63,"last_page":63,"pdf_url":2351,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2352,"paper_type":860,"authors":2353,"abstract":2361},"lrec2002-main-089","Preliminary Evaluation of Slovenian Mobile Database PoliDat","10.63317\u002F2epsufmjt93y","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-089","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F89.pdf","zgank-etal-2002-preliminary",[2354,2357,2360],{"paper_id":2347,"author_seq":247,"given_name":2355,"surname":2356,"affiliation":63,"orcid":63},"Andrej","Žgank",{"paper_id":2347,"author_seq":232,"given_name":2358,"surname":2359,"affiliation":63,"orcid":63},"Zdravko","Kačič",{"paper_id":2347,"author_seq":218,"given_name":1392,"surname":1393,"affiliation":63,"orcid":63},"The following paper describes the preliminary speech recognition  evaluation of PoliDat database. This new database contains Slovenian speech  captured over mobile telephones.  The design of database is modeled  according to the SpeechDat(II) specifications. The recording of speech material and the format of the database are shortly described. The  speech recognition experiment is based on slightly modified COST 249 refrec0.96 script.  Acoustic HMM speech models are trained on the fixed telephone Slovenian 1000 FDB  SpeechDat(II) database. 40 speakers were taken from mobile PoliDat database, 20 for  test set and 20 for adaptation set. First the signal to noise ratio of all recordings was  calculated, then the speech recognition with unadapted acoustic models was performed.  In the next step the retraining of acoustic models and maximum likelihood linear  regression procedure were used for adaptation. In the last step, the adapted acoustic  models were used for speech recognition with the PoliDat database. The adaptation  procedures significantly improved the mobile speech recognition with fixed acoustic  models. The overall word error rate decreased from 46.5% for unadapted models to  19.1% and 5.2% for adapted models.",{"paper_id":2363,"title":2364,"year":213,"month":855,"day":63,"doi":2365,"resource_url":2366,"first_page":63,"last_page":63,"pdf_url":2367,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2368,"paper_type":860,"authors":2369,"abstract":2379},"lrec2002-main-090","Evaluating resource acquisition tools for Information Extraction","10.63317\u002F28pyza9dd65r","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-090","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F90.pdf","poibeau-etal-2002-evaluating",[2370,2373,2376],{"paper_id":2363,"author_seq":247,"given_name":2371,"surname":2372,"affiliation":63,"orcid":63},"Thierry","Poibeau",{"paper_id":2363,"author_seq":232,"given_name":2374,"surname":2375,"affiliation":63,"orcid":63},"Dominique","Dutoit",{"paper_id":2363,"author_seq":218,"given_name":2377,"surname":2378,"affiliation":63,"orcid":63},"Sophie","Bizouard","This paper evaluates two different approaches for the  elaboration of semantic classes. The framework is an Information  Extraction, which needs large amount of domain-dependent resources. An  endogenous approach (corpus-based learning) is contrasted with a  heterogeneous one (the use of a large semantic network). The two  techniques are evaluated. Cet article vise à évaluer deux approches  différentes pour la constitution de classes sémantiques. Nous nous  plaçons dans la perspective d’une application d’extraction d’information,  pour laquelle la notion de classe sémantique est primordiale. Une  approche endogène (acquisition à partir d’un corpus) est contrastée   avec une approche exogène (à travers un réseau sémantique riche). L’article  présente une évaluation fine de ces deux techniques et leur  complémentarité possible.",{"paper_id":2381,"title":2382,"year":213,"month":855,"day":63,"doi":2383,"resource_url":2384,"first_page":63,"last_page":63,"pdf_url":2385,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2386,"paper_type":860,"authors":2387,"abstract":2392},"lrec2002-main-091","An Algorithm to Find Words from Definitions","10.63317\u002F4mef2zdnxztr","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-091","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F91.pdf","dutoit-nugues-2002-algorithm",[2388,2389],{"paper_id":2381,"author_seq":247,"given_name":2374,"surname":2375,"affiliation":63,"orcid":63},{"paper_id":2381,"author_seq":232,"given_name":2390,"surname":2391,"affiliation":63,"orcid":63},"Pierre","Nugues","This paper presents a system to find automatically  words from a definition or a paraphrase. The system uses a lexical  database of French words that is comparable in its size to WordNet and  an algorithm that evaluates distances in the semantic graph between  hypernyms and hyponyms of the words in the definition. The paper first  outlines the structure of the lexical network on which the method is  based. It then describes the algorithm. Finally, it concludes with  examples of results we have obtained.",{"paper_id":2394,"title":2395,"year":213,"month":855,"day":63,"doi":2396,"resource_url":2397,"first_page":63,"last_page":63,"pdf_url":2398,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2399,"paper_type":860,"authors":2400,"abstract":2406},"lrec2002-main-092","Lithuanian Speech Database LTDIGITS","10.63317\u002F27vwuhqttbro","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-092","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F92.pdf","rudzionis-rudzionis-2002-lithuanian",[2401,2404],{"paper_id":2394,"author_seq":247,"given_name":2402,"surname":2403,"affiliation":63,"orcid":63},"Algimantas","Rudzionis",{"paper_id":2394,"author_seq":232,"given_name":2405,"surname":2403,"affiliation":63,"orcid":63},"Vytautas","The Lithuanian speech database LTDIGITS was  developed. Some details of this database could be of more general  interest. These features are related with collected set of nasal  consonant realizations in different vowel contexts. First, LTDIGITS  contains nasal – vowel syllables where nasal is before open, middle  and closed vowels. Second, the database includes special continuous  phrase with above mentioned nasal – vowel syllables. These nasal-vowel  pairs are in the stressed positions in the front of short 2 – 3  syllable words. Third, both the utterances to words and word to phones  marking and labeling procedures were applied and are presented here.",{"paper_id":2408,"title":2409,"year":213,"month":855,"day":63,"doi":2410,"resource_url":2411,"first_page":63,"last_page":63,"pdf_url":2412,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2413,"paper_type":860,"authors":2414,"abstract":2427},"lrec2002-main-093","Building domain specific lexical hierarchies from corpora","10.63317\u002F4jemifg3sv4o","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-093","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F93.pdf","ferret-etal-2002-building",[2415,2418,2421,2424],{"paper_id":2408,"author_seq":247,"given_name":2416,"surname":2417,"affiliation":63,"orcid":63},"Olivier","Ferret",{"paper_id":2408,"author_seq":232,"given_name":2419,"surname":2420,"affiliation":63,"orcid":63},"Christian","Fluhr",{"paper_id":2408,"author_seq":218,"given_name":2422,"surname":2423,"affiliation":63,"orcid":63},"Françoise","Rousseau-Hans",{"paper_id":2408,"author_seq":203,"given_name":2425,"surname":2426,"affiliation":63,"orcid":63},"Jean-Luc","Simoni","In this article, we present a new algorithm for building domain specific lexical  hierarchies from texts. The basic elements of such a hierarchy are the normalized terms - mono and multi-word terms - extracted from a large corpus by a terminological extractor.  The algorithm relies on collocations for representing the meaning of these terms, finding hierarchical relations between them and finally, organizing them into a hierarchy.  Moreover, it takes into account the polysemy of terms while it builds the hierarchy. We also present the results of its application on a part of the corpus designed for the ARC A3 of the Francil network and we go through its possible applications.",{"paper_id":2429,"title":2430,"year":213,"month":855,"day":63,"doi":2431,"resource_url":2432,"first_page":63,"last_page":63,"pdf_url":2433,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2434,"paper_type":860,"authors":2435,"abstract":2442},"lrec2002-main-094","Evaluation of Machine Learning Methods for Natural Language Processing Tasks","10.63317\u002F2wmcmskiy5tm","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-094","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F94.pdf","daelemans-hoste-2002-evaluation",[2436,2439],{"paper_id":2429,"author_seq":247,"given_name":2437,"surname":2438,"affiliation":63,"orcid":63},"Walter","Daelemans",{"paper_id":2429,"author_seq":232,"given_name":2440,"surname":2441,"affiliation":63,"orcid":63},"Véronique","Hoste","We show that the methodology currently in use for comparing symbolic  supervised learning methods applied to human language technology tasks is unreliable. We show that  the interaction between algorithm parameter settings and feature selection within a single  algorithm often accounts for a higher variation in results than differences  between different algorithms or information sources.  We illustrate this with experiments on a  number of linguistic datasets. The consequences of this phenomenon are far-reaching, and  we discuss possible solutions to this methodological problem.",{"paper_id":2444,"title":2445,"year":213,"month":855,"day":63,"doi":2446,"resource_url":2447,"first_page":63,"last_page":63,"pdf_url":2448,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2449,"paper_type":860,"authors":2450,"abstract":2456},"lrec2002-main-095","An evaluation of different symbolic shallow parsing techniques","10.63317\u002F5md2jzyzhthx","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-095","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F95.pdf","van-rullen-blache-2002-evaluation",[2451,2454],{"paper_id":2444,"author_seq":247,"given_name":2452,"surname":2453,"affiliation":63,"orcid":63},"Tristan","Van Rullen",{"paper_id":2444,"author_seq":232,"given_name":1051,"surname":2455,"affiliation":63,"orcid":63},"Blache","This paper presents an evaluation of four shallow  parsers The interest of each of these parsers led us to imagine a  parameterized multiplexer for syntactic information based on the  principle of merging the common boundaries of the outputs given by each  of these programs. The question of evaluating the parsers as well as the  multiplexer came in the foreground with the problem of not owning  reference corpora. We attempt here to demonstrate the interest of  observing the ‘common boundaries’ produced by different parsers as  good indices for the evaluation of these algorithms. Such an evaluation  is proposed and tested with a set of two experiences.",{"paper_id":2458,"title":2459,"year":213,"month":855,"day":63,"doi":2460,"resource_url":2461,"first_page":63,"last_page":63,"pdf_url":2462,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2463,"paper_type":860,"authors":2464,"abstract":2481},"lrec2002-main-096","Annotation of prominent words, prosodic boundaries and segmental lengthening by non-expert transcribers in the Spoken Dutch Corpus","10.63317\u002F548nbjpage3j","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-096","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F96.pdf","buhmann-etal-2002-annotation",[2465,2468,2471,2474,2475,2478],{"paper_id":2458,"author_seq":247,"given_name":2466,"surname":2467,"affiliation":63,"orcid":63},"Jeska","Buhmann",{"paper_id":2458,"author_seq":232,"given_name":2469,"surname":2470,"affiliation":63,"orcid":63},"Johanneke","Caspers",{"paper_id":2458,"author_seq":218,"given_name":2472,"surname":2473,"affiliation":63,"orcid":63},"Vincent J.","van Heuven",{"paper_id":2458,"author_seq":203,"given_name":2076,"surname":2077,"affiliation":63,"orcid":63},{"paper_id":2458,"author_seq":188,"given_name":2476,"surname":2477,"affiliation":63,"orcid":63},"Jean-Pierre","Martens",{"paper_id":2458,"author_seq":172,"given_name":2479,"surname":2480,"affiliation":63,"orcid":63},"Marc","Swerts","This paper first describes the aims of the prosodic  annotation for (part of) the Spoken Dutch Corpus (Corpus Gesproken  Nederlands, CGN), and the procedures that are currently being developed  to produce the annotation. It further reports on a pilot study that was  run to estimate the costs and the attainable quality (in terms of  inter-transcriber consistency) of the envisaged annotation. It is our  claim that high-quality prosodic annotation (of prominence, prosodic  breaks, and unusual segmental lengthening) can be obtained by  nonexperts, provided these are given a strict, written protocol and a  short period of supervision and feedback.",{"paper_id":2483,"title":2484,"year":213,"month":855,"day":63,"doi":2485,"resource_url":2486,"first_page":63,"last_page":63,"pdf_url":2487,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2488,"paper_type":860,"authors":2489,"abstract":2502},"lrec2002-main-097","Word Segmentation in the Spoken Dutch Corpus","10.63317\u002F4mnid7cw7be5","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-097","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F97.pdf","martens-etal-2002-word",[2490,2491,2493,2494,2497,2498,2501],{"paper_id":2483,"author_seq":247,"given_name":2476,"surname":2477,"affiliation":63,"orcid":63},{"paper_id":2483,"author_seq":232,"given_name":872,"surname":2492,"affiliation":63,"orcid":63},"Binnenpoorte",{"paper_id":2483,"author_seq":218,"given_name":1018,"surname":1019,"affiliation":63,"orcid":63},{"paper_id":2483,"author_seq":203,"given_name":2495,"surname":2496,"affiliation":63,"orcid":63},"Ruben","Van Parys",{"paper_id":2483,"author_seq":188,"given_name":1015,"surname":1016,"affiliation":63,"orcid":63},{"paper_id":2483,"author_seq":172,"given_name":2499,"surname":2500,"affiliation":63,"orcid":63},"Wim","Goedertier",{"paper_id":2483,"author_seq":155,"given_name":1021,"surname":1022,"affiliation":63,"orcid":63},"This paper describes the aims of the word  segmentation in the Spoken Dutch Corpus  (Corpus Gesproken  Nederlands, CGN), and the procedures to create it. For one million  words, a manually veried segmentation will be created, whereas the  remaining nine million words will only come with an automatically  generated segmentation. Described are our efforts to create the best  possible automatic word segmentation from an auditory veried phonetic  transcription, and the development of a protocol for the manual  verication  of tha tautomatic segmentation. The paper also mentions  some gures concerning  the manual verication of the rst hundred  thousand words.",{"paper_id":2504,"title":2505,"year":213,"month":855,"day":63,"doi":2506,"resource_url":2507,"first_page":63,"last_page":63,"pdf_url":2508,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2509,"paper_type":860,"authors":2510,"abstract":2525},"lrec2002-main-098","Experiences from the Spoken Dutch Corpus Project","10.63317\u002F23v53bk3v83c","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-098","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F98.pdf","oostdijk-etal-2002-experiences",[2511,2514,2515,2517,2520,2521,2522],{"paper_id":2504,"author_seq":247,"given_name":2512,"surname":2513,"affiliation":63,"orcid":63},"Nelleke","Oostdijk",{"paper_id":2504,"author_seq":232,"given_name":2499,"surname":2500,"affiliation":63,"orcid":63},{"paper_id":2504,"author_seq":218,"given_name":2129,"surname":2516,"affiliation":63,"orcid":63},"van Eynde",{"paper_id":2504,"author_seq":203,"given_name":2518,"surname":2519,"affiliation":63,"orcid":63},"Louis","Boves",{"paper_id":2504,"author_seq":188,"given_name":2476,"surname":2477,"affiliation":63,"orcid":63},{"paper_id":2504,"author_seq":172,"given_name":2079,"surname":2080,"affiliation":63,"orcid":63},{"paper_id":2504,"author_seq":155,"given_name":2523,"surname":2524,"affiliation":63,"orcid":63},"Harald","Baayen","This paper provides an overview of the ongoing  development of a large corpus of spoken Dutch in Flanders and the  Netherlands. We outline the design of this corpus and the various layers  of annotation with which the speech signal is enriched. Special  attention is paid to the problems we have encountered, and to the tools  and protocols developed for obtaining consistent and reliable  annotations. We also discuss the outcome of a recent external evaluation  of our project by an international committee of experts.",{"paper_id":2527,"title":2528,"year":213,"month":855,"day":63,"doi":2529,"resource_url":2530,"first_page":63,"last_page":63,"pdf_url":2531,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2532,"paper_type":860,"authors":2533,"abstract":2537},"lrec2002-main-099","Quantitative parameters in corpus design: Estimating the optimum text size in Modern Greek language","10.63317\u002F4vt2iedto5j7","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-099","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F99.pdf","mikros-2002-quantitative",[2534],{"paper_id":2527,"author_seq":247,"given_name":2535,"surname":2536,"affiliation":63,"orcid":63},"George","Mikros","The aim of this paper is to investigate the major quantitative parameters related to the  definition of the optimum text size in Modern Greek corpus development. Using the  Hellenic National Corpus (HNC) (Hatzigeorgiu et al., 2000) as a reference point we  estimated a number of critical statistical measures regarding feature counting in different  text sizes. The results indicate that frequent linguistic features behave differently from the  medium frequency and the rare ones and the text size increase do not affect them  uniformly.",{"paper_id":2539,"title":2540,"year":213,"month":855,"day":63,"doi":2541,"resource_url":2542,"first_page":63,"last_page":63,"pdf_url":2543,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2544,"paper_type":860,"authors":2545,"abstract":2556},"lrec2002-main-100","Acquisition of Qualia Elements from Corpora - Evaluation of a Symbolic Learning Method","10.63317\u002F5fgzmugic89f","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-100","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F100.pdf","bouillon-etal-2002-acquisition",[2546,2549,2551,2554],{"paper_id":2539,"author_seq":247,"given_name":2547,"surname":2548,"affiliation":63,"orcid":63},"Pierrette","Bouillon",{"paper_id":2539,"author_seq":232,"given_name":922,"surname":2550,"affiliation":63,"orcid":63},"Claveau",{"paper_id":2539,"author_seq":218,"given_name":2552,"surname":2553,"affiliation":63,"orcid":63},"Cécile","Fabre",{"paper_id":2539,"author_seq":203,"given_name":1273,"surname":2555,"affiliation":63,"orcid":63},"Sébillot","This paper presents and evaluates a system extracting from a corpus  noun-verb pairs hose components are related by a special kind of link: the qualia roles as defined in the   Generative Lexicon. This system is based on a symbolic learning method that  automatically learns, from noun-verb pairs that are or are not related by a qualia link,  rules characterizing positive examples from negative ones in terms of their surrounding  part-of-speech or semantic contexts. The qualia noun-verb pair extraction is thus  performed by applying the learnt rules on a part-of-speech or semantically tagged text.  Stress is put on the quality of the learning when compared with traditional  statistical or syntactical-based approaches. The linguistic relevance of the  rules is also evaluated through a comparison with manually acquired qualia patterns.",{"paper_id":2558,"title":2559,"year":213,"month":855,"day":63,"doi":2560,"resource_url":2561,"first_page":63,"last_page":63,"pdf_url":2562,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2563,"paper_type":860,"authors":2564,"abstract":2574},"lrec2002-main-101","Methods and Tools for Prosodic Analysis of a Spoken Italian Corpus","10.63317\u002F43tckpzrbc4n","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-101","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F101.pdf","savino-etal-2002-methods",[2565,2568,2571],{"paper_id":2558,"author_seq":247,"given_name":2566,"surname":2567,"affiliation":63,"orcid":63},"Michelina","Savino",{"paper_id":2558,"author_seq":232,"given_name":2569,"surname":2570,"affiliation":63,"orcid":63},"Mario","Refice",{"paper_id":2558,"author_seq":218,"given_name":2572,"surname":2573,"affiliation":63,"orcid":63},"Domenico","Daleno","In the last few years, a number of actions has been carried out in Italy with the goal of  collecting, annotating and making available a considerable amount of data of spoken  Italian varieties. After a first phase, in which the AVIP corpus has been collected and  transcribed at both segmental and suprasegmental levels, now research efforts have been  concentrating on corpus analysis, starting from two preliminary yet crucial aspect,  namely: a) developing strategies and software tools for controlling the semantic  coherence of the AVIP database; and b) designing a DBMS scheme for allowing easy  access to the data and for rendering the results of the online queries in a user-friendly  manner, also by means of special graphical interfaces. In this paper both aspects are  presented and discussed, focussing on the prosodic analysis of the database, in terms of  the methodologies followed in the intonation labelling phase as well as the consequent  strategies adopted in the implemetation of software tools for prosodic analysis.",{"paper_id":2576,"title":2577,"year":213,"month":855,"day":63,"doi":2578,"resource_url":2579,"first_page":63,"last_page":63,"pdf_url":2580,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2581,"paper_type":860,"authors":2582,"abstract":2587},"lrec2002-main-102","Language Resources for Multi-Modal Dialogue Systems.","10.63317\u002F5p52ufzim6yk","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-102","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F102.pdf","lemon-gruenstein-2002-language",[2583,2585],{"paper_id":2576,"author_seq":247,"given_name":1166,"surname":2584,"affiliation":63,"orcid":63},"Lemon",{"paper_id":2576,"author_seq":232,"given_name":2059,"surname":2586,"affiliation":63,"orcid":63},"Gruenstein","This paper reviews a resource base of software agents  for hub-based architectures, which can be used generally for advanced  dialogue systems research and deployment. The problem of  domain-specicity of dialogue managers is discussed, and we describe an  approach to it developed at CSLI, involving a domain-general dialogue  manager with application specic ìActivity Modelsî. We also describe  relevant grammar development tools.",{"paper_id":2589,"title":2590,"year":213,"month":855,"day":63,"doi":2591,"resource_url":2592,"first_page":63,"last_page":63,"pdf_url":2593,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2594,"paper_type":860,"authors":2595,"abstract":2605},"lrec2002-main-103","Using Parallel Corpora to enrich Multilingual Lexical Resources","10.63317\u002F22my2kcxk9c4","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-103","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F103.pdf","widdows-etal-2002-using",[2596,2599,2602],{"paper_id":2589,"author_seq":247,"given_name":2597,"surname":2598,"affiliation":63,"orcid":63},"Dominic","Widdows",{"paper_id":2589,"author_seq":232,"given_name":2600,"surname":2601,"affiliation":63,"orcid":63},"Beate","Dorow",{"paper_id":2589,"author_seq":218,"given_name":2603,"surname":2604,"affiliation":63,"orcid":63},"Chiu-Ki","Chan","This paper describes the use of a bilingual vector  model for the automatic discovery of German translations of English  terms. The model is built by analysing co-occurence patterns in a  parallel corpus of English and German medical abstracts, a method also  used for Cross- Lingual Information Retrieval. The model generates  candidate German translations of English words using the cosine  similarity measure between terms in the bilingual vector space. The  correct translations could be added to UMLS, the multilingual dictionary  in question. The accuracy of the translations is evaluated by measuring  how many of the existing UMLS translations are correctly predicted by  the vector translations. The model also detects synonymy, particularly  acronyms. An online public demonstration of the model is available.",{"paper_id":2607,"title":2608,"year":213,"month":855,"day":63,"doi":2609,"resource_url":2610,"first_page":63,"last_page":63,"pdf_url":2611,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2612,"paper_type":860,"authors":2613,"abstract":2620},"lrec2002-main-104","Evaluation and collection of proper name pronunciations online","10.63317\u002F455iha9rzt4h","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-104","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F104.pdf","font-llitjos-black-2002-evaluation",[2614,2617],{"paper_id":2607,"author_seq":247,"given_name":2615,"surname":2616,"affiliation":63,"orcid":63},"Ariadna","Font Llitjós",{"paper_id":2607,"author_seq":232,"given_name":2618,"surname":2619,"affiliation":63,"orcid":63},"Alan W.","Black","Objective evaluation allows a model to be compared with other similar models.  However, automatic pronunciation models should also be extensively evaluated by  humans, since the ultimate goal of any pronunciation model is to produce an accurate  pronunciation as judged by most people. This paper describes an initiative to evaluate  and collect proper name pronunciations online, the development of the US Pronunciation  of Proper Names Site (www.pronounce-names.org), and the results obtained so far. The  internet, through our web-based interface, has already proven to be a very successful  medium both in terms of number of evaluations and in terms of data collection. In 5  weeks, it has brought to our site 601 users, which have evaluated 477 names and  corrected 281 pronunciations. The information gathered is useful to improve our  pronunciation models, as well as to (automatically) correct the pronunciations in the  CMU dictionary.",{"paper_id":2622,"title":2623,"year":213,"month":855,"day":63,"doi":2624,"resource_url":2625,"first_page":63,"last_page":63,"pdf_url":2626,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2627,"paper_type":860,"authors":2628,"abstract":2640},"lrec2002-main-105","Modal Expressions in Natural Language Sentence and Their Similarity","10.63317\u002F2c98bbmftoyv","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-105","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F105.pdf","tanabe-etal-2002-modal",[2629,2632,2635,2637],{"paper_id":2622,"author_seq":247,"given_name":2630,"surname":2631,"affiliation":63,"orcid":63},"Toshifumi","Tanabe",{"paper_id":2622,"author_seq":232,"given_name":2633,"surname":2634,"affiliation":63,"orcid":63},"Yasuo","Koyama",{"paper_id":2622,"author_seq":218,"given_name":1849,"surname":2636,"affiliation":63,"orcid":63},"Yoshimura",{"paper_id":2622,"author_seq":203,"given_name":2638,"surname":2639,"affiliation":63,"orcid":63},"Kosho","Shudo","This paper is concerned with the treatment of modal  information in natural language processing (NLP). Modal information,  which fleshes out the kernel sentence, providing temporal,  interpersonal, contingent or subjective information, i.e., polarity,  tense, aspect, mood, modality in narrow sense, specific kinds of speaker’s  judgment or attitude, etc plays an important role especially in  discourse understanding, man-machine dialogue, inference system, etc. On  the other hand, It is important for future NLP systems to formulate the  semantic similarity of natural language expressions. In particular,  paraphrasing, full text information retrieval, example-based MT and  document compression technology require the effective similarity  criterion for linguistic expressions. In this paper, first, we discuss  the meaning of Japanese sentence-final modality expressions (ME) and  second, we present similarity rules.",{"paper_id":2642,"title":2643,"year":213,"month":855,"day":63,"doi":2644,"resource_url":2645,"first_page":63,"last_page":63,"pdf_url":2646,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2647,"paper_type":860,"authors":2648,"abstract":2669},"lrec2002-main-106","CATCG: a general purpose parsing tool applied","10.63317\u002F2ki2pdt5tjvm","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-106","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F106.pdf","alsina-etal-2002-catcg",[2649,2652,2655,2658,2660,2663,2666],{"paper_id":2642,"author_seq":247,"given_name":2650,"surname":2651,"affiliation":63,"orcid":63},"Alex","Alsina",{"paper_id":2642,"author_seq":232,"given_name":2653,"surname":2654,"affiliation":63,"orcid":63},"Toni","Badia",{"paper_id":2642,"author_seq":218,"given_name":2656,"surname":2657,"affiliation":63,"orcid":63},"Gemma","Boleda",{"paper_id":2642,"author_seq":203,"given_name":1452,"surname":2659,"affiliation":63,"orcid":63},"Bott",{"paper_id":2642,"author_seq":188,"given_name":2661,"surname":2662,"affiliation":63,"orcid":63},"Àngel","Gil",{"paper_id":2642,"author_seq":172,"given_name":2664,"surname":2665,"affiliation":63,"orcid":63},"Martí","Quixal",{"paper_id":2642,"author_seq":155,"given_name":2667,"surname":2668,"affiliation":63,"orcid":63},"Oriol","Valentín","This paper focuses on the language processing tool being developed at our centre and briefly describes two of its applications. CATCG, our morphosyntactic analyser, is designed to deal with general written Catalan text. In CATCG the whole processing task has been divided into specific subtasks and for each one of them we try to apply the best strategy available. The most relevant properties of our system are its robustness, the fact that we have given reusability a very high priority, and the goal of acquiring linguistic information by fully automatic means.",{"paper_id":2671,"title":2672,"year":213,"month":855,"day":63,"doi":2673,"resource_url":2674,"first_page":63,"last_page":63,"pdf_url":2675,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2676,"paper_type":860,"authors":2677,"abstract":2680},"lrec2002-main-107","Does the Content of Speech Influence its Perceived Sound Quality?","10.63317\u002F2u59jtw2hw8c","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-107","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F107.pdf","raake-2002-content",[2678],{"paper_id":2671,"author_seq":247,"given_name":2059,"surname":2679,"affiliation":63,"orcid":63},"Raake","From a user’s perspective, the speech quality of modern telecommunication systems often differs from that of traditional wireline telephone systems. One aspect is a changed sound of the interlocutor’s voice ­ introduced by an expansion of the transmission-bandwidth to wide-band, by low-bitrate coding and\u002For by the acoustic properties of specific  user-interfaces. In order to quantify the effect of transmission on speech quality,  subjective data to be correlated to transmission characteristics have to be collected in auditory tests. In this paper, a study is presented investigating in how far the content of specific speech material used in a listening-only test impacts its perceived sound quality. A set of French speech data was presented to two different groups of listeners: French native speakers and listeners without knowledge of French. The speech material consists of different text types, such as everyday speech or semantically unpredictable sentences (SUS). The listeners were asked to rate the sound quality of the transmitted voice on a one-dimensional category rating scale. The French listeners’ ratings were found to be lower for SUS, while those of the non-French listen-ers did not show any major dependency on text material. Hence, it can be stated that if a given speech sign is understood by the listen-ers, they are unable to separate form from function and reflect content in their ratings of sound.",{"paper_id":2682,"title":2683,"year":213,"month":855,"day":63,"doi":2684,"resource_url":2685,"first_page":63,"last_page":63,"pdf_url":2686,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2687,"paper_type":860,"authors":2688,"abstract":2692},"lrec2002-main-108","Issues in the design, construction and use of Language Resources (LR) for Endangered Languages (Els)","10.63317\u002F2umcdggvyjco","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-108","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F108.pdf","ward-2002-issues",[2689],{"paper_id":2682,"author_seq":247,"given_name":2690,"surname":2691,"affiliation":63,"orcid":63},"Monica","Ward","Growth in the development of Human Language  Technologies (HLT) means that it is easier to document and archive  languages than has been the case in the past. This is especially  important in the Endangered Language (EL) context where it is imperative  to document the language while its remaining speakers are still alive.  This paper outlines the additional constraints that prevail when  documenting languages in the EL context and how Computer Assisted  Language Learning (CALL) development can help in language documentation  exercises. It also highlights the importance of the management of the  Language Resources (LR) once they have been procured, including the need  to provide different access rights to the material depending on the EL  community requirements. A forward looking, flexible technology is  essential to ensure that current LR are not made obsolete by changes in  technology and XML technologies offer a suitable platform in this  regard. The paper presents a case study of the development of CALL  materials for Nawat, an EL of El Salvador and the ensuing language  documentation benefits that arose from the project.",{"paper_id":2694,"title":2695,"year":213,"month":855,"day":63,"doi":2696,"resource_url":2697,"first_page":63,"last_page":63,"pdf_url":2698,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2699,"paper_type":860,"authors":2700,"abstract":2707},"lrec2002-main-109","TTS - A Treebank Tool Suite","10.63317\u002F27kdnj849dk8","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-109","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F109.pdf","cahill-van-genabith-2002-tts",[2701,2704],{"paper_id":2694,"author_seq":247,"given_name":2702,"surname":2703,"affiliation":63,"orcid":63},"Aoife","Cahill",{"paper_id":2694,"author_seq":232,"given_name":2705,"surname":2706,"affiliation":63,"orcid":63},"Josef","van Genabith","Treebanks are important resources in descriptive,  theoretical and computational linguistic research, development and  teaching. This paper presents a treebank tool suite (TTS) for and  derived from the Penn-II treebank resource (Marcus et al, 1993). The  tools include treebank inspection and viewing options which support  search for CF-PSG rule tokens extracted from the treebank, graphical  display of complete trees containing the rule instance, display of  subtrees rooted by the rule instance and display of the yield of the  subtree (with or without context). The search can be further restricted  by constraining the yield to contain particular strings. Rules can be  ordered by frequency and the user can set frequency thresholds. To  process new text, the tool suite provides a PCFG chart parser   (based on the CYK algorithm) operating on CFG grammars extracted from  the treebank following the method of (Charniak, 1996) as well as a HMM  bi-\u002Ftrigram tagger trained on  the tagged version of the treebank  resource. The system is implemented in Java and Perl. We employ the  InterArbora module based on the Thistle display engine (LTG, 2001) as  our tree grapher.",{"paper_id":2709,"title":2710,"year":213,"month":855,"day":63,"doi":2711,"resource_url":2712,"first_page":63,"last_page":63,"pdf_url":2713,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2714,"paper_type":860,"authors":2715,"abstract":2723},"lrec2002-main-110","The Pronouncing Dictionary of Austrian German and the other Major Varieties of German - A Phonetic Resources Database on the Pronunciation of German","10.63317\u002F3afnoso8m73v","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-110","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F110.pdf","muhr-etal-2002-pronouncing",[2716,2719,2721],{"paper_id":2709,"author_seq":247,"given_name":2717,"surname":2718,"affiliation":63,"orcid":63},"Rudolf","Muhr",{"paper_id":2709,"author_seq":232,"given_name":1181,"surname":2720,"affiliation":63,"orcid":63},"Hölrdich",{"paper_id":2709,"author_seq":218,"given_name":1911,"surname":2722,"affiliation":63,"orcid":63},"Wächter-Kollpache","The paper gives a comprehensive overview on the project \"Varieties of Austrian German - Standard pronunciation and varieties of standard pronunciation\" whose primary goal is the creation of a pronouncing dictionary of Austrian German and the creation of a large data base of audio samples for research on spoken language and different forms of pronunciation in Austria. The contents of the dictionary and the database are described in detail. The project is based on the idea that German is a pluricentric language which means that German and Swiss model speaker realisations will be also included in the database alongside with the Austrian model speakers. A corpus of 86.000 words spoken by 6 model speakers and a large number of texts will be published together with the dictionary on CD-ROM in 2003. The paper also gives an overview on the theoretical and methodological foundations of the project which is supported by the Austrian national broadcasting corporation and funded by the Austrian national bank. Finally the database and the user-interface is described in detail which allows a number of different queries and will have a built-in tool for the acoustic analysis of sound files chosen by the user.",{"paper_id":2725,"title":2726,"year":213,"month":855,"day":63,"doi":2727,"resource_url":2728,"first_page":63,"last_page":63,"pdf_url":2729,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2730,"paper_type":860,"authors":2731,"abstract":2745},"lrec2002-main-111","Towards a large corpus of spoken dialogue in French that will be freely available: the “Parole Publique” project and its first realisations","10.63317\u002F4giaq2w746pm","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-111","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F111.pdf","nicolas-etal-2002-towards",[2732,2734,2736,2739,2742],{"paper_id":2725,"author_seq":247,"given_name":1273,"surname":2733,"affiliation":63,"orcid":63},"Nicolas",{"paper_id":2725,"author_seq":232,"given_name":1234,"surname":2735,"affiliation":63,"orcid":63},"Letellier-Zarshenas",{"paper_id":2725,"author_seq":218,"given_name":2737,"surname":2738,"affiliation":63,"orcid":63},"Igor","Schadle",{"paper_id":2725,"author_seq":203,"given_name":2740,"surname":2741,"affiliation":63,"orcid":63},"Jean-Yves","Antoine",{"paper_id":2725,"author_seq":188,"given_name":2743,"surname":2744,"affiliation":63,"orcid":63},"Jean","Caelen","This paper presents two corpora (OTG et ECOLE_MASSY)  which are the first delivery of the Parole_Publique (in English : Public  Speech) project held by the VALORIA laboratory. This project aims at the  achievement of a large corpus (orthographic transcription and  morpho-syntactic annotation) of spoken French dialogues. It is primarily  intended for researches on man-machine communication and will gather  various types (human-human, Wizard of Oz, man-machine) of dialogues  restricted to several specific tasks. The Parole Publique corpus will be  freely distributed on the WWW.",{"paper_id":2747,"title":2748,"year":213,"month":855,"day":63,"doi":2749,"resource_url":2750,"first_page":63,"last_page":63,"pdf_url":2751,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2752,"paper_type":860,"authors":2753,"abstract":2757},"lrec2002-main-112","XQuery as an Annotation Query Language: a Use Case Analysis","10.63317\u002F4awmkauo8fds","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-112","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F112.pdf","cassidy-2002-xquery",[2754],{"paper_id":2747,"author_seq":247,"given_name":2755,"surname":2756,"affiliation":63,"orcid":63},"Steve","Cassidy","Recent work has shown that single data model can represent  many different kinds of  Linguistic annotation. This data model can be  expressed equivalently as a directed graph  of temporal nodes (Bird  and Liberman, Speech Communication, 2000) as a set of  intersecting  hierarchies (Cassidy and Harrington, Speech Communication, 2000).  While some tools are being built to support this data model, there is  as yet no query language  that can be used to search annotations  stored in this way.  Since the hierarchical view of  annotations has  much in common with the XML data model, this paper examines a recent  proposal for an XML query language as a candidate annotation query  language. The  methodology used is a use case analysis.  The result of  the analysis shows that XQuery  provides many useful features  particularly when queries include hierarchical constraints  but that  it is weak in expressing sequential constraints.",{"paper_id":2759,"title":2760,"year":213,"month":855,"day":63,"doi":2761,"resource_url":2762,"first_page":63,"last_page":63,"pdf_url":2763,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2764,"paper_type":860,"authors":2765,"abstract":2772},"lrec2002-main-113","A corpus-based investigation of junk emails","10.63317\u002F2n3no7e4qykc","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-113","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F113.pdf","orasan-krishnamurthy-2002-corpus",[2766,2769],{"paper_id":2759,"author_seq":247,"given_name":2767,"surname":2768,"affiliation":63,"orcid":63},"Constantin","Orasan",{"paper_id":2759,"author_seq":232,"given_name":2770,"surname":2771,"affiliation":63,"orcid":63},"Ramesh","Krishnamurthy","Almost everyone who has an email account receives from time to time  unwanted emails. These emails can be jokes from friends or commercial product offers from unknown people. In this paper we focus on these  unwanted messages which try to promote a product or service, or to offer some \"hot\" business opportunities. These messages are  called junk emails. Several methods to filter junk emails were proposed, but  none considers the linguistic characteristics of junk emails. In this paper, we investigate the  linguistic features of a corpus of junk emails, and try to decide if they constitute a distinct genre. Our  corpus of junk emails was build from the messages received by the authors  over a period of time. Initially, the corpus consisted of 1563, but after eliminating the   duplications automatically we kept only 673 files, totalising just over 373,000 tokens. In order to  decide if the junk emails constitute a different genre, a comparison with a corpus of leaflets extracted from BNC and with the whole BNC  corpus is carried out. Several characteristics at the lexical and grammatical levels were identified.",{"paper_id":2774,"title":2775,"year":213,"month":855,"day":63,"doi":2776,"resource_url":2777,"first_page":63,"last_page":63,"pdf_url":2778,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2779,"paper_type":860,"authors":2780,"abstract":2782},"lrec2002-main-114","Building annotated resources for automatic text summarisation","10.63317\u002F2u6cmrvbmgmy","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-114","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F114.pdf","orasan-2002-building",[2781],{"paper_id":2774,"author_seq":247,"given_name":2767,"surname":2768,"affiliation":63,"orcid":63},"Annotated corpora are necessary for automatic summarisation, but given  how difficult is to produce them there are only few available. This paper presents an annotation tool  which helps the human annotator to select the important units from a text. In addition to the tool, a new  annotation scheme is proposed so that phenomena which such as presence of anaphoric expressions and redundancy can be marked. We argue that  by annotating these phenomena the results of evaluation can be made more reliable.",{"paper_id":2784,"title":2785,"year":213,"month":855,"day":63,"doi":2786,"resource_url":2787,"first_page":63,"last_page":63,"pdf_url":2788,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2789,"paper_type":860,"authors":2790,"abstract":2797},"lrec2002-main-115","Translation Tracking System: A tool for managing translation archives","10.63317\u002F4aw8a3x6xh5h","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-115","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F115.pdf","bowker-bennison-2002-translation",[2791,2794],{"paper_id":2784,"author_seq":247,"given_name":2792,"surname":2793,"affiliation":63,"orcid":63},"Lynne","Bowker",{"paper_id":2784,"author_seq":232,"given_name":2795,"surname":2796,"affiliation":63,"orcid":63},"Peter","Bennison","The Translation Tracking System (TTS) is a database management tool intended to help translation researchers, translator trainers and translators to collect and organize archives of translated material. Relevant corpora can then be extracted from the archive in order to be further processed and analyzed using other natural language processing tools. This paper briefly describes the design and development of TTS, and it then goes on to explore how this tool has been successfully applied in an academic environment to help translator trainers identify areas of difficulty that have been encountered by their students. Some other applications of TTS are also discussed.",{"paper_id":2799,"title":2800,"year":213,"month":855,"day":63,"doi":2801,"resource_url":2802,"first_page":63,"last_page":63,"pdf_url":2803,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2804,"paper_type":860,"authors":2805,"abstract":2811},"lrec2002-main-116","VIQTORYA – A Visual Query Tool for Syntactically Annotated Corpora","10.63317\u002F5ku6sp6f2ybf","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-116","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F116.pdf","steiner-kallmeyer-2002-viqtorya",[2806,2809],{"paper_id":2799,"author_seq":247,"given_name":2807,"surname":2808,"affiliation":63,"orcid":63},"Ilona","Steiner",{"paper_id":2799,"author_seq":232,"given_name":1127,"surname":2810,"affiliation":63,"orcid":63},"Kallmeyer","This paper presents a query tool for syntactically                annotated corpora. The query tool is developed to                search the Tübingen Treebanks annotated at the                University of Tübingen. However, in principle it                also can be adapted to other corpora. The tool uses a                query language that allows to search for tokens,                syntactic categories, grammatical functions and binary                relations of (immediate) dominance and linear                precedence between nodes. The overall idea is to                extract in an initializing phase the relevant                information from the corpus and store it in a compact                way in a relational database.  An incoming query is                then  translated into a corresponding SQL query that is                evaluated on the database. A graphical user interface                allows to specify queries in a user-friendly way.",{"paper_id":2813,"title":2814,"year":213,"month":855,"day":63,"doi":2815,"resource_url":2816,"first_page":63,"last_page":63,"pdf_url":2817,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2818,"paper_type":860,"authors":2819,"abstract":2830},"lrec2002-main-117","Acquiring Lexical Knowledge for Anaphora Resolution","10.63317\u002F3vp7obnb6oct","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-117","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F117.pdf","poesio-etal-2002-acquiring",[2820,2823,2826,2827],{"paper_id":2813,"author_seq":247,"given_name":2821,"surname":2822,"affiliation":63,"orcid":63},"Massimo","Poesio",{"paper_id":2813,"author_seq":232,"given_name":2824,"surname":2825,"affiliation":63,"orcid":63},"Tomonori","Ishikawa",{"paper_id":2813,"author_seq":218,"given_name":1234,"surname":1235,"affiliation":63,"orcid":63},{"paper_id":2813,"author_seq":203,"given_name":2828,"surname":2829,"affiliation":63,"orcid":63},"Renata","Vieira","The lack of adequate bases of commonsense or even  lexical knowledge is perhaps the main obstacle to the development of  highperformance, robust tools for semantic interpretation. It is also  generally accepted that, notwithstanding the increasing availability in  recent years of substantial hand-coded lexical resources such as WordNet  and EuroWordNet, addressing the commonsense knowledge bottleneck will  eventually require the development of effective techniques for acquiring  such information automatically, e.g., from corpora. We discuss research  aimed at improving the performance of anaphora resolution systems by  acquiring the commonsense knowledge require to resolve the more complex  cases of anaphora, such as bridging references. We focus in particular  on the problem of acquiring information about part-of relations.",{"paper_id":2832,"title":2833,"year":213,"month":855,"day":63,"doi":2834,"resource_url":2835,"first_page":63,"last_page":63,"pdf_url":2836,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2837,"paper_type":860,"authors":2838,"abstract":2842},"lrec2002-main-118","Resources for Morphology Learning and Evaluation","10.63317\u002F2ga2crtwom38","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-118","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F118.pdf","maxwell-2002-resources",[2839],{"paper_id":2832,"author_seq":247,"given_name":2840,"surname":2841,"affiliation":63,"orcid":63},"Mike","Maxwell","Recently, there has been a proliferation of research into the acquisition of morphological grammars—that is, grammars and lexicons required for computer-based morphological analysis and synthesis. The approaches to acquiring such grammars range from tools  which structure data provided by native speakers and linguists, to unsupervised machine learning. Despite this flurry of research into morphology learning, a means of comparing results among different approaches is largely lacking. This paper describes a test bench for morphology learning, which would assist designers of morphology learning programs by providing both training and evaluation data, and would allow comparison across  programs. This paper is simultaneously a description of the projected form of the test bench, and a call for further input.",{"paper_id":2844,"title":2845,"year":213,"month":855,"day":63,"doi":2846,"resource_url":2847,"first_page":63,"last_page":63,"pdf_url":2848,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2849,"paper_type":860,"authors":2850,"abstract":2858},"lrec2002-main-119","Summarization System Integrated with Named Entity Tagging and IE pattern Discovery","10.63317\u002F4juinjjhcf2h","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-119","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F119.pdf","nobata-etal-2002-summarization",[2851,2854,2856,2857],{"paper_id":2844,"author_seq":247,"given_name":2852,"surname":2853,"affiliation":63,"orcid":63},"Chikashi","Nobata",{"paper_id":2844,"author_seq":232,"given_name":1099,"surname":2855,"affiliation":63,"orcid":63},"Sekine",{"paper_id":2844,"author_seq":218,"given_name":1296,"surname":1297,"affiliation":63,"orcid":63},{"paper_id":2844,"author_seq":203,"given_name":1767,"surname":1768,"affiliation":63,"orcid":63},"We have introduced information extraction technique  such as named entity tagging and pattern discovery to a summarization  system based on sentence extraction technique, and evaluated the  performance in the Document Understanding Conference 2001 (DUC-2001). We  participated in the Single Document Summarization task in DUC-2001 and  achieved one of the best performance in subjective evaluation of  summarization results.",{"paper_id":2860,"title":2861,"year":213,"month":855,"day":63,"doi":2862,"resource_url":2863,"first_page":63,"last_page":63,"pdf_url":2864,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2865,"paper_type":860,"authors":2866,"abstract":2872},"lrec2002-main-120","Extended Named Entity Hierarchy","10.63317\u002F26uien2xmdj7","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-120","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F120.pdf","sekine-etal-2002-extended",[2867,2868,2871],{"paper_id":2860,"author_seq":247,"given_name":1099,"surname":2855,"affiliation":63,"orcid":63},{"paper_id":2860,"author_seq":232,"given_name":2869,"surname":2870,"affiliation":63,"orcid":63},"Kiyoshi","Sudo",{"paper_id":2860,"author_seq":218,"given_name":2852,"surname":2853,"affiliation":63,"orcid":63},"The tagging of Named Entities (NE), the names of  particular things or classes and numeric expressions, is regarded as an  important component technology for many NLP applications. These  applications include Information Extraction, from which it was born,  Question-Answering, Summarization and Information Retrieval. However, up  to now, the number of NE types has been quite limited, 7 in MUC, 8 in  IREX and 5 in the ACE program. Many more kinds of things have proper  names or proper classes of expressions, and also finer distinctions are  needed for some applications. We now propose a Named Entity hierarchy  which contains about 150 NE types. The focus of this paper is the design   of the hierarchy and we would like to provide this resource for any  application. We report the design and development procedure of the  hierarchy.",{"paper_id":2874,"title":2875,"year":213,"month":855,"day":63,"doi":2876,"resource_url":2877,"first_page":63,"last_page":63,"pdf_url":2878,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2879,"paper_type":860,"authors":2880,"abstract":2884},"lrec2002-main-121","Recording techniques for capturing natural every-day speech","10.63317\u002F4tmshuw8diqn","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-121","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F121.pdf","campbell-2002-recording",[2881],{"paper_id":2874,"author_seq":247,"given_name":2882,"surname":2883,"affiliation":63,"orcid":63},"Nick","Campbell","This paper describes techniques for the collection of  natural spontaneous speech from daily conversational interactions for a  large  corpus that is currently being produced by the Japan Science  and Technology Agency. This corpus will form the basis for further  development of tools and software for the improvement of concatenative  speech synthesis and for the development of spoken-language interfaces  for information-providing devices that will be sensitive not only to the  content of an utterance, but also to the manner in which it is spoken,  so as to be able to detect speaker emotions and attitudes.",{"paper_id":2886,"title":2887,"year":213,"month":855,"day":63,"doi":2888,"resource_url":2889,"first_page":63,"last_page":63,"pdf_url":2890,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2891,"paper_type":860,"authors":2892,"abstract":2896},"lrec2002-main-122","Automatic Alignment of Japanese and English Newspaper Articles using an MT System and a Bilingual Company Name Dictionary","10.63317\u002F2jy2uqwv2mrs","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-122","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F122.pdf","matsumoto-tanaka-2002-automatic",[2893,2895],{"paper_id":2886,"author_seq":247,"given_name":1849,"surname":2894,"affiliation":63,"orcid":63},"Matsumoto",{"paper_id":2886,"author_seq":232,"given_name":1852,"surname":1578,"affiliation":63,"orcid":63},"One of the crucial parts of any corpus-based machine translation system is a large-scale bilingual corpus that is aligned at various levels such, as the sentence and phrase levels. This kind of corpus, however, is not easy to obtain, and accordingly, there is a great need for an efficient construction method. We approach this problem by integrating two large monolingual corpora in two different languages sharing the same source of information. We often see such a situation in journalistic texts where the same events are reported in many languages. Unfortunately, they often lack article-level alignment information and the recovery of this is the first problem to solve. In this paper, we report a method of automatically aligning Japanese and English newspaper articles in the financial and economic news domain. Although conventional methods require some manual work, the proposed method works fully automatically. We show that our method can align such newspaper articles with an accuracy of 97%.",{"paper_id":2898,"title":2899,"year":213,"month":855,"day":63,"doi":2900,"resource_url":2901,"first_page":63,"last_page":63,"pdf_url":2902,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2903,"paper_type":860,"authors":2904,"abstract":2913},"lrec2002-main-123","Towards a Thesaurus of Predicates","10.63317\u002F4apyoman6iec","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-123","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F123.pdf","shirai-etal-2002-towards",[2905,2907,2909,2912],{"paper_id":2898,"author_seq":247,"given_name":1099,"surname":2906,"affiliation":63,"orcid":63},"Shirai",{"paper_id":2898,"author_seq":232,"given_name":2908,"surname":1094,"affiliation":63,"orcid":63},"Kazuhide",{"paper_id":2898,"author_seq":218,"given_name":2910,"surname":2911,"affiliation":63,"orcid":63},"Francis","Bond",{"paper_id":2898,"author_seq":203,"given_name":1577,"surname":1578,"affiliation":63,"orcid":63},"We propose a thesaurus of predicates that can help to  resolve pre-editing and\u002For post-editing problems in machine translation  environments. It differs from earlier approaches such as conventional  dictionaries in that we are aiming to link a wide range of near-synonyms  and paraphrases. We are compiling such similar examples through both  introspection and the use of translation data, giving us a large  collection of monolingual and bilingual equivalences. This thesaurus  enables the following machine translation techniques. (a) Unification of  synonymous expressions in the source language (source language  paraphrasing). (b) Conversion of homonymous expressions to more easily  translated ones (source language rewriting). (c) Development of  expressions appearing in the target language into various expressions  (target language paraphrasing).",{"paper_id":2915,"title":2916,"year":213,"month":855,"day":63,"doi":2917,"resource_url":2918,"first_page":63,"last_page":63,"pdf_url":2919,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2920,"paper_type":860,"authors":2921,"abstract":2931},"lrec2002-main-124","Speech Information Technology & Industry Promotion Center in Korea: Activities and Directions","10.63317\u002F2ik88jiyhwp7","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-124","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F124.pdf","lee-etal-2002-speech",[2922,2925,2928],{"paper_id":2915,"author_seq":247,"given_name":2923,"surname":2924,"affiliation":63,"orcid":63},"Yong-Ju","Lee",{"paper_id":2915,"author_seq":232,"given_name":2926,"surname":2927,"affiliation":63,"orcid":63},"Bong-Wan","Kim",{"paper_id":2915,"author_seq":218,"given_name":2929,"surname":2930,"affiliation":63,"orcid":63},"Yongnam","Um","We will introduce current developments of language  resources and efforts to promote the systematic creation and sharing of  speech corpora in Korea. Recently speech information technology has  developed substantially through the research and development of  academia, industry and institute in Korea. However, in the country there was no national organization to manage language resources systematically and communicate with related overseas organizations. As practical uses of speech information technologies such as  speech recognition and synthesis increase, difficulties arise in academic and industrial areas with respect to speech information technology, such as speech corpora and  assessment for speech recognition and synthesis. Thus, in May 2001 SITEC (Speech Information Technology & Industry Promotion Center) was founded at Wonkwang  University, funded by the Ministry of Commerce, Industry and Energy and the concerned companies, modeling after LDC (Linguistic Data Consortium) in the U. S, and ELRA  (European Language Resources Association) in Europe. It was founded to help solve the common difficulties in the field and to manage systematic creation and distribution of  speech resources in Korea. Up to now organizations and companies have constructed  speech corpora individually for their own use, but now a publicly recognized  organization has been founded to coordinate the activities for speech resources. This will contribute to economic management of speech corpora and application development as well as to active speech research in Korea.",{"paper_id":2933,"title":2934,"year":213,"month":855,"day":63,"doi":2935,"resource_url":2936,"first_page":63,"last_page":63,"pdf_url":2937,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2938,"paper_type":860,"authors":2939,"abstract":63},"lrec2002-main-125","Database Adaptation for Speech Recognition in Cross-Environmental Conditions","10.63317\u002F4c94qdakkpar","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-125","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F125.pdf","gedge-etal-2002-database",[2940,2941,2944,2947,2949],{"paper_id":2933,"author_seq":247,"given_name":1820,"surname":1821,"affiliation":63,"orcid":63},{"paper_id":2933,"author_seq":232,"given_name":2942,"surname":2943,"affiliation":63,"orcid":63},"Christophe","Couvreur",{"paper_id":2933,"author_seq":218,"given_name":2945,"surname":2946,"affiliation":63,"orcid":63},"Klaus","Linhard",{"paper_id":2933,"author_seq":203,"given_name":2948,"surname":1824,"affiliation":63,"orcid":63},"Shaunie",{"paper_id":2933,"author_seq":188,"given_name":2950,"surname":2951,"affiliation":63,"orcid":63},"Ami","Moyal",{"paper_id":2953,"title":2954,"year":213,"month":855,"day":63,"doi":2955,"resource_url":2956,"first_page":63,"last_page":63,"pdf_url":2957,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2958,"paper_type":860,"authors":2959,"abstract":2970},"lrec2002-main-126","Combining Bayesian and Support Vector Machines Learning to automatically complete Syntactical Information for HPSG-like Formalisms","10.63317\u002F4j4nti2nfkzj","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-126","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F126.pdf","maragoudakis-etal-2002-combining",[2960,2962,2965,2968],{"paper_id":2953,"author_seq":247,"given_name":2961,"surname":1815,"affiliation":63,"orcid":63},"Manolis",{"paper_id":2953,"author_seq":232,"given_name":2963,"surname":2964,"affiliation":63,"orcid":63},"Katia","Kermanidis",{"paper_id":2953,"author_seq":218,"given_name":2966,"surname":2967,"affiliation":63,"orcid":63},"Nikos","Fakotakis",{"paper_id":2953,"author_seq":203,"given_name":2535,"surname":2969,"affiliation":63,"orcid":63},"Kokkinakis","Learning Bayesian Belief Networks (BBN) from corpora and incorporating the extracted inferring knowledge with a Support Vector Machines (SVM) classifier has been applied to the automatic acquisition of verb subcategorization frames for Modern Greek. We have made use of minimal linguistic resources, such as basic morphological tagging and phrase chunking, to demonstrate that verb subcategorization, which is of great significance for  developing robust natural language human computer interaction systems, could be  achieved using large corpora, without having any general-purpose syntactic parser at all. Moreover, by taking advantage of the plethora in unlabeled data found in text corpora in addition to some available labeled examples, we overcome the expensive task of  annotating the whole set of training data and the performance of the subcategorization  frames learner is increased. We argue that a classifier generated from BBN and SVM is well suited for learning to identify verb subcategorization frames. Empirical results will support this claim. Performance has been methodically evaluated using two different  corpora, one balanced and one domain-specific in order to determine the unbiased  behavior of the trained models. Limited training data are proved to endow with  satisfactory results. We have been able to achieve precision exceeding 90% on the  identification of subcategorization frames which were not known beforehand. The  obtained valid frames have been used to fill out the subcategorization field of verb entries  in an HPSG-like lexicon using the LKB grammar development environment.",{"paper_id":2972,"title":2973,"year":213,"month":855,"day":63,"doi":2974,"resource_url":2975,"first_page":63,"last_page":63,"pdf_url":2976,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2977,"paper_type":860,"authors":2978,"abstract":2991},"lrec2002-main-127","Automatic machine translation selection scheme to output the best result","10.63317\u002F5ns6hxn865rh","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-127","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F127.pdf","yasuda-etal-2002-automatic",[2979,2982,2985,2986,2988],{"paper_id":2972,"author_seq":247,"given_name":2980,"surname":2981,"affiliation":63,"orcid":63},"Keiji","Yasuda",{"paper_id":2972,"author_seq":232,"given_name":2983,"surname":2984,"affiliation":63,"orcid":63},"Fumiaki","Sugaya",{"paper_id":2972,"author_seq":218,"given_name":1096,"surname":1097,"affiliation":63,"orcid":63},{"paper_id":2972,"author_seq":203,"given_name":2987,"surname":1094,"affiliation":63,"orcid":63},"Seiichi",{"paper_id":2972,"author_seq":188,"given_name":2989,"surname":2990,"affiliation":63,"orcid":63},"Masuzo","Yanagida","An automatic selection method for an integrated multiple MT system is proposed. This method employs a machine learning approach to build an automatic MT selector. The selector learns based on the parameters of MT systems and the evaluation result provided by a human evaluator. An experiment is conducted on two MT systems developed in our laboratories. Experimental results show the effectiveness of the proposed method. The  ratio of correct selection is 76%. According to the system performance evaluation result, the integrated MT system using the proposed method gives a better performance than each individual MT system.",{"paper_id":2993,"title":2994,"year":213,"month":855,"day":63,"doi":2995,"resource_url":2996,"first_page":63,"last_page":63,"pdf_url":2997,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":2998,"paper_type":860,"authors":2999,"abstract":3005},"lrec2002-main-128","Comparative Evaluation of Collocation Extraction Metrics","10.63317\u002F3mh93w77x296","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-128","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F128.pdf","thanopoulos-etal-2002-comparative",[3000,3003,3004],{"paper_id":2993,"author_seq":247,"given_name":3001,"surname":3002,"affiliation":63,"orcid":63},"Aristomenis","Thanopoulos",{"paper_id":2993,"author_seq":232,"given_name":2966,"surname":2967,"affiliation":63,"orcid":63},{"paper_id":2993,"author_seq":218,"given_name":2535,"surname":2969,"affiliation":63,"orcid":63},"Corpus-based automatic extraction of collocations is typically carried out employing some statistic indicating concurrency in order to identify words that co-occur more often than expected by chance. In this paper we are concerned with some typical measures such as the t-score, Pearson’s  X-square test, log-likelihood ratio, pointwise mutual information and a novel information theoretic measure, namely mutual dependency. Apart from some theoretical discussion about their correlation, we perform comparative evaluation experiments judging performance by their ability to identify lexically associated bigrams. We use two different gold standards: WordNet and lists of named-entities. Besides  discovering that a frequency-biased version of mutual dependency performs the best,  followed close by likelihood ratio, we point out some implications that usage of  available electronic dictionaries such as the WordNet for evaluation of collocation  extraction encompasses.",{"paper_id":3007,"title":3008,"year":213,"month":855,"day":63,"doi":3009,"resource_url":3010,"first_page":63,"last_page":63,"pdf_url":3011,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3012,"paper_type":860,"authors":3013,"abstract":63},"lrec2002-main-129","A Pratical Introduction to ATLAS","10.63317\u002F2b3przbzyj9j","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-129","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F129.pdf","laprun-etal-2002-pratical",[3014,3016,3019,3022],{"paper_id":3007,"author_seq":247,"given_name":2942,"surname":3015,"affiliation":63,"orcid":63},"Laprun",{"paper_id":3007,"author_seq":232,"given_name":3017,"surname":3018,"affiliation":63,"orcid":63},"Jonathan G.","Fiscus",{"paper_id":3007,"author_seq":218,"given_name":3020,"surname":3021,"affiliation":63,"orcid":63},"John","Garofolo",{"paper_id":3007,"author_seq":203,"given_name":3023,"surname":3024,"affiliation":63,"orcid":63},"Sylvain","Pajot",{"paper_id":3026,"title":3027,"year":213,"month":855,"day":63,"doi":3028,"resource_url":3029,"first_page":63,"last_page":63,"pdf_url":3030,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3031,"paper_type":860,"authors":3032,"abstract":3042},"lrec2002-main-130","NIST Rich Transcription 2002 Evaluation: A Preview","10.63317\u002F3ot89b6sjpob","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-130","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F130.pdf","garofolo-etal-2002-nist",[3033,3034,3035,3037,3040],{"paper_id":3026,"author_seq":247,"given_name":3020,"surname":3021,"affiliation":63,"orcid":63},{"paper_id":3026,"author_seq":232,"given_name":3017,"surname":3018,"affiliation":63,"orcid":63},{"paper_id":3026,"author_seq":218,"given_name":3036,"surname":2179,"affiliation":63,"orcid":63},"Alvin",{"paper_id":3026,"author_seq":203,"given_name":3038,"surname":3039,"affiliation":63,"orcid":63},"David","Pallett",{"paper_id":3026,"author_seq":188,"given_name":2227,"surname":3041,"affiliation":63,"orcid":63},"Przybocki","The National Institute of Standards and Technology  (NIST) has been implementing evaluations of automatic speech  transcription technologies for over 15 years. NIST has  helped  guide progress in these technologies by: creating increasingly  challenging and realistic tests, helping to provide associated  linguistic resources, employing uniform metrics and analyses across  systems to assess performance, and sponsoring evaluation-related  technology workshops. Over time, this approach has shown great progress  in the technology as the test domains have become more difficult and  error rates  have almost consistently decreased. In conjunction  with the new DARPA Effective,  Affordable, Reusable Speech (EARS)  Program, NIST has begun an evaluation effort to help move the stateof-  the-art to the next level in the form of a Rich Transcription (RT)   evaluation program. RT is defined to be an integrated combination of  speech-to-text  generation (STT) and metadata (MD) annotation as  applied to multiple domains such as speech from Broadcast  News,  telephone conversations, and meetings. The Rich Transcription 2002  (RT-02) evaluation will have been the first in an annual  series of  evaluations and workshops focusing on this technology.",{"paper_id":3044,"title":3045,"year":213,"month":855,"day":63,"doi":3046,"resource_url":3047,"first_page":63,"last_page":63,"pdf_url":3048,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3049,"paper_type":860,"authors":3050,"abstract":3059},"lrec2002-main-131","Integrating Spanish Linguistic Resources in a Web Site Assistant","10.63317\u002F24f9k2hz7zw4","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-131","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F131.pdf","martinez-etal-2002-integrating",[3051,3054,3057],{"paper_id":3044,"author_seq":247,"given_name":3052,"surname":3053,"affiliation":63,"orcid":63},"Paloma","Martínez",{"paper_id":3044,"author_seq":232,"given_name":3055,"surname":3056,"affiliation":63,"orcid":63},"Ana","García-Serrano",{"paper_id":3044,"author_seq":218,"given_name":2012,"surname":3058,"affiliation":63,"orcid":63},"Ruiz-Cristina","This work describes a proposal to improve web  document retrieval by facing the main problems in document searching:  first, traditional web search engines miss documents that are relevant  to the user query and retrieve many that are not. Second, the query   formulation is not as accessible as it could be, and some users have  difficulties in expressing boolean queries. To improve the quality of  Internet search engines, two main approaches have typically been  adopted: One is the creation of a metasearch engine that makes use of  multiple search engines by unifying both the query language and the type  of results returned by the different search engines; the other one  involves applying NLP techniques for query extensions in order to handle  morphological, lexical, semantic and syntactic variations. Focusing on  the second approach, we present the research project MESIA (project CAM  07T\u002F0017\u002F1998) for the Madrid Local Government web site  (www.comadrid.es). Its main goal is to exploit general purpose  linguistic resources to extend user queries in order to enhance the  answers provided by AltaVista search engine.",{"paper_id":3061,"title":3062,"year":213,"month":855,"day":63,"doi":3063,"resource_url":3064,"first_page":63,"last_page":63,"pdf_url":3065,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3066,"paper_type":860,"authors":3067,"abstract":3071},"lrec2002-main-132","Creation and Evaluation of Extensible Language Resources for Maltese","10.63317\u002F3bsq7teff7b8","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-132","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F132.pdf","dalli-2002-creation",[3068],{"paper_id":3061,"author_seq":247,"given_name":3069,"surname":3070,"affiliation":63,"orcid":63},"Angelo","Dalli","The creation of Language Resources is a labour  intensive process whose difficulty is further compounded when minority  languages are concerned (Cunningham, 1999). This paper discusses the  creation of an extensible set of Language Resources for Maltese   developed by the Maltilex Project at the University of Malta (Rosner et.  al., 1999), together with quality evaluation mechanisms for minority  languages.",{"paper_id":3073,"title":3074,"year":213,"month":855,"day":63,"doi":3075,"resource_url":3076,"first_page":63,"last_page":63,"pdf_url":3077,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3078,"paper_type":860,"authors":3079,"abstract":3089},"lrec2002-main-133","Expanding lexicons by inducing paradigms and validating attested forms","10.63317\u002F4uss54c5fpo5","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-133","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F133.pdf","grefenstette-etal-2002-expanding",[3080,3083,3086],{"paper_id":3073,"author_seq":247,"given_name":3081,"surname":3082,"affiliation":63,"orcid":63},"Gregory","Grefenstette",{"paper_id":3073,"author_seq":232,"given_name":3084,"surname":3085,"affiliation":63,"orcid":63},"Yan","Qu",{"paper_id":3073,"author_seq":218,"given_name":3087,"surname":3088,"affiliation":63,"orcid":63},"David A.","Evans","One of the bottlenecks in Natural Language Processing  for a given language is creating a lexicon that covers the language. The  morphological lexicon provides two important pieces of information for  NLP applications: 1) the normalization of a word, its lemmatization,  which allows the application to recognize two variants of the same word;  and 2) the part-of-speech roles that the word can play, which allows the  application to parse the text, creating relations between the words in a  text. Many NLP applications, e.g. Information Retrieval, Classification,  Terminology Extraction, etc., depend upon the normalization and parsing  information found in lexicons. When words are not present in these  lexicons, it is difficult to predict what their proper lemmatizations  and  parts-of-speech are. In this paper we present a technique for  updating a lexicon given an unknown word via induction of paradigms from  an existing, but incomplete, lexicon and validation of the paradigm  using corpus evidence.",{"paper_id":3091,"title":3092,"year":213,"month":855,"day":63,"doi":3093,"resource_url":3094,"first_page":63,"last_page":63,"pdf_url":3095,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3096,"paper_type":860,"authors":3097,"abstract":3103},"lrec2002-main-134","Statistical Machine Translation on Paraphrased Corpora","10.63317\u002F4xuyhdcugqpe","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-134","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F134.pdf","watanabe-etal-2002-statistical",[3098,3101,3102],{"paper_id":3091,"author_seq":247,"given_name":3099,"surname":3100,"affiliation":63,"orcid":63},"Taro","Watanabe",{"paper_id":3091,"author_seq":232,"given_name":2313,"surname":2314,"affiliation":63,"orcid":63},{"paper_id":3091,"author_seq":218,"given_name":2316,"surname":2317,"affiliation":63,"orcid":63},"This paper presents a statistical machine translation  trained on normalized corpora. The automatic paraphrasing is carried out  by inducing paraphrasing expressions from a bilingual corpus. Then, the  normalization is treated as a specic paraphrase of a given input  determined by the frequency in a corpus. The experimental results on  Japanese-to-English translation with normalized English corpus exhibited  the reduction of word-error-rate by 8% and the improvement of subjective  evaluation from 70% into 72.5%.",{"paper_id":3105,"title":3106,"year":213,"month":855,"day":63,"doi":3107,"resource_url":3108,"first_page":63,"last_page":63,"pdf_url":3109,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3110,"paper_type":860,"authors":3111,"abstract":3118},"lrec2002-main-135","Building ancient Spanish dictionaries for spell-checking of DL texts","10.63317\u002F4vqpaaa8qxgh","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-135","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F135.pdf","bia-quero-2002-building",[3112,3115],{"paper_id":3105,"author_seq":247,"given_name":3113,"surname":3114,"affiliation":63,"orcid":63},"Alejandro","Bia",{"paper_id":3105,"author_seq":232,"given_name":3116,"surname":3117,"affiliation":63,"orcid":63},"Manuel Sánchez","Quero","Being aware of the usefulness of spell-checkers on the  correction of modern works, and lacking this facility for ancient texts, we decided to build dictionaries for ancient Spanish. This  decision led to new problems and new questions. We have built a time-aware  system of dictionaries that takes into account the temporal dynamics of language, to help solve the problem of  ancient Spanish spell-checking. In this paper we present the problems we have found, the decisions we have made and the  conclusions and results we arrived at.",{"paper_id":3120,"title":3121,"year":213,"month":855,"day":63,"doi":3122,"resource_url":3123,"first_page":63,"last_page":63,"pdf_url":3124,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3125,"paper_type":860,"authors":3126,"abstract":3128},"lrec2002-main-136","Translation Unit Concerning Timing of Simultaneous Translation","10.63317\u002F3q6ciitm2bsd","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-136","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F136.pdf","kashioka-2002-translation",[3127],{"paper_id":3120,"author_seq":247,"given_name":1852,"surname":1853,"affiliation":63,"orcid":63},"This paper discusses and proposes a translation unit for simultaneous  translation using a machine translation system. Monologues, such as lectures or broadcast news, are used as  the target of simultaneous speech translation. To date, a lot of research on speech  translation has dealt with dialogues, especially travel conversations. Most of  the speech translation systems in MT have treated a sentence as a translation unit. In the ATR travel  conversation database, sentence length is less than 10 words on average. Therefore, most  of the sentences are simple and almost all of the utterances are constructed  in one or two sentences. However, the sentences of monologues are longer than travel dialogues. They  have over 30 words (as in ``ASU-wo-YOMU,'' a TV news commentary program) on  average, and most of the sentences are complex or compound. Accordingly, it is difficult  to treat a sentence as a translation unit for monologues, and thus an appropriate translation  unit needs to be found. Considering this, we hypothesized that an adequate translation unit  of speech translation systems relates to the translation unit of a human simultaneous  translator. Therefore, we collected simultaneous translation data from lectures by human  translators and investigated the characteristics of monologues and simultaneous translatio",{"paper_id":3130,"title":3131,"year":213,"month":855,"day":63,"doi":3132,"resource_url":3133,"first_page":63,"last_page":63,"pdf_url":3134,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3135,"paper_type":860,"authors":3136,"abstract":3145},"lrec2002-main-137","A Web-based English Abstract Writing Tool Using a Tagged E-J Parallel Corpus","10.63317\u002F35idt8dyrr3p","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-137","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F137.pdf","narita-etal-2002-web",[3137,3140,3142],{"paper_id":3130,"author_seq":247,"given_name":3138,"surname":3139,"affiliation":63,"orcid":63},"Masumi","Narita",{"paper_id":3130,"author_seq":232,"given_name":1102,"surname":3141,"affiliation":63,"orcid":63},"Kurokawa",{"paper_id":3130,"author_seq":218,"given_name":3143,"surname":3144,"affiliation":63,"orcid":63},"Takehito","Utsuro","In this paper, we present a Web-based English abstract writing tool, the \"BEAR  (Building English Abstracts by Ricoh).\" This English writing tool is aimed at helping  Japanese software engineers improve the organization of their writing by enabling them to  select a rhetorical template of the target abstract and to build up component sentences  while having access to good-quality sample sentences. To provide this kind of language  assistance, we constructed an E-J parallel corpus of 539 sample abstracts as the core  language resource. After analyzing the rhetorical structure of these sample abstracts,  we tagged the corpus with textual and linguistic information. The \"BEAR\" is not designed  for beginners but for intermediate to advanced EFL learners who very often need to write  a research paper or a technical report in English. Software development has not yet been  completed, but we have already gathered some user feedback at preliminary user trials.  We show that the \"BEAR\" has been positively evaluated by our users and thus our tagged  E-J parallel corpus of sample abstracts can support our users in the difficult task of  working with a foreign language. We also discuss the outlook for further development of  the \"BEAR.\"",{"paper_id":3147,"title":3148,"year":213,"month":855,"day":63,"doi":3149,"resource_url":3150,"first_page":63,"last_page":63,"pdf_url":3151,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3152,"paper_type":860,"authors":3153,"abstract":63},"lrec2002-main-138","Morphosyntactic Disambiguation for TTS Systems","10.63317\u002F3htzveyuvy37","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-138","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F138.pdf","ribeiro-etal-2002-morphosyntactic",[3154,3157,3159],{"paper_id":3147,"author_seq":247,"given_name":3155,"surname":3156,"affiliation":63,"orcid":63},"Ricardo","Ribeiro",{"paper_id":3147,"author_seq":232,"given_name":1899,"surname":3158,"affiliation":63,"orcid":63},"Oliveira",{"paper_id":3147,"author_seq":218,"given_name":3160,"surname":3161,"affiliation":63,"orcid":63},"Isabel","Trancoso",{"paper_id":3163,"title":3164,"year":213,"month":855,"day":63,"doi":3165,"resource_url":3166,"first_page":63,"last_page":63,"pdf_url":3167,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3168,"paper_type":860,"authors":3169,"abstract":3179},"lrec2002-main-139","Seeing Arguments through Transparent Structures","10.63317\u002F35sqkhwxc5oo","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-139","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F139.pdf","fillmore-etal-2002-seeing",[3170,3173,3176],{"paper_id":3163,"author_seq":247,"given_name":3171,"surname":3172,"affiliation":63,"orcid":63},"Charles J.","Fillmore",{"paper_id":3163,"author_seq":232,"given_name":3174,"surname":3175,"affiliation":63,"orcid":63},"Collin F.","Baker",{"paper_id":3163,"author_seq":218,"given_name":3177,"surname":3178,"affiliation":63,"orcid":63},"Hiroaki","Sato","This paper describes a research effort that exploits  information available in the FrameNet database and seeks to find, for  argumentstructure- bearing verbs, nouns, and adjectives, the lexical  heads of the phrases that satisfy the core semantic roles of those  predicates, and to create from the database of annotated sentences  collections of structured clusters of words, called kernel dependency  graphs. These KDGs when thus extracted from a collection of annotated  sentences can be studied as candidates for the status of special  collocations, but the same kinds of clusters, when discovered in raw  text, can serve, in NLP applications, as indicators of the salient  topics or issues in the passage from which they have been extracted.  Unfortunately, there are sometimes discrepancies between syntactic and  semantic \"heads\", and for our purposes it is the semantic head  that is significant; it is thus necessary to identify grammatical  structures - and the words which mark them - that can intervene,  structurally, between a predicate and its arguments. When these  \"transparency\" structures are the familiar control structures  seen in various kinds of embedding predicates, we should be able to rely  on ordinary parsers to identify them; but the concern in this paper is  with two additional phenomena, the support verbs that separate arguments  from predications that are expressed as nouns (typically deverbal  nouns), and transparent nouns that syntactically take the semantically  relevant nouns as their complements.",{"paper_id":3181,"title":3182,"year":213,"month":855,"day":63,"doi":3183,"resource_url":3184,"first_page":63,"last_page":63,"pdf_url":3185,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3186,"paper_type":860,"authors":3187,"abstract":3191},"lrec2002-main-140","The FrameNet Database and Software Tools","10.63317\u002F42tfhpzywopx","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-140","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F140.pdf","fillmore-etal-2002-framenet",[3188,3189,3190],{"paper_id":3181,"author_seq":247,"given_name":3171,"surname":3172,"affiliation":63,"orcid":63},{"paper_id":3181,"author_seq":232,"given_name":3174,"surname":3175,"affiliation":63,"orcid":63},{"paper_id":3181,"author_seq":218,"given_name":3177,"surname":3178,"affiliation":63,"orcid":63},"The FrameNet Project  (http:\u002F\u002Fframenet.icsi.berkeley.edu\u002F˜framenet) is producing a lexicon of  English for both human use and NLP applications, based on the principles  of Frame Semantics, in which sentences are described on the basis of  predicators which evoke semantic frames and other constituents which  express the participants (frame elements) in these frames. Our lexicon  contains detailed information about the possible syntactic realizations  of frame elements, derived from annotated corpus examples. In the   process, we have developed a suite of tools for the definition of  semantic frames, for annotating sentences, for searching the results,  and for creating a variety of reports. We  will discuss the  conceptual basis of our work and demonstrate the tools we work with, the  results we produce, and how they may be of use to other NLP projects.",{"paper_id":3193,"title":3194,"year":213,"month":855,"day":63,"doi":3195,"resource_url":3196,"first_page":63,"last_page":63,"pdf_url":3197,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3198,"paper_type":860,"authors":3199,"abstract":3209},"lrec2002-main-141","Models and Tools for Collaborative Annotation","10.63317\u002F3hvcm58td6c8","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-141","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F141.pdf","ma-etal-2002-models",[3200,3203,3205,3206],{"paper_id":3193,"author_seq":247,"given_name":3201,"surname":3202,"affiliation":63,"orcid":63},"Xiaoyi","Ma",{"paper_id":3193,"author_seq":232,"given_name":3204,"surname":2924,"affiliation":63,"orcid":63},"Haejoong",{"paper_id":3193,"author_seq":218,"given_name":1142,"surname":1143,"affiliation":63,"orcid":63},{"paper_id":3193,"author_seq":203,"given_name":3207,"surname":3208,"affiliation":63,"orcid":63},"Kazuaki","Maeda","The Annotation Graph Toolkit (AGTK) is a collection  of software which facilitates development of linguistic annotation  tools. AGTK provides a database interface which allows applications to  use a database server for persistent storage. This paper discusses  various modes of collaborative annotation and how they can be supported  with tools built using AGTK and its database interface. We describe the  relational database schema and API, and describe a version of the  TableTrans tool which supports collaborative  annotation. The  remainder of the paper discusses a high-level query language for  annotation graphs, along with optimizations, in support of expressive  and efficient access to the annotations held on a large central server.  The paper demonstrates that it is straightforward to support a variety  of different levels of collaborative annotation with existing AGTK-based  tools, with a minimum of additional programming effort.",{"paper_id":3211,"title":3212,"year":213,"month":855,"day":63,"doi":3213,"resource_url":3214,"first_page":63,"last_page":63,"pdf_url":3215,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3216,"paper_type":860,"authors":3217,"abstract":3224},"lrec2002-main-142","HMMs for Automatic Phonetic Segmentation","10.63317\u002F4ryigjau7yeu","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-142","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F142.pdf","toledano-gomez-2002-hmms",[3218,3221],{"paper_id":3211,"author_seq":247,"given_name":3219,"surname":3220,"affiliation":63,"orcid":63},"Doroteo Torre","Toledano",{"paper_id":3211,"author_seq":232,"given_name":3222,"surname":3223,"affiliation":63,"orcid":63},"Luis A. Hernández","Gómez","This paper presents an analysis of the most frequently used approach in automatic phonetic segmentation ­ computing forced alignments using HMMs and features similar to those used in speech recognition. We start by analyzing the segmentation accuracy of context-dependent and context-independent HMMs, and proposing an explanation for the results. We focus our attention on the loss of correspondence between phones and context-dependent HMMs. This effect was already proposed to explain the surprisingly worse segmentation accuracy of context-dependent HMMs, given its clear superiority in speech recognition. We argue that this effect should lead to systematic segmentation errors. Therefore, we propose a new method, called Statistical Correction of Context Dependent Boundary Marks (SCCDBM), which partially corrects these systematic errors making segmentation results for context-dependent HMMs followed SCCDBM clearly superior to those obtained with context-independent HMMs. This observation empirically proves the existence of systematic segmentation errors and adds empirical evidence to the explanation for the worse segmentation accuracy of context-dependent HMMs. Finally, we analyze how speaker adaptation improves segmentation accuracy, and how speaker adaptation hardly modifies the systematic errors produced by context-dependent HMMs.",{"paper_id":3226,"title":3227,"year":213,"month":855,"day":63,"doi":3228,"resource_url":3229,"first_page":63,"last_page":63,"pdf_url":3230,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3231,"paper_type":860,"authors":3232,"abstract":3242},"lrec2002-main-143","Automatic Evaluation: Using a DATE Dialogue Act Tagger for User Satisfaction and Task Completion Prediction","10.63317\u002F2m58wpesch86","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-143","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F143.pdf","hastie-etal-2002-automatic",[3233,3236,3239],{"paper_id":3226,"author_seq":247,"given_name":3234,"surname":3235,"affiliation":63,"orcid":63},"Helen Wright","Hastie",{"paper_id":3226,"author_seq":232,"given_name":3237,"surname":3238,"affiliation":63,"orcid":63},"Rashmi","Prasad",{"paper_id":3226,"author_seq":218,"given_name":3240,"surname":3241,"affiliation":63,"orcid":63},"Marilyn","Walker","The objective of the DARPA Communicator project is to  support rapid, cost-effective development of multi-modal speech-enabled  dialogue systems with advanced conversational capabilities. During the  course of the Communicator program, we have been involved in developing  methods for measuring progress towards the program goals  and  assessing advances in the component technologies required to achieve  such goals.  Our goal has been to develop a lightweight evaluation  paradigm for heterogeneous systems. In this paper, we utilize the  Communicator evaluation corpus from 2001 and build on previous work  applying the PARADISE evaluation framework to establish a  baseline  for fully automatic system evaluation. We train a regression tree to  predict User Satisfaction using a random 80 of the dialogues for  training. The metrics (features) we use  for prediction are a fully   automatic Task Success Measure, Efficiency Measures, and System Dialogue  Act Behaviors extracted from the dialogue logfiles using the DATE  (Dialogue Act Tagging for Evaluation) tagging scheme. The learned tree  with the DATE metrics has a correlation of 0.614 (R",{"paper_id":3244,"title":3245,"year":213,"month":855,"day":63,"doi":3246,"resource_url":3247,"first_page":63,"last_page":63,"pdf_url":3248,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3249,"paper_type":860,"authors":3250,"abstract":3272},"lrec2002-main-144","Design and Evaluation of a SLDS for E-Mail Access through the Telephone","10.63317\u002F2hdq69d29rdj","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-144","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F144.pdf","bel-etal-2002-design",[3251,3252,3253,3256,3259,3262,3265,3267,3270,3271],{"paper_id":3244,"author_seq":247,"given_name":1644,"surname":1645,"affiliation":63,"orcid":63},{"paper_id":3244,"author_seq":232,"given_name":1345,"surname":1346,"affiliation":63,"orcid":63},{"paper_id":3244,"author_seq":218,"given_name":3254,"surname":3255,"affiliation":63,"orcid":63},"Luis","Hernández",{"paper_id":3244,"author_seq":203,"given_name":3257,"surname":3258,"affiliation":63,"orcid":63},"Montserrat","Marimón",{"paper_id":3244,"author_seq":188,"given_name":3260,"surname":3261,"affiliation":63,"orcid":63},"José F.","Morlesín",{"paper_id":3244,"author_seq":172,"given_name":3263,"surname":3264,"affiliation":63,"orcid":63},"Josep M.","Otero",{"paper_id":3244,"author_seq":155,"given_name":1893,"surname":3266,"affiliation":63,"orcid":63},"Relaño",{"paper_id":3244,"author_seq":138,"given_name":3268,"surname":3269,"affiliation":63,"orcid":63},"M. Carmen","Rodríguez",{"paper_id":3244,"author_seq":121,"given_name":1356,"surname":1357,"affiliation":63,"orcid":63},{"paper_id":3244,"author_seq":104,"given_name":1353,"surname":1354,"affiliation":63,"orcid":63},"E-MATTER (E-Mail Access through the Telephone Using Speech Technology  Resources) is a Trial EC project (IST-1999-21042) directed to make e-mail universally  and seamlessly accessible to a broad population of potential users through an affordable  telephone-based service. Thus, the main objective of E-MATTER was to develop a  Spoken Language Dialogue System (SLDS) for an e-mail access service that uses a  multilingual spoken language interface (both input and output) and that takes into account  the cultural and the linguistic diversity nature of the e-mail messages.  This paper addresses the different linguistic resources involved in the design and evaluation  of the E-MATTER prototype.  In the first part of the paper, we describe the guidelines for  the design of  the principal linguistic technologies involved in the development of the system:  multilingual speech recognition, multilingual text-to-speech conversion, semantic parsing,  dialogue management, language identification and advanced text verification. Then we  present an evaluation methodology we have followed to obtain a complete analysis of  both module deficiencies and global system behaviour. This methodology has been used  to show us how to improve the prototype system, and we hope it will be general enough  to be useful for testingother similar SLDS’s.",{"paper_id":3274,"title":3275,"year":213,"month":855,"day":63,"doi":3276,"resource_url":3277,"first_page":63,"last_page":63,"pdf_url":3278,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3279,"paper_type":860,"authors":3280,"abstract":3298},"lrec2002-main-145","Multiword expressions: linguistic precision and reusability","10.63317\u002F323zr2e2bw6g","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-145","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F145.pdf","copestake-etal-2002-multiword",[3281,3284,3286,3289,3290,3293,3296],{"paper_id":3274,"author_seq":247,"given_name":3282,"surname":3283,"affiliation":63,"orcid":63},"Ann","Copestake",{"paper_id":3274,"author_seq":232,"given_name":2553,"surname":3285,"affiliation":63,"orcid":63},"Lambeau",{"paper_id":3274,"author_seq":218,"given_name":3287,"surname":3288,"affiliation":63,"orcid":63},"Aline","Villavicencio",{"paper_id":3274,"author_seq":203,"given_name":2910,"surname":2911,"affiliation":63,"orcid":63},{"paper_id":3274,"author_seq":188,"given_name":3291,"surname":3292,"affiliation":63,"orcid":63},"Timothy","Baldwin",{"paper_id":3274,"author_seq":172,"given_name":3294,"surname":3295,"affiliation":63,"orcid":63},"Ivan A.","Sag",{"paper_id":3274,"author_seq":155,"given_name":1371,"surname":3297,"affiliation":63,"orcid":63},"Flickinger","This paper discusses the approach to multiword  expressions being adopted in the LinGO English Resource Grammar  (http:\u002F\u002Flingo.stanford.edu), a broad-scale bidirectional grammar of  English in the HPSG framework. We discuss how the lexicon of multiword  expressions is encoded in a database and describe the implications for  building a reusable lexical resource.",{"paper_id":3300,"title":3301,"year":213,"month":855,"day":63,"doi":3302,"resource_url":3303,"first_page":63,"last_page":63,"pdf_url":3304,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3305,"paper_type":860,"authors":3306,"abstract":3316},"lrec2002-main-146","Extracting French-Japanese Word Pairs from Bilingual Corpora based on Transliteration Rules","10.63317\u002F2tpyykcdzsjk","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-146","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F146.pdf","tsuji-etal-2002-extracting",[3307,3310,3313],{"paper_id":3300,"author_seq":247,"given_name":3308,"surname":3309,"affiliation":63,"orcid":63},"Keita","Tsuji",{"paper_id":3300,"author_seq":232,"given_name":3311,"surname":3312,"affiliation":63,"orcid":63},"Beatrice","Daille",{"paper_id":3300,"author_seq":218,"given_name":3314,"surname":3315,"affiliation":63,"orcid":63},"Kyo","Kageura","It has been shown so far that using transliteration  rules to extract Japanese Katakana and English word pairs is highly  useful and promising. But for Japanese-French pairs, the method is not  guaranteed to work, because only a very few Japanese Katakana words are  borrowed directly from French. In this paper we will show the  possibility of extracting Japanese Katakana and French word pairs based  on transliteration from loosely aligned Japanese French bilingual  corpora. The method applies all the existing transliteration rules to  each mora unit in a Katakana word, and extracts the French word which  matches or partially-matches one of these transliteration candidates as  translation. For instance, if we have `Ot' in the Japanese part of a  bilingual corpora, we generate such transliteration candidates as  \u003Cgraf>, \u003Cgraphe>, \u003Cgulerph>,... and identify similar  words from French part of the corpora. The method performed reasonably  well, achieving 80% precision at 20% recall. We had also observed that  Japanese-English transliteration rules worked well for  extracting  Katakana-French word pairs.",{"paper_id":3318,"title":3319,"year":213,"month":855,"day":63,"doi":3320,"resource_url":3321,"first_page":63,"last_page":63,"pdf_url":3322,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3323,"paper_type":860,"authors":3324,"abstract":3328},"lrec2002-main-147","A Two-level Morphological Analyser and Generator for Irish using Finite-State Transducers","10.63317\u002F5o53dkjtn4sr","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-147","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F147.pdf","ui-dhonnchadha-2002-two",[3325],{"paper_id":3318,"author_seq":247,"given_name":3326,"surname":3327,"affiliation":63,"orcid":63},"Elaine","Uí Dhonnchadha","Computational morphology is an important part of  natural language processing. Finite-state techniques have been applied  successfully in computational phonology and morphology to many of the  world’s major languages. Celtic languages such as Modern Irish present  challenging morphological features that to date have not been addressed  using finite-state technology. This paper presents a finite-state  two-level morphology of Irish developed using Xerox Finite-State Tools.  The system encodes the inflectional morphology of all inflected  parts-of-speech in Modern Irish. The morphotactics of stems and affixes  are encoded in the lexicon and word mutations are implemented as a  series of replace rules encoded as regular expressions. Both the  lexicons and rules are compiled into finite state transducers and  combined to produce a single lexical transducer for the language. A  major advantage of finite-state two-level implementations of morphology  is their inherent bi-directionality; the same system is used for both  analysis and generation of word forms in the language. This resource can  be used as a component part in many NLP applications such as spelling  checkers\u002Fcorrectors, stemmers, and text to speech synthesisers. It can  also be used in tokenising, lemmatising and part-of-speech tagging of a  corpus of text. The system, which is designed for broad coverage of the  language, is evaluated against the most frequently used words in a  corpus of contemporary Irish texts. Finally, possible extensions to the  system are suggested, such as derivational morphology and the inclusion  of dialectal or historical word-forms.",{"paper_id":3330,"title":3331,"year":213,"month":855,"day":63,"doi":3332,"resource_url":3333,"first_page":63,"last_page":63,"pdf_url":3334,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3335,"paper_type":860,"authors":3336,"abstract":3342},"lrec2002-main-148","A Method for Automatically Building and Evaluating Dictionary Resources","10.63317\u002F59v7d9ooxiyn","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-148","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F148.pdf","muresan-klavans-2002-method",[3337,3340],{"paper_id":3330,"author_seq":247,"given_name":3338,"surname":3339,"affiliation":63,"orcid":63},"Smaranda","Muresan",{"paper_id":3330,"author_seq":232,"given_name":3341,"surname":968,"affiliation":63,"orcid":63},"Judith","This paper describes a method toward automatically  building dictionaries from text. We present DEFINDER, a rule-based  system for extraction of definitions from on-line consumer-oriented  medical articles. We provide an extensive evaluation on three   dimensions: i) performance of the definition extraction technique in  terms of precision  and recall, ii) quality of the built dictionary  as judged both by specialists and lay users, iii) coverage of existing  on-line dictionaries. The corpus we used for the study is publicly  available. A major contribution of the paper is the range of  quantitative and qualitative evaluation methods.",{"paper_id":3344,"title":3345,"year":213,"month":855,"day":63,"doi":3346,"resource_url":3347,"first_page":63,"last_page":63,"pdf_url":3348,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3349,"paper_type":860,"authors":3350,"abstract":3356},"lrec2002-main-149","The Use of Referential Constraints in Structuring Discourse","10.63317\u002F2vnwdendm3hr","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-149","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F149.pdf","seretan-cristea-2002-use",[3351,3354],{"paper_id":3344,"author_seq":247,"given_name":3352,"surname":3353,"affiliation":63,"orcid":63},"Violeta","Seretan",{"paper_id":3344,"author_seq":232,"given_name":1371,"surname":3355,"affiliation":63,"orcid":63},"Cristea","The quality of discourse structure annotations is negatively influenced by the numerous difficulties that occur in the analysis process. In contrast, referential annotation resources are considerably more reliable, given the high precision of the anaphora resolution systems that exist. We present an approach based on the Veins Theory (Cristea, Ide, Romary, 1998), in which successful reference annotations of texts are exploited in order to improve arbitrary structural analyses; in this way, the large amount of corpora annotated at reference level can be used for the acquisition of structural annotation resources.",{"paper_id":3358,"title":3359,"year":213,"month":855,"day":63,"doi":3360,"resource_url":3361,"first_page":63,"last_page":63,"pdf_url":3362,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3363,"paper_type":860,"authors":3364,"abstract":3367},"lrec2002-main-150","Construction of a Word Sense Tagged Corpus for SENSEVAL-2 Japanese Dictionary Task","10.63317\u002F36zzdrcxg7d8","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-150","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F150.pdf","shirai-2002-construction",[3365],{"paper_id":3358,"author_seq":247,"given_name":3366,"surname":2906,"affiliation":63,"orcid":63},"Kiyoaki","This paper reports the details of a Japanese word sense tagged corpus  developed as an evaluation data for SENSEVAL-2 Japanese dictionary task. The corpus made up of 2,130 newspaper articles. Not all but only  10,000 words in the articles were manually annotated with sense IDs, which was used as a gold standard data. Word senses were defined  according to the Iwanami Kokugo Jiten, a Japanese dictionary published by Iwanami Shoten. Two annotators chose a sense ID for each instance  separately. If they did not agree, the third annotator chose the correct sense ID between them. Inter-tagger agreement and Cohen's  \\kappa was 86.3% and 0.677, respectively.",{"paper_id":3369,"title":3370,"year":213,"month":855,"day":63,"doi":3371,"resource_url":3372,"first_page":63,"last_page":63,"pdf_url":3373,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3374,"paper_type":860,"authors":3375,"abstract":3387},"lrec2002-main-151","Development and Evaluation of a Korean Treebank and its Application to NLP","10.63317\u002F2vpimyzt3yia","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-151","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F151.pdf","han-etal-2002-development",[3376,3379,3381,3384],{"paper_id":3369,"author_seq":247,"given_name":3377,"surname":3378,"affiliation":63,"orcid":63},"Chung-hye","Han",{"paper_id":3369,"author_seq":232,"given_name":3380,"surname":3378,"affiliation":63,"orcid":63},"Na-Rae",{"paper_id":3369,"author_seq":218,"given_name":3382,"surname":3383,"affiliation":63,"orcid":63},"Eon-Suk","Ko",{"paper_id":3369,"author_seq":203,"given_name":3385,"surname":3386,"affiliation":63,"orcid":63},"Martha","Palmer","This paper discusses issues in building a 54-thousand-word Korean  Treebank using a phrase structure annotation, along with developing annotation guidelines based on the morpho-syntactic phenomena represented  in the corpus. Various methods that were employed for quality control are  presented. The evaluation on the quality of the Treebank and some of the  NLP applications under development using the Treebank are also presented.",{"paper_id":3389,"title":3390,"year":213,"month":855,"day":63,"doi":3391,"resource_url":3392,"first_page":63,"last_page":63,"pdf_url":3393,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3394,"paper_type":860,"authors":3395,"abstract":63},"lrec2002-main-152","Identifying Verb Arguments and their Syntactic Function in the Penn Treebank","10.63317\u002F3m756i9i2fd3","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-152","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F152.pdf","kinyon-prolo-2002-identifying",[3396,3399],{"paper_id":3389,"author_seq":247,"given_name":3397,"surname":3398,"affiliation":63,"orcid":63},"Alexandra","Kinyon",{"paper_id":3389,"author_seq":232,"given_name":3400,"surname":3401,"affiliation":63,"orcid":63},"Carlos A.","Prolo",{"paper_id":3403,"title":3404,"year":213,"month":855,"day":63,"doi":3405,"resource_url":3406,"first_page":63,"last_page":63,"pdf_url":3407,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3408,"paper_type":860,"authors":3409,"abstract":3414},"lrec2002-main-153","Automatic Detection of Acoustic Centres of Reliability for Tagging Paralinguistic Information in Expressive Speech","10.63317\u002F3g9o4288ugbj","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-153","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F153.pdf","mokhtari-campbell-2002-automatic",[3410,3413],{"paper_id":3403,"author_seq":247,"given_name":3411,"surname":3412,"affiliation":63,"orcid":63},"Parham","Mokhtari",{"paper_id":3403,"author_seq":232,"given_name":2882,"surname":2883,"affiliation":63,"orcid":63},"Preparation of a unit-database to be used in concatenative speech  synthesis demands sufficiently robust, unsupervised algorithms for processing the typically huge corpora. The demands are even  more stringent when considering a corpus large enough to capture a wide variety of speaking-styles and emotions, even of a  single speaker. This paper describes a method of combining robust acoustic-prosodic and cepstral analyses to locate centres  of acoustic-phonetic reliability in the speech stream, wherein physiologically meaningful parameters related to voice quality  can be estimated more reliably. These parameters which describe the state of glottal phonation and of supralaryngeal  articulation, can then provide a paralinguistic annotation of the unit-database,  thereby enabling speech synthesis with a greater variety of expressions and speaking-styles.",{"paper_id":3416,"title":3417,"year":213,"month":855,"day":63,"doi":3418,"resource_url":3419,"first_page":63,"last_page":63,"pdf_url":3420,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3421,"paper_type":860,"authors":3422,"abstract":3434},"lrec2002-main-154","Word Sense Disambiguation with Information Retrieval Technique","10.63317\u002F4zy8ehzsrraq","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-154","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F154.pdf","oh-etal-2002-word",[3423,3426,3429,3432],{"paper_id":3416,"author_seq":247,"given_name":3424,"surname":3425,"affiliation":63,"orcid":63},"Jong-Hoon","Oh",{"paper_id":3416,"author_seq":232,"given_name":3427,"surname":3428,"affiliation":63,"orcid":63},"Saim","Shin",{"paper_id":3416,"author_seq":218,"given_name":3430,"surname":3431,"affiliation":63,"orcid":63},"Yong-Seok","Choi",{"paper_id":3416,"author_seq":203,"given_name":3433,"surname":3431,"affiliation":63,"orcid":63},"Key-Sun","This paper reports on word sense disambiguation of Korean nouns with information retrieval technique. First, context vectors are constructed using contextual words in training data. Then, the words in the context vector are weighted with local density. Each sense of a target word is represented as ¡®Static Sense Vector¡¯ in word space, which is the centroid of the context vectors. Contextual noise is removed using selective sampling. A selective sampling method use information retrieval technique, so as to enhance the discriminative power. We regard training samples as indexed documents and test samples as queries. We can retrieve relevant top-N training samples for a query (a test sample) and construct ¡®Dynamic Sense Vector¡¯ using the retrieved training samples. A word sense is estimated using the ¡®Static Sense Vector¡¯ and ¡®Dynamic Sense Vector¡¯. The Korean SENSEVAL test suit is used for this experiment and our method produces relatively good results.",{"paper_id":3436,"title":3437,"year":213,"month":855,"day":63,"doi":3438,"resource_url":3439,"first_page":63,"last_page":63,"pdf_url":3440,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3441,"paper_type":860,"authors":3442,"abstract":3461},"lrec2002-main-155","English Speech Database Read by Japanese Learners for CALL System Development","10.63317\u002F2h2uvuunc5if","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-155","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F155.pdf","minematsu-etal-2002-english",[3443,3446,3449,3452,3454,3457,3459],{"paper_id":3436,"author_seq":247,"given_name":3444,"surname":3445,"affiliation":63,"orcid":63},"N.","Minematsu",{"paper_id":3436,"author_seq":232,"given_name":3447,"surname":3448,"affiliation":63,"orcid":63},"Y.","Tomiyama",{"paper_id":3436,"author_seq":218,"given_name":3450,"surname":3451,"affiliation":63,"orcid":63},"K.","Yoshimoto",{"paper_id":3436,"author_seq":203,"given_name":3450,"surname":3453,"affiliation":63,"orcid":63},"Shimizu",{"paper_id":3436,"author_seq":188,"given_name":3455,"surname":3456,"affiliation":63,"orcid":63},"S.","Nakagawa",{"paper_id":3436,"author_seq":172,"given_name":1995,"surname":3458,"affiliation":63,"orcid":63},"Dantsuji",{"paper_id":3436,"author_seq":155,"given_name":3455,"surname":3460,"affiliation":63,"orcid":63},"Makino","With the help of recent advances in speech processing  techniques, we can see various kinds of practical speech applications in  both laboratories and the real world. One of the major applications in  Japan is CALL (Computer Assisted Language Learning) systems. It is  well-known that most of the recent speech technologies are based upon  statistical methods, which require a large amount of speech data.  Although we can find many speech corpora available from distribution  sites such as Linguistic Data Consortium, European Language Resources  Association, and so on, the number of speech corpora built especially  for CALL system development is very small. In this paper, we firstly  introduce a Japanese national project of \"Advanced Utilization of  Multimedia to Promote Higher Educational Reform,\" under which some  research groups are currently developing CALL systems. One of the main  objectives of the project is to construct an English speech database  read by Japanese students for CALL system development. This paper  describes specification of the database and strategies adopted to select  speakers and record their sentence\u002Fword utterances in addition to  preliminary discussions and investigations done  before the  database development. Further, by using the new database and WSJ  database, corpus-based analysis and comparison between Japanese English  and American English is done in view of the entire phonemic system of  English. Here, tree diagrams of the two kinds of English are drawn  through their HMM sets. Results show many interesting characteristics of  Japanese English.",{"paper_id":3463,"title":3464,"year":213,"month":855,"day":63,"doi":3465,"resource_url":3466,"first_page":63,"last_page":63,"pdf_url":3467,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3468,"paper_type":860,"authors":3469,"abstract":3477},"lrec2002-main-156","NESPOLE!’s Multilingual and Multimodal Corpus","10.63317\u002F58mj2ur38t4x","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-156","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F156.pdf","costantini-etal-2002-nespole",[3470,3473,3476],{"paper_id":3463,"author_seq":247,"given_name":3471,"surname":3472,"affiliation":63,"orcid":63},"Erica","Costantini",{"paper_id":3463,"author_seq":232,"given_name":3474,"surname":3475,"affiliation":63,"orcid":63},"Susanne","Burger",{"paper_id":3463,"author_seq":218,"given_name":1308,"surname":2015,"affiliation":63,"orcid":63},"NESPOLE! is a EU\u002FNSF jointly funded project exploring  multilingual (speech-to-speech translation) and multimodal communication  in e-services. The current system allows users speaking different  languages (English, French, German and Italian) to interact on the  tourism domain through the Internet using thin terminals (PCs with sound  and video cards  and H323 video-conferencing software). Web pages  and maps can be shared among users, by means of a special White Board.  NESPOLE! provides for multimodal communication by allowing users to  perform gestures on displayed maps, by means of a tablet and a pen. To  test the integration of multilinguality with multimodality, and the  impact of the latter on the former, we designed and executed an  experiment, involving 35 subjects, 28 playing the role of customers  (English and German) and 7 playing the role of agents (Italian).  Subjects communicated through the NESPOLE! system to accomplish an  assigned task (booking an hotel), meeting specific constraints as to  available budget, location, distance from relevant spots, etc. Two  experimental conditions were considered and compared, differing as to  whether multimodal resources were available: a speech-only condition  (SO), and a multimodal condition (MM). This paper reports on the  resulting corpus, and on the results of the experiment.",{"paper_id":3479,"title":3480,"year":213,"month":855,"day":63,"doi":3481,"resource_url":3482,"first_page":63,"last_page":63,"pdf_url":3483,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3484,"paper_type":860,"authors":3485,"abstract":3505},"lrec2002-main-157","Extracting Information for Automatic Indexing of Multimedia Material","10.63317\u002F3tmdgb2s67fi","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-157","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F157.pdf","saggion-etal-2002-extracting",[3486,3489,3492,3494,3497,3500,3502],{"paper_id":3479,"author_seq":247,"given_name":3487,"surname":3488,"affiliation":63,"orcid":63},"Horacio","Saggion",{"paper_id":3479,"author_seq":232,"given_name":3490,"surname":3491,"affiliation":63,"orcid":63},"Hamish","Cunningham",{"paper_id":3479,"author_seq":218,"given_name":872,"surname":3493,"affiliation":63,"orcid":63},"Maynard",{"paper_id":3479,"author_seq":203,"given_name":3495,"surname":3496,"affiliation":63,"orcid":63},"Kalina","Bontcheva",{"paper_id":3479,"author_seq":188,"given_name":3498,"surname":3499,"affiliation":63,"orcid":63},"Oana","Hamza",{"paper_id":3479,"author_seq":172,"given_name":2419,"surname":3501,"affiliation":63,"orcid":63},"Ursu",{"paper_id":3479,"author_seq":155,"given_name":3503,"surname":3504,"affiliation":63,"orcid":63},"Yorick","Wilks","This paper discusses our work on information  extraction (IE) from multi-lingual, multi-media, multi-genre Language  Resources, in a domain where there are many different event types. This  work is being carried out in the context of MUMIS, an EU-funded project  that aims at the development of basic technology for the creation of a  composite index from multiple and multi-lingual sources. Our approach to  IE relies on a finite state machinery provided by GATE, a General  Architecture for Text Engineering, pipelined with full syntactic  analysis and discourse interpretation implemented in Prolog.",{"paper_id":3507,"title":3508,"year":213,"month":855,"day":63,"doi":3509,"resource_url":3510,"first_page":63,"last_page":63,"pdf_url":3511,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3512,"paper_type":860,"authors":3513,"abstract":3525},"lrec2002-main-158","Developing Infrastructure for the Evaluation of Single and Multi-document Summarization Systems in a Cross-lingual Environment","10.63317\u002F5owjve6cqe7k","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-158","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F158.pdf","saggion-etal-2002-developing",[3514,3515,3518,3519,3522],{"paper_id":3507,"author_seq":247,"given_name":3487,"surname":3488,"affiliation":63,"orcid":63},{"paper_id":3507,"author_seq":232,"given_name":3516,"surname":3517,"affiliation":63,"orcid":63},"Dragomir","Radev",{"paper_id":3507,"author_seq":218,"given_name":1553,"surname":1554,"affiliation":63,"orcid":63},{"paper_id":3507,"author_seq":203,"given_name":3520,"surname":3521,"affiliation":63,"orcid":63},"Wai","Lam",{"paper_id":3507,"author_seq":188,"given_name":3523,"surname":3524,"affiliation":63,"orcid":63},"Stephanie M.","Strassel","We describe our work on the development of Language  and Evaluation Resources for the evaluation of summaries in English and  Chinese. The language resources include a parallel corpus of English and  Chinese texts which are translations of each other, a set of queries in  both languages, clusters of documents relevants to each query, sentence  relevance measures for each sentence in the document clusters, and  manual multi-document summaries at different compression rates. The  evaluation resources consist of metrics for measuring the content of  automatic summaries against reference summaries. The framework can be  used in the evaluation of extractive, non-extractive,  single and  multi-document summarization. We focus on the resources developed that  are made available for the research community.",{"paper_id":3527,"title":3528,"year":213,"month":855,"day":63,"doi":3529,"resource_url":3530,"first_page":63,"last_page":63,"pdf_url":3531,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3532,"paper_type":860,"authors":3533,"abstract":3552},"lrec2002-main-159","Multi-level XML-based Corpus Annotation","10.63317\u002F5k7fsvc8g3dp","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-159","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F159.pdf","papageorgiou-etal-2002-multi",[3534,3537,3540,3543,3546,3549],{"paper_id":3527,"author_seq":247,"given_name":3535,"surname":3536,"affiliation":63,"orcid":63},"Harris","Papageorgiou",{"paper_id":3527,"author_seq":232,"given_name":3538,"surname":3539,"affiliation":63,"orcid":63},"Prokopis","Prokopidis",{"paper_id":3527,"author_seq":218,"given_name":3541,"surname":3542,"affiliation":63,"orcid":63},"Voula","Giouli",{"paper_id":3527,"author_seq":203,"given_name":3544,"surname":3545,"affiliation":63,"orcid":63},"Iason","Demiros",{"paper_id":3527,"author_seq":188,"given_name":3547,"surname":3548,"affiliation":63,"orcid":63},"Alexis","Konstantinidis",{"paper_id":3527,"author_seq":172,"given_name":3550,"surname":3551,"affiliation":63,"orcid":63},"Stelios","Piperidis","In this paper we present the methodological principles and the implementation framework of text annotation process in an Information Extraction setting. Due to the recent prevalence of XML as a means for describing structured documents in a reusable format, our team has switched to an XML based annotation schema. In that framework, an XML annotation platform has been built, while processing tools, lexical resources and textual data communicate with each other via this platform. Editing\u002Fviewing tools have been implemented, endowed with functionalities that allow annotators to gain access to previous annotation levels as well as necessary lexical resources.",{"paper_id":3554,"title":3555,"year":213,"month":855,"day":63,"doi":3556,"resource_url":3557,"first_page":63,"last_page":63,"pdf_url":3558,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3559,"paper_type":860,"authors":3560,"abstract":3563},"lrec2002-main-160","Project Proposal TC-STAR - Make Speech to Speech Translation Real","10.63317\u002F2o4pipdy5j3a","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-160","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F160.pdf","hoge-2002-project",[3561],{"paper_id":3554,"author_seq":247,"given_name":2523,"surname":3562,"affiliation":63,"orcid":63},"Höge","The proposed project TC-STAR (technology and corpora  for speech to speech translation), which is focused on technology,  platform and service development for speech to speech translation  components and systems. The components are speech recognition, speech  centered translation and speech synthesis. The project is aimed to be   launched as an integrated project in the 6th framework of the European  Commission. To prepare TC-STAR a preparatory action TC-STAR_P has been  launched to set up the infrastructure of TCSTAR. For further preparation  the EU-funded project LC-STAR has been started to standardize and to  create some corpora and lexica needed for all speech to speech  translation components.",{"paper_id":3565,"title":3566,"year":213,"month":855,"day":63,"doi":3567,"resource_url":3568,"first_page":63,"last_page":63,"pdf_url":3569,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3570,"paper_type":860,"authors":3571,"abstract":3590},"lrec2002-main-161","Development of a Dependency Treebank for Russian and its Possible Applications in NLP","10.63317\u002F2e2whabr2jv3","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-161","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F161.pdf","boguslavsky-etal-2002-development",[3572,3574,3576,3579,3582,3585,3587],{"paper_id":3565,"author_seq":247,"given_name":2737,"surname":3573,"affiliation":63,"orcid":63},"Boguslavsky",{"paper_id":3565,"author_seq":232,"given_name":2340,"surname":3575,"affiliation":63,"orcid":63},"Chardin",{"paper_id":3565,"author_seq":218,"given_name":3577,"surname":3578,"affiliation":63,"orcid":63},"Svetlana","Grigorieva",{"paper_id":3565,"author_seq":203,"given_name":3580,"surname":3581,"affiliation":63,"orcid":63},"Nikolai","Grigoriev",{"paper_id":3565,"author_seq":188,"given_name":3583,"surname":3584,"affiliation":63,"orcid":63},"Leonid","Iomdin",{"paper_id":3565,"author_seq":172,"given_name":3583,"surname":3586,"affiliation":63,"orcid":63},"Kreidlin",{"paper_id":3565,"author_seq":155,"given_name":3588,"surname":3589,"affiliation":63,"orcid":63},"Nadezhda","Frid","The paper describes a tagging scheme designed for the  Russian Treebank and presents tools used for corpus creation.",{"paper_id":3592,"title":3593,"year":213,"month":855,"day":63,"doi":3594,"resource_url":3595,"first_page":63,"last_page":63,"pdf_url":3596,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3597,"paper_type":860,"authors":3598,"abstract":3602},"lrec2002-main-162","An Iterative Data Collection Approach for Multimodal Dialogue Systems","10.63317\u002F4g2nz6kjcprd","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-162","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F162.pdf","rapp-strube-2002-iterative",[3599,3600],{"paper_id":3592,"author_seq":247,"given_name":1452,"surname":1865,"affiliation":63,"orcid":63},{"paper_id":3592,"author_seq":232,"given_name":2079,"surname":3601,"affiliation":63,"orcid":63},"Strube","This paper deals with the way in which data for  multimodal dialogue systems are  collected. We argue that for  multimodal data, an iterative data collection strategy should be  followed. Instead of a single major data collection effort using a  \"Wizard of OZ\" (WOZ) or \"prompting\" experimental  setup, several smaller data collections should accompany the system  development. We also describe the \"script\" experimental setup  we  developed. It is in between the WOZ and prompting setup, and  can be used as a cost  effective design for the first data  collection within the iterative data collection strategy.",{"paper_id":3604,"title":3605,"year":213,"month":855,"day":63,"doi":3606,"resource_url":3607,"first_page":63,"last_page":63,"pdf_url":3608,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3609,"paper_type":860,"authors":3610,"abstract":3616},"lrec2002-main-163","The Importance of Evaluation for Cross-Language System Development: the CLEF Experience","10.63317\u002F3gdvbffvi8m3","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-163","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F163.pdf","peters-braschler-2002-importance",[3611,3614],{"paper_id":3604,"author_seq":247,"given_name":3612,"surname":3613,"affiliation":63,"orcid":63},"Carol","Peters",{"paper_id":3604,"author_seq":232,"given_name":2179,"surname":3615,"affiliation":63,"orcid":63},"Braschler","The aim of the Cross-Language Evaluation Forum (CLEF) is to develop and maintain an infrastructure for the evaluation of information retrieval systems operating on European languages in both monolingual and cross-language contexts, and to create test-suites of reusable data that can be employed by system developers for benchmarking purposes. Two CLEF evaluation campaigns have been held so far (CLEF 2000 and CLEF 2001); CLEF 2002 is now under way. The paper describes the objectives and the organisation of these campaigns, and gives a first assessment of the results. In conclusion, plans for future CLEF campaigns are reported.",{"paper_id":3618,"title":3619,"year":213,"month":855,"day":63,"doi":3620,"resource_url":3621,"first_page":63,"last_page":63,"pdf_url":3622,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3623,"paper_type":860,"authors":3624,"abstract":3647},"lrec2002-main-164","Bilingual alignment of anaphoric expressions","10.63317\u002F5drd7oetrdaw","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-164","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F164.pdf","munoz-etal-2002-bilingual",[3625,3628,3630,3632,3634,3635,3636,3638,3640,3642,3643,3645],{"paper_id":3618,"author_seq":247,"given_name":3626,"surname":3627,"affiliation":63,"orcid":63},"R.","Muñoz",{"paper_id":3618,"author_seq":232,"given_name":3626,"surname":3629,"affiliation":63,"orcid":63},"Mitkov",{"paper_id":3618,"author_seq":218,"given_name":1995,"surname":3631,"affiliation":63,"orcid":63},"Palomar",{"paper_id":3618,"author_seq":203,"given_name":1607,"surname":3633,"affiliation":63,"orcid":63},"Peral",{"paper_id":3618,"author_seq":188,"given_name":3626,"surname":3088,"affiliation":63,"orcid":63},{"paper_id":3618,"author_seq":172,"given_name":1992,"surname":1702,"affiliation":63,"orcid":63},{"paper_id":3618,"author_seq":155,"given_name":3637,"surname":2768,"affiliation":63,"orcid":63},"C.",{"paper_id":3618,"author_seq":138,"given_name":1995,"surname":3639,"affiliation":63,"orcid":63},"Saiz-Noeda",{"paper_id":3618,"author_seq":121,"given_name":1983,"surname":3641,"affiliation":63,"orcid":63},"Ferrández",{"paper_id":3618,"author_seq":104,"given_name":3637,"surname":1375,"affiliation":63,"orcid":63},{"paper_id":3618,"author_seq":87,"given_name":1989,"surname":3644,"affiliation":63,"orcid":63},"Martínez-Barco",{"paper_id":3618,"author_seq":73,"given_name":1983,"surname":3646,"affiliation":63,"orcid":63},"Suárez","In this paper we present an automatic mechanism for  bilingual (Spanish-English) alignment of anaphoric expressions. For this  purpose, two anaphora resolution systems were used. Both are based on  linguistic preferences and constraints, for Spanish (SUPPAR) and for  English (MARS). These systems have been independently developed and each  of them is presented individually with their evaluation results. The  majority of the paper presents an automatic alignment method for  pronominal anaphora in Spanish and English. Once an anaphor has been  solved (in both languages) this method matches anaphoric expressions and  antecedents from both texts. A bitext map method has been used for the  alignment with a set of bilingual texts for the evaluation. These texts  have been extracted from several European Community Official documents  (EUR-lex database). The alignment mechanism can be applied to different  tasks related to Machine Translation such us pattern learning for  translation or evaluation for automatic generation  of multilingual  anaphora.",{"paper_id":3649,"title":3650,"year":213,"month":855,"day":63,"doi":3651,"resource_url":3652,"first_page":63,"last_page":63,"pdf_url":3653,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3654,"paper_type":860,"authors":3655,"abstract":3662},"lrec2002-main-165","Using the Text Corpus to Create a Comprehensive List of Phrasal Verbs","10.63317\u002F26byg78236ko","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-165","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F165.pdf","kaalep-muischnek-2002-using",[3656,3659],{"paper_id":3649,"author_seq":247,"given_name":3657,"surname":3658,"affiliation":63,"orcid":63},"Heiki-Jaan","Kaalep",{"paper_id":3649,"author_seq":232,"given_name":3660,"surname":3661,"affiliation":63,"orcid":63},"Kadri","Muischnek","The paper describes extraction of Estonian multi-word  verbs from text corpora, using a language- and task-specific software  tool SENVA, which is based on a statistical language-independent  software tool SENTA (Dias et al, 2000). The outcome is a comprehensive  list of 16,000 phrasal verbs. We describe the extraction tool, manual  post-editing principles, and evaluate the outcome in terms of precision  and recall, comparing the results with man-made electronic dictionaries,  and with the results of a manual extraction experiment of a sub-set of  the MWV-s.",{"paper_id":3664,"title":3665,"year":213,"month":855,"day":63,"doi":3666,"resource_url":3667,"first_page":63,"last_page":63,"pdf_url":3668,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3669,"paper_type":860,"authors":3670,"abstract":3682},"lrec2002-main-166","Evaluation Corpora for Sense Disambiguation in the Medical Domain","10.63317\u002F3vrchchqzyrq","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-166","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F166.pdf","raileanu-etal-2002-evaluation",[3671,3673,3676,3679],{"paper_id":3664,"author_seq":247,"given_name":872,"surname":3672,"affiliation":63,"orcid":63},"Raileanu",{"paper_id":3664,"author_seq":232,"given_name":3674,"surname":3675,"affiliation":63,"orcid":63},"Paul","Buitelaar",{"paper_id":3664,"author_seq":218,"given_name":3677,"surname":3678,"affiliation":63,"orcid":63},"Spela","Vintar",{"paper_id":3664,"author_seq":203,"given_name":3680,"surname":3681,"affiliation":63,"orcid":63},"Jörg","Bay","An important aspect of word sense disambiguation is  the evaluation of different methods and parameters. Unfortunately, there  is a lack of test sets for evaluation, specifically for languages other  than English and even more so for specific domains like medicine. Given  that our work focuses on English as well as German text in the medical  domain, we had to develop our own evaluation corpora in order to test  our disambiguation methods. In this paper we describe the work on  developing these corpora, using GermaNet and UMLS as (lexical) semantic  resources, next to a description of the annotation tool KiC that we  developed for support of the annotation task.",{"paper_id":3684,"title":3685,"year":213,"month":855,"day":63,"doi":3686,"resource_url":3687,"first_page":63,"last_page":63,"pdf_url":3688,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3689,"paper_type":860,"authors":3690,"abstract":3704},"lrec2002-main-167","An Efficient and Flexible Format for Linguistic and Semantic Annotation","10.63317\u002F386nvb7rtshp","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-167","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F167.pdf","vintar-etal-2002-efficient",[3691,3693,3694,3697,3700,3701],{"paper_id":3684,"author_seq":247,"given_name":3692,"surname":3678,"affiliation":63,"orcid":63},"Špela",{"paper_id":3684,"author_seq":232,"given_name":3674,"surname":3675,"affiliation":63,"orcid":63},{"paper_id":3684,"author_seq":218,"given_name":3695,"surname":3696,"affiliation":63,"orcid":63},"Bärbel","Ripplinger",{"paper_id":3684,"author_seq":203,"given_name":3698,"surname":3699,"affiliation":63,"orcid":63},"Bogdan","Sacaleanu",{"paper_id":3684,"author_seq":188,"given_name":872,"surname":3672,"affiliation":63,"orcid":63},{"paper_id":3684,"author_seq":172,"given_name":3702,"surname":3703,"affiliation":63,"orcid":63},"Detlef","Prescher","The paper describes an XML annotation format and tool  developed within the MUCHMORE project. The annotation scheme was  designed specifically for the purposes of Cross-Lingual Information  Retrieval in the medical domain so as to allow both efficient and  flexible access to layers of information. We use a parallel  English-German corpus of medical abstracts and annotate it with  linguistic information (tokenisation, part-of-speech tagging,  lemmatisation and decomposition, phrase recognition, grammatical  functions) as well as semantic information from various sources. The  annotation of medical terms\u002Fconcepts, semantic types and semantic  relations is based on the Unified Medical Language System (UMLS).  Additionally, we use EuroWordNet as a general-language resource in  annotating word senses and to compare domain-specific and general  language use. A major aim of the project is also to complement existing  ontological resources by extracting new terms and new semantic  relations. We present the annotation scheme, which is conceptually  related to stand-off annotation, and describe our tool for automatic  semantic annotation.",{"paper_id":3706,"title":3707,"year":213,"month":855,"day":63,"doi":3708,"resource_url":3709,"first_page":63,"last_page":63,"pdf_url":3710,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3711,"paper_type":860,"authors":3712,"abstract":3718},"lrec2002-main-168","Valency Dictionary of Czech Verbs: Complex Tectogrammatical Annotation","10.63317\u002F3fyiew9szwj2","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-168","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F168.pdf","stranakova-lopatkova-zabokrtsky-2002-valency",[3713,3716],{"paper_id":3706,"author_seq":247,"given_name":3714,"surname":3715,"affiliation":63,"orcid":63},"Markéta","Straňáková-Lopatková",{"paper_id":3706,"author_seq":232,"given_name":3717,"surname":2258,"affiliation":63,"orcid":63},"Zdenĕk","A lexicon containing a certain kind of syntactic  information about verbs is one of the crucial prerequisities for most  tasks in Natural Language Processing. The goal of the project described  in the paper is to create a human- and machine-readable lexicon  capturing in detail valency behavior of hundreds most frequent Czech  verbs. Manual annotation effort consumed at this project limits the  speed of its growth on the one hand, but guarantees significantly higher  data consistency than that of automatically acquired lexicons. In this  paper, we outline the theoretical background on which the lexicon is  based, and describe the annotation schema (lexicon data structure,  annotation tools, etc.).  Selected quantitative characteristics of  the lexicon are presented as well.",{"paper_id":3720,"title":3721,"year":213,"month":855,"day":63,"doi":3722,"resource_url":3723,"first_page":63,"last_page":63,"pdf_url":3724,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3725,"paper_type":860,"authors":3726,"abstract":3730},"lrec2002-main-169","A Comparative Evaluation of Collocation Extraction Techniques","10.63317\u002F283gn3b6z6pj","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-169","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F169.pdf","pearce-2002-comparative",[3727],{"paper_id":3720,"author_seq":247,"given_name":3728,"surname":3729,"affiliation":63,"orcid":63},"Darren","Pearce","This paper describes an experiment that attempts to compare a  range of existing collocation extraction techniques as well as\tthe implementation of a new technique based on tests for\tlexical substitutability. After a description of the\texperiment details, the techniques are discussed with\tparticular emphasis on any adaptations that are required in  order to evaluate it in the way proposed. This is followed by\ta discussion on the relative strengths and weaknesses of the\ttechniques with reference to the results obtained. Since there\tis no general agreement on the exact nature of collocation,\tevaluating techniques with reference to any single standard is\tsomewhat controversial.  Departing from this point, part of\tthe concluding discussion includes initial proposals for a\tcommon framework for evaluation of collocation extraction\ttechniques.",{"paper_id":3732,"title":3733,"year":213,"month":855,"day":63,"doi":3734,"resource_url":3735,"first_page":63,"last_page":63,"pdf_url":3736,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3737,"paper_type":860,"authors":3738,"abstract":3742},"lrec2002-main-170","Building the Croatian National Corpus","10.63317\u002F2rsns9f3rwmo","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-170","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F170.pdf","tadic-2002-building",[3739],{"paper_id":3732,"author_seq":247,"given_name":3740,"surname":3741,"affiliation":63,"orcid":63},"Marko","Tadić","The paper presents the work being done so far on the building of the Croatian National Corpus (HNK). It's being collected since 1998 at the Institute of Linguistics, Faculty of Philosophy, University of Zagreb. The size, time-span, its composition and criteria for text selection are being presented. The HNK consists of two parts: 1) 30-million corpus of contemporary Croatian language, 2) Croatian Electronic Textual Archive. The procedures of the corpus mark-up and processing are being discussed. One of the most interesting features of this corpus since its launch in 1998 is its availability for querying through the WWW. The future directions of 30m corpus enlargement to 100m in next few years, enhanced corpus management and querying as well as annotation and processing are being discussed at the end.",{"paper_id":3744,"title":3745,"year":213,"month":855,"day":63,"doi":3746,"resource_url":3747,"first_page":63,"last_page":63,"pdf_url":3748,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3749,"paper_type":860,"authors":3750,"abstract":3757},"lrec2002-main-171","Annotation Driven Concordancing: the PAX Toolkit","10.63317\u002F4jf4uvzno682","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-171","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F171.pdf","trippel-gibbon-2002-annotation",[3751,3754],{"paper_id":3744,"author_seq":247,"given_name":3752,"surname":3753,"affiliation":63,"orcid":63},"Thorsten","Trippel",{"paper_id":3744,"author_seq":232,"given_name":3755,"surname":3756,"affiliation":63,"orcid":63},"Dafydd","Gibbon","We describe PAX, \"Portable Audio Concordance System\", a\t  proof-of-concept prototype of a multipurpose, multilingual audio\t  concordance toolkit. The primary goal is to support efficient grammar\t  and lexicon construction in the documentation of unwritten languages;  languages currently included are Ega, Anyi, and Koulango (Ivory\t  Coast), additional samples in German and English. The approach\t  combines methods from corpus linguistics, annotation theory and\t  practice, phonetics and lexicography.",{"paper_id":3759,"title":3760,"year":213,"month":855,"day":63,"doi":3761,"resource_url":3762,"first_page":63,"last_page":63,"pdf_url":3763,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3764,"paper_type":860,"authors":3765,"abstract":3770},"lrec2002-main-172","DELOS: An Automatically Tagged Economic Corpus for Modern Greek","10.63317\u002F2g6d7oi9rn9j","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-172","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F172.pdf","kermanidis-etal-2002-delos",[3766,3768,3769],{"paper_id":3759,"author_seq":247,"given_name":3767,"surname":2964,"affiliation":63,"orcid":63},"Katia Lida",{"paper_id":3759,"author_seq":232,"given_name":2966,"surname":2967,"affiliation":63,"orcid":63},{"paper_id":3759,"author_seq":218,"given_name":2535,"surname":2969,"affiliation":63,"orcid":63},"Text corpora resources have become an essential tool for Natural Language Processing tasks over the past years. A wide range of applications like information retrieval, ontology and terminology extraction require a sufficiently large corpus but of restricted domain. Manual tagging of such a corpus is very costly, making automatic annotation by a set of linguistic tools a very challenging idea. DELOS, described in this paper, is a Modern Greek corpus of economic domain consisting of 5 million word tokens, which is automatically tagged for morphology and shallow syntactic relations. The annotating tools described are embodied in an integrated system and their application to the corpus is performed using the GATE text engineering platform. The system output is a textual database marked up with the annotation tagset in plain text as well as in XML format.",{"paper_id":3772,"title":3773,"year":213,"month":855,"day":63,"doi":3774,"resource_url":3775,"first_page":63,"last_page":63,"pdf_url":3776,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3777,"paper_type":860,"authors":3778,"abstract":3784},"lrec2002-main-173","Give me a bug. a framework for a bug report service","10.63317\u002F4ntx6jymg2xf","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-173","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F173.pdf","van-den-heuvel-etal-2002-give",[3779,3782,3783],{"paper_id":3772,"author_seq":247,"given_name":3780,"surname":3781,"affiliation":63,"orcid":63},"Henk","van den Heuvel",{"paper_id":3772,"author_seq":232,"given_name":1808,"surname":1809,"affiliation":63,"orcid":63},{"paper_id":3772,"author_seq":218,"given_name":2523,"surname":3562,"affiliation":63,"orcid":63},"Recently, ELRA initiated the development of a bug report mechanism for the speech databases in its catalogue. This paper reports on the framework of this new service and its practical implementation. Topics dealt with are bug administration, communication with the reporters, formal error listings, corrections of databases, and the release of corrective patches and updated versions of databases.  The bug report service is now up and running at \u003Chttp:\u002F\u002Fwww.spex.nl\u002Fvalidationcentre\u002Fbugreport.html>",{"paper_id":3786,"title":3787,"year":213,"month":855,"day":63,"doi":3788,"resource_url":3789,"first_page":63,"last_page":63,"pdf_url":3790,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3791,"paper_type":860,"authors":3792,"abstract":3805},"lrec2002-main-174","Interface Databases: Design and Collection of a Multilingual Emotional Speech Database","10.63317\u002F35qmchms4f2w","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-174","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F174.pdf","hozjan-etal-2002-interface",[3793,3796,3798,3800,3802],{"paper_id":3786,"author_seq":247,"given_name":3794,"surname":3795,"affiliation":63,"orcid":63},"Vladimir","Hozjan",{"paper_id":3786,"author_seq":232,"given_name":2358,"surname":3797,"affiliation":63,"orcid":63},"Kacic",{"paper_id":3786,"author_seq":218,"given_name":3799,"surname":1702,"affiliation":63,"orcid":63},"Asunción",{"paper_id":3786,"author_seq":203,"given_name":1692,"surname":3801,"affiliation":63,"orcid":63},"Bonafonte",{"paper_id":3786,"author_seq":188,"given_name":3803,"surname":3804,"affiliation":63,"orcid":63},"Albino","Nogueiras","As a part of the IST project Interface (\"Multimodal Analysis\u002FSynthesis System  for Human Interaction to Virtual and Augmented environments\"), an emotional  speech database for Slovenian, English, Spanish, and French language has been  recorded. The database is designed for general study of emotional speech as  well as analysis of emotion characteristics for speech synthesis and for  automatic emotion classification purposes. Six emotions have been defined:  anger, sadness, joy, fear, disgust and surprise. The neutral styles were also  recorded. One male speaker and one female speaker have been recorded, except  for English language where two male and one female speaker have been recorded.  All the speakers are actors. The corpora consist of 175-190 sentences for each  language. For Spanish and Slovenian databases subjective evaluation tests have  been made. The recorded Interface emotional speech database represents a good  basis for emotional speech analysis and is also useful in synthesis of emotional speech.",{"paper_id":3807,"title":3808,"year":213,"month":855,"day":63,"doi":3809,"resource_url":3810,"first_page":63,"last_page":63,"pdf_url":3811,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3812,"paper_type":860,"authors":3813,"abstract":3816},"lrec2002-main-175","Objective analysis of emotional speech for English and Slovenian Interface emotional speech databases","10.63317\u002F3ddkbtd2h99m","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-175","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F175.pdf","hozjan-kacic-2002-objective",[3814,3815],{"paper_id":3807,"author_seq":247,"given_name":3794,"surname":3795,"affiliation":63,"orcid":63},{"paper_id":3807,"author_seq":232,"given_name":2358,"surname":3797,"affiliation":63,"orcid":63},"In this paper we propose a new approach for analysis of emotional speech  prosody features. The aim of the analysis is definition of emotional features  that characterise emotions. Analysis was performed on emotional speech databases that were recorded in the framework of the project \"Multimodal  Analysis\u002FSynthesis System for Human Interaction to Virtual and Augmented  Environments\" (Interface). The new approach determines emotional features in  three steps. In the first step the low-level features were determined, second  step includes definition of high-level features and in the last step the  emotional features were determined. Emotional features for native English and  native Slovenian speakers were analysed. A comparison of emotional features  between English male and female speaker, English male and Slovenian male as  well as English female and Slovenian female speaker was performed. New approach  for analysis of emotional features enables consistent and objective analysis.  It enables comparison of emotional features between different speakers and at  the same time searches for new correlates of emotions in speech.",{"paper_id":3818,"title":3819,"year":213,"month":855,"day":63,"doi":3820,"resource_url":3821,"first_page":63,"last_page":63,"pdf_url":3822,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3823,"paper_type":860,"authors":3824,"abstract":3831},"lrec2002-main-176","Methods and Tools for Speech Data Acquisition exploiting a Database of German Parliamentary Speeches and Transcripts from the Internet","10.63317\u002F4mdpvf5y8yke","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-176","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F176.pdf","biatov-kohler-2002-methods",[3825,3828],{"paper_id":3818,"author_seq":247,"given_name":3826,"surname":3827,"affiliation":63,"orcid":63},"Konstantin","Biatov",{"paper_id":3818,"author_seq":232,"given_name":3829,"surname":3830,"affiliation":63,"orcid":63},"Joachim","Köhler","This paper describes methods that exploit  stenographic transcripts of the German  parliament to improve the  acoustic models of a speech recognition system for this domain. The  stenographic transcripts and the speech data are available on the  Internet. Using data from the Internet makes it possible to avoid the  costly process of the collection and annotation of a huge amount of  data. The automatic data acquisition technique works using the  stenographic transcripts and acoustic data from the German parliamentary  speeches plus general acoustic models, trained on different data. The  idea of this technique is to generate special finite state automata from  the stenographic transcripts. These finite state automata simulate  potential possible correspondences between the  stenographic  transcript and the spoken audio content, i.e. accurate transcript. The  first step is the recognition of the speech data using finite state  automaton as a language model. The next step is to find, to extract and  to verify the match between sections of recognized  words and  actually spoken audio content. After this, the automatically extracted  and verified data can be used for acoustic model training. Experiments  show that for a given  recognition task from the German Parliament  domain the absolute decrease of the word error rate is 20%.",{"paper_id":3833,"title":3834,"year":213,"month":855,"day":63,"doi":3835,"resource_url":3836,"first_page":63,"last_page":63,"pdf_url":3837,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3838,"paper_type":860,"authors":3839,"abstract":3851},"lrec2002-main-177","SPEECON – Speech Databases for Consumer Devices: Database Specification and Validation","10.63317\u002F3qdvv58t5zk4","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-177","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F177.pdf","iskra-etal-2002-speecon",[3840,3841,3843,3846,3847,3849],{"paper_id":3833,"author_seq":247,"given_name":1834,"surname":1835,"affiliation":63,"orcid":63},{"paper_id":3833,"author_seq":232,"given_name":2600,"surname":3842,"affiliation":63,"orcid":63},"Grosskopf",{"paper_id":3833,"author_seq":218,"given_name":3844,"surname":3845,"affiliation":63,"orcid":63},"Krzysztof","Marasek",{"paper_id":3833,"author_seq":203,"given_name":3780,"surname":3781,"affiliation":63,"orcid":63},{"paper_id":3833,"author_seq":188,"given_name":2129,"surname":3848,"affiliation":63,"orcid":63},"Diehl",{"paper_id":3833,"author_seq":172,"given_name":1431,"surname":3850,"affiliation":63,"orcid":63},"Kiessling","SPEECON (Speech-Driven Interfaces for Consumer Devices) is a project which aims to develop voice-driven interfaces for consumer applications. Led by an industrial consortium, the project’s goal is to collect speech data for at least 20 languages and 600 speakers per language (mostly adults but children as well). Recorded in different environments which are expected to be representative for the future applications, the database corpus comprises both spontaneous and read speech, the latter including phonetically rich material, a large number of application commands and isolated items such as digits, names, etc. In order to safeguard consistency and high quality of the databases, all of them are subject to validation. This paper describes in detail the specifications of the databases as well as the validation procedure.",{"paper_id":3853,"title":3854,"year":213,"month":855,"day":63,"doi":3855,"resource_url":3856,"first_page":63,"last_page":63,"pdf_url":3857,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3858,"paper_type":860,"authors":3859,"abstract":3878},"lrec2002-main-178","Technical Terminology as a Critical Resource","10.63317\u002F5kd5tv2t8g2a","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-178","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F178.pdf","dowdall-etal-2002-technical",[3860,3863,3865,3868,3871,3874,3876],{"paper_id":3853,"author_seq":247,"given_name":3861,"surname":3862,"affiliation":63,"orcid":63},"James","Dowdall",{"paper_id":3853,"author_seq":232,"given_name":2079,"surname":3864,"affiliation":63,"orcid":63},"Hess",{"paper_id":3853,"author_seq":218,"given_name":3866,"surname":3867,"affiliation":63,"orcid":63},"Neeme","Kahusk",{"paper_id":3853,"author_seq":203,"given_name":3869,"surname":3870,"affiliation":63,"orcid":63},"Kaarel","Kaljurand",{"paper_id":3853,"author_seq":188,"given_name":3872,"surname":3873,"affiliation":63,"orcid":63},"Mare","Koit",{"paper_id":3853,"author_seq":172,"given_name":1308,"surname":3875,"affiliation":63,"orcid":63},"Rinaldi",{"paper_id":3853,"author_seq":155,"given_name":3660,"surname":3877,"affiliation":63,"orcid":63},"Vider","Technical documentation  is  riddled with domain  specific terminology which   needs to be  detected and  properly organized  in  order to be meaningfully used.  In this  paper we  describe how  we coped with  the problem  of terminology detection for a  specific type of document and how   the extracted terminology was   used  within the  context of our Answer Extraction System.",{"paper_id":3880,"title":3881,"year":213,"month":855,"day":63,"doi":3882,"resource_url":3883,"first_page":63,"last_page":63,"pdf_url":3884,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3885,"paper_type":860,"authors":3886,"abstract":3893},"lrec2002-main-179","Converting a Corpus into a Hypertext: An Approach Using XML Topic Maps and XSLT","10.63317\u002F4mtzdw4g2swg","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-179","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F179.pdf","lenz-storrer-2002-converting",[3887,3890],{"paper_id":3880,"author_seq":247,"given_name":3888,"surname":3889,"affiliation":63,"orcid":63},"Eva Anna","Lenz",{"paper_id":3880,"author_seq":232,"given_name":3891,"surname":3892,"affiliation":63,"orcid":63},"Angelika","Storrer","In the context of the HyTex project, our goal is to  convert a corpus into a hypertext, basing conversion strategies on  annotations which explicitly mark up the text-grammatical structures and  relations between text segments. Domain-specific knowledge is  represented in the form of a knowledge net, using topic maps. We use XML  as an interchange format. In this paper, we focus on a declarative rule  language designed to express conversion strategies in terms of  text-grammatical structures and hypertext results. The strategies can be  formulated in a concise formal syntax which is independend of the  markup, and which can be transformed automatically into executable  program code.",{"paper_id":3895,"title":3896,"year":213,"month":855,"day":63,"doi":3897,"resource_url":3898,"first_page":63,"last_page":63,"pdf_url":3899,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3900,"paper_type":860,"authors":3901,"abstract":3911},"lrec2002-main-180","Different Ways of Evaluating a Swedish Grammar Checker","10.63317\u002F5ox9fgwing4i","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-180","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F180.pdf","domeij-etal-2002-different",[3902,3905,3908],{"paper_id":3895,"author_seq":247,"given_name":3903,"surname":3904,"affiliation":63,"orcid":63},"Rickard","Domeij",{"paper_id":3895,"author_seq":232,"given_name":3906,"surname":3907,"affiliation":63,"orcid":63},"Ola","Knutsson",{"paper_id":3895,"author_seq":218,"given_name":3909,"surname":3910,"affiliation":63,"orcid":63},"Kerstin Severinson","Eklundh","Three different ways of evaluating a Swedish grammar checker are presented and discussed in this article. The first evaluation concerns measuring the program's detection capacity on five text genres. The measures (precision and recall) are often used in evaluating grammar checkers. However, in order to test and improve the usability of grammar checking software, they need to be complemented with user-oriented methods. Consequently, the second and the third evaluations presented in the article both involve users. The second evaluation focuses on user reactions to grammar error presentations, especially with regard to false alarms and erroneous error identification. The third and last evaluation focuses on problems in supporting users' cognitive revision processes. It also examines user motives behind choosing to correct or not to correct problems highlighted by the program. Advantages and disadvantages of the different evaluation methods are discussed.",{"paper_id":3913,"title":3914,"year":213,"month":855,"day":63,"doi":3915,"resource_url":3916,"first_page":63,"last_page":63,"pdf_url":3917,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3918,"paper_type":860,"authors":3919,"abstract":3928},"lrec2002-main-181","New Developments in Ontological Semantics","10.63317\u002F57hjqxtuiujk","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-181","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F181.pdf","moreno-ortiz-etal-2002-new",[3920,3922,3925],{"paper_id":3913,"author_seq":247,"given_name":1692,"surname":3921,"affiliation":63,"orcid":63},"Moreno Ortiz",{"paper_id":3913,"author_seq":232,"given_name":3923,"surname":3924,"affiliation":63,"orcid":63},"Victor","Raskin",{"paper_id":3913,"author_seq":218,"given_name":3926,"surname":3927,"affiliation":63,"orcid":63},"Sergei","Nirenburg","In this paper we discuss ongoing activity within the  approach to natural language processing known as ontological semantics,  as defined in Nirenburg and Raskin (forthcoming). After a brief  discussion of the principal tenets on which this approach is built, and  a revision of extant implementations that have led toward its present  form, we concentrate on some specific aspects that are key to the  development of this approach, such as the acquisition of the semantics  of lexical items and, intimately connected with this, the ontology, the  central resource in this approach. Although we review the fundamentals  of the approach, the focus is on practical aspects of implementation,  such as the automation of static knowledge acquisition and the  acquisition of scripts to enrich the ontology further.",{"paper_id":3930,"title":3931,"year":213,"month":855,"day":63,"doi":3932,"resource_url":3933,"first_page":63,"last_page":63,"pdf_url":3934,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3935,"paper_type":860,"authors":3936,"abstract":3946},"lrec2002-main-182","Towards Reusable NLP Components","10.63317\u002F2negwqar4i76","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-182","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F182.pdf","todirascu-etal-2002-towards",[3937,3940,3943],{"paper_id":3930,"author_seq":247,"given_name":3938,"surname":3939,"affiliation":63,"orcid":63},"Amalia","Todirascu",{"paper_id":3930,"author_seq":232,"given_name":3941,"surname":3942,"affiliation":63,"orcid":63},"Eric","Kow",{"paper_id":3930,"author_seq":218,"given_name":3944,"surname":3945,"affiliation":63,"orcid":63},"Laurent","Romary","We propose a methodology for transforming NLP modules  into reusable components that can be integrated it into a distributed  and open architecture. We illustrate the methodology by showing the  adaptations needed to transform an LTAG parser into a bundle of parsing  and lexical services.",{"paper_id":3948,"title":3949,"year":213,"month":855,"day":63,"doi":3950,"resource_url":3951,"first_page":63,"last_page":63,"pdf_url":3952,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3953,"paper_type":860,"authors":3954,"abstract":3964},"lrec2002-main-183","Subcategorization Acquisition as an Evaluation Method for WSD","10.63317\u002F446xignog3vu","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-183","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F183.pdf","preiss-etal-2002-subcategorization",[3955,3958,3961],{"paper_id":3948,"author_seq":247,"given_name":3956,"surname":3957,"affiliation":63,"orcid":63},"Judita","Preiss",{"paper_id":3948,"author_seq":232,"given_name":3959,"surname":3960,"affiliation":63,"orcid":63},"Anna","Korhonen",{"paper_id":3948,"author_seq":218,"given_name":3962,"surname":3963,"affiliation":63,"orcid":63},"Ted","Briscoe","Evaluation of word sense disambiguation (WSD) systems is  often based on machine-readable dictionaries (MRDs). Such evaluation typically employs a set of fine-grained dictionary senses and  considers them all to be equally important. In this paper, we propose a novel evaluation method for WSD systems in the context of  automatic subcategorization acquisition. Building on an extant subcategorization  acquisition system, we show that the system would benefit from WSD and propose modifications which allow it to make use of WSD. The enhanced  subcategorization acquisition system can then be used as a task-based evaluation method for WSD systems where both the notion of sense and  the sense's relevance to the evaluation process is determined by the application itself.",{"paper_id":3966,"title":3967,"year":213,"month":855,"day":63,"doi":3968,"resource_url":3969,"first_page":63,"last_page":63,"pdf_url":3970,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3971,"paper_type":860,"authors":3972,"abstract":3979},"lrec2002-main-184","Resource Sharing System for Humanity Researches","10.63317\u002F2igwo27h5xcu","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-184","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F184.pdf","hara-yasunaga-2002-resource",[3973,3976],{"paper_id":3966,"author_seq":247,"given_name":3974,"surname":3975,"affiliation":63,"orcid":63},"Shoichiro","Hara",{"paper_id":3966,"author_seq":232,"given_name":3977,"surname":3978,"affiliation":63,"orcid":63},"Hisashi","Yasunaga","The NIJL has developed variety kinds of databases,  i.e., catalogue databases, image databases, movie databases, and full  text databases. As these systems have been developed under different  backgrounds, users have to learn different command for each database.  Furthermore, although some databases have similar contents, users cannot  access related information unless they understand NIJL database system  well. This paper  describes NIJL's new resource sharing system,  called \"NIJL Collaboration System,\" to solve above problems.  The \"NIJL Collaboration System\" is an ongoing project  involving  data conversion to XML and developing platform  independent data manipulation system for a distributed environment. The  essential of the project is to introduce XML as a common data  description, Dublin Core meta-data as a common access points to  databases, and Z39.50 as a common searching protocol. This system  enables users to access various sorts of multimedia data in distributed  databases on the WEB seamlessly by a single graphical user interface.",{"paper_id":3981,"title":3982,"year":213,"month":855,"day":63,"doi":3983,"resource_url":3984,"first_page":63,"last_page":63,"pdf_url":3985,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":3986,"paper_type":860,"authors":3987,"abstract":3994},"lrec2002-main-185","Study and quantification of the declination for the Arabic speech synthesis system PARADIS.","10.63317\u002F3xzefweshhrt","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-185","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F185.pdf","benabbou-etal-2002-study",[3988,3990,3992],{"paper_id":3981,"author_seq":247,"given_name":1983,"surname":3989,"affiliation":63,"orcid":63},"Benabbou",{"paper_id":3981,"author_seq":232,"given_name":3444,"surname":3991,"affiliation":63,"orcid":63},"Chenfour",{"paper_id":3981,"author_seq":218,"given_name":1983,"surname":3993,"affiliation":63,"orcid":63},"Mouradi","The modeling of the melody in a Text-To-Speech System  is indispensable to have a good quality of synthesis and to approach the  naturalness. The study of the melody generally includes the analysis of  the local melody events relating to the accent and the declination of  the global melody contour of an utterance. In this paper, we will  present an experimental study of the declination phenomenon concerning  the Arabic language. Our observations and results are of a great  contribution for the quality of synthesis in our Text-To-Speech system  PARADIS (Psola ARAbic DIsyllable Synthesizer). The melodic model that we  have integrated in PARADIS is based on the prediction of a declination  line on which local melodic events related to stressed syllables would  be superimposed. Our study will include the description and the  classification of the declination line in a  context of isolated  sentences. The classification will be established mainly according to   the modality and the number of syllables in the sentence. We will also  study the phenomenon related to the resetting of F0 value which often  affect the declination.",{"paper_id":3996,"title":3997,"year":213,"month":855,"day":63,"doi":3998,"resource_url":3999,"first_page":63,"last_page":63,"pdf_url":4000,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4001,"paper_type":860,"authors":4002,"abstract":4010},"lrec2002-main-186","Design and Implementation of the Slovenian Phonetic and Morphology Lexicons for the Use in Spoken Language Applications","10.63317\u002F4ywo9e4nrvmq","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-186","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F186.pdf","rojc-etal-2002-design",[4003,4006,4007],{"paper_id":3996,"author_seq":247,"given_name":4004,"surname":4005,"affiliation":63,"orcid":63},"Matej","Rojc",{"paper_id":3996,"author_seq":232,"given_name":2358,"surname":2359,"affiliation":63,"orcid":63},{"paper_id":3996,"author_seq":218,"given_name":4008,"surname":4009,"affiliation":63,"orcid":63},"Darinka","Verdonik","Phonetic and Morphology Lexicons that can be used in Spoken Language Applications are costly and time-consuming to build.  This paper reports on a project aiming at the semi-automatic development of large phonetic (SIflex) and morphology (SImlex)  lexicons for Slovenian language. The main goal of the project is to build the phonetic and  morphology lexicon for Slovenian language that will be used within the framework of  various applications in speech processing (e.g. speech synthesis and recognition),  natural language processing (e.g. spell checking) and for studying and assessing automatic  grapheme-to-phoneme transcription. In automatic speech recognition one of the major  problem is extremely high variability of pronunciations. One part of this variability  can be taken into account through a training of the acoustic-phonetic units from a large amount  of data. Another part of variability must be modeled in the lexicon as pronunciation  variants. In the case of text-to-speech systems it is also very usable to be able to detect  homographs and choose the correct pronunciation according to the context information.  All this was our motivation for developing both lexicons for Slovenian language.  Currently the created phonetic lexicon (SIflex) contains more than 130.000 items,  whereas the morphology lexicon (SImlex) consists of approximately 600.000 inflected  forms, including information on the orthography, pronunciation, stress and  morphosyntactic features, as defined in the framework of the Multext project.",{"paper_id":4012,"title":4013,"year":213,"month":855,"day":63,"doi":4014,"resource_url":4015,"first_page":63,"last_page":63,"pdf_url":4016,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4017,"paper_type":860,"authors":4018,"abstract":4025},"lrec2002-main-187","Webaffix: Discovering Morphological Links on the WWW","10.63317\u002F2o5b95p436ok","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-187","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F187.pdf","hathout-tanguy-2002-webaffix",[4019,4022],{"paper_id":4012,"author_seq":247,"given_name":4020,"surname":4021,"affiliation":63,"orcid":63},"Nabil","Hathout",{"paper_id":4012,"author_seq":232,"given_name":4023,"surname":4024,"affiliation":63,"orcid":63},"Ludovic","Tanguy","This paper presents a new language-independent method  for finding morphological links between newly appeared words (i.e.  absent from reference word lists). Using the WWW as a corpus, the  Webaffix tool detects the occurrences of new derived lexemes based on a  given suffix, proposes a base lexeme following a standard scheme (such  as noun-verb), and then performs a compatibility test on the word pairs  produced, using the Web again, but as a source of cooccurrences. The  resulting pairs of words are used to build generic morphological  databases useful for a number of NLP tasks. We develop and comment an  example use of Webaffix to find new noun\u002Fverb pairs in French.",{"paper_id":4027,"title":4028,"year":213,"month":855,"day":63,"doi":4029,"resource_url":4030,"first_page":63,"last_page":63,"pdf_url":4031,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4032,"paper_type":860,"authors":4033,"abstract":4040},"lrec2002-main-188","Evaluation of Thesaurus on Sociopolitical Life as Information-Retrieval Tool","10.63317\u002F3tojbmqmv275","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-188","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F188.pdf","loukachevitch-dobrov-2002-evaluation",[4034,4037],{"paper_id":4027,"author_seq":247,"given_name":4035,"surname":4036,"affiliation":63,"orcid":63},"Natalia V.","Loukachevitch",{"paper_id":4027,"author_seq":232,"given_name":4038,"surname":4039,"affiliation":63,"orcid":63},"Boris V.","Dobrov","In the paper we present description of Thesaurus on Sociopolitical life,  which was constructed as a tool for automatic text processing of large text collections. Specific features of the thesaurus in comparison  to conventional information-retrieval thesauri for manual indexing are described. Evaluation of thesaurus-based information retrieval  for short queries showed considerable improvement of the model in comparison to vector model.",{"paper_id":4042,"title":4043,"year":213,"month":855,"day":63,"doi":4044,"resource_url":4045,"first_page":63,"last_page":63,"pdf_url":4046,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4047,"paper_type":860,"authors":4048,"abstract":4050},"lrec2002-main-189","From WordNet to CELEX: acquiring morphological links from dictionaries of synonyms","10.63317\u002F48zumjrrprhy","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-189","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F189.pdf","hathout-2002-wordnet",[4049],{"paper_id":4042,"author_seq":247,"given_name":4020,"surname":4021,"affiliation":63,"orcid":63},"Morphological resources such as CELEX do not exist  for many languages. NLP and RI systems that operate on texts and  documents written in these languages have then to rely on morphological  resources acquired from lexica or corpora. These resources usually  suffer from a problem of precision because no a priori semantic  knowledge is used for their acquisition. The paper proposes a robust and  language independent technique to acquire morphological constructional  relations from dictionaries of synonyms. The idea is to explore  simultaneously synonymy and morphological relations in order to make  more accurate prediction. The paper presents an evaluation of the  technique and a comparison of the acquired morphological links with the  CELEX database.",{"paper_id":4052,"title":4053,"year":213,"month":855,"day":63,"doi":4054,"resource_url":4055,"first_page":63,"last_page":63,"pdf_url":4056,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4057,"paper_type":860,"authors":4058,"abstract":63},"lrec2002-main-190","Using Descriptive Generalisations in the Acquisition of Lexical Data for Word Formation","10.63317\u002F5ovz5t7t7p5q","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-190","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F190.pdf","heid-etal-2002-using",[4059,4061,4064],{"paper_id":4052,"author_seq":247,"given_name":1660,"surname":4060,"affiliation":63,"orcid":63},"Heid",{"paper_id":4052,"author_seq":232,"given_name":4062,"surname":4063,"affiliation":63,"orcid":63},"Bettina","Säuberlich",{"paper_id":4052,"author_seq":218,"given_name":4065,"surname":4066,"affiliation":63,"orcid":63},"Arne","Fitschen",{"paper_id":4068,"title":4069,"year":213,"month":855,"day":63,"doi":4070,"resource_url":4071,"first_page":63,"last_page":63,"pdf_url":4072,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4073,"paper_type":860,"authors":4074,"abstract":4089},"lrec2002-main-191","Use of XML and Relational Databases for Consistent Development and Maintenance of Lexicons and Annotated Corpora","10.63317\u002F3ouk5ojcvgbc","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-191","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F191.pdf","asahara-etal-2002-use",[4075,4078,4081,4084,4087],{"paper_id":4068,"author_seq":247,"given_name":4076,"surname":4077,"affiliation":63,"orcid":63},"Masayuki","Asahara",{"paper_id":4068,"author_seq":232,"given_name":4079,"surname":4080,"affiliation":63,"orcid":63},"Ryuichi","Yoneda",{"paper_id":4068,"author_seq":218,"given_name":4082,"surname":4083,"affiliation":63,"orcid":63},"Akiko","Yamashita",{"paper_id":4068,"author_seq":203,"given_name":4085,"surname":4086,"affiliation":63,"orcid":63},"Yasuharu","Den",{"paper_id":4068,"author_seq":188,"given_name":4088,"surname":2894,"affiliation":63,"orcid":63},"Yuji","In this paper, we present a use of XML and relational  database for developing and maintaining Japanese linguistic resources.  In languages that do not provide word delimitation in texts (e.g.  Chinese and Japanese), consistent delimitation definition of words in a  lexicon is a critical issue to build POS tagged corpora. When we change  the definition of word delimitation in the lexicon, we need to modify  the tagged corpora to make them consistent with the lexicon. We propose  a use of relational database to perform these modifications in tandem.  Hence, in the Japanese language, there are several standards for word  delimitation definition. To accommodate more than one definition of word  delimitation, we compose a compounding word lexicon in the database. The  compounding word lexicon includes dependency structures of compounding  words.",{"paper_id":4091,"title":4092,"year":213,"month":855,"day":63,"doi":4093,"resource_url":4094,"first_page":63,"last_page":63,"pdf_url":4095,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4096,"paper_type":860,"authors":4097,"abstract":4105},"lrec2002-main-192","Linguistic and Computational Problems for the Creation of an Italian Children’s Corpus of Spoken Language","10.63317\u002F29zd2jx7tk75","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-192","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F192.pdf","pecchia-etal-2002-linguistic",[4098,4100,4102],{"paper_id":4091,"author_seq":247,"given_name":1127,"surname":4099,"affiliation":63,"orcid":63},"Pecchia",{"paper_id":4091,"author_seq":232,"given_name":4101,"surname":1984,"affiliation":63,"orcid":63},"Giuseppe",{"paper_id":4091,"author_seq":218,"given_name":4103,"surname":4104,"affiliation":63,"orcid":63},"Elisabetta","Guazzini","In this paper we describe the criteria adopted for the creation of a corpus of spoken language produced by children of six to eleven years of age in different communicative situations, the methodology used for the collection of data, the transcription, coding and lemmatization phases. We also give some quantitative descriptions about nouns, verbs and adjectives present in the corpus. Qualitative analyses on the adjectives are underway.  This work is to be included among the activities carried out within the framework of the \"Corpus di Linguaggio Infantile\" (C.L.I.), a special project of the Italian National Research Council (CNR).",{"paper_id":4107,"title":4108,"year":213,"month":855,"day":63,"doi":4109,"resource_url":4110,"first_page":63,"last_page":63,"pdf_url":4111,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4112,"paper_type":860,"authors":4113,"abstract":4122},"lrec2002-main-193","A System for Incremental and Interactive Word Linking","10.63317\u002F29gmv69qi7rw","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-193","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F193.pdf","ahrenberg-etal-2002-system",[4114,4116,4119],{"paper_id":4107,"author_seq":247,"given_name":1505,"surname":4115,"affiliation":63,"orcid":63},"Ahrenberg",{"paper_id":4107,"author_seq":232,"given_name":4117,"surname":4118,"affiliation":63,"orcid":63},"Mikael","Andersson",{"paper_id":4107,"author_seq":218,"given_name":4120,"surname":4121,"affiliation":63,"orcid":63},"Magnus","Merkel","Aligned parallel corpora constitute a critical information resource for a great number of linguistic and technological endeavours. Automatic sentence alignment has reached a level whereby large parallel documents can be fully aligned with the aid of interactive post-editing tools. Word alignment systems have not yet reached the same level of performance, but are good enough to support full word alignment if embedded in an interactive system. In this paper we describe a system for fast and accurate word alignment currently under development at our department, where the user can review and improve the output from an automatic system in an incremental fashion.",{"paper_id":4124,"title":4125,"year":213,"month":855,"day":63,"doi":4126,"resource_url":4127,"first_page":63,"last_page":63,"pdf_url":4128,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4129,"paper_type":860,"authors":4130,"abstract":4133},"lrec2002-main-194","Old Sources and Modern Procedures: Computer Processing of Old-Church Slavonic","10.63317\u002F4cq3e346uq4y","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-194","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F194.pdf","ribarov-2002-old",[4131],{"paper_id":4124,"author_seq":247,"given_name":2038,"surname":4132,"affiliation":63,"orcid":63},"Ribarov","A framework for computer processing of Old-Church  Slavonic including its specific features is presented. The corpus of  Old-Church Slavonic and its annotation is introduced. Incorporation of  manually pre-prepared card catalogues into a corpus is  proposed.",{"paper_id":4135,"title":4136,"year":213,"month":855,"day":63,"doi":4137,"resource_url":4138,"first_page":63,"last_page":63,"pdf_url":4139,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4140,"paper_type":860,"authors":4141,"abstract":4145},"lrec2002-main-195","Compiling an Interactive Literary Translation Web Site for Education Purposes","10.63317\u002F39e7xhv5nr4g","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-195","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F195.pdf","rio-2002-compiling",[4142],{"paper_id":4135,"author_seq":247,"given_name":4143,"surname":4144,"affiliation":63,"orcid":63},"José Miguel Aguilar","Río","The project under discussion represents an attempt to exploit the potential of web resources for higher education and, more  particularly, on a domain (that of literary translation) which is traditionally considered not very much in relation to technology and  computer science. Translation and Interpreting students at the Universidad de Málaga are offered the possibility to take an English-Spanish Literary Translation module, which  epitomises the need for debate in the field of Humanities. Sadly enough, implementation  of course methodology is rendered very difficult or impossible owing to time restrictions and overcrowded classrooms.",{"paper_id":4147,"title":4148,"year":213,"month":855,"day":63,"doi":4149,"resource_url":4150,"first_page":63,"last_page":63,"pdf_url":4151,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4152,"paper_type":860,"authors":4153,"abstract":4158},"lrec2002-main-196","How to evaluate necessary cooperative systems of terminology building?","10.63317\u002F4rgeeti8maqm","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-196","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F196.pdf","hamon-hu-2002-evaluate",[4154,4156],{"paper_id":4147,"author_seq":247,"given_name":2371,"surname":4155,"affiliation":63,"orcid":63},"Hamon",{"paper_id":4147,"author_seq":232,"given_name":2416,"surname":4157,"affiliation":63,"orcid":63},"Hû","Terminology building cannot be considered as a full  automated process but rather as a cooperative task between  terminological tools and  terminologists. Identifying terms in a  technical domain is a matter of word usage and expert agreement. We  point out the problem of the evaluation of such tools: their quality and  their contribution to the terminology building is difficult to estimate  and cannot be fully evaluated with usual precision and recall measures.  We aim at evaluating more globally their technical aspects and their  usability. We give a non-exhaustive list of the features of such  evaluation. Then, we apply them on four terminological systems.",{"paper_id":4160,"title":4161,"year":213,"month":855,"day":63,"doi":4162,"resource_url":4163,"first_page":63,"last_page":63,"pdf_url":4164,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4165,"paper_type":860,"authors":4166,"abstract":4187},"lrec2002-main-197","CLIPS, a Multi-level Italian Computational Lexicon: a Glimpse to Data","10.63317\u002F4bmys5muzkzi","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-197","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F197.pdf","ruimy-etal-2002-clips",[4167,4170,4172,4175,4176,4179,4182,4185],{"paper_id":4160,"author_seq":247,"given_name":4168,"surname":4169,"affiliation":63,"orcid":63},"Nilda","Ruimy",{"paper_id":4160,"author_seq":232,"given_name":2690,"surname":4171,"affiliation":63,"orcid":63},"Monachini",{"paper_id":4160,"author_seq":218,"given_name":4173,"surname":4174,"affiliation":63,"orcid":63},"Raffaella","Distante",{"paper_id":4160,"author_seq":203,"given_name":4103,"surname":4104,"affiliation":63,"orcid":63},{"paper_id":4160,"author_seq":188,"given_name":4177,"surname":4178,"affiliation":63,"orcid":63},"Stefano","Molino",{"paper_id":4160,"author_seq":172,"given_name":4180,"surname":4181,"affiliation":63,"orcid":63},"Marisa","Ulivieri",{"paper_id":4160,"author_seq":155,"given_name":4183,"surname":4184,"affiliation":63,"orcid":63},"Nicoletta","Calzolari",{"paper_id":4160,"author_seq":138,"given_name":1692,"surname":4186,"affiliation":63,"orcid":63},"Zampolli","CLIPS is a multi-layered Italian computational lexicon based on the PAROLE-SIMPLE model. In this paper we briefly recall the main characteristics of the model and devote our attention to issues emerging from the encoding of large quantities of data, especially in relation to those types of syntactic and semantic information specific to our lexicon and that reflect innovative features of the underlying model. At syntactic level, we show how alternating structures may be encoded in a linguistically more elegant way by using framesets. We illustrate the connection between syntactic and semantic information, and show how the SIMPLE Italian lexicon approach to predicate selection has been refined in CLIPS. At semantic level, we illustrate the richness of information types encoded in a word sense description and the way such a wealth of data can be exploited. We stress in particular the expressive power of the Extended Qualia Structure yet mentioning some of its problematic aspects.  We show that queries on qualia relations allow to retrieve lexical collocates, to extract domain specific information, semantic networks, and help interpreting modifying PPs in complex nominals. Finally, we show that features, which cut across the type hierarchy, have a stronger expressive power with respect to semantic types in identifying selectional preferences.",{"paper_id":4189,"title":4190,"year":213,"month":855,"day":63,"doi":4191,"resource_url":4192,"first_page":63,"last_page":63,"pdf_url":4193,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4194,"paper_type":860,"authors":4195,"abstract":4199},"lrec2002-main-198","Nominal Expressions in Multilingual Corpora: Definites and Demonstratives","10.63317\u002F5e4scxmzok9i","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-198","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F198.pdf","salmon-alt-vieira-2002-nominal",[4196,4198],{"paper_id":4189,"author_seq":247,"given_name":3474,"surname":4197,"affiliation":63,"orcid":63},"Salmon-Alt",{"paper_id":4189,"author_seq":232,"given_name":2828,"surname":2829,"affiliation":63,"orcid":63},"This paper presents the results of a multilingual  corpus study on definite descriptions and demonstrative noun phrases.  The analysis made on a parallel corpus (French and Portuguese)  reinforces previous findings regarding the predominance of non-anaphoric  uses of definite descriptions in English corpus. It is also shown that  the use of demonstrative noun phrases, on the other hand, is more  regularly based on discourse salient entities. The analysis involves  syntactic issues and is oriented to the design of natural language  processing tools.",{"paper_id":4201,"title":4202,"year":213,"month":855,"day":63,"doi":4203,"resource_url":4204,"first_page":63,"last_page":63,"pdf_url":4205,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4206,"paper_type":860,"authors":4207,"abstract":4216},"lrec2002-main-199","Lexical and Textual Resources for Sense Recognition and Description","10.63317\u002F5nzq2bs3yk7p","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-199","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F199.pdf","jarborg-etal-2002-lexical",[4208,4211,4213],{"paper_id":4201,"author_seq":247,"given_name":4209,"surname":4210,"affiliation":63,"orcid":63},"Jerker","Järborg",{"paper_id":4201,"author_seq":232,"given_name":4212,"surname":2969,"affiliation":63,"orcid":63},"Dimitrios",{"paper_id":4201,"author_seq":218,"given_name":4214,"surname":4215,"affiliation":63,"orcid":63},"Maria Toporowska","Gronostaj","It is common knowledge that the creation of language resources for Language   Engineering (LE) applications is a time-consuming, and hence expensive, enterprise.   From this knowledge stems the demand for the re-usability of resources, which  always remains essential. In this paper we will, however, concentrate on another, complementary,  aspect, namely that of combining and extending existing resources by a variety of means  and with a minimum of manual interaction. The resources to be discussed below consist  of (i) a large lexical database, (ii) a formalized computational lexicon, and  (iii) a sense-tagged corpus for Swedish. Some results concerning the semi-automatic  annotation of the corpus and examples of a variety of phenomena analysed, such as  compounding, will also be given. The annotation has been performed within the framework  of the SemTag project, while part of this material has been successfully used in  the SENSEVAL-2 exercise. In addition to these three resources, it can be added  the background material of the Swedish Language Bank (some hundred million words)  that forms the basis for the creation of (i) and partly (ii). Having been developed  at our department, the lexical resources can easily be accessed, and, more importantly,  can be systematically improved where necessary. It should be noted that this type  of work requires close cooperation between specialists in lexicography and language  technology.",{"paper_id":4218,"title":4219,"year":213,"month":855,"day":63,"doi":4220,"resource_url":4221,"first_page":63,"last_page":63,"pdf_url":4222,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4223,"paper_type":860,"authors":4224,"abstract":4237},"lrec2002-main-200","A Class Library for the Integration of NLP Tools: Definition and implementation of an Abstract Data Type Collection for the manipulation of SGML documents in a context of stand-off linguistic annotation","10.63317\u002F2d2kbjo2izb2","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-200","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F200.pdf","artola-etal-2002-class",[4225,4227,4229,4231,4233,4235],{"paper_id":4218,"author_seq":247,"given_name":1614,"surname":4226,"affiliation":63,"orcid":63},"Artola",{"paper_id":4218,"author_seq":232,"given_name":1983,"surname":4228,"affiliation":63,"orcid":63},"Díaz de Ilarraza",{"paper_id":4218,"author_seq":218,"given_name":3444,"surname":4230,"affiliation":63,"orcid":63},"Ezeiza",{"paper_id":4218,"author_seq":203,"given_name":3450,"surname":4232,"affiliation":63,"orcid":63},"Gojenola",{"paper_id":4218,"author_seq":188,"given_name":4234,"surname":3255,"affiliation":63,"orcid":63},"G.",{"paper_id":4218,"author_seq":172,"given_name":1983,"surname":4236,"affiliation":63,"orcid":63},"Soroa","In this paper we present a program library conceived  and implemented to represent and manipulate the information exchanged in  the process of integration of NLP tools. It is currently used to  integrate the tools developed for Basque processing during the last ten  years at our research group. In our opinion, the program library is  general enough to be used in similar processes of integration of NLP  tools or in the design of new applications built on them. The program  library constitutes a class library that provides the programmer with  the elements s\u002Fhe needs when manipulating SGML documents in a context of  stand-off linguistic annotation, where linguistic analyses obtained at  different phases (morphology, lemmatization, processing of multiword  lexical units, surface syntax, and so on) are represented by  well-defined typed features structures. Due to the complexity of the  information to be exchanged among the different tools, feature  structures (FS) are used to represent it. Feature structures provide us  with a well-formalized basis for the exchange of linguistic information  among the different text analysis tools. Feature structures are coded in  SGML following the TEI’s DTD for Fs, and Feature-System Declarations  (FSD) have been thoroughly specified. So, TEI-P3 conformant feature  structures constitute the representation schema for the different  documents that convey the information from one linguistic tool to the  next in the language processing chain. The tools integrated so far are a  lexical database, a tokenizer, a wide-coverage morphosyntactic analyzer,  a general purpose tagger\u002Flemmatizer and a shallow syntactic parser. The  type of information contained in the documents exchanged among these  tools has been analyzed and characterized using a set of Abstract Data  Types.",{"paper_id":4239,"title":4240,"year":213,"month":855,"day":63,"doi":4241,"resource_url":4242,"first_page":63,"last_page":63,"pdf_url":4243,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4244,"paper_type":860,"authors":4245,"abstract":4252},"lrec2002-main-201","Efficient Stochastic Part-of-Speech Tagging for Hungarian","10.63317\u002F2rnbnd8t8z9s","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-201","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F201.pdf","oravecz-dienes-2002-efficient",[4246,4249],{"paper_id":4239,"author_seq":247,"given_name":4247,"surname":4248,"affiliation":63,"orcid":63},"Csaba","Oravecz",{"paper_id":4239,"author_seq":232,"given_name":4250,"surname":4251,"affiliation":63,"orcid":63},"Péter","Dienes","Many of the methods developed for Western European languages and used  widespread to produce annotated language resources cannot readily be applied  to Central and Eastern European languages, due to the large number of novel  phenomena exhibited in the syntax and morphology of these languages, which  these methods have to handle but have not been designed to cope with. The  process of morphological tagging when applied to Hungarian data to produce corpora annotated at least at the morphosyntactic level is  most indicative of this problem: several of the algorithms (either rule-based  or statistical) that have been used very successfully in other domains cannot  readily be applied to a language exhibiting such a varied morphology and huge  number of wordforms as Hungarian.  The paper will describe a robust tagging  scenario for Hungarian using a relatively simple stochastic system augmented  with external morphological processing, which can overcome the two most conspcicuous problems: the complexity of morphosyntactic descriptions and most  importantly the huge number of possible wordforms.",{"paper_id":4254,"title":4255,"year":213,"month":855,"day":63,"doi":4256,"resource_url":4257,"first_page":63,"last_page":63,"pdf_url":4258,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4259,"paper_type":860,"authors":4260,"abstract":4266},"lrec2002-main-202","YAC - A Recursive Chunker for Unrestricted German Text","10.63317\u002F4hut4qkhxfy9","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-202","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F202.pdf","kermes-evert-2002-yac",[4261,4264],{"paper_id":4254,"author_seq":247,"given_name":4262,"surname":4263,"affiliation":63,"orcid":63},"Hannah","Kermes",{"paper_id":4254,"author_seq":232,"given_name":1452,"surname":4265,"affiliation":63,"orcid":63},"Evert","YAC is a fully automatic recursive chunker for  unrestricted German text. It is especially designed to provide a useful  basis for the extraction of linguistic as well as lexicographic  information. Consequently, the grammar rules of YAC are implemented such  as to make the resulting analysis meet the needs of an ensuing  extraction process. The chunks provided by YAC are continuous parts of  intra-clausal constituents including recursion but no PP-attachment or  sentential elements. The chunks are additionally enriched with  information about head lemma, morpho-syntactic features and certain  lexical and structural properties.",{"paper_id":4268,"title":4269,"year":213,"month":855,"day":63,"doi":4270,"resource_url":4271,"first_page":63,"last_page":63,"pdf_url":4272,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4273,"paper_type":860,"authors":4274,"abstract":4284},"lrec2002-main-203","Sensitivity of IR systems Evaluation to Topic Difficulty","10.63317\u002F3h4g2yfpxzrh","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-203","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F203.pdf","eguchi-etal-2002-sensitivity",[4275,4278,4281],{"paper_id":4268,"author_seq":247,"given_name":4276,"surname":4277,"affiliation":63,"orcid":63},"Koji","Eguchi",{"paper_id":4268,"author_seq":232,"given_name":4279,"surname":4280,"affiliation":63,"orcid":63},"Kazuko","Kuriyama",{"paper_id":4268,"author_seq":218,"given_name":4282,"surname":4283,"affiliation":63,"orcid":63},"Noriko","Kando","The difficulty of the topics or queries is one of  important factors in evaluating information retrieval (IR) systems. This  paper analyzes the differences of system ranking affected by the topic  difficulty using a test collection ’NTCIR-1,’ which is constructed  for evaluating Japanese IR systems and composed of (1) the topics, (2)  the document database, and (3) the lists of relevant judgments.  Furthermore, this paper defines measures for the various features on the  topics, and analyzes the correlation between them, in order to  investigate the predictability of the topic difficulty.",{"paper_id":4286,"title":4287,"year":213,"month":855,"day":63,"doi":4288,"resource_url":4289,"first_page":63,"last_page":63,"pdf_url":4290,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4291,"paper_type":860,"authors":4292,"abstract":4298},"lrec2002-main-204","A Comparison of Machine Learning Algorithms for Prepositional Phrase Attachment","10.63317\u002F4765jpyyj552","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-204","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F204.pdf","mitchell-gaizauskas-2002-comparison",[4293,4296],{"paper_id":4286,"author_seq":247,"given_name":4294,"surname":4295,"affiliation":63,"orcid":63},"Brian","Mitchell",{"paper_id":4286,"author_seq":232,"given_name":1181,"surname":4297,"affiliation":63,"orcid":63},"Gaizauskas","This paper presents work which extends previous  corpus-based work on training Machine Learning Algorithms to perform  Prepositional Phrase attachment. Besides  recreating others’  experiments to see how algorithms’ performance changes with the number  of training examples and using n-fold cross-validation to produce more  accurate error rates, we implemented our own vanilla Machine Learning  Algorithms as a  comparison. We also had people perform exactly the  same task as the Machine Learning Algorithms to indicate whether the way  forward lies in improving Machine Learning Algorithms or in improving  the data sets used to train Machine Learning Algorithms. The   results from all these experiments feed into our other work transforming  the Penn TreeBank into a more useful resource for training Machine  Learning Algorithms to do Prepositional Phrase attachment.",{"paper_id":4300,"title":4301,"year":213,"month":855,"day":63,"doi":4302,"resource_url":4303,"first_page":63,"last_page":63,"pdf_url":4304,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4305,"paper_type":860,"authors":4306,"abstract":4316},"lrec2002-main-205","AR-Engine - a framework for unrestricted co-reference resolution","10.63317\u002F4eyy7hv25atu","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-205","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F205.pdf","cristea-etal-2002-ar",[4307,4308,4311,4314],{"paper_id":4300,"author_seq":247,"given_name":1371,"surname":3355,"affiliation":63,"orcid":63},{"paper_id":4300,"author_seq":232,"given_name":4309,"surname":4310,"affiliation":63,"orcid":63},"Oana-Diana","Postolache",{"paper_id":4300,"author_seq":218,"given_name":4312,"surname":4313,"affiliation":63,"orcid":63},"Gabriela-Eugenia","Dima",{"paper_id":4300,"author_seq":203,"given_name":4315,"surname":1375,"affiliation":63,"orcid":63},"Cătălina","The paper presents a framework that allows the design, realisation and validation of different anaphora resolution models on real texts. The type of processing implemented by the engine is an incremental one, simulating the reading of texts by humans. Advanced behaviour like postponed resolution and accumulation of values for features of the discourse entities during reading is implemented. Four models are defined, plugged in the framework and tested on a small corpus. The approach is open to any type of anaphora resolution. However, the models reported deal only with co-reference anaphora, independent of the type of the anaphor. It is shown that the setting on of more and more features, generally results in an improvement of the analysis.",{"paper_id":4318,"title":4319,"year":213,"month":855,"day":63,"doi":4320,"resource_url":4321,"first_page":63,"last_page":63,"pdf_url":4322,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4323,"paper_type":860,"authors":4324,"abstract":63},"lrec2002-main-206","A corpus based investigation of morphological disagreement in anaphoric relations","10.63317\u002F47uy9veb6767","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-206","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F206.pdf","barbu-etal-2002-corpus",[4325,4326,4327],{"paper_id":4318,"author_seq":247,"given_name":4315,"surname":1375,"affiliation":63,"orcid":63},{"paper_id":4318,"author_seq":232,"given_name":1407,"surname":3088,"affiliation":63,"orcid":63},{"paper_id":4318,"author_seq":218,"given_name":4328,"surname":3629,"affiliation":63,"orcid":63},"Ruslan",{"paper_id":4330,"title":4331,"year":213,"month":855,"day":63,"doi":4332,"resource_url":4333,"first_page":63,"last_page":63,"pdf_url":4334,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4335,"paper_type":860,"authors":4336,"abstract":4338},"lrec2002-main-207","Error analysis in anaphora resolution","10.63317\u002F3p6uk637on4w","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-207","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F207.pdf","barbu-2002-error",[4337],{"paper_id":4330,"author_seq":247,"given_name":4315,"surname":1375,"affiliation":63,"orcid":63},"This paper deals with error analysis and their influence  in comparative and qualitative evaluation of systems performing anaphora resolution. It presents a corpus-based analysis of errors  reported by four anaphora resolution systems, leading to an investigation of the type and source of errors; as a direct  application of the investigation's results, a simple probabilistic hybrid method is described, that takes advantage of the strong  points of each of the methods analysed, while trying to avoid their weak points.",{"paper_id":4340,"title":4341,"year":213,"month":855,"day":63,"doi":4342,"resource_url":4343,"first_page":63,"last_page":63,"pdf_url":4344,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4345,"paper_type":860,"authors":4346,"abstract":4364},"lrec2002-main-208","Predictive and objective evaluation of speech understanding: the “challenge” evaluation campaign of the I3 speech workgroup of the French CNRS","10.63317\u002F5aowg9dq5vfe","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-208","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F208.pdf","antoine-etal-2002-predictive",[4347,4348,4350,4353,4356,4358,4361],{"paper_id":4340,"author_seq":247,"given_name":2740,"surname":2741,"affiliation":63,"orcid":63},{"paper_id":4340,"author_seq":232,"given_name":1115,"surname":4349,"affiliation":63,"orcid":63},"Bousquet-Vernhettes",{"paper_id":4340,"author_seq":218,"given_name":4351,"surname":4352,"affiliation":63,"orcid":63},"Jérôme","Goulian",{"paper_id":4340,"author_seq":203,"given_name":4354,"surname":4355,"affiliation":63,"orcid":63},"Mohamed Zakaria","Kurdi",{"paper_id":4340,"author_seq":188,"given_name":2377,"surname":4357,"affiliation":63,"orcid":63},"Rosset",{"paper_id":4340,"author_seq":172,"given_name":4359,"surname":4360,"affiliation":63,"orcid":63},"Nadine","Vigouroux",{"paper_id":4340,"author_seq":155,"given_name":4362,"surname":4363,"affiliation":63,"orcid":63},"Jeanne","Villaneau","This paper presents a new paradigm of  \"challenge\" evaluation of Spoken Language Understanding. This  methodology aims at a quantitative assessment with a high diagnostic  power, by opposition with standard ATIS-like frameworks. This paper  details the methodology as well as the results of an evaluation campaign  held by the French CNRS research agency. The benefits of this  methodology are also discussed.",{"paper_id":4366,"title":4367,"year":213,"month":855,"day":63,"doi":4368,"resource_url":4369,"first_page":63,"last_page":63,"pdf_url":4370,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4371,"paper_type":860,"authors":4372,"abstract":4376},"lrec2002-main-209","Using the Spoken Dutch Corpus for type-logical grammar induction","10.63317\u002F4g2pneqpfcmy","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-209","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F209.pdf","moortgat-moot-2002-using",[4373,4374],{"paper_id":4366,"author_seq":247,"given_name":2079,"surname":2080,"affiliation":63,"orcid":63},{"paper_id":4366,"author_seq":232,"given_name":1407,"surname":4375,"affiliation":63,"orcid":63},"Moot","The dependency-based annotation format employed  within the Spoken Dutch Corpus (CGN) project (van der Wouden et al.,  2002) has been designed in such a way as to enable a transparent mapping  to the derivational structures of current ‘lexicalized’ grammar  formalisms. Through such translations, the CGN tree bank can be used to  train and evaluate computational grammars within these frameworks. In  this paper we use the computational facilities of the Grail system (see  Moot, 2002) to extract type logical grammars from the CGN annotation  graphs. Grail is a general grammar development environment for  type-logical categorial grammars (TLG). The Grail parsing engine  combines proof net technology with structural rewriting.",{"paper_id":4378,"title":4379,"year":213,"month":855,"day":63,"doi":4380,"resource_url":4381,"first_page":63,"last_page":63,"pdf_url":4382,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4383,"paper_type":860,"authors":4384,"abstract":4391},"lrec2002-main-210","Semantic Lexical Resources Applied to Content-based Querying - the OntoQuery Project","10.63317\u002F5e4bj8a6zqi6","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-210","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F210.pdf","pedersen-paggio-2002-semantic",[4385,4388],{"paper_id":4378,"author_seq":247,"given_name":4386,"surname":4387,"affiliation":63,"orcid":63},"Bolette S.","Pedersen",{"paper_id":4378,"author_seq":232,"given_name":4389,"surname":4390,"affiliation":63,"orcid":63},"Patrizia","Paggio","This paper deals with the exploitation of the lexical and conceptual knowledge coded in the SIMPLE-DK lexicon in the methodology for content-based querying developed by the OntoQuery project. SIMPLE-DK has proven a rich and flexible lexical resource, which the project has taken advantage of in several ways. Firstly, the paper explains how the ontology provided by SIMPLE is used by the current project prototype to derive conceptual descriptors on which to base the matching of documents to user queries. Furthermore, it discusses how selectional restrictions and qualia roles,  both coded in SIMPLE, can be used to construct an ontological grammar to build more complex descriptors.",{"paper_id":4393,"title":4394,"year":213,"month":855,"day":63,"doi":4395,"resource_url":4396,"first_page":63,"last_page":63,"pdf_url":4397,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4398,"paper_type":860,"authors":4399,"abstract":4414},"lrec2002-main-211","Ellogon: A New Text Engineering Platform","10.63317\u002F53phshtzmz9o","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-211","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F211.pdf","petasis-etal-2002-ellogon",[4400,4403,4406,4408,4411],{"paper_id":4393,"author_seq":247,"given_name":4401,"surname":4402,"affiliation":63,"orcid":63},"Georgios","Petasis",{"paper_id":4393,"author_seq":232,"given_name":4404,"surname":4405,"affiliation":63,"orcid":63},"Vangelis","Karkaletsis",{"paper_id":4393,"author_seq":218,"given_name":4401,"surname":4407,"affiliation":63,"orcid":63},"Paliouras",{"paper_id":4393,"author_seq":203,"given_name":4409,"surname":4410,"affiliation":63,"orcid":63},"Ion","Androutsopoulos",{"paper_id":4393,"author_seq":188,"given_name":4412,"surname":4413,"affiliation":63,"orcid":63},"Constantine D.","Spyropoulos","This paper presents Ellogon, a multi-lingual, cross-platform, general-purpose text                     engineering environment. Ellogon was designed in order to aid both researchers in                     natural language processing, as well as companies that produce language engineering                     systems for the end-user. Ellogon provides a powerful TIPSTER-based infrastructure                     for managing, storing and exchanging textual data, embedding and managing text                     processing components as well as visualising textual data and their associated                     linguistic information. Among its key features are full Unicode support, an                     extensive multi-lingual graphical user interface, its modular architecture and the                     reduced hardware requirements.",{"paper_id":4416,"title":4417,"year":213,"month":855,"day":63,"doi":4418,"resource_url":4419,"first_page":63,"last_page":63,"pdf_url":4420,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4421,"paper_type":860,"authors":4422,"abstract":4430},"lrec2002-main-212","Multilingual Terminology Extraction and Validation","10.63317\u002F3hofemubra52","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-212","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F212.pdf","valderrabanos-etal-2002-multilingual",[4423,4426,4428],{"paper_id":4416,"author_seq":247,"given_name":4424,"surname":4425,"affiliation":63,"orcid":63},"Antonio S.","Valderrábanos",{"paper_id":4416,"author_seq":232,"given_name":2059,"surname":4427,"affiliation":63,"orcid":63},"Belskis",{"paper_id":4416,"author_seq":218,"given_name":4429,"surname":1702,"affiliation":63,"orcid":63},"Luis Iraola","This paper presents the automatic terminology  extraction approach developed within project LIQUID1. This project aims  at developing a cost-effective solution for the problem of  cross-language access to multilingual text databases in technical and  scientific domains. Cross-Language Information Retrieval faces a major  challenge: organizing unstructured textual information according to its  contents and regardless of its language. Our solution is based on two  main components, a terminology extraction tool and a domain-specific  ontology. The terminology extraction tool identifies the terminology  that describes the contents of a particular document. Then, these terms  are linked to a domain-specific ontology. This paper presents the  terminology extraction tool and the experimental results obtained in the  domain of Gastroenterology.",{"paper_id":4432,"title":4433,"year":213,"month":855,"day":63,"doi":4434,"resource_url":4435,"first_page":63,"last_page":63,"pdf_url":4436,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4437,"paper_type":860,"authors":4438,"abstract":4445},"lrec2002-main-213","Natural Interactivity Resources – Data, Annotation Schemes and Tools","10.63317\u002F2sutjsmcszk8","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-213","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F213.pdf","dybkjaer-bernsen-2002-natural",[4439,4442],{"paper_id":4432,"author_seq":247,"given_name":4440,"surname":4441,"affiliation":63,"orcid":63},"Laila","Dybkjær",{"paper_id":4432,"author_seq":232,"given_name":4443,"surname":4444,"affiliation":63,"orcid":63},"Niels Ole","Bernsen","This paper presents results of three surveys of  natural interactivity and multimodal resources carried out by a Working  Group in the ISLE project on International Standards for Language  Engineering. Information has been collected on a large number of  corpora, coding schemes and coding tools world-wide. The paper presents  the information collection process, the description and validation  methods used, the surveyed resources, and brief conclusions for each of  the three resource areas reviewed. Observations on user profiles, user  needs and best practices are briefly presented.",{"paper_id":4447,"title":4448,"year":213,"month":855,"day":63,"doi":4449,"resource_url":4450,"first_page":63,"last_page":63,"pdf_url":4451,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4452,"paper_type":860,"authors":4453,"abstract":4459},"lrec2002-main-214","THE NITE WORKBENCH. A Tool for Annotation of Natural Interactivity and Multimodal Data","10.63317\u002F5875wtfkkmgd","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-214","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F214.pdf","bernsen-etal-2002-nite",[4454,4455,4456],{"paper_id":4447,"author_seq":247,"given_name":4443,"surname":4444,"affiliation":63,"orcid":63},{"paper_id":4447,"author_seq":232,"given_name":4440,"surname":4441,"affiliation":63,"orcid":63},{"paper_id":4447,"author_seq":218,"given_name":4457,"surname":4458,"affiliation":63,"orcid":63},"Mykola","Kolodnytsky","This paper describes ongoing work in the European  NITE project on the development of a tool in support of annotation of  natural interactive and multimodal data. The paper discusses the  resources required for pursuing the vision of natural interactivity and  provides an overview of existing natural interactivity data coding tools  and projects. After discussing the target user groups of a NITE tool,  the paper presents requirements to a visual coding tool interface  followed by an early draft of the visual interface for the NITE coding  tool.",{"paper_id":4461,"title":4462,"year":213,"month":855,"day":63,"doi":4463,"resource_url":4464,"first_page":63,"last_page":63,"pdf_url":4465,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4466,"paper_type":860,"authors":4467,"abstract":4482},"lrec2002-main-215","A Unicode-based Environment for Creation and Use of Language Resources","10.63317\u002F2onkmujgskxz","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-215","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F215.pdf","tablan-etal-2002-unicode",[4468,4471,4473,4474,4475,4476,4477,4479,4480],{"paper_id":4461,"author_seq":247,"given_name":4469,"surname":4470,"affiliation":63,"orcid":63},"Valentin","Tablan",{"paper_id":4461,"author_seq":232,"given_name":4472,"surname":3501,"affiliation":63,"orcid":63},"Cristian",{"paper_id":4461,"author_seq":218,"given_name":3495,"surname":3496,"affiliation":63,"orcid":63},{"paper_id":4461,"author_seq":203,"given_name":3490,"surname":3491,"affiliation":63,"orcid":63},{"paper_id":4461,"author_seq":188,"given_name":872,"surname":3493,"affiliation":63,"orcid":63},{"paper_id":4461,"author_seq":172,"given_name":3498,"surname":3499,"affiliation":63,"orcid":63},{"paper_id":4461,"author_seq":155,"given_name":2224,"surname":4478,"affiliation":63,"orcid":63},"McEnery",{"paper_id":4461,"author_seq":138,"given_name":3674,"surname":3175,"affiliation":63,"orcid":63},{"paper_id":4461,"author_seq":121,"given_name":2227,"surname":4481,"affiliation":63,"orcid":63},"Leisher","GATE is a Unicode-aware architecture, development environment and framework  for building systems that process human language. It is often thought that  the character sets problem has been solved by the arrival of the Unicode  standard. This standard is an important advance, but in practice the ability  to process text in a large number of the World's languages is still limited.  This paper describes work done in the context of the GATE project that makes  use of Unicode and plugs some of the gaps for language processing R&D.  First we look at storing and decoding of Unicode compliant linguistic  resources. The new capabilities for processing textual data and taking  advantage of the Unicode standard are detailed next. Finally, the solutions  used to add Unicode displaying and editing capabilities for the graphical  interface are described.",{"paper_id":4484,"title":4485,"year":213,"month":855,"day":63,"doi":4486,"resource_url":4487,"first_page":63,"last_page":63,"pdf_url":4488,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4489,"paper_type":860,"authors":4490,"abstract":4500},"lrec2002-main-216","PatEdit: An Information Extraction Pattern Editor for Fast System Customization","10.63317\u002F2ww2mzvytjtt","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-216","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F216.pdf","farmakiotou-etal-2002-patedit",[4491,4494,4495,4498,4499],{"paper_id":4484,"author_seq":247,"given_name":4492,"surname":4493,"affiliation":63,"orcid":63},"Dimitra","Farmakiotou",{"paper_id":4484,"author_seq":232,"given_name":4404,"surname":4405,"affiliation":63,"orcid":63},{"paper_id":4484,"author_seq":218,"given_name":4496,"surname":4497,"affiliation":63,"orcid":63},"Ioannis","Koutsias",{"paper_id":4484,"author_seq":203,"given_name":2535,"surname":4402,"affiliation":63,"orcid":63},{"paper_id":4484,"author_seq":188,"given_name":4412,"surname":4413,"affiliation":63,"orcid":63},"This paper addresses the problem of Information Extraction (IE) system customization to new domains and extraction needs with the use of PatEdit, an IE Pattern Editor. PatEdit is a human-assisted knowledge engineering tool, that facilitates the production of IE  patterns. First, we present the problem of IE system customisation and the use of human assisted knowledge engineering tools. Then, we describe PatEdit with respect to the IE pattern language used and discuss its characteristics that facilitate rapid pattern writing. Finally, the exploitation of PatEdit in two information extraction projects is presented along with our plans for future work",{"paper_id":4502,"title":4503,"year":213,"month":855,"day":63,"doi":4504,"resource_url":4505,"first_page":63,"last_page":63,"pdf_url":4506,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4507,"paper_type":860,"authors":4508,"abstract":4512},"lrec2002-main-217","The Hungarian National Corpus","10.63317\u002F2ewxxry2r46t","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-217","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F217.pdf","varadi-2002-hungarian",[4509],{"paper_id":4502,"author_seq":247,"given_name":4510,"surname":4511,"affiliation":63,"orcid":63},"Tamás","Váradi","The paper reports on the development of the Hungarian  National Corpus, which was completed at the end of 2001 after four  years' effort. The HNC is designed to be a  balanced reference corpus  of current written Hungarian consisting of 150 million words.  The  paper first discusses basic design issues concerning the composition  of the corpus.  The HNC adopts a fairly pragmatic approach,  focusing on five major text types. The  second half of the paper  contains details of the annotation and tagging system used.",{"paper_id":4514,"title":4515,"year":213,"month":855,"day":63,"doi":4516,"resource_url":4517,"first_page":63,"last_page":63,"pdf_url":4518,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4519,"paper_type":860,"authors":4520,"abstract":4527},"lrec2002-main-218","Building and annotating a corpus for the study of journalistic text reuse","10.63317\u002F3dqyw7g68fnr","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-218","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F218.pdf","clough-etal-2002-building",[4521,4523,4524],{"paper_id":4514,"author_seq":247,"given_name":3674,"surname":4522,"affiliation":63,"orcid":63},"Clough",{"paper_id":4514,"author_seq":232,"given_name":1181,"surname":4297,"affiliation":63,"orcid":63},{"paper_id":4514,"author_seq":218,"given_name":4525,"surname":4526,"affiliation":63,"orcid":63},"S. L.","Piao","In this paper we present the METER Corpus, a novel resource  for the study and analysis of journalistic text reuse. The corpus consists of a set of news stories written by the Press Association  (PA), the major UK news agency, and a set of stories about the same news events, as published in various British newspapers. In some  cases the newspaper stories are rewritten from the PA source; in other cases they have been independently written by the newspapers'  own journalists. We discuss the motivation for creating the corpus, its contents, the annotation of certain attributes for analysis of  text reuse and finally the encoding of those annotations into a standardised corpus format: the Text Encoding Initiative (TEI).",{"paper_id":4529,"title":4530,"year":213,"month":855,"day":63,"doi":4531,"resource_url":4532,"first_page":63,"last_page":63,"pdf_url":4533,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4534,"paper_type":860,"authors":4535,"abstract":4547},"lrec2002-main-219","Multimedia Annotation with Multilingual Input Methods and Search Support","10.63317\u002F44zrsg6e7yde","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-219","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F219.pdf","brugman-etal-2002-multimedia",[4536,4539,4542,4545],{"paper_id":4529,"author_seq":247,"given_name":4537,"surname":4538,"affiliation":63,"orcid":63},"Hennie","Brugman",{"paper_id":4529,"author_seq":232,"given_name":4540,"surname":4541,"affiliation":63,"orcid":63},"Harriet","Spenke",{"paper_id":4529,"author_seq":218,"given_name":4543,"surname":4544,"affiliation":63,"orcid":63},"Markus","Kramer",{"paper_id":4529,"author_seq":203,"given_name":2059,"surname":4546,"affiliation":63,"orcid":63},"Klassmann","A tool set to create complex multimedia\u002Fmultimodal  annotations and to exploit them is described. Due to its possibility to  flexibly define tiers and associate languages\u002Fwriting systems with it  and to even mix characters from different writing systems it is a tool  which is especially suitable for work in multilingual environments. Also  the search interface supports the multilingual features allowing to  search for complex patterns in the annotations.",{"paper_id":4549,"title":4550,"year":213,"month":855,"day":63,"doi":4551,"resource_url":4552,"first_page":63,"last_page":63,"pdf_url":4553,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4554,"paper_type":860,"authors":4555,"abstract":4562},"lrec2002-main-220","Analysis of Lexical Structures from Field Linguistics and Language Engineering","10.63317\u002F2oybdd3d4kqn","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-220","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F220.pdf","wittenburg-etal-2002-analysis",[4556,4558,4560],{"paper_id":4549,"author_seq":247,"given_name":1989,"surname":4557,"affiliation":63,"orcid":63},"Wittenburg",{"paper_id":4549,"author_seq":232,"given_name":4559,"surname":3613,"affiliation":63,"orcid":63},"W.",{"paper_id":4549,"author_seq":218,"given_name":3455,"surname":4561,"affiliation":63,"orcid":63},"Drude","Lexica play an important role in every linguistic  discipline. We are confronted with many types of lexica. Depending on  the type of lexicon and the language we are currently faced with a large  variety of structures from very simple tables to complex graphs, as was  indicated by a recent overview of structures found in dictionaries from  field linguistics and language engineering. It is important to assess  these differences and aim at the integration of lexical resources in  order to improve lexicon creation, exchange and reuse. This paper  describes the first step towards the integration of existing structures  and standards into a flexible abstract model.",{"paper_id":4564,"title":4565,"year":213,"month":855,"day":63,"doi":4566,"resource_url":4567,"first_page":63,"last_page":63,"pdf_url":4568,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4569,"paper_type":860,"authors":4570,"abstract":4577},"lrec2002-main-221","Methods of Language Documentation in the DOBES project","10.63317\u002F4oedkq62mrsc","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-221","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F221.pdf","wittenburg-etal-2002-methods",[4571,4572,4575],{"paper_id":4564,"author_seq":247,"given_name":1989,"surname":4557,"affiliation":63,"orcid":63},{"paper_id":4564,"author_seq":232,"given_name":4573,"surname":4574,"affiliation":63,"orcid":63},"U.","Mosel",{"paper_id":4564,"author_seq":218,"given_name":1983,"surname":4576,"affiliation":63,"orcid":63},"Dwyer","The DOBES program for the documentation of endangered  languages, started in September 2000, has just completed its pilot  phase. Eight documentation teams and one archiving team worked out  agreements on formats, tools, naming conventions, and encoding,  especially the linguistic level of encoding. These standards will form  the basis for a five-year main phase, which will include about 20 teams.  In the pilot phase, strategies to set up an online archive incorporating  redundancy and regular backup were developed and implemented. Ethical  and legal aspects of the archiving process were discussed and amounted  to a number of documents to which all participants have to adhere to.  Tools and converters developed within the pilot phase are available to  others.",{"paper_id":4579,"title":4580,"year":213,"month":855,"day":63,"doi":4581,"resource_url":4582,"first_page":63,"last_page":63,"pdf_url":4583,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4584,"paper_type":860,"authors":4585,"abstract":4591},"lrec2002-main-222","Metadata Proposals for Corpora and Lexica","10.63317\u002F3ngd6hnwnx8f","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-222","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F222.pdf","wittenburg-etal-2002-metadata",[4586,4587,4588],{"paper_id":4579,"author_seq":247,"given_name":1989,"surname":4557,"affiliation":63,"orcid":63},{"paper_id":4579,"author_seq":232,"given_name":4559,"surname":3613,"affiliation":63,"orcid":63},{"paper_id":4579,"author_seq":218,"given_name":4589,"surname":4590,"affiliation":63,"orcid":63},"D.","Broeder","A number of metadata proposals appear to be relevant  to establish a searchable and browsable domain of language resources so  that users can easily discover suitable resources on the Web. These  proposals differ in their approach, in their descriptive detail, in the  set of linguistic data types supported by specific elements and the  supporting tools. The IMDI initiative, in particular, has worked out not  only a set for (multimedia) corpora, but also for lexica. All  initiatives have declared their commitment towards interoperability  where Dublin Core will play a role in the near future. For the long term  we foresee much effort to make the metadata sets compliant with the  trends of the Semantic Web and to allow an increasing re-usage of  existing sub-schemas and data categories that will probably be  formulated with RDF.",{"paper_id":4593,"title":4594,"year":213,"month":855,"day":63,"doi":4595,"resource_url":4596,"first_page":63,"last_page":63,"pdf_url":4597,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4598,"paper_type":860,"authors":4599,"abstract":4608},"lrec2002-main-223","Multimodal Annotations in Gesture and Sign Language Studies","10.63317\u002F2iaqjhvw3fot","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-223","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F223.pdf","wittenburg-etal-2002-multimodal",[4600,4601,4604,4606],{"paper_id":4593,"author_seq":247,"given_name":1989,"surname":4557,"affiliation":63,"orcid":63},{"paper_id":4593,"author_seq":232,"given_name":4602,"surname":4603,"affiliation":63,"orcid":63},"St.","Levinson",{"paper_id":4593,"author_seq":218,"given_name":3455,"surname":4605,"affiliation":63,"orcid":63},"Kita",{"paper_id":4593,"author_seq":203,"given_name":4607,"surname":4538,"affiliation":63,"orcid":63},"H.","For multimodal annotations an exhaustive encoding  system for gestures was developed to facilitate research. The structural  requirements of multimodal annotations were analyzed to develop an  Abstract Corpus Model which is the basis for a powerful annotation and  exploitation tool for multimedia recordings and the definition of the  XML-based EUDICO Annotation Format. Finally, a metadata-based data  management environment has been setup to facilitate resource discovery  and especially corpus management. Bt means of an appropriate  digitization policy and their online availability researchers have been  able to build up a large corpus covering gesture and sign language data.",{"paper_id":4610,"title":4611,"year":213,"month":855,"day":63,"doi":4612,"resource_url":4613,"first_page":63,"last_page":63,"pdf_url":4614,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4615,"paper_type":860,"authors":4616,"abstract":4625},"lrec2002-main-224","Metadata Tools Supporting Controlled Vocabulary Services","10.63317\u002F2deki4igpafd","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-224","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F224.pdf","broeder-etal-2002-metadata",[4617,4619,4622],{"paper_id":4610,"author_seq":247,"given_name":4618,"surname":4590,"affiliation":63,"orcid":63},"Daan",{"paper_id":4610,"author_seq":232,"given_name":4620,"surname":4621,"affiliation":63,"orcid":63},"Freddy","Offenga",{"paper_id":4610,"author_seq":218,"given_name":4623,"surname":4624,"affiliation":63,"orcid":63},"Don","Willems","Within the ISLE Metadata Initiative (IMDI) project a  user-friendly editor to enter metadata descriptions and a browser  operating on the linked metadata descriptions were developed. Both tools  support the usage of Controlled Vocabulary (CV) repositories by means of  the specification of an URL where the formal CV definition data is  available.",{"paper_id":4627,"title":4628,"year":213,"month":855,"day":63,"doi":4629,"resource_url":4630,"first_page":63,"last_page":63,"pdf_url":4631,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4632,"paper_type":860,"authors":4633,"abstract":4639},"lrec2002-main-225","LREP: A Language Repository Exchange Protocol","10.63317\u002F38vg5nczkt8t","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-225","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F225.pdf","broeder-etal-2002-lrep",[4634,4635,4636,4638],{"paper_id":4627,"author_seq":247,"given_name":4618,"surname":4590,"affiliation":63,"orcid":63},{"paper_id":4627,"author_seq":232,"given_name":2795,"surname":4557,"affiliation":63,"orcid":63},{"paper_id":4627,"author_seq":218,"given_name":2371,"surname":4637,"affiliation":63,"orcid":63},"Declerck",{"paper_id":4627,"author_seq":203,"given_name":3944,"surname":3945,"affiliation":63,"orcid":63},"The recent increase in the number and complexity of  the language resources available on the Internet is followed by a  similar increase of available tools for linguistic analysis. Ideally the  user does not need to be confronted with the question in how to match  tools with resources. If resource repositories and tool repositories  offer adequate metadata information and a suitable exchange protocol is  developed this matching process could beperformed (semi-) automatically.",{"paper_id":4641,"title":4642,"year":213,"month":855,"day":63,"doi":4643,"resource_url":4644,"first_page":63,"last_page":63,"pdf_url":4645,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4646,"paper_type":860,"authors":4647,"abstract":4653},"lrec2002-main-226","A Robust and Flexible Platform for Dependency Extraction","10.63317\u002F2qpcdhw73i55","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-226","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F226.pdf","hagege-roux-2002-robust",[4648,4650],{"paper_id":4641,"author_seq":247,"given_name":1115,"surname":4649,"affiliation":63,"orcid":63},"Hagège",{"paper_id":4641,"author_seq":232,"given_name":4651,"surname":4652,"affiliation":63,"orcid":63},"Claude","Roux","This paper describes a linguistic platform, Xerox  Incremental Parser (XIP hereafter), to develop robust grammars. Most  robust parsers usually impose one specific strategy (constraint-based or  incremental) in the grammar writing, whereas XIP allows mixing both  types of analysis. The first part introduces XIP and its main  functionalities. The second part illustrates how a linguist can benefit  from merging different strategies in grammar writing. Finally, a first  evaluation of different grammars is given.",{"paper_id":4655,"title":4656,"year":213,"month":855,"day":63,"doi":4657,"resource_url":4658,"first_page":63,"last_page":63,"pdf_url":4659,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4660,"paper_type":860,"authors":4661,"abstract":4665},"lrec2002-main-227","Subject-field-specific Ontologies and Terminologies for the Web Community","10.63317\u002F3ed564bj7n5r","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-227","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F227.pdf","schmitz-2002-subject",[4662],{"paper_id":4655,"author_seq":247,"given_name":4663,"surname":4664,"affiliation":63,"orcid":63},"Klaus-Dirk","Schmitz","A terminological thesis written in the Department of  Modern Languages at the University of Applied Sciences Cologne contains  descriptive terminology of a limited domain. These systematic,  concept-oriented terminological data are available in electronic form  (MultiTerm database format). The WebTerm Project aims to consolidate and  convert the terminological data to a web-based system allowing efficient  and free access to these terminologies. Special attention is paid to the  dynamic representation of the system of concepts and the ontological  relations of these data collections. The paper describes the structure  and content of the terminological theses and the terminology contained,  as well as the web-based dynamic interface to the ontologies and  terminologies developed in the framework of the WebTerm Project.",{"paper_id":4667,"title":4668,"year":213,"month":855,"day":63,"doi":4669,"resource_url":4670,"first_page":63,"last_page":63,"pdf_url":4671,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4672,"paper_type":860,"authors":4673,"abstract":63},"lrec2002-main-228","Fish or Fowl:A Wizard of Oz Evaluation of Dialogue Strategies in the Restaurant Domain","10.63317\u002F2bd6amcoocw5","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-228","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F228.pdf","whittaker-etal-2002-fish",[4674,4676,4677],{"paper_id":4667,"author_seq":247,"given_name":2755,"surname":4675,"affiliation":63,"orcid":63},"Whittaker",{"paper_id":4667,"author_seq":232,"given_name":3240,"surname":3241,"affiliation":63,"orcid":63},{"paper_id":4667,"author_seq":218,"given_name":4678,"surname":4679,"affiliation":63,"orcid":63},"Johanna","Moore",{"paper_id":4681,"title":4682,"year":213,"month":855,"day":63,"doi":4683,"resource_url":4684,"first_page":63,"last_page":63,"pdf_url":4685,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4686,"paper_type":860,"authors":4687,"abstract":4693},"lrec2002-main-229","Integrating Two Semantic Lexicons, SIMPLE and ItalWordNet: What Can We Gain?","10.63317\u002F2r3gcwyvpgfd","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-229","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F229.pdf","roventini-etal-2002-integrating",[4688,4691,4692],{"paper_id":4681,"author_seq":247,"given_name":4689,"surname":4690,"affiliation":63,"orcid":63},"Adriana","Roventini",{"paper_id":4681,"author_seq":232,"given_name":4180,"surname":4181,"affiliation":63,"orcid":63},{"paper_id":4681,"author_seq":218,"given_name":4183,"surname":4184,"affiliation":63,"orcid":63},"In the last years, at the Institute for Computational Linguistics in Pisa, a few lexical resources have been developed aiming at encoding complex lexical semantic information. ItalWordNet and SIMPLE are two of these resources which, tackling semantics in the lexicon from different points of view, and being at least partially complementary, could certainly profit from linking each other. These resources in fact evidence different aspects of the lexical information: in SIMPLE, which adds a semantic layer to the morphological and syntactic ones developed in PAROLE, the connections between semantics and syntax are preeminent; ItalWordNet (as the Princeton WordNet and then EuroWordNet) is built around the basic notion of a synset and various semantic relations are encoded between synsets while syntactic aspects are not taken into consideration. In the paper we describe an experiment we carried out, aimed at exploring the feasibility of linking these lexical resources, being convinced that a noteworthy gain could be achieved through this operation. As we will show in the following some problems came in the foreground but also considerable advantages concerning the coherence and the completeness of both of them.",{"paper_id":4695,"title":4696,"year":213,"month":855,"day":63,"doi":4697,"resource_url":4698,"first_page":63,"last_page":63,"pdf_url":4699,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4700,"paper_type":860,"authors":4701,"abstract":4706},"lrec2002-main-230","Proper Names In A Semantic Database","10.63317\u002F47cuzw56iy84","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-230","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F230.pdf","marinelli-roventini-2002-proper",[4702,4705],{"paper_id":4695,"author_seq":247,"given_name":4703,"surname":4704,"affiliation":63,"orcid":63},"Rita","Marinelli",{"paper_id":4695,"author_seq":232,"given_name":4689,"surname":4690,"affiliation":63,"orcid":63},"Among the resources developed in SI-TAL (Integrated Systems for the Automatic Treatment of Language), ItalWordNet (IWN)  were built as reference semantic database, enlarging the Italian WordNet developed in the framework of the European project  EuroWordNet (EWN). The Italian lexical database was increased, by introducing and codifying, besides the new grammatical  categories of the adjectives and adverbs, a subset of proper names. In the IWN context, the subset of proper names represents  a quantitatively limited portion, about 3600 synsets, but it may become a qualitatively important extension. The ever growing  amount of non-structured information, stored in natural language, requires the availability of computational instruments able to  manage this kind of information where proper names show a remarkable incidence in any types of texts. The work here presented  falls in this context, taking into account the proper names, and is focused on: i) encoding in the IWN database; ii) more typical uses  in either proper or metaphorical and methonymic ways such as textual corpora evidence; iii) possibility of a well reasoned and  structured enlarging of this data on the basis of the recent experience carried out in IWN.",{"paper_id":4708,"title":4709,"year":213,"month":855,"day":63,"doi":4710,"resource_url":4711,"first_page":63,"last_page":63,"pdf_url":4712,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4713,"paper_type":860,"authors":4714,"abstract":4721},"lrec2002-main-231","Transformed Subcategorization Frames in Chunk Parsing","10.63317\u002F3kwqmq4g4ot3","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-231","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F231.pdf","lesmo-lombardo-2002-transformed",[4715,4718],{"paper_id":4708,"author_seq":247,"given_name":4716,"surname":4717,"affiliation":63,"orcid":63},"Leonardo","Lesmo",{"paper_id":4708,"author_seq":232,"given_name":4719,"surname":4720,"affiliation":63,"orcid":63},"Vincenzo","Lombardo","This paper describes an approach to treebank  development which relies on the manual development of annotation tools.  The overall process of tree annotation is described, and a special  emphasis is put on the description of the last tool which has been  built, i.e. a dependency-based robust chunk parser. The modularization  of the parser and the central role of verbal subcategorization is  presented. The first experimental results, carried out on a corpus of  645 sentences are reported and discussed.",{"paper_id":4723,"title":4724,"year":213,"month":855,"day":63,"doi":4725,"resource_url":4726,"first_page":63,"last_page":63,"pdf_url":4727,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4728,"paper_type":860,"authors":4729,"abstract":4733},"lrec2002-main-232","Measuring corpus homogeneity using a range of measures for inter-document distance","10.63317\u002F5avq3qndtjqm","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-232","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F232.pdf","cavaglia-2002-measuring",[4730],{"paper_id":4723,"author_seq":247,"given_name":4731,"surname":4732,"affiliation":63,"orcid":63},"Gabriela","Cavaglià","With the ever more widespread use of corpora in language research, it is becoming increasingly important to be able to describe and compare corpora. The analysis of corpus homogeneity is preliminary to any quantitative approach to corpora comparison. We describe a method for text analysis based only on document-internal linguistic features, and a set of related homogeneity measures based on inter-document distance. We present a preliminary experiment to validate the hypothesis that in the presence of a homogeneous corpus the subcorpus that is necessary to train an NLP system is smaller than the one required if a heterogeneous corpus is used.Overhead projector",{"paper_id":4735,"title":4736,"year":213,"month":855,"day":63,"doi":4737,"resource_url":4738,"first_page":63,"last_page":63,"pdf_url":4739,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4740,"paper_type":860,"authors":4741,"abstract":4766},"lrec2002-main-233","Multilingual XML-Based Named Entity Recognition for E-Retail Domains","10.63317\u002F4s4rcjj6y6xy","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-233","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F233.pdf","grover-etal-2002-multilingual",[4742,4745,4747,4750,4751,4752,4754,4755,4758,4761,4764],{"paper_id":4735,"author_seq":247,"given_name":4743,"surname":4744,"affiliation":63,"orcid":63},"Claire","Grover",{"paper_id":4735,"author_seq":232,"given_name":1411,"surname":4746,"affiliation":63,"orcid":63},"McDonald",{"paper_id":4735,"author_seq":218,"given_name":4748,"surname":4749,"affiliation":63,"orcid":63},"Donnla Nic","Gearailt",{"paper_id":4735,"author_seq":203,"given_name":4404,"surname":4405,"affiliation":63,"orcid":63},{"paper_id":4735,"author_seq":188,"given_name":4492,"surname":4493,"affiliation":63,"orcid":63},{"paper_id":4735,"author_seq":172,"given_name":4401,"surname":4753,"affiliation":63,"orcid":63},"Samaritakis",{"paper_id":4735,"author_seq":155,"given_name":4401,"surname":4402,"affiliation":63,"orcid":63},{"paper_id":4735,"author_seq":138,"given_name":4756,"surname":4757,"affiliation":63,"orcid":63},"Maria Teresa","Pazienza",{"paper_id":4735,"author_seq":121,"given_name":4759,"surname":4760,"affiliation":63,"orcid":63},"Michele","Vindigni",{"paper_id":4735,"author_seq":104,"given_name":4762,"surname":4763,"affiliation":63,"orcid":63},"Frantz","Vichot",{"paper_id":4735,"author_seq":87,"given_name":2910,"surname":4765,"affiliation":63,"orcid":63},"Wolinski","We describe the multilingual Named Entity Recognition  and Classification (NERC) subpart of an e-retail product comparison  system which is currently under development as part of the EU-funded  project CROSSMARC. The system must be rapidly extensible, both to new  languages and new domains. To achieve this aim we use XML as our common  exchange format and the monolingual NERC components use a combination of  rule-based and machine-learning techniques. It has been challenging to  process web pages which contain heavily structured data where text is  intermingled with HTML and other code. Our preliminary evaluation  results demonstrate the viability of our approach.",{"paper_id":4768,"title":4769,"year":213,"month":855,"day":63,"doi":4770,"resource_url":4771,"first_page":63,"last_page":63,"pdf_url":4772,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4773,"paper_type":860,"authors":4774,"abstract":4789},"lrec2002-main-234","Usability Evaluation of a Dutch Multimodal System for Train Timetable Information","10.63317\u002F4u29cgcwqh9h","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-234","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F234.pdf","sturm-etal-2002-usability",[4775,4778,4781,4784,4786],{"paper_id":4768,"author_seq":247,"given_name":4776,"surname":4777,"affiliation":63,"orcid":63},"Janienke","Sturm",{"paper_id":4768,"author_seq":232,"given_name":4779,"surname":4780,"affiliation":63,"orcid":63},"Ilse","Bakx",{"paper_id":4768,"author_seq":218,"given_name":4782,"surname":4783,"affiliation":63,"orcid":63},"Bert","Cranen",{"paper_id":4768,"author_seq":203,"given_name":1021,"surname":4785,"affiliation":63,"orcid":63},"Terken",{"paper_id":4768,"author_seq":188,"given_name":4787,"surname":4788,"affiliation":63,"orcid":63},"Fusi","Wang","In the MATIS project a multimodal system has been developed for train timetable information. The aim of the project was to obtain guidelines for designing multimodal interfaces for information systems. The MATIS system accepts input both in spoken and in graphical mode (no keyboard input) and provides feedback in the same two modes. The user can choose at any time which of the input modalities (s)he prefers to use for a certain action. A user test was carried out in which 25 subjects were asked to evaluate the system. For comparison, users were also asked to test a GUI (Graphical User Interface) version of the train timetable information system as well as a speech-only version of the system. We measured the efficiency and the effectiveness of the interaction and the user satisfaction with all three systems.",{"paper_id":4791,"title":4792,"year":213,"month":855,"day":63,"doi":4793,"resource_url":4794,"first_page":63,"last_page":63,"pdf_url":4795,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4796,"paper_type":860,"authors":4797,"abstract":4804},"lrec2002-main-235","How feasible is the reuse of grammars for Named Entity Recognition?","10.63317\u002F4zm273mxomdq","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-235","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F235.pdf","pastra-etal-2002-feasible",[4798,4800,4801,4802,4803],{"paper_id":4791,"author_seq":247,"given_name":1675,"surname":4799,"affiliation":63,"orcid":63},"Pastra",{"paper_id":4791,"author_seq":232,"given_name":872,"surname":3493,"affiliation":63,"orcid":63},{"paper_id":4791,"author_seq":218,"given_name":3498,"surname":3499,"affiliation":63,"orcid":63},{"paper_id":4791,"author_seq":203,"given_name":3490,"surname":3491,"affiliation":63,"orcid":63},{"paper_id":4791,"author_seq":188,"given_name":3503,"surname":3504,"affiliation":63,"orcid":63},"In this paper, we argue that vital time is wasted in creating resources        from scratch, instead of reusing existing grammars for NE recognition.        We discuss three possible reasons for this and present our corresponding empirical        results, that ground our argument and encourage more widespread  use of valuable existing resources.",{"paper_id":4806,"title":4807,"year":213,"month":855,"day":63,"doi":4808,"resource_url":4809,"first_page":63,"last_page":63,"pdf_url":4810,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4811,"paper_type":860,"authors":4812,"abstract":4844},"lrec2002-main-236","Advanced Tools for the Study of Natural Interactivity","10.63317\u002F42atbyqtnzu9","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-236","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F236.pdf","soria-etal-2002-advanced",[4813,4815,4816,4819,4821,4822,4823,4824,4827,4828,4830,4833,4836,4839,4842],{"paper_id":4806,"author_seq":247,"given_name":1428,"surname":4814,"affiliation":63,"orcid":63},"Soria",{"paper_id":4806,"author_seq":232,"given_name":4443,"surname":4444,"affiliation":63,"orcid":63},{"paper_id":4806,"author_seq":218,"given_name":4817,"surname":4818,"affiliation":63,"orcid":63},"Niels","Cadée",{"paper_id":4806,"author_seq":203,"given_name":2743,"surname":4820,"affiliation":63,"orcid":63},"Carletta",{"paper_id":4806,"author_seq":188,"given_name":4440,"surname":4441,"affiliation":63,"orcid":63},{"paper_id":4806,"author_seq":172,"given_name":1452,"surname":4265,"affiliation":63,"orcid":63},{"paper_id":4806,"author_seq":155,"given_name":1660,"surname":4060,"affiliation":63,"orcid":63},{"paper_id":4806,"author_seq":138,"given_name":4825,"surname":4826,"affiliation":63,"orcid":63},"Amy","Isard",{"paper_id":4806,"author_seq":121,"given_name":4457,"surname":4458,"affiliation":63,"orcid":63},{"paper_id":4806,"author_seq":104,"given_name":1195,"surname":4829,"affiliation":63,"orcid":63},"Lauer",{"paper_id":4806,"author_seq":87,"given_name":4831,"surname":4832,"affiliation":63,"orcid":63},"Wolfgang","Lezius",{"paper_id":4806,"author_seq":73,"given_name":4834,"surname":4835,"affiliation":63,"orcid":63},"Lucas P.J.J.","Noldus",{"paper_id":4806,"author_seq":55,"given_name":4837,"surname":4838,"affiliation":63,"orcid":63},"Vito","Pirrelli",{"paper_id":4806,"author_seq":38,"given_name":4840,"surname":4841,"affiliation":63,"orcid":63},"Norbert","Reithinger",{"paper_id":4806,"author_seq":17,"given_name":1431,"surname":4843,"affiliation":63,"orcid":63},"Vögele","The NITE European project aims at building an integrated best practice  workbench for multi-level, cross-level and cross-modality annotation, retrieval and  exploitation of multi-party natural interactive human-human and human-machine dialogue data. In this paper we intend to broach the general lines of software development envisaged  in NITE, the four prototypes we intend to make available to the scientific community at  large and our approach to usability evaluation of the prototypes. Under the aegis of  LREC 2002 we plan to encourage conference participants to take active part in usability  evaluation and provide early feedback to our software design choices.",{"paper_id":4846,"title":4847,"year":213,"month":855,"day":63,"doi":4848,"resource_url":4849,"first_page":63,"last_page":63,"pdf_url":4850,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4851,"paper_type":860,"authors":4852,"abstract":4863},"lrec2002-main-237","ADAM: The SI-TAL Corpus of Annotated Dialogues","10.63317\u002F3ges3zwa78oj","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-237","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F237.pdf","cattoni-etal-2002-adam",[4853,4856,4859,4862],{"paper_id":4846,"author_seq":247,"given_name":4854,"surname":4855,"affiliation":63,"orcid":63},"Roldano","Cattoni",{"paper_id":4846,"author_seq":232,"given_name":4857,"surname":4858,"affiliation":63,"orcid":63},"Morena","Danieli",{"paper_id":4846,"author_seq":218,"given_name":4860,"surname":4861,"affiliation":63,"orcid":63},"Vanessa","Sandrini",{"paper_id":4846,"author_seq":203,"given_name":1428,"surname":4814,"affiliation":63,"orcid":63},"In this paper we describe the methodological assumptions, general architectural framework and annotation and encoding practices underlying the ADAM Corpus, which has been developed as part of the Italian national project SI-TAL. Each of the 450 dialogues is represented by an orthographic transcription and is annotated at five levels of linguistic information, namely prosody, pos tagging, syntax, semantics, and pragmatics.",{"paper_id":4865,"title":4866,"year":213,"month":855,"day":63,"doi":4867,"resource_url":4868,"first_page":63,"last_page":63,"pdf_url":4869,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4870,"paper_type":860,"authors":4871,"abstract":4879},"lrec2002-main-238","Leo: an Architecture for Sharing Resources for Unification-Based Grammars","10.63317\u002F5iicha2npopf","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-238","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F238.pdf","baldridge-etal-2002-leo",[4872,4875,4877],{"paper_id":4865,"author_seq":247,"given_name":4873,"surname":4874,"affiliation":63,"orcid":63},"Jason","Baldridge",{"paper_id":4865,"author_seq":232,"given_name":3020,"surname":4876,"affiliation":63,"orcid":63},"Dowding",{"paper_id":4865,"author_seq":218,"given_name":863,"surname":4878,"affiliation":63,"orcid":63},"Early","Many mature systems for parsing unification-based  grammars have been developed over the last two decades. They incorporate  a variety of design decisions both in implementation and in the  representations they use for grammatical information. The Leo project  aims to provide an architecture for automating the sharing of  grammatical resources among various systems so that one system can take  advantage of specialized algorithms and tools that are implemented for  the representations used by another. The project furthermore seeks to  learn about best practice in the design of these representations and  encode their principles in a new XML-based format. This paper describes  initial work toward creating the Leo architecture and tools that convert  between different representations.",{"paper_id":4881,"title":4882,"year":213,"month":855,"day":63,"doi":4883,"resource_url":4884,"first_page":63,"last_page":63,"pdf_url":4885,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4886,"paper_type":860,"authors":4887,"abstract":4897},"lrec2002-main-239","Tuning Context Features with Genetic Algorithms","10.63317\u002F3wakzgsrzbko","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-239","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F239.pdf","spasic-etal-2002-tuning",[4888,4891,4894],{"paper_id":4881,"author_seq":247,"given_name":4889,"surname":4890,"affiliation":63,"orcid":63},"Irena","Spasić",{"paper_id":4881,"author_seq":232,"given_name":4892,"surname":4893,"affiliation":63,"orcid":63},"Goran","Nenadić",{"paper_id":4881,"author_seq":218,"given_name":4895,"surname":4896,"affiliation":63,"orcid":63},"Sophia","Ananiadou","In this paper we present an approach to tuning of  context features acquired from corpora. The approach is based on the  idea of a genetic algorithm (GA). We analyse a whole population of  contexts surrounding related linguistic entities in order to find a  generic property characteristic of such contexts. Our goal is to tune  the context properties so as not to lose any correct feature values, but  also to minimise the presence of ambiguous values. The GA implements a  crossover operator based on dominant and recessive genes, where a gene  corresponds to a context feature. A dominant gene is the one that, when  combined with another gene of the same type, is inevitably reflected in  the offspring. Dominant genes denote the more suitable context features.  In each iteration of the GA, the number of individuals in the population  is halved, finally resulting in a single individual that contains  context features tuned with respect to the information contained in the  training corpus. We illustrate the general method by using a case study  concerned with the identification of relationships between verbs and  terms complementing them. More precisely, we tune the classes of terms  that are typically selected as arguments for the considered verbs in  order to acquire their semantic features.",{"paper_id":4899,"title":4900,"year":213,"month":855,"day":63,"doi":4901,"resource_url":4902,"first_page":63,"last_page":63,"pdf_url":4903,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4904,"paper_type":860,"authors":4905,"abstract":4909},"lrec2002-main-240","Automatic Acronym Acquisition and Term Variation Management within Domain-Specific Texts","10.63317\u002F3efas92idk6g","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-240","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F240.pdf","nenadic-etal-2002-automatic",[4906,4907,4908],{"paper_id":4899,"author_seq":247,"given_name":4892,"surname":4893,"affiliation":63,"orcid":63},{"paper_id":4899,"author_seq":232,"given_name":4889,"surname":4890,"affiliation":63,"orcid":63},{"paper_id":4899,"author_seq":218,"given_name":4895,"surname":4896,"affiliation":63,"orcid":63},"In this paper we present a framework for the  effective management of terms and their variants that are automatically  acquired from domain-specific texts. In our approach, the term variant  recognition is incorporated in the automatic term retrieval process by  taking into account orthographical, morphological, syntactic,  lexico-semantic and pragmatic term variations. In particular, we address  acronyms as a common way of introducing term variants in scientific  papers. We describe a method for the automatic acquisition of newly  introduced acronyms and the mapping to their ‘meanings’, i.e. the  corresponding terms. The proposed three-step procedure is based on  morpho-syntactic constraints that are commonly used in acronym  definitions. First, acronym definitions containing an acronym and the  corresponding term are retrieved. These two elements are matched in the  second step by performing morphological analysis of words and combining  forms constituting the term. The problems of acronym variation and  acronym ambiguity are addressed in the third step by establishing  classes of term variants that correspond to specific concepts. We  present the results of the acronym acquisition in the domain of  molecular biology: the precision of the method ranged from 94% to 99%  depending on the size of the corpus used for evaluation, whilst the  recall was 73%.",{"paper_id":4911,"title":4912,"year":213,"month":855,"day":63,"doi":4913,"resource_url":4914,"first_page":63,"last_page":63,"pdf_url":4915,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4916,"paper_type":860,"authors":4917,"abstract":4921},"lrec2002-main-241","Adverbs in Semantic Lexica for NLP - The extension of the Danish SIMPLE lexicon with Time Adverbs","10.63317\u002F3f2jnmrym2sm","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-241","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F241.pdf","nimb-2002-adverbs",[4918],{"paper_id":4911,"author_seq":247,"given_name":4919,"surname":4920,"affiliation":63,"orcid":63},"Sanni","Nimb","In this paper we will discuss the treatment of  adverbs in semantic lexica for NLP. On the basis of a semantic  classification of Danish lexical time adverbs as well as a test carried  out wrt. their ability to combine with different tenses and types of  Aktionsart, an ontology on time adverbs is established. We will discuss  which semantic characteristics exposed by the test that should be  included in a computational lexicon, and propose how the ideas can be  incorporated in the SIMPLE lexicon model, partly by reusing already  implemented features from the model, partly by an extension of the set  of features. Furthermore we will show how some adverbs will inherit  information from several nodes in a SIMPLE ontology for adverbs, and how  semantic relations, e.g. synonymi and antonomy, is relevant also in the  case of adverbs. Finally we will give som examples on lexical entries of  adverbs. The result can easily be applied on other adverbials with a  time sense, and will therefore in fact also cover a large group of  lexicalised multiword entities.",{"paper_id":4923,"title":4924,"year":213,"month":855,"day":63,"doi":4925,"resource_url":4926,"first_page":63,"last_page":63,"pdf_url":4927,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4928,"paper_type":860,"authors":4929,"abstract":4949},"lrec2002-main-242","Scaling Up an MT Prototype for Industrial Use - Databases and Data Flow","10.63317\u002F2wnv2gqpmozs","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-242","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F242.pdf","hein-etal-2002-scaling",[4930,4933,4935,4937,4940,4943,4946],{"paper_id":4923,"author_seq":247,"given_name":4931,"surname":4932,"affiliation":63,"orcid":63},"Anna Sågvall","Hein",{"paper_id":4923,"author_seq":232,"given_name":1911,"surname":4934,"affiliation":63,"orcid":63},"Forsbom",{"paper_id":4923,"author_seq":218,"given_name":3680,"surname":4936,"affiliation":63,"orcid":63},"Tiedemann",{"paper_id":4923,"author_seq":203,"given_name":4938,"surname":4939,"affiliation":63,"orcid":63},"Per","Weijnitz",{"paper_id":4923,"author_seq":188,"given_name":4941,"surname":4942,"affiliation":63,"orcid":63},"Ingrid","Almqvist",{"paper_id":4923,"author_seq":172,"given_name":4944,"surname":4945,"affiliation":63,"orcid":63},"Leif-Jöran","Olsson",{"paper_id":4923,"author_seq":155,"given_name":4947,"surname":4948,"affiliation":63,"orcid":63},"Sten","Thaning","In a cooperative project between Uppsala University, the bus and truck manufacturing company Scania CV AB, and the translation company Explicon AB, issues of scaling up the transfer-based machine translation prototype MULTRA for industrial use is beeing investigated. The project is limited to one domain, automotive service literature, and one translation direction, Swedish to English, but issues concerning the change of domain, translation direction and language pair are also considered. Three focal points of the project work have been the design and implementation of the new MATS system, including the redesign, porting and integration of MULTRA, the redesign and implementation of the dictionaries of the language modules as a lexical database, and the scaling up of the dictionaries and the grammars. The system is currently trained on a corpus of aligned bitexts from the automotive service domain. The coverage of the lexical data is almost complete, and validated by professional translators, but the grammars are still limited. Despite the incomplete state of the grammars, the system already translates more than a third of the segments in the corpus. Preliminary evaluations of system performance and coverage have been made, and further development of evaluation methods and metrics are in progress.",{"paper_id":4951,"title":4952,"year":213,"month":855,"day":63,"doi":4953,"resource_url":4954,"first_page":63,"last_page":63,"pdf_url":4955,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4956,"paper_type":860,"authors":4957,"abstract":4964},"lrec2002-main-243","A Flexible Distributed Architecture for Natural Language Analyzers","10.63317\u002F2f7ermdp3fgr","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-243","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F243.pdf","carreras-padro-2002-flexible",[4958,4961],{"paper_id":4951,"author_seq":247,"given_name":4959,"surname":4960,"affiliation":63,"orcid":63},"Xavier","Carreras",{"paper_id":4951,"author_seq":232,"given_name":4962,"surname":4963,"affiliation":63,"orcid":63},"Lluís","Padró","Many modern NLP applications require basic language processors such as PoS taggers, parsers, etc. All these tools are usually pre-existing, and must be adapted to fit in the requirements of the application to be developed. This adaptation procedure is usually time consuming and increases the application development cost. Our proposal to minimize this effort is to use standard engineering solutions for software reusability. In that sense, we converted all our language processors to classes which may be instantiated and accessed from any application via a CORBA broker. Reusability is not the only advantatge, since the distributed CORBA approach also makes it possible to access the analyzers from any remote application, developed in any language, and running on any operating system.",{"paper_id":4966,"title":4967,"year":213,"month":855,"day":63,"doi":4968,"resource_url":4969,"first_page":63,"last_page":63,"pdf_url":4970,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4971,"paper_type":860,"authors":4972,"abstract":4987},"lrec2002-main-244","Italian arabic linguistic tools","10.63317\u002F3dr57cp8jj8c","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-244","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F244.pdf","picchi-etal-2002-italian",[4973,4976,4978,4981,4984],{"paper_id":4966,"author_seq":247,"given_name":4974,"surname":4975,"affiliation":63,"orcid":63},"Eugenio","Picchi",{"paper_id":4966,"author_seq":232,"given_name":1911,"surname":4977,"affiliation":63,"orcid":63},"Sassolini",{"paper_id":4966,"author_seq":218,"given_name":4979,"surname":4980,"affiliation":63,"orcid":63},"Ouafae","Nahli",{"paper_id":4966,"author_seq":203,"given_name":4982,"surname":4983,"affiliation":63,"orcid":63},"Sebastiana","Cucurullo",{"paper_id":4966,"author_seq":188,"given_name":4985,"surname":4986,"affiliation":63,"orcid":63},"M. Isabel","Vargas","This paper concerns our participation in the research project: 'Corpus bilingue Italiano - Arabo' (Bilingual Italian - Arabic corpus) funded by law 488\u002F92. The purpose of this project is to develop some linguistic tools and resources for bilingual Italian\u002FArabic corpora; its background and starting point are tools that have already been developed by the Computational Linguistics Institute. As far as IT tools are concerned, the project consists of four basic elements: a) morphological engine for the Arabic language; b) aligning system for Italian and Arabic parallel texts; c) automatic tagging system for Italian and Arabic texts; d) access tools (and relevant query systems) for the texts of the bilingual corpora at each text-processing step.",{"paper_id":4989,"title":4990,"year":213,"month":855,"day":63,"doi":4991,"resource_url":4992,"first_page":63,"last_page":63,"pdf_url":4993,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":4994,"paper_type":860,"authors":4995,"abstract":5001},"lrec2002-main-245","Language Resource Creation and Distribution at the Linguistic Data Consortium: A Progress Report","10.63317\u002F2mvcga28z5zq","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-245","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F245.pdf","cieri-liberman-2002-language",[4996,4999],{"paper_id":4989,"author_seq":247,"given_name":4997,"surname":4998,"affiliation":63,"orcid":63},"Christopher","Cieri",{"paper_id":4989,"author_seq":232,"given_name":2227,"surname":5000,"affiliation":63,"orcid":63},"Liberman","Changes in the supply of and demand for language  resources continues to affect the role of large data centers such as the  Linguistic Data Consortium (LDC) and European Language Resource Center  (ELRA) within the research communities they serve. The past few years  have seen increased demand for: intensively multi-modal resources,  larger data sets in high-density languages and new data in low density  languages; standards and tools for corpus development and re-useable  resources. The next few years will bring demand for extensive batteries  of coordinated language resources with sophisticated annotation in  several major languages. The DARPA program in Translingual Information  Detection Extraction and Summarization (TIDES) has already undertaken  such resource development; programs with similarly broad scope  addressing other technologies will surely follow. Data centers will be  well placed to address these needs if they integrate new resource  development with distribution of existing resources to fill known gaps  by creating or assisting the creation of new data. LDC has projects  ongoing to address all of these issues. This paper will provide an  overview of LDC activity in corpus creation, annotation and distribution  and describe new efforts bring together communities of researchers, to  identify best practices and develop tools of general use.",{"paper_id":5003,"title":5004,"year":213,"month":855,"day":63,"doi":5005,"resource_url":5006,"first_page":63,"last_page":63,"pdf_url":5007,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5008,"paper_type":860,"authors":5009,"abstract":5013},"lrec2002-main-246","Enhanced Dialogue Markup for Crisis Talk Scenario Resources","10.63317\u002F3xpmu5uhda9m","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-246","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F246.pdf","sassen-gibbon-2002-enhanced",[5010,5012],{"paper_id":5003,"author_seq":247,"given_name":1428,"surname":5011,"affiliation":63,"orcid":63},"Sassen",{"paper_id":5003,"author_seq":232,"given_name":3755,"surname":3756,"affiliation":63,"orcid":63},"We present a method of enhancing dialogue markup by mapping  HPSG-based discourse category information into XML. The application scenario is crisis talk, specifically cockpit voice recording (CVR)  transcripts of aviation disasters. This approach is new both as a source of richly annotated spoken language corpus resources for a  little known scenario, and in grammatical theory and language documentation.",{"paper_id":5015,"title":5016,"year":213,"month":855,"day":63,"doi":5017,"resource_url":5018,"first_page":63,"last_page":63,"pdf_url":5019,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5020,"paper_type":860,"authors":5021,"abstract":5023},"lrec2002-main-247","MatsLex - a Multilingual Lexical Database for Machine Translation","10.63317\u002F48vnmh2bv8ex","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-247","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F247.pdf","tiedemann-2002-matslex",[5022],{"paper_id":5015,"author_seq":247,"given_name":3680,"surname":4936,"affiliation":63,"orcid":63},"MatsLex represents a relational database which can be used to store  multilingual lexical data in a central and coherent lexicon. Tools and interfaces have been implemented to maintain the database and to apply its  contents to different multilingual applications. MatsLex has been developed  to feed different modules of a machine translation system with appropriate  data, monolingual as well as bilingual. The database gives the user full  control of the lexicon. In the paper, features and interfaces of the database are discussed  as well as the connection to the machine translation engine.",{"paper_id":5025,"title":5026,"year":213,"month":855,"day":63,"doi":5027,"resource_url":5028,"first_page":63,"last_page":63,"pdf_url":5029,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5030,"paper_type":860,"authors":5031,"abstract":5035},"lrec2002-main-248","Terminology Resources in the Context of a Major Translation Project","10.63317\u002F2265jcz3haia","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-248","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F248.pdf","rzewuska-2002-terminology",[5032],{"paper_id":5025,"author_seq":247,"given_name":5033,"surname":5034,"affiliation":63,"orcid":63},"Maria","Rzewuska","In this paper the author is going to present terminology activity and resources in the context of a translation project - translation of European Community's legislation into Polish. The specificity of the project focuses on the fact that the project is run by a central organ of public administration in Poland (Office of the Committee for European Integration) and that it consists of a translation into another language of a significant number of pages in a narrow time frame. The amount of the texts covered by the project is assessed for ca. 60 thousand pages (as published in the Official Journal of the European Communities). The project has been launched in 1997 and it is due to end in mid 2003. Facing such a huge amount of legal texts implies a lot of side activities, which are indispensable to assume a high quality of translations.",{"paper_id":5037,"title":5038,"year":213,"month":855,"day":63,"doi":5039,"resource_url":5040,"first_page":63,"last_page":63,"pdf_url":5041,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5042,"paper_type":860,"authors":5043,"abstract":5047},"lrec2002-main-249","Reducing Segmental Duration Variation by Local Speech Rate Normalization of Large Spoken Language Resources","10.63317\u002F34gdttfc2zru","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-249","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F249.pdf","pfitzinger-2002-reducing",[5044],{"paper_id":5037,"author_seq":247,"given_name":5045,"surname":5046,"affiliation":63,"orcid":63},"Hartmut R.","Pfitzinger","We developed a time-domain normalization procedure which uses a speech signal  and its corresponding speech rate contour as an input, and produces the normalized speech signal. Then we normalized the speech rate of a large spoken  language resource of German read speech. We compared the resulting segment  durations with the original durations using several three-way ANOVAs with  phone type and speaker as independent variables, since we assume that segment  duration variation is determined by segment type (intrinsic duration), by the  speaker (speech rate, sociolect, ideolect, dialect, speech production variation), and by linguistic effects (context, syllable structure, accent,  and stress). One important result of the statistical analysis was, that the  influence of the speaker on segment duration variation decreased dramatically  (factor 0.54 for vowels, factor 0.29 for consonants) when normalizing speech  rate, despite the fact that sociolect, ideolect, and dialect remained almost  unchanged. Since the interaction between the independent variables speaker and  phone type remained constantly, the hypothesis arises, that this interaction  contains most of the speaker-specific information.",{"paper_id":5049,"title":5050,"year":213,"month":855,"day":63,"doi":5051,"resource_url":5052,"first_page":63,"last_page":63,"pdf_url":5053,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5054,"paper_type":860,"authors":5055,"abstract":5059},"lrec2002-main-250","Robust Accurate Statistical Annotation of General Text","10.63317\u002F2aswu2tg29np","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-250","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F250.pdf","briscoe-carroll-2002-robust",[5056,5057],{"paper_id":5049,"author_seq":247,"given_name":3962,"surname":3963,"affiliation":63,"orcid":63},{"paper_id":5049,"author_seq":232,"given_name":3020,"surname":5058,"affiliation":63,"orcid":63},"Carroll","We describe a robust accurate domain-independent approach to  statistical parsing incorporated into the new release of the ANLT toolkit, and publicly available as a  research tool. The system has been used to parse many well known corpora in order to  produce data for lexical acquisition efforts; it has also been used as a component in an  open-domain question answering project. The performance of the system is  competitive with that of statistical parsers using highly lexicalised parse selection models. However, we plan to extend the system to improve  parse coverage, depth and accuracy.",{"paper_id":5061,"title":5062,"year":213,"month":855,"day":63,"doi":5063,"resource_url":5064,"first_page":63,"last_page":63,"pdf_url":5065,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5066,"paper_type":860,"authors":5067,"abstract":5077},"lrec2002-main-251","A Human Language Technologies Platform for the Dutch language: awareness, management maintenance and distribution","10.63317\u002F43jic8mm3kqt","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-251","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F251.pdf","cucchiarini-etal-2002-human",[5068,5071,5074],{"paper_id":5061,"author_seq":247,"given_name":5069,"surname":5070,"affiliation":63,"orcid":63},"Catia","Cucchiarini",{"paper_id":5061,"author_seq":232,"given_name":5072,"surname":5073,"affiliation":63,"orcid":63},"Elisabeth","D’Halleweyn",{"paper_id":5061,"author_seq":218,"given_name":5075,"surname":5076,"affiliation":63,"orcid":63},"Lisanne","Teunissen","In this paper we report on two of the four action  lines within the project \"Dutch Human Language Technologies  Platform\": Action line A, which was aimed at raising awareness of  the results of HLT research and promoting communication among interested  partners, and Action line D, which was concerned with management,  maintenance and distribution of HLT resources. Our overview of the  results obtained so far reveals that the goals of action lines A and D  have been achieved and that there are clear directions for how to  proceed in the near future. We hope that the experiences of the Dutch  speaking area may be useful to other countries that intend to start  similar initiatives.",{"paper_id":5079,"title":5080,"year":213,"month":855,"day":63,"doi":5081,"resource_url":5082,"first_page":63,"last_page":63,"pdf_url":5083,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5084,"paper_type":860,"authors":5085,"abstract":5094},"lrec2002-main-252","A Field Survey for Establishing Priorities in the Development of HLT Resources for Dutch","10.63317\u002F5q9ovq3yqs5h","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-252","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F252.pdf","binnenpoorte-etal-2002-field",[5086,5087,5089,5090,5091,5093],{"paper_id":5079,"author_seq":247,"given_name":4589,"surname":2492,"affiliation":63,"orcid":63},{"paper_id":5079,"author_seq":232,"given_name":1998,"surname":5088,"affiliation":63,"orcid":63},"De Vriend",{"paper_id":5079,"author_seq":218,"given_name":1607,"surname":4777,"affiliation":63,"orcid":63},{"paper_id":5079,"author_seq":203,"given_name":4559,"surname":2438,"affiliation":63,"orcid":63},{"paper_id":5079,"author_seq":188,"given_name":4607,"surname":5092,"affiliation":63,"orcid":63},"Strik",{"paper_id":5079,"author_seq":172,"given_name":3637,"surname":5070,"affiliation":63,"orcid":63},"In this paper we describe a survey of Dutch language  resources that has been carried out within the framework of a project  launched by the Dutch Language Union (Nederlandse Taalunie) with the aim  of strengthening the position of Dutch in Human Language Technologies  (HLT). In this paper we present a so-called BLARK (Basic LAnguage  Resources Kit). Based on the information collected in the survey, a  priority list has been drawn up for materials that need to be developed  to complete the BLARK specific for Dutch. The method employed and  reported in this paper is not specific for Dutch and can be adopted for  other languages.",{"paper_id":5096,"title":5097,"year":213,"month":855,"day":63,"doi":5098,"resource_url":5099,"first_page":63,"last_page":63,"pdf_url":5100,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5101,"paper_type":860,"authors":5102,"abstract":5109},"lrec2002-main-253","Natural Language Dialogue in a Virtual Assistant Interface","10.63317\u002F52ebwewisjet","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-253","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F253.pdf","garcia-serrano-etal-2002-natural",[5103,5105,5107],{"paper_id":5096,"author_seq":247,"given_name":5104,"surname":3056,"affiliation":63,"orcid":63},"Ana M.",{"paper_id":5096,"author_seq":232,"given_name":3254,"surname":5106,"affiliation":63,"orcid":63},"Rodrigo-Aguado",{"paper_id":5096,"author_seq":218,"given_name":1345,"surname":5108,"affiliation":63,"orcid":63},"Calle","Dialogue management and language processing appear as  key points in virtual assistants for e-shops, as their main goal is to  assist the user both in his navegation through the shop product pages  and in his search for the most appropriate product. In this paper we  present the details of the semantic and pragmatic approach and the  dialogue management done in the ADVICE project (IST-1999-11305) for a  bricolage tools shop, as well as how this modules have been evaluated.",{"paper_id":5111,"title":5112,"year":213,"month":855,"day":63,"doi":5113,"resource_url":5114,"first_page":63,"last_page":63,"pdf_url":5115,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5116,"paper_type":860,"authors":5117,"abstract":5127},"lrec2002-main-254","The UNL System","10.63317\u002F33gg6r5dwbpc","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-254","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F254.pdf","cardenosa-etal-2002-unl",[5118,5121,5124],{"paper_id":5111,"author_seq":247,"given_name":5119,"surname":5120,"affiliation":63,"orcid":63},"Jesús","Cardeñosa",{"paper_id":5111,"author_seq":232,"given_name":5122,"surname":5123,"affiliation":63,"orcid":63},"Edmundo","Tovar",{"paper_id":5111,"author_seq":218,"given_name":5125,"surname":5126,"affiliation":63,"orcid":63},"Carolina","Gallardo","The UNL System was conceived to support multilingual  services in Internet being an alternative to the classical machine  translation systems. The UNL System is based in the creation of  documents written in an unique computational language able to represent  concepts and their relations. The definition of this language has been  possible thanks to the collaboration of more than one hundred people,  prestigious researchers, and scientists of all around the world. The  purpose of the UNL is to break the linguistic barriers in Internet that  avoid the real global access to the knowledge and culture for all  people. Since the starting of the UNL project in 1996, the participants  in the project from initially 14 languages have made substantial  progress in the technical as well as the organizational aspects  involved. This demonstration will show to the academic and business  communities the current state and practical achievements of the UNL  system.",{"paper_id":5129,"title":5130,"year":213,"month":855,"day":63,"doi":5131,"resource_url":5132,"first_page":63,"last_page":63,"pdf_url":5133,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5134,"paper_type":860,"authors":5135,"abstract":5145},"lrec2002-main-255","Bilingual Indexing for Information Retrieval with AUTINDEX","10.63317\u002F29oh2esx2f5w","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-255","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F255.pdf","maas-etal-2002-bilingual",[5136,5138,5140,5143],{"paper_id":5129,"author_seq":247,"given_name":1434,"surname":5137,"affiliation":63,"orcid":63},"Maas",{"paper_id":5129,"author_seq":232,"given_name":4703,"surname":5139,"affiliation":63,"orcid":63},"Nuebel",{"paper_id":5129,"author_seq":218,"given_name":5141,"surname":5142,"affiliation":63,"orcid":63},"Catherine","Pease",{"paper_id":5129,"author_seq":203,"given_name":3674,"surname":5144,"affiliation":63,"orcid":63},"Schmidt","AUTINDEX is a bilingual automatic indexing system for the two languages German and English.  It is being developed within an EU-funded project called \"BINDEX\" (IST-1999-20028, November 2000 - April 2002). The aim of the system  is to automatically index large quantities of abstracts of scientific  and technical papers from several areas of engineering. These abstracts are  provided by project partners FIZ Fachinformationszentrum) Technik in  Frankfurt (Germany), and IEE (Institution of Electrial Engineers) in Stevenage(England) - both are large information providers.                                                                                                               Automatic indexing takes place using a controlled vocabulary (lists of  approved \"descriptors\") provided in monolingual and bilingual thesauri, which have been made available (and are also used for manual indexing) by FIZ and IEE.   The indexing process produces for a given abstract a list of descriptors as   well as a list of classification codes using these thesauri. The AUTINDEX system also allows for free indexing - indexing with an unrestricted vocabulary (delivering so called 'free descriptors´). These free descriptors are used to enhance and extend the thesauri",{"paper_id":5147,"title":5148,"year":213,"month":855,"day":63,"doi":5149,"resource_url":5150,"first_page":63,"last_page":63,"pdf_url":5151,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5152,"paper_type":860,"authors":5153,"abstract":5156},"lrec2002-main-256","The Future of Maltilex","10.63317\u002F3662vv2yiohz","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-256","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F256.pdf","rosner-2002-future",[5154],{"paper_id":5147,"author_seq":247,"given_name":2079,"surname":5155,"affiliation":63,"orcid":63},"Rosner","The Maltilex project, supported by the University of  Malta, has now been running for approximately 3 years. Its aim is to  create a computational lexicon of Maltese to serve as the basic  infrastructure for the development of a wide variety of language-enabled  applications. The project is further described in Rosner et. al. (Rosner  et-al 1999, Rosner et al., 1998). This paper discusses the background,  achievements, and immediate future aims of the project. It concludes  with a discussion of some themes to be pursued in the medium term.",{"paper_id":5158,"title":5159,"year":213,"month":855,"day":63,"doi":5160,"resource_url":5161,"first_page":63,"last_page":63,"pdf_url":5162,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5163,"paper_type":860,"authors":5164,"abstract":63},"lrec2002-main-257","Standards & best practice for multilingual computational lexicons: ISLE MILE and more”","10.63317\u002F5k5w9o8y6a9g","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-257","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F257.pdf","calzolari-etal-2002-standards",[5165,5166,5167],{"paper_id":5158,"author_seq":247,"given_name":4183,"surname":4184,"affiliation":63,"orcid":63},{"paper_id":5158,"author_seq":232,"given_name":1767,"surname":1768,"affiliation":63,"orcid":63},{"paper_id":5158,"author_seq":218,"given_name":3385,"surname":3386,"affiliation":63,"orcid":63},{"paper_id":5169,"title":5170,"year":213,"month":855,"day":63,"doi":5171,"resource_url":5172,"first_page":63,"last_page":63,"pdf_url":5173,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5174,"paper_type":860,"authors":5175,"abstract":5200},"lrec2002-main-258","From Resources to Applications. Designing the Multilingual ISLE Lexical Entry","10.63317\u002F3c9myjkwh5ud","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-258","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F258.pdf","atkins-etal-2002-resources",[5176,5179,5180,5183,5184,5185,5188,5189,5192,5194,5195,5198,5199],{"paper_id":5169,"author_seq":247,"given_name":5177,"surname":5178,"affiliation":63,"orcid":63},"Sue","Atkins",{"paper_id":5169,"author_seq":232,"given_name":1644,"surname":1645,"affiliation":63,"orcid":63},{"paper_id":5169,"author_seq":218,"given_name":5181,"surname":5182,"affiliation":63,"orcid":63},"Francesca","Bertagna",{"paper_id":5169,"author_seq":203,"given_name":2547,"surname":2548,"affiliation":63,"orcid":63},{"paper_id":5169,"author_seq":188,"given_name":4183,"surname":4184,"affiliation":63,"orcid":63},{"paper_id":5169,"author_seq":172,"given_name":5186,"surname":5187,"affiliation":63,"orcid":63},"Christiane","Fellbaum",{"paper_id":5169,"author_seq":155,"given_name":1767,"surname":1768,"affiliation":63,"orcid":63},{"paper_id":5169,"author_seq":138,"given_name":5190,"surname":5191,"affiliation":63,"orcid":63},"Alessandro","Lenci",{"paper_id":5169,"author_seq":121,"given_name":5141,"surname":5193,"affiliation":63,"orcid":63},"MacLeod",{"paper_id":5169,"author_seq":104,"given_name":3385,"surname":3386,"affiliation":63,"orcid":63},{"paper_id":5169,"author_seq":87,"given_name":5196,"surname":5197,"affiliation":63,"orcid":63},"Gregor","Thurmair",{"paper_id":5169,"author_seq":73,"given_name":1641,"surname":1642,"affiliation":63,"orcid":63},{"paper_id":5169,"author_seq":55,"given_name":1692,"surname":4186,"affiliation":63,"orcid":63},"The ISLE Computational Lexicon Working Group is committed to the consensual definition of a standardized infrastructure to develop multilingual resources for HLT applications. In particular, the ISLE-CLWG pursues this goal by designing MILE (Multilingual ISLE Lexical Entry), a general schema for the encoding of multilingual lexical information. This has to be intended as a meta-entry, acting as a common representational layer for multilingual lexical resources. We present the general architecture and features of MILE, as well as the methodology adopted for its definition. In particular, we focus on two essential ingredients for the MILE specification: the selection of the types of lexical information most relevant to establish multilingual correspondences, and the specification of a data structure which will provide the formal backbone of the MILE as a general representation language to develop multilingual resources. The ISLE recommendations will also consist of a first repository of shared lexical objects, including main syntactic constructions, basic operations and conditions to establish multilingual links, macro-semantic objects, etc., for the encoding of lexical units at a higher level of abstraction, as a step in the direction of simplifying and improving the usability of the MILE recommendations. We are also developing the ISLE Lexicographic tool.",{"paper_id":5202,"title":5203,"year":213,"month":855,"day":63,"doi":5204,"resource_url":5205,"first_page":63,"last_page":63,"pdf_url":5206,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5207,"paper_type":860,"authors":5208,"abstract":5218},"lrec2002-main-259","Towards Best Practice for Multiword Expressions in Computational Lexicons","10.63317\u002F3i5y7f2mpi4f","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-259","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F259.pdf","calzolari-etal-2002-towards",[5209,5210,5211,5212,5215,5216,5217],{"paper_id":5202,"author_seq":247,"given_name":4183,"surname":4184,"affiliation":63,"orcid":63},{"paper_id":5202,"author_seq":232,"given_name":3171,"surname":3172,"affiliation":63,"orcid":63},{"paper_id":5202,"author_seq":218,"given_name":1767,"surname":1768,"affiliation":63,"orcid":63},{"paper_id":5202,"author_seq":203,"given_name":5213,"surname":5214,"affiliation":63,"orcid":63},"Nancy","Ide",{"paper_id":5202,"author_seq":188,"given_name":5190,"surname":5191,"affiliation":63,"orcid":63},{"paper_id":5202,"author_seq":172,"given_name":5141,"surname":5193,"affiliation":63,"orcid":63},{"paper_id":5202,"author_seq":155,"given_name":1692,"surname":4186,"affiliation":63,"orcid":63},"The importance and role of multi-word expressions (MWE) in the description and processing of natural language has been long recognized. However, multi-word information has often been relegated to the marginal role of idiosyncratic lexical information. The need for MWE lexicons grows even more acute for multi-lingual applications, for which (sometimes complex) correspondences must be identified, classified, and recorded. Within the XMELLT and ISLE projects we have started to investigate the potential to develop multi-lingual, multi-word expression lexicons incorporating both syntactic and semantic information. We aim at specifying means to acquire and represent multi-word lexical entries for multiple languages, and establishing uniform (or inter-translatable) standards for describing  multi-word lexical entries. We explored theoretical approaches used in large lexicon-building projects, in particular FrameNet and SIMPLE. They constitute  interesting frameworks for the explicit syntactic and semantic representation of MWEs, due mainly to their ability to capture semantic multidimensionality, through frame elements and qualia relations respectively. We also developed an abstract data model for lexical information together with a representation in XML for it. Our goal is to define a set of minimal lexicon “objects”, which can serve not only as a model for MWEs but also for lexical data in general.",{"paper_id":5220,"title":5221,"year":213,"month":855,"day":63,"doi":5222,"resource_url":5223,"first_page":63,"last_page":63,"pdf_url":5224,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5225,"paper_type":860,"authors":5226,"abstract":5243},"lrec2002-main-260","Multilingual Summarization by Integrating Linguistic Resources in the MLIS-MUSI Project","10.63317\u002F3y4yd46gvvg8","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-260","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F260.pdf","lenci-etal-2002-multilingual",[5227,5228,5230,5231,5233,5236,5238,5241],{"paper_id":5220,"author_seq":247,"given_name":5190,"surname":5191,"affiliation":63,"orcid":63},{"paper_id":5220,"author_seq":232,"given_name":1626,"surname":5229,"affiliation":63,"orcid":63},"Bartolini",{"paper_id":5220,"author_seq":218,"given_name":4183,"surname":4184,"affiliation":63,"orcid":63},{"paper_id":5220,"author_seq":203,"given_name":3055,"surname":5232,"affiliation":63,"orcid":63},"Agua",{"paper_id":5220,"author_seq":188,"given_name":5234,"surname":5235,"affiliation":63,"orcid":63},"Stephan","Busemann",{"paper_id":5220,"author_seq":172,"given_name":1814,"surname":5237,"affiliation":63,"orcid":63},"Cartier",{"paper_id":5220,"author_seq":155,"given_name":5239,"surname":5240,"affiliation":63,"orcid":63},"Karine","Chevreau",{"paper_id":5220,"author_seq":138,"given_name":1893,"surname":5242,"affiliation":63,"orcid":63},"Coch","In this paper we will illustrate the approach to multilingual automatic abstract production adopted by the EU-sponsored project MLIS-MUSI. Although a small scale research project, MUSI has tried to tackle the challenges set by multilingual summarization by adopting an original approach based on the definition of a shared ontology and representation language, and on the reuse of existing linguistic resources. MUSI combines a linguistic-based module for relevant sentence extraction and a concept-based component to generate multilingual summaries.",{"paper_id":5245,"title":5246,"year":213,"month":855,"day":63,"doi":5247,"resource_url":5248,"first_page":63,"last_page":63,"pdf_url":5249,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5250,"paper_type":860,"authors":5251,"abstract":5254},"lrec2002-main-261","Current Developments of STO - the Danish Lexicon Project for NLP and HLT Applications","10.63317\u002F2yoi2psa6isz","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-261","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F261.pdf","braasch-2002-current",[5252],{"paper_id":5245,"author_seq":247,"given_name":3959,"surname":5253,"affiliation":63,"orcid":63},"Braasch","The Centre for Language Technology (Center for Sprogteknologi, CST) is in charge of a national project developing a large-scale  Danish lexicon for HLT and NLP applications. The short name of the project is STO, which stands for SprogTegnologisk Ordbase   (Lexical Database for Language Technology). The project is inspired by principles and methods applied in the multilingual LE-PAROLE project (1996-98) the aim of which was to develop harmonised written language resources for 12 EU languages. The   Danish PAROLE lexicon was produced by CST and the STO project highly benefits from the  experience acquired from the work  mentioned. This paper deals with a few central tasks of the ongoing project. It discusses the development of a smaller lexical   resource produced in a multilingual environment into a large-scale, monolingual resource. Two  different methods of increasing the vocabulary will be presented in detail; the extension of the linguistic coverage and the refinement of the linguistic description by  including more detailed language-specific information. Finally, some exploitation perspectives and the development of an internet-based user-interface will be presented. The STO project gets funding from the Danish Ministry for Science, Technology and  Development for a period of three years (2001-2004).",{"paper_id":5256,"title":5257,"year":213,"month":855,"day":63,"doi":5258,"resource_url":5259,"first_page":63,"last_page":63,"pdf_url":5260,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5261,"paper_type":860,"authors":5262,"abstract":5273},"lrec2002-main-262","Field Testing the Tongues Speech-to-Speech Machine Translation System","10.63317\u002F37vog7o5uc4c","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-262","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F262.pdf","frederking-etal-2002-field",[5263,5266,5267,5269,5271],{"paper_id":5256,"author_seq":247,"given_name":5264,"surname":5265,"affiliation":63,"orcid":63},"Robert E.","Frederking",{"paper_id":5256,"author_seq":232,"given_name":2618,"surname":2619,"affiliation":63,"orcid":63},{"paper_id":5256,"author_seq":218,"given_name":5268,"surname":2287,"affiliation":63,"orcid":63},"Ralf D.",{"paper_id":5256,"author_seq":203,"given_name":3020,"surname":5270,"affiliation":63,"orcid":63},"Moody",{"paper_id":5256,"author_seq":188,"given_name":3941,"surname":5272,"affiliation":63,"orcid":63},"Steinbrecher","The Tongues portable, rapid-development, speech-to-speech machine  translation system was developed specifically to allow a realistic field-test of a deployable prototype.  In this paper we will describe  the system, its field-testing using regular US Army officers and naive Croatians, and the evaluation of these tests.  The evaluation includes  analysis of answers to a questionnaire, analysis of system transcript logs, and the authors' qualitative observations.  The overall result of  the test was that while the system did successfully aid translation, it requires further development before it would be ready for regular  field use.",{"paper_id":5275,"title":5276,"year":213,"month":855,"day":63,"doi":5277,"resource_url":5278,"first_page":63,"last_page":63,"pdf_url":5279,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5280,"paper_type":860,"authors":5281,"abstract":5287},"lrec2002-main-263","Acquiring Compact Lexicalized Grammars from a Cleaner Treebank","10.63317\u002F3ws4qb8xz972","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-263","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F263.pdf","hockenmaier-steedman-2002-acquiring",[5282,5285],{"paper_id":5275,"author_seq":247,"given_name":5283,"surname":5284,"affiliation":63,"orcid":63},"Julia","Hockenmaier",{"paper_id":5275,"author_seq":232,"given_name":2227,"surname":5286,"affiliation":63,"orcid":63},"Steedman","We present an algorithm which translates the Penn  Treebank into a corpus of Combinatory Categorial Grammar (CCG)  derivations. To do this we have needed to make several systematic  changes to the Treebank which have to effect of cleaning up a number of  errors and inconsistencies. This process has yielded a cleaner treebank  that can potentially be used in any framework. We also show how unary  type-changing rules for certain types of modifiers can be introduced in  a CCG grammar to ensure a compact lexicon without augmenting the  generative power of the system. We demonstrate how the combination of  preprocessing and type-changing rules minimizes the lexical coverage  problem.",{"paper_id":5289,"title":5290,"year":213,"month":855,"day":63,"doi":5291,"resource_url":5292,"first_page":63,"last_page":63,"pdf_url":5293,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5294,"paper_type":860,"authors":5295,"abstract":5303},"lrec2002-main-264","Using Grammatical Description as a Metalanguage Resource","10.63317\u002F3mc2ji8mnm6u","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-264","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F264.pdf","de-vriend-etal-2002-using",[5296,5298,5301],{"paper_id":5289,"author_seq":247,"given_name":1998,"surname":5297,"affiliation":63,"orcid":63},"de Vriend",{"paper_id":5289,"author_seq":232,"given_name":5299,"surname":5300,"affiliation":63,"orcid":63},"P.A.","Coppen",{"paper_id":5289,"author_seq":218,"given_name":4559,"surname":5302,"affiliation":63,"orcid":63},"Haeseryn","The present paper is concerned with the advantages of  a digitised descriptive grammar over its traditional print version.  First we discuss the process of up-conversion of the ANS material and  the main advantages the E-ANS has for the editorial staff. Then from the  perspective of language resources, we discuss different applications of  the grammatical descriptions for both human and machine users. The  discussion is based on our experiences during the project ‘Elektronisering  van de ANS’, a project in progress that is aimed at developing a  digital version of the Dutch reference grammar",{"paper_id":5305,"title":5306,"year":213,"month":855,"day":63,"doi":5307,"resource_url":5308,"first_page":63,"last_page":63,"pdf_url":5309,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5310,"paper_type":860,"authors":5311,"abstract":5317},"lrec2002-main-265","The Greedy Algorithm and its Application to the Construction of a Continuous Speech Database","10.63317\u002F4ewtb496pq75","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-265","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F265.pdf","francois-boeffard-2002-greedy",[5312,5315],{"paper_id":5305,"author_seq":247,"given_name":5313,"surname":5314,"affiliation":63,"orcid":63},"Hélène","François",{"paper_id":5305,"author_seq":232,"given_name":2416,"surname":5316,"affiliation":63,"orcid":63},"Boëffard","Databases containing varied linguistic features can be build by condensing large corpora; in this work we need to cover a set of phonetic units with a minimal set of natural  phonetic sentences. With this aim in view we compare three set covering methods: the greedy method, its inverse which we call the spitting method, and the pair exchange method. Each method is defined with several criteria guiding the selection of sentences; they relate to the number of units of the sentences, to their length, and to the rareness of their units. A first experiment shows that pair exchange method doesn't guarantee a total covering. Greedy and spitting methods performances are comparable; nevertheless greedy is a bit better and above all less time-consuming. Applying spitting method to a greedy cover increases performance by removing about 10% redundancy. So does pair exchange method, but it is more time-consuming. Most of the criteria guiding selections are sensitive to the sentences length. Criteria performances obtained for a total covering are not necessarily transposable to a partial covering.",{"paper_id":5319,"title":5320,"year":213,"month":855,"day":63,"doi":5321,"resource_url":5322,"first_page":63,"last_page":63,"pdf_url":5323,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5324,"paper_type":860,"authors":5325,"abstract":5331},"lrec2002-main-266","Cooperation between black box and glass box approaches for the evaluation of a question answering system","10.63317\u002F28crn7ow6jbj","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-266","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F266.pdf","hurault-plantet-monceaux-2002-cooperation",[5326,5329],{"paper_id":5319,"author_seq":247,"given_name":5327,"surname":5328,"affiliation":63,"orcid":63},"Martine","Hurault-Plantet",{"paper_id":5319,"author_seq":232,"given_name":1127,"surname":5330,"affiliation":63,"orcid":63},"Monceaux","For the past three years, the question answering system QALC,  currently developed in our team, has been taking part in the Question Answering (QA) track of evaluation campaigns TREC (Text REtrieval  Conference).  In the QA track, each system is evaluated according to a black box approach: as input, a set of questions, and as output, for  each question, five answers ranked with regard to decreasing relevance.  A score is then computed with regard to the correctness of  the answers.  Such an evaluation is attractive for comparing systems to each other, as well as for comparing a system to itself after a  modification. However, the capacity for knowing how to improve the system requires another approach: the glass box approach. Indeed, in  complex modular systems such as question answering systems, we have to \"enter\" inside the system and evaluate each module in order to assess  if it reaches the goal that has been set for it, or not. Nevertheless, after modifying a module, we have to apply again the back box approach  on the whole system in order to judge the effect of the modifications on the overall result. In this paper, we thus present an evaluation of  our system, based both on black box and glass box approaches. We will describe the methods used as well as the results that we obtain.",{"paper_id":5333,"title":5334,"year":213,"month":855,"day":63,"doi":5335,"resource_url":5336,"first_page":63,"last_page":63,"pdf_url":5337,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5338,"paper_type":860,"authors":5339,"abstract":5358},"lrec2002-main-267","BDCon: A Spanish knowledge database","10.63317\u002F39qb4xbys2h5","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-267","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F267.pdf","cassan-etal-2002-bdcon",[5340,5343,5346,5349,5352,5354,5357],{"paper_id":5333,"author_seq":247,"given_name":5341,"surname":5342,"affiliation":63,"orcid":63},"Adán","Cassán",{"paper_id":5333,"author_seq":232,"given_name":5344,"surname":5345,"affiliation":63,"orcid":63},"Sergi","Cervell",{"paper_id":5333,"author_seq":218,"given_name":5347,"surname":5348,"affiliation":63,"orcid":63},"Mireia","Colom",{"paper_id":5333,"author_seq":203,"given_name":5350,"surname":5351,"affiliation":63,"orcid":63},"Rafael","Marín",{"paper_id":5333,"author_seq":188,"given_name":3263,"surname":5353,"affiliation":63,"orcid":63},"Merenciano",{"paper_id":5333,"author_seq":172,"given_name":5355,"surname":5356,"affiliation":63,"orcid":63},"Gema","Pérez",{"paper_id":5333,"author_seq":155,"given_name":4962,"surname":2668,"affiliation":63,"orcid":63},"In this paper we describe a knowledge base that has  been built using the partially structured knowledge from encyclopaedias.  The BDCon (from the Spanish: Base de Datos de Conocimiento) is a general  ontology built around an extended Spanish lexicon extracted from two  encyclopaedias and a cartographic database. It is composed by a number  of interconnected knowledge structures, each of them covering a  different aspect of world knowledge. The purpose of the BDCon is to  classify the contents of various reference works publishing companies,  as well as to support a set of advanced linguistic tools.",{"paper_id":5360,"title":5361,"year":213,"month":855,"day":63,"doi":5362,"resource_url":5363,"first_page":63,"last_page":63,"pdf_url":5364,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5365,"paper_type":860,"authors":5366,"abstract":5374},"lrec2002-main-268","A step forward to hypertext","10.63317\u002F4gnogm9jhgfi","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-268","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F268.pdf","cassan-etal-2002-step",[5367,5368,5369,5370,5371,5372,5373],{"paper_id":5360,"author_seq":247,"given_name":5341,"surname":5342,"affiliation":63,"orcid":63},{"paper_id":5360,"author_seq":232,"given_name":5344,"surname":5345,"affiliation":63,"orcid":63},{"paper_id":5360,"author_seq":218,"given_name":5347,"surname":5348,"affiliation":63,"orcid":63},{"paper_id":5360,"author_seq":203,"given_name":5350,"surname":5351,"affiliation":63,"orcid":63},{"paper_id":5360,"author_seq":188,"given_name":3263,"surname":5353,"affiliation":63,"orcid":63},{"paper_id":5360,"author_seq":172,"given_name":5355,"surname":5356,"affiliation":63,"orcid":63},{"paper_id":5360,"author_seq":155,"given_name":4962,"surname":2668,"affiliation":63,"orcid":63},"In this paper, after a critical review of how  hypertext has been understood over the past few years, we claim against  the distinction between total and partial hypertext, and we provide a  brief description of a dynamic system that allows the automatic  highlighting of those textual elements related to a certain topic. The  outcome of our approach is ESQUITX, an automatic highlighter based on  different filters, particularly those referring to topic information.  The general process can be summarized as follows: once the text is  lemmatized, by means of our Spanish tagger, a collection of filters is  applied, and only the resulting lemma forms are highlighted.",{"paper_id":5376,"title":5377,"year":213,"month":855,"day":63,"doi":5378,"resource_url":5379,"first_page":63,"last_page":63,"pdf_url":5380,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5381,"paper_type":860,"authors":5382,"abstract":5401},"lrec2002-main-269","SpeechDat across all America: SALA II","10.63317\u002F425b8v2dhu7s","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-269","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F269.pdf","moreno-etal-2002-speechdat",[5383,5384,5385,5386,5387,5389,5391,5393,5395,5398],{"paper_id":5376,"author_seq":247,"given_name":3799,"surname":1702,"affiliation":63,"orcid":63},{"paper_id":5376,"author_seq":232,"given_name":1820,"surname":1821,"affiliation":63,"orcid":63},{"paper_id":5376,"author_seq":218,"given_name":3780,"surname":3781,"affiliation":63,"orcid":63},{"paper_id":5376,"author_seq":203,"given_name":2523,"surname":3562,"affiliation":63,"orcid":63},{"paper_id":5376,"author_seq":188,"given_name":1234,"surname":5388,"affiliation":63,"orcid":63},"Horbach",{"paper_id":5376,"author_seq":172,"given_name":5390,"surname":2179,"affiliation":63,"orcid":63},"Patricia",{"paper_id":5376,"author_seq":155,"given_name":5072,"surname":5392,"affiliation":63,"orcid":63},"Pinto",{"paper_id":5376,"author_seq":138,"given_name":1692,"surname":5394,"affiliation":63,"orcid":63},"Rincón",{"paper_id":5376,"author_seq":121,"given_name":5396,"surname":5397,"affiliation":63,"orcid":63},"Franco","Senia",{"paper_id":5376,"author_seq":104,"given_name":5399,"surname":5400,"affiliation":63,"orcid":63},"Rafid","Sukkar","SALA II is a project co-sponsored by several companies that focuses on collecting linguistic data dedicated for training speaker independent speech recognizers for mobile\u002Fcellular  network telephone applications. The goal of the project is to produce SpeechDat-like databases in all the significant languages and dialects spoken across Latin America, US and Canada. Utterances will be recorded directly from calls made from cellular telephones and are composed by read text and answers to specific questions. The goal of  the project should be reached within the year 2003.",{"paper_id":5403,"title":5404,"year":213,"month":855,"day":63,"doi":5405,"resource_url":5406,"first_page":63,"last_page":63,"pdf_url":5407,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5408,"paper_type":860,"authors":5409,"abstract":5419},"lrec2002-main-270","Extraction of Associative Attributes from Nouns and Quantitative Expression of Prototype Concept","10.63317\u002F2dc38nkpuiky","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-270","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F270.pdf","ando-etal-2002-extraction",[5410,5413,5416],{"paper_id":5403,"author_seq":247,"given_name":5411,"surname":5412,"affiliation":63,"orcid":63},"Maya","Ando",{"paper_id":5403,"author_seq":232,"given_name":5414,"surname":5415,"affiliation":63,"orcid":63},"Jun","Okamoto",{"paper_id":5403,"author_seq":218,"given_name":5417,"surname":5418,"affiliation":63,"orcid":63},"Shun","Ishizaki","One of the purposes of this research is to formalize  similarity among nouns by using attributes associated from the nouns,  and then using the similarity, to formalize prototypes of categories.  The other purpose is to extract features of nouns by using adjectives or  adjective-like words obtained by the association experiments and to  formalize importance of the nouns with the words. We constructed an  associative concept dictionary using many kinds of attributes associated  from nouns. Similarity among nouns was calculated by using their  associated attributes with inner product methods, where the nouns were  organized in a hierarchical structure using generalized or specific  relations. This paper discusses similarity between nouns using their  attributes. We found that the similarity of nouns located at lower  levels has a high score in many cases. Then prototypes are  quantitatively formalized among Japanese noun concepts. It uses  similarities of part\u002Fmaterial concepts, features, and action concepts,  and distance values between the noun and its lower-level concepts. Such  formalized prototypes are compared with a result of human questionnaire  experiments to obtain a good correspondence among them.",{"paper_id":5421,"title":5422,"year":213,"month":855,"day":63,"doi":5423,"resource_url":5424,"first_page":63,"last_page":63,"pdf_url":5425,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5426,"paper_type":860,"authors":5427,"abstract":5430},"lrec2002-main-271","The Language Resource Archive of the 21st Century","10.63317\u002F3adr3dk6oxcw","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-271","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F271.pdf","wynne-2002-language",[5428],{"paper_id":5421,"author_seq":247,"given_name":2179,"surname":5429,"affiliation":63,"orcid":63},"Wynne","What will an archive of language resources look like in the future? It is to be expected that developments in computer technology will have an impact on the nature of language resources which will be created in the future. A projection current trends into the future helps us to see that there will be more multimedia and multilingual resources. It is also likely that increasing internet bandwidth will lead to a more distributed architecture whereby resources are accessed remotely rather than held locally. This will also facilitate the development of virtual corpora, whereby temporary, ad hoc, collections of texts can be assembled for a specific analysis. Increasingly it will become the norm to extract information from resources held in the archive, rather than downloading the corpus, installing software to analyse it with and getting them to work together.  It can therefore be predicted that although archives will continue to have an important role in the preservation of resources, other roles will develop or grow in importance, as archives adapt to allow the creation of virtual corpora and online access to resources, and become centres of resource creation expertise, metadata validation and resource discovery. This paper discusses the new directions envisaged by the Oxford Text Archive (OTA), and in particular its current initiatives to improve the service provided for the community of academic linguistics researchers in the UK.",{"paper_id":5432,"title":5433,"year":213,"month":855,"day":63,"doi":5434,"resource_url":5435,"first_page":63,"last_page":63,"pdf_url":5436,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5437,"paper_type":860,"authors":5438,"abstract":5446},"lrec2002-main-272","Incremental Recognition and Referential Categorization of French Proper Names","10.63317\u002F47hkdzgsrzvg","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-272","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F272.pdf","fourour-etal-2002-incremental",[5439,5442,5444],{"paper_id":5432,"author_seq":247,"given_name":5440,"surname":5441,"affiliation":63,"orcid":63},"Nordine","Fourour",{"paper_id":5432,"author_seq":232,"given_name":1814,"surname":5443,"affiliation":63,"orcid":63},"Morin",{"paper_id":5432,"author_seq":218,"given_name":5445,"surname":3312,"affiliation":63,"orcid":63},"Béatrice","This paper presents Nemesis, a French proper name (PN) recognizer for Large-scale Information Extraction (IE), whose specifications have been elaborated through corpus investigation both in terms of referential categories and graphical structures. The graphical criteria are used to identify proper names and the referential classification to categorize them. The system is a classical one: it is rule-based and uses specialized lexicons without any linguistic preprocessing. Its originality consists on a modular architecture which includes a learning process. The system up to now recognizes anthroponyms and toponyms with performance achieving 95% of precision and 90% of recall.",{"paper_id":5448,"title":5449,"year":213,"month":855,"day":63,"doi":5450,"resource_url":5451,"first_page":63,"last_page":63,"pdf_url":5452,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5453,"paper_type":860,"authors":5454,"abstract":5467},"lrec2002-main-273","Bilingual Spoken Monologue Corpus for Simultaneous Machine Interpretation Research","10.63317\u002F2airahzn8k72","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-273","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F273.pdf","matsubara-etal-2002-bilingual",[5455,5458,5461,5464],{"paper_id":5448,"author_seq":247,"given_name":5456,"surname":5457,"affiliation":63,"orcid":63},"Shigeki","Matsubara",{"paper_id":5448,"author_seq":232,"given_name":5459,"surname":5460,"affiliation":63,"orcid":63},"Akira","Takagi",{"paper_id":5448,"author_seq":218,"given_name":5462,"surname":5463,"affiliation":63,"orcid":63},"Nobuo","Kawaguchi",{"paper_id":5448,"author_seq":203,"given_name":5465,"surname":5466,"affiliation":63,"orcid":63},"Yasuyoshi","Inagaki","This paper describes a large-scale bilingual corpus of spoken monologues  and their simultaneous interpretation, which has been constructed at CIAIR.  The corpus has the following characteristics: (1) English and Japanese speeches are recorded in parallel, (2) the data contains monologue speechessuch as lecture and self-introduction, and (3) the exact beginning and ending  times are provided for each utterance. We have collected a total of about  70 hours of speech data and transcribed them into ASCII text files.   The corpus will be made publicly available in the near future. This paper  also provides an analysis of the professional interpreter's speeches using  the bilingual corpus. The following points have been investigated: (1) the interpreting unit of simultaneous  interpretation, (2) the difference between the beginning time of the lecturer's utterance  and that of the interpreter's utterance, and (3) the interpreter's speaking speed.  The characteristic features about the timing at which simultaneous interpreters start to speak is presented. The analysis will be available  for the development of a simultaneous machine interpreting system.",{"paper_id":5469,"title":5470,"year":213,"month":855,"day":63,"doi":5471,"resource_url":5472,"first_page":63,"last_page":63,"pdf_url":5473,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5474,"paper_type":860,"authors":5475,"abstract":5487},"lrec2002-main-274","A XML-based tool for evaluation of SLDS","10.63317\u002F4beksbbrqzve","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-274","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F274.pdf","charfuelan-etal-2002-xml",[5476,5479,5481,5484],{"paper_id":5469,"author_seq":247,"given_name":5477,"surname":5478,"affiliation":63,"orcid":63},"Marcela","Charfuelán",{"paper_id":5469,"author_seq":232,"given_name":5480,"surname":3223,"affiliation":63,"orcid":63},"Luis Hernández",{"paper_id":5469,"author_seq":218,"given_name":5482,"surname":5483,"affiliation":63,"orcid":63},"Cristina Esteban","López",{"paper_id":5469,"author_seq":203,"given_name":5485,"surname":5486,"affiliation":63,"orcid":63},"Holmer","Hemsen","This paper addresses two topics relevant to the  evaluation of Spoken Language Dialogue Systems (SLDSs): methodology and  tools. We present a methodology for evaluation of SLDSs which includes  formalising of procedures for annotation, representation and processing  of spoken dialogues for evaluation. Also we present a tool with which to  carry on most of the procedures usually applied in evaluation of SLDS  nowadays.",{"paper_id":5489,"title":5490,"year":213,"month":855,"day":63,"doi":5491,"resource_url":5492,"first_page":63,"last_page":63,"pdf_url":5493,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5494,"paper_type":860,"authors":5495,"abstract":5501},"lrec2002-main-275","Automatic Morphological Segmentation for Continuous Speech Recognition of Basque","10.63317\u002F26qoazinba5v","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-275","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F275.pdf","lopez-de-ipina-etal-2002-automatic",[5496,5498,5499],{"paper_id":5489,"author_seq":247,"given_name":3450,"surname":5497,"affiliation":63,"orcid":63},"López de Ipiña",{"paper_id":5489,"author_seq":232,"given_name":3444,"surname":4230,"affiliation":63,"orcid":63},{"paper_id":5489,"author_seq":218,"given_name":4234,"surname":5500,"affiliation":63,"orcid":63},"Bordel","The selection of appropriate Lexical Units (LUs) is an important issue in the development of Continuous Speech Recognition (CSR) systems. Word has been used classically as unit in most of them. However, proposals of non-word units have begun to arise. Since the subject of this study is the Basque language, which is an agglutinative language with a complex structure inside words, non-word units could be an appropriate choice. In this work an automatic morphological segmentation tool oriented to CSR tasks is presented.",{"paper_id":5503,"title":5504,"year":213,"month":855,"day":63,"doi":5505,"resource_url":5506,"first_page":63,"last_page":63,"pdf_url":5507,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5508,"paper_type":860,"authors":5509,"abstract":5516},"lrec2002-main-276","Searching via Keywords or Concept Hierarchies - Which is Better?","10.63317\u002F5b4zik7hbt7v","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-276","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F276.pdf","sutcliffe-white-2002-searching",[5510,5513],{"paper_id":5503,"author_seq":247,"given_name":5511,"surname":5512,"affiliation":63,"orcid":63},"Richard F. E.","Sutcliffe",{"paper_id":5503,"author_seq":232,"given_name":5514,"surname":5515,"affiliation":63,"orcid":63},"Kieran","White","We have carried out a comparison of interactive search in a homogenous information retrieval domain using a keyword search engine on the one hand and a concept ontology system on the other. The experimental design was that of the TREC Interactive Track. While the results showed that keyword search was superior on this occasion, we have identified the ideal characteristics of an ontology and shown that the one used for the study did not conform to these. Future work will include repeating the experiment with an optimal hierarchy and establishing numerical attributes of an ontology relative to a particular task domain.",{"paper_id":5518,"title":5519,"year":213,"month":855,"day":63,"doi":5520,"resource_url":5521,"first_page":63,"last_page":63,"pdf_url":5522,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5523,"paper_type":860,"authors":5524,"abstract":5533},"lrec2002-main-277","DIADORIM - A Lexical Database for Brazilian Portuguese","10.63317\u002F4f82kg63ijy7","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-277","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F277.pdf","greghi-etal-2002-diadorim",[5525,5528,5531],{"paper_id":5518,"author_seq":247,"given_name":5526,"surname":5527,"affiliation":63,"orcid":63},"Juliana Galvani","Greghi",{"paper_id":5518,"author_seq":232,"given_name":5529,"surname":5530,"affiliation":63,"orcid":63},"Ronaldo Teixeira","Martins",{"paper_id":5518,"author_seq":218,"given_name":5033,"surname":5532,"affiliation":63,"orcid":63},"das Graças Volpe Nunes","This paper aims at providing a general description  for DIADORIM, a lexical database for Brazilian Portuguese. DIADORIM is  said to successively merge two very different previous  application-oriented dictionaries, increasing their user-friendliness,  the reusability of their entries and their capability of incorporating  new features. Besides improving the structure of the previous databases,  DIADORIM also preserves their performance and functionality, as  indicated in the real use and simulation tests carried out during its  evaluation.",{"paper_id":5535,"title":5536,"year":213,"month":855,"day":63,"doi":5537,"resource_url":5538,"first_page":63,"last_page":63,"pdf_url":5539,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5540,"paper_type":860,"authors":5541,"abstract":5549},"lrec2002-main-278","Multidialectal Spanish Modeling for ASR","10.63317\u002F4txo78t67uu6","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-278","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F278.pdf","caballero-etal-2002-multidialectal",[5542,5545,5548],{"paper_id":5535,"author_seq":247,"given_name":5543,"surname":5544,"affiliation":63,"orcid":63},"Mónica","Caballero",{"paper_id":5535,"author_seq":232,"given_name":5546,"surname":5547,"affiliation":63,"orcid":63},"José B.","Mariño",{"paper_id":5535,"author_seq":218,"given_name":3799,"surname":1702,"affiliation":63,"orcid":63},"This paper describes the latest advances in our ongoing work in the area of Spanish multidialectal speech recognition. This work deals with the suitability of using a single multidialectal acoustic modeling for all the Spanish variants spoken in Europe and Latin America. The objective is two fold. First, it allows to use all the available databases to jointly train and improve the same system.It also allows to use a single system for all the Spanish speakers. Our latest experiments consist of the optimization of the acoustic models applying a top-down bottom-up hybrid clustering algorithm. Overall multidialectal acoustic modeling leads to maintain the performance of the recognition system even when it’s tested with an unseen dialect, that is, not seen in the training process.",{"paper_id":5551,"title":5552,"year":213,"month":855,"day":63,"doi":5553,"resource_url":5554,"first_page":63,"last_page":63,"pdf_url":5555,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5556,"paper_type":860,"authors":5557,"abstract":5568},"lrec2002-main-279","A unified system for accessing typological databases","10.63317\u002F5mq5xzbhjnr3","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-279","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F279.pdf","monachesi-etal-2002-unified",[5558,5560,5562,5565],{"paper_id":5551,"author_seq":247,"given_name":1629,"surname":5559,"affiliation":63,"orcid":63},"Monachesi",{"paper_id":5551,"author_seq":232,"given_name":3547,"surname":5561,"affiliation":63,"orcid":63},"Dimitriadis",{"paper_id":5551,"author_seq":218,"given_name":5563,"surname":5564,"affiliation":63,"orcid":63},"Rob","Goedemans",{"paper_id":5551,"author_seq":203,"given_name":5566,"surname":5567,"affiliation":63,"orcid":63},"Anne-Marie","Mineur","We present the goals and architecture of the Typological  Database System, a project for the creation of a unified interface to numerous independently developed typological databases. The aim of the  project is to develop a software system that allows a user to simultaneously query diffrent databases through a single interface.  The challenge of the project lies in the variability of the included data. In order to overcome the diversity, the system relies  on detailed formal descriptions (metadata), prepared in advance and describing in detail the structure and content of each component  database. The metadata is used to match a user's query against the capabilities of the component databases.",{"paper_id":5570,"title":5571,"year":213,"month":855,"day":63,"doi":5572,"resource_url":5573,"first_page":63,"last_page":63,"pdf_url":5574,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5575,"paper_type":860,"authors":5576,"abstract":5587},"lrec2002-main-280","A Procedure for Word Derivational Processes Concerning Lexicon Extension in Highly Inflected Languages","10.63317\u002F5dpdaqwwu2pt","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-280","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F280.pdf","osolsobe-etal-2002-procedure",[5577,5580,5581,5584],{"paper_id":5570,"author_seq":247,"given_name":5578,"surname":5579,"affiliation":63,"orcid":63},"Klára","Osolsobĕ",{"paper_id":5570,"author_seq":232,"given_name":2343,"surname":2344,"affiliation":63,"orcid":63},{"paper_id":5570,"author_seq":218,"given_name":5582,"surname":5583,"affiliation":63,"orcid":63},"Radek","Sedláček",{"paper_id":5570,"author_seq":203,"given_name":5585,"surname":5586,"affiliation":63,"orcid":63},"Marek","Veber","The aim of this paper is to describe an efficient  tool (I PAR) for a supervised and semi-automatic extension of a lexicon  or morphological database and its easy updating. We will present the  underlying algorithms and their implementation that are general enough  to capture the main word-forming processes (both inflectional and  derivational). They are designed for languages with a rich inflectional  morphology, such as Slavonic languages, particularly Czech. The  implementation is partly based on the ideas presented in the earlier  paper by Kl´ýmov and Pala (2000)",{"paper_id":5589,"title":5590,"year":213,"month":855,"day":63,"doi":5591,"resource_url":5592,"first_page":63,"last_page":63,"pdf_url":5593,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5594,"paper_type":860,"authors":5595,"abstract":5599},"lrec2002-main-281","Experimental Two-Level Morphology of Estonian","10.63317\u002F5drdwskb8xwi","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-281","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F281.pdf","uibo-2002-experimental",[5596],{"paper_id":5589,"author_seq":247,"given_name":5597,"surname":5598,"affiliation":63,"orcid":63},"Heli","Uibo","The experimental two-level morphology of Estonian is under development at  the University of Tartu. The language description, consisting of 45 two-level rules and over 200 lexicons has been implemented and tested  using Xerox finite-state tools twolc and lexc. The root lexicons cover 400  most frequent stems at the present stage of development. The software has  been designed to update the lexicon automatically with new stems, including the automatic generation of lexical representations of root  lexicon entries. The open problems by describing of word formation processes - derivation  and compounding are discussed. The advantages and  disadvantages of the two-level model with respect to Estonian morphology  are pointed out.",{"paper_id":5601,"title":5602,"year":213,"month":855,"day":63,"doi":5603,"resource_url":5604,"first_page":63,"last_page":63,"pdf_url":5605,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5606,"paper_type":860,"authors":5607,"abstract":5616},"lrec2002-main-282","Terminological Enrichment for non-Interactive MT Evaluation","10.63317\u002F3xa7k5fw6wq9","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-282","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F282.pdf","dabbadie-etal-2002-terminological",[5608,5610,5613],{"paper_id":5601,"author_seq":247,"given_name":1751,"surname":5609,"affiliation":63,"orcid":63},"Dabbadie",{"paper_id":5601,"author_seq":232,"given_name":5611,"surname":5612,"affiliation":63,"orcid":63},"Widad Mustafa El","Hadi",{"paper_id":5601,"author_seq":218,"given_name":5614,"surname":5615,"affiliation":63,"orcid":63},"Ismaïl","Timimi","In a previous study (Dabbadie, Mustafa, Timimi, 2001)  we set a methodology for non interactive machine translation evaluation  on big corpora, assuming that the goal of the translation was a simple  understanding of the original message. The source text, in French,  provided by INRA (Institut National pour la Recherche Agronomique i.e.  National Institute for Agronomic Research) deals with biotechnology and  animal reproduction. It has been translated into English by REVERSO. The  output of the system (i.e. the result of the assembling of several  components), as opposed to its individual modules or specific components  (i.e. analysis, generation, grammar, lexicon, core, etc.), has been  evaluated. In the present study we will recall the methodology and  results obtained in the case of simple translation by REVERSO with no  terminological enrichment and compare them to the results obtained after  terminological  enrichment. The aim of this study is to evaluate  the impact of specific terminology when integrated to an MT System and  after having run the system with a basic bilingual dictionary.",{"paper_id":5618,"title":5619,"year":213,"month":855,"day":63,"doi":5620,"resource_url":5621,"first_page":63,"last_page":63,"pdf_url":5622,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5623,"paper_type":860,"authors":5624,"abstract":5628},"lrec2002-main-283","From TreeBank to PropBank","10.63317\u002F3gh6v2hd4z55","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-283","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F283.pdf","kingsbury-palmer-2002-treebank",[5625,5627],{"paper_id":5618,"author_seq":247,"given_name":3674,"surname":5626,"affiliation":63,"orcid":63},"Kingsbury",{"paper_id":5618,"author_seq":232,"given_name":3385,"surname":3386,"affiliation":63,"orcid":63},"This paper describes our approach to the development  of a Proposition Bank, which involves the addition of semantic  information to the Penn English Treebank. Our primary goal is the  labeling of syntactic nodes with specific argument labels that preserve  the similarity of roles such as",{"paper_id":5630,"title":5631,"year":213,"month":855,"day":63,"doi":5632,"resource_url":5633,"first_page":63,"last_page":63,"pdf_url":5634,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5635,"paper_type":860,"authors":5636,"abstract":5654},"lrec2002-main-284","Combining statistics on n-grams for automatic term recognition","10.63317\u002F23skt9yh87yd","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-284","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F284.pdf","ballester-etal-2002-combining",[5637,5640,5643,5645,5648,5651],{"paper_id":5630,"author_seq":247,"given_name":5638,"surname":5639,"affiliation":63,"orcid":63},"Almudena","Ballester",{"paper_id":5630,"author_seq":232,"given_name":5641,"surname":5642,"affiliation":63,"orcid":63},"Ángel Martín","Municio",{"paper_id":5630,"author_seq":218,"given_name":1538,"surname":5644,"affiliation":63,"orcid":63},"Pardos",{"paper_id":5630,"author_seq":203,"given_name":5646,"surname":5647,"affiliation":63,"orcid":63},"Jordi Porta","Zamorano",{"paper_id":5630,"author_seq":188,"given_name":5649,"surname":5650,"affiliation":63,"orcid":63},"Rafael J. Ruiz","Ureña",{"paper_id":5630,"author_seq":172,"given_name":5652,"surname":5653,"affiliation":63,"orcid":63},"Fernando Sánchez","León","This paper presents the work-in-progress in the  development of an automatic term recognition (ATR) system built around  the",{"paper_id":5656,"title":5657,"year":213,"month":855,"day":63,"doi":5658,"resource_url":5659,"first_page":63,"last_page":63,"pdf_url":5660,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5661,"paper_type":860,"authors":5662,"abstract":5673},"lrec2002-main-285","TableTrans, MultiTrans, InterTrans and TreeTrans: Diverse Tools Built on the Annotation Graph Toolkit","10.63317\u002F4fvhg2zhhxbo","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-285","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F285.pdf","bird-etal-2002-tabletrans",[5663,5664,5665,5666,5667,5670],{"paper_id":5656,"author_seq":247,"given_name":1142,"surname":1143,"affiliation":63,"orcid":63},{"paper_id":5656,"author_seq":232,"given_name":3207,"surname":3208,"affiliation":63,"orcid":63},{"paper_id":5656,"author_seq":218,"given_name":3201,"surname":3202,"affiliation":63,"orcid":63},{"paper_id":5656,"author_seq":203,"given_name":3204,"surname":2924,"affiliation":63,"orcid":63},{"paper_id":5656,"author_seq":188,"given_name":5668,"surname":5669,"affiliation":63,"orcid":63},"Beth","Randall",{"paper_id":5656,"author_seq":172,"given_name":5671,"surname":5672,"affiliation":63,"orcid":63},"Salim","Zayat","Four diverse tools built on the Annotation Graph Toolkit are described.  Each tool associates linguistic codes and structures with time-series data.  All are based on the same software library and tool architecture. TableTrans is for observational coding, using a  spreadsheet whose rows are aligned to a signal.  MultiTrans is for transcribing multi-party  communicative interactions recorded using multi-channel signals. InterTrans is for creating interlinear text aligned to audio.  TreeTrans is  for creating and manipulating syntactic trees.  This work demonstrates that  the development of diverse tools and re-use of software components is greatly facilitated by a common high-level application  programming interface for representing the data and managing input\u002Foutput, together  with a common architecture for managing the interaction of multiple components.",{"paper_id":5675,"title":5676,"year":213,"month":855,"day":63,"doi":5677,"resource_url":5678,"first_page":63,"last_page":63,"pdf_url":5679,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5680,"paper_type":860,"authors":5681,"abstract":5687},"lrec2002-main-286","Creating Annotation Tools with the Annotation Graph Toolkit","10.63317\u002F4wa4cnejpwjv","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-286","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F286.pdf","maeda-etal-2002-creating",[5682,5684,5685,5686],{"paper_id":5675,"author_seq":247,"given_name":5683,"surname":3208,"affiliation":63,"orcid":63},"Kazauki",{"paper_id":5675,"author_seq":232,"given_name":1142,"surname":1143,"affiliation":63,"orcid":63},{"paper_id":5675,"author_seq":218,"given_name":3201,"surname":3202,"affiliation":63,"orcid":63},{"paper_id":5675,"author_seq":203,"given_name":3204,"surname":2924,"affiliation":63,"orcid":63},"The Annotation Graph Toolkit is a collection of software supporting  the development of annotation tools based on the annotation graph model.  The toolkit includes application programming interfaces for  manipulating annotation graph data and for importing data from other formats.  There are interfaces for the scripting languages Tcl and  Python, a database interface, specialized graphical user interfaces for a variety of annotation tasks, and several sample applications.  This paper describes all the toolkit components for the benefit of would-be application developers.",{"paper_id":5689,"title":5690,"year":213,"month":855,"day":63,"doi":5691,"resource_url":5692,"first_page":63,"last_page":63,"pdf_url":5693,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5694,"paper_type":860,"authors":5695,"abstract":5702},"lrec2002-main-287","Multi-Dimensional Data Acquisition for Integrated Acoustic Information Research","10.63317\u002F45itu589in53","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-287","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F287.pdf","kawaguchi-etal-2002-multi",[5696,5697,5698,5699],{"paper_id":5689,"author_seq":247,"given_name":5462,"surname":5463,"affiliation":63,"orcid":63},{"paper_id":5689,"author_seq":232,"given_name":5456,"surname":5457,"affiliation":63,"orcid":63},{"paper_id":5689,"author_seq":218,"given_name":1102,"surname":1103,"affiliation":63,"orcid":63},{"paper_id":5689,"author_seq":203,"given_name":5700,"surname":5701,"affiliation":63,"orcid":63},"Fumitada","Itakura","The Center for Integrated Acoustic Information Research (CIAIR) at  Nagoya University has been collecting various kinds of speech corpora for both of acoustic modeling and speech modeling. The corpora include  multi-media data collection in moving-car environment, collection of children's voice while video gaming, room acoustics at multiple  points, head related transfer functions of multiple subjects, and simultaneous interpretation of the speech between English and  Japanese. This paper introduces these multi-dimensional data acquisition activities in CIAIR, and gives the basic information of  the collected databases.",{"paper_id":5704,"title":5705,"year":213,"month":855,"day":63,"doi":5706,"resource_url":5707,"first_page":63,"last_page":63,"pdf_url":5708,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5709,"paper_type":860,"authors":5710,"abstract":5720},"lrec2002-main-288","Annotations for Dynamic Diagnosis of the Dialog State","10.63317\u002F4g8oqe8sdixz","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-288","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F288.pdf","devillers-etal-2002-annotations",[5711,5714,5715,5717],{"paper_id":5704,"author_seq":247,"given_name":5712,"surname":5713,"affiliation":63,"orcid":63},"Laurence","Devillers",{"paper_id":5704,"author_seq":232,"given_name":2377,"surname":4357,"affiliation":63,"orcid":63},{"paper_id":5704,"author_seq":218,"given_name":5313,"surname":5716,"affiliation":63,"orcid":63},"Bonneau-Maynard",{"paper_id":5704,"author_seq":203,"given_name":5718,"surname":5719,"affiliation":63,"orcid":63},"Lori","Lamel","This paper describes recent work aimed at relating  multi-level dialog annotations with meta-data annotations for a corpus  of real humanhuman dialogs. This work is carried out in the context of  the AMITIES project in which spoken dialog systems for call center  services are being developed. A corpus of 100 agent-client dialogs have  been annotated with three types of annotations. The first are  utterance-level DAMSL-style dialogic labels. The second set of  annotations applies to exchanges and takes into account of the dynamic  aspect of dialog progress. Finally, 5 emotions types are annotated at  the utterance level. Some of these multi-style annotations were used in  a multiple linear regression analysis to predict dialog quality. The  predictive factors are able to explain about 80% of the dialog  accidents.",{"paper_id":5722,"title":5723,"year":213,"month":855,"day":63,"doi":5724,"resource_url":5725,"first_page":63,"last_page":63,"pdf_url":5726,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5727,"paper_type":860,"authors":5728,"abstract":5733},"lrec2002-main-289","Annotating and Measuring Multimodal Behaviour – Tycoon Metrics in the Anvil Tool","10.63317\u002F3mkx3frgyt3r","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-289","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F289.pdf","martin-kipp-2002-annotating",[5729,5731],{"paper_id":5722,"author_seq":247,"given_name":5730,"surname":2179,"affiliation":63,"orcid":63},"Jean-Claude",{"paper_id":5722,"author_seq":232,"given_name":2079,"surname":5732,"affiliation":63,"orcid":63},"Kipp","We demonstrate how the Tycoon framework can be put to practice with the Anvil tool in a concrete case study. Tycoon offers a coding scheme and analysis metrics for multimodal communication scenarios. Anvil is a generic, extensible and ergonomically designed annotation tool for videos. In this paper, we describe the Anvil tool, the Tycoon scheme\u002Fmetrics, and their implementation in Anvil for a video sample. A new Anvil feature, motivated by the Tycoon scheme, is presented: non-temporal annotation objects - an important concept, we argue, of general interest. We also outline future plans for automatizing Tycoon metrics computation using Anvil plug-ins.",{"paper_id":5735,"title":5736,"year":213,"month":855,"day":63,"doi":5737,"resource_url":5738,"first_page":63,"last_page":63,"pdf_url":5739,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5740,"paper_type":860,"authors":5741,"abstract":5768},"lrec2002-main-290","The C-ORAL-ROM Project. New methods for spoken language archives in a multilingual romance corpus","10.63317\u002F54752suankfu","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-290","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F290.pdf","cresti-etal-2002-c",[5742,5745,5747,5750,5753,5755,5756,5758,5761,5764,5766],{"paper_id":5735,"author_seq":247,"given_name":5743,"surname":5744,"affiliation":63,"orcid":63},"Emanuela","Cresti",{"paper_id":5735,"author_seq":232,"given_name":2821,"surname":5746,"affiliation":63,"orcid":63},"Moneglia",{"paper_id":5735,"author_seq":218,"given_name":5748,"surname":5749,"affiliation":63,"orcid":63},"Fernanda Bacelar","do Nascimento",{"paper_id":5735,"author_seq":203,"given_name":5751,"surname":5752,"affiliation":63,"orcid":63},"Antonio Moreno","Sandoval",{"paper_id":5735,"author_seq":188,"given_name":2743,"surname":5754,"affiliation":63,"orcid":63},"Veronis",{"paper_id":5735,"author_seq":172,"given_name":1051,"surname":2179,"affiliation":63,"orcid":63},{"paper_id":5735,"author_seq":155,"given_name":5757,"surname":1809,"affiliation":63,"orcid":63},"Kalid",{"paper_id":5735,"author_seq":138,"given_name":5759,"surname":5760,"affiliation":63,"orcid":63},"Valerie","Mapelli",{"paper_id":5735,"author_seq":121,"given_name":5762,"surname":5763,"affiliation":63,"orcid":63},"Daniele","Falavigna",{"paper_id":5735,"author_seq":104,"given_name":1692,"surname":5765,"affiliation":63,"orcid":63},"Cid",{"paper_id":5735,"author_seq":87,"given_name":4651,"surname":5767,"affiliation":63,"orcid":63},"Blum","C-ORAL-ROM is a multilingual corpus of spontaneous speech of around 1.200.000 words representing the four main Romance  languages: French, Italian, Portuguese and Spanish.. The resource will be delivered in standard textual format, aligned to the audio  source in a multimedia edition. C-ORAL-ROM aims to ensure at the same time a  sufficient representation of spontaneous speech variation in each language resource and the comparability among the four resources with respect to a definite set of variation  parameters. The multimedia conception of C-ORAL-ROM allows simultaneously  alignment and full appreciation of the acoustic information through the speech software WINPITCHCORPUS. The storage of spoken language resources is based on the  identification of utterances in the four corpora through perceptively relevant prosodic properties. In C-ORAL-ROM all the textual  information is tagged simultaneously with respect to prosodic parsing and utterance limits. Each prosodic unit corresponding to an  utterance is easily and directly  aligned to its acoustic counterpart, thus ensuring a natural text - sound correspondence and the  definition of a data base of possible speech act in the four romance languages.",{"paper_id":5770,"title":5771,"year":213,"month":855,"day":63,"doi":5772,"resource_url":5773,"first_page":63,"last_page":63,"pdf_url":5774,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5775,"paper_type":860,"authors":5776,"abstract":5779},"lrec2002-main-291","TIDES Language Resources: A Resource Map for Translingual Information Access","10.63317\u002F2m72zucoe8yq","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-291","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F291.pdf","cieri-liberman-2002-tides",[5777,5778],{"paper_id":5770,"author_seq":247,"given_name":4997,"surname":4998,"affiliation":63,"orcid":63},{"paper_id":5770,"author_seq":232,"given_name":2227,"surname":5000,"affiliation":63,"orcid":63},"Continuing improvements in human language algorithms,  coupled with improvements in digital storage and processing, inspire  growing confidence in multilingual information access systems. Systems  exist to transcribe broadcast news, segment broadcasts into individual  stories and sort them by topic. These technologies, useful in isolation,  are now being combined to produce intelligent multilingual systems.  DARPA TIDES combines technologies in detection, extraction,  summarization and translation to create systems capable of searching a  wide range of streaming multilingual text and speech sources, in real  time, to provide effective access for English-speaking users. The broad  scope of tasks and languages in programs like TIDES demands close  coordination of research and shared resources. These resources includes  large collections of raw text and speech; translations and summaries;  annotations of topics, named entities and relations, syntactic  structures and propositional content; lexicons; annotation  specifications and protocols; and distribution formats and standards.  The TIDES program has initiated ambitious attacks on difficult problems,  with linguistic resources matched to the needs of each piece of the  overall research enterprise. This paper will describe the coordinated  language resources being created under the TIDES aegis.",{"paper_id":5781,"title":5782,"year":213,"month":855,"day":63,"doi":5783,"resource_url":5784,"first_page":63,"last_page":63,"pdf_url":5785,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5786,"paper_type":860,"authors":5787,"abstract":5796},"lrec2002-main-292","Creation of an Annotated German Broadcast Speech Database for Spoken Document Retrieval","10.63317\u002F5i5wd2ujvqko","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-292","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F292.pdf","eickeler-etal-2002-creation",[5788,5790,5792,5795],{"paper_id":5781,"author_seq":247,"given_name":1452,"surname":5789,"affiliation":63,"orcid":63},"Eickeler",{"paper_id":5781,"author_seq":232,"given_name":3385,"surname":5791,"affiliation":63,"orcid":63},"Larson",{"paper_id":5781,"author_seq":218,"given_name":5793,"surname":5794,"affiliation":63,"orcid":63},"Wolff","Rüter",{"paper_id":5781,"author_seq":203,"given_name":3829,"surname":3830,"affiliation":63,"orcid":63},"In this paper we present a semi-automatic method for creating annotated data  sets from German-language broadcast resources for which audio files as well  as transcripts are available on the Internet. The transcripts are required  to be reasonably accurate, but not perfect. Our approach is implemented by a  integrated bundle of data processing tools, which support the human annotator in the creation of an annotated data set specialized for research  in the area of spoken document classification and retrieval.  Annotation  decisions that would require prohibitively large amounts training data or  system development time to make automatically are taken over by the human  annotator. Annotation decisions which are easily automated and tedious for  humans are shouldered by the computer. Using our method we can process and  annotate the data approximately ten times faster that it was possible by  hand. The data is downloaded and the transcripts are normalized by a series  of filters as well as a semi-automatic digit to text conversion. Then, the system makes use of the Bayesian Information Criterion (BIC) to segment the  audio data and Automatic Speech Recognition (ASR) to forced-alignment of the  speech signal with written transcripts.  We demonstrate the method with the  concrete example of our Deutsche Welle database of programs from the Kalenderblatt radio series.",{"paper_id":5798,"title":5799,"year":213,"month":855,"day":63,"doi":5800,"resource_url":5801,"first_page":63,"last_page":63,"pdf_url":5802,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5803,"paper_type":860,"authors":5804,"abstract":5811},"lrec2002-main-293","The TASX-environment: an XML-based toolset for time aligned speech corpora","10.63317\u002F37ngft6vhhi2","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-293","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F293.pdf","milde-gut-2002-tasx",[5805,5808],{"paper_id":5798,"author_seq":247,"given_name":5806,"surname":5807,"affiliation":63,"orcid":63},"Jan-Torsten","Milde",{"paper_id":5798,"author_seq":232,"given_name":5809,"surname":5810,"affiliation":63,"orcid":63},"Ulrike","Gut","This paper describes the design and implementation of an XML-based corpus environment for multi-tier  annotated speech data. The TASX-environment (TASX: Time Aligned Signal data  eXchange format) constitutes the technical basis for a corpus designed to  explore the acquisition of prosody by second language learners. It supports all aspects of the corpus setup procedure:  XML-based annotation of the speech data, all transformation of non XML-annotations, and the web-based analysis and dissemination of thedata.",{"paper_id":5813,"title":5814,"year":213,"month":855,"day":63,"doi":5815,"resource_url":5816,"first_page":63,"last_page":63,"pdf_url":5817,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5818,"paper_type":860,"authors":5819,"abstract":5823},"lrec2002-main-294","An integrated framework for treebanks and multilayer annotations","10.63317\u002F2jxp2nuws2rk","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-294","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F294.pdf","cotton-bird-2002-integrated",[5820,5822],{"paper_id":5813,"author_seq":247,"given_name":1411,"surname":5821,"affiliation":63,"orcid":63},"Cotton",{"paper_id":5813,"author_seq":232,"given_name":1142,"surname":1143,"affiliation":63,"orcid":63},"Treebank formats and associated software tools are  proliferating rapidly, with little consideration for interoperability.  We survey a wide variety of treebank structures and operations, and show  how they can be mapped onto the annotation graph model, and leading to  an integrated framework encompassing tree and non-tree annotations  alike. This development opens up new possibilities for managing and  exploiting multilayer annotations.",{"paper_id":5825,"title":5826,"year":213,"month":855,"day":63,"doi":5827,"resource_url":5828,"first_page":63,"last_page":63,"pdf_url":5829,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5830,"paper_type":860,"authors":5831,"abstract":5839},"lrec2002-main-295","Using the Web as a Linguistic Resource for Learning Reformulations Automatically","10.63317\u002F5fm9e2w7yk7k","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-295","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F295.pdf","duclaye-etal-2002-using",[5832,5835,5837],{"paper_id":5825,"author_seq":247,"given_name":5833,"surname":5834,"affiliation":63,"orcid":63},"Florence","Duclaye",{"paper_id":5825,"author_seq":232,"given_name":5314,"surname":5836,"affiliation":63,"orcid":63},"Yvon",{"paper_id":5825,"author_seq":218,"given_name":2416,"surname":5838,"affiliation":63,"orcid":63},"Collin","The use of paraphrases as a potential way to improve question answering, machine translation or automatic text summarization systems has long attracted the interest of researchers in natural language processing. However, manually entering reformulations into a system is a tedious and time-consuming process, if not an endless one. In this paper, we introduce a learning machinery aimed at acquiring reformulations automatically. Our system uses the Web as a linguistic resource and takes advantage of the results of an existing question answering system. Starting with one single prototypical argument tuple of a given semantic relation, our system first searches for potential alternative formulations of the relation, then finds new potential argument tuples, and iterates this process to progressively validate the candidate formulations. This learning process combines an acquisition stage, whose goal is to retrieve new evidences from Web pages, and a validation stage, whose role is to filter out noise and discard invalid paraphrases. After justifying the use of the Web as a linguistic resource, we describe our system, and report on primary results on a series of test semantic relations.",{"paper_id":5841,"title":5842,"year":213,"month":855,"day":63,"doi":5843,"resource_url":5844,"first_page":63,"last_page":63,"pdf_url":5845,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5846,"paper_type":860,"authors":5847,"abstract":5850},"lrec2002-main-296","An API for Discourse-level Access to XML-encoded Corpora","10.63317\u002F2mt885sqpixa","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-296","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F296.pdf","muller-strube-2002-api",[5848,5849],{"paper_id":5841,"author_seq":247,"given_name":1195,"surname":1387,"affiliation":63,"orcid":63},{"paper_id":5841,"author_seq":232,"given_name":2079,"surname":3601,"affiliation":63,"orcid":63},"We describe a simple and efficient Java object model and application programming interface (API) for (possibly multi-modal) annotated natural language corpora. Corpora are represented as elements like Sentences, Turns, Utterances, Words, Gestures and Markables. The API allows linguists to access corpora in terms of these discourse-level elements, i.e. at a conceptual level they are familiar with, with the flexibility offered by a general purpose programming language. It is also a contribution to corpus standardization efforts because it is based on a straightforward and easily extensible data model which can serve as a target for conversion of different corpus formats.",{"paper_id":5852,"title":5853,"year":213,"month":855,"day":63,"doi":5854,"resource_url":5855,"first_page":63,"last_page":63,"pdf_url":5856,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5857,"paper_type":860,"authors":5858,"abstract":5863},"lrec2002-main-297","Some Examinations of Intrinsic Methods for Summary Evaluation Based on the Text Summarization Challenge (TSC)","10.63317\u002F4jxsjn6wkmm2","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-297","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F297.pdf","nanba-okumura-2002-examinations",[5859,5862],{"paper_id":5852,"author_seq":247,"given_name":5860,"surname":5861,"affiliation":63,"orcid":63},"Hidetsugu","Nanba",{"paper_id":5852,"author_seq":232,"given_name":1571,"surname":1572,"affiliation":63,"orcid":63},"Computer-produced summaries have traditionally been evaluated by  comparing them with human-produced summaries using the F-measure. However, the F-measure is not  appropriate when alternative sentences are possible in a human-produced extract. In this paper, we  examine some evaluation methods devised to overcome the problem, including utility-based evaluation.  By giving scores for moderately  important sentences that does not appear in the human-produced extract, utility-based evaluation can resolve the  problem. However, the method requires much effort from humans to provide data for  evaluation. In this paper, we first propose a pseudo-utility-based evaluation that uses  human-produced extracts at different compression ratios. To evaluate the effectiveness of pseudo-utility-based  evaluation, we compare our method and the F-measure using the data of the Text Summarization Challenge (TSC), and show that  pseudo-utility-based evaluation can resolve this problem. Next, we focus on content-based evaluation. Instead of measuring the ratio of  sentences that match exactly in the extract, the method evaluates extracts by comparing their content words to those of human-produced  extracts. Although the method has been reported to be effective in resolving the problem, it has not been  examined in the context of comparing two extracts produced from different systems. We evaluated  computer-produced summaries by content-based evaluation, and compared the results with a subjective evaluation. We found that the evaluation  by content-based measure matched those by subjective evaluation in 93\\% of the cases, if the gap in  content-based scores between two summaries is more than 0.2.",{"paper_id":5865,"title":5866,"year":213,"month":855,"day":63,"doi":5867,"resource_url":5868,"first_page":63,"last_page":63,"pdf_url":5869,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5870,"paper_type":860,"authors":5871,"abstract":5877},"lrec2002-main-298","Toward an objective and generic Method for Spoken Language Understanding Systems Evaluation: an extension of the DCR method","10.63317\u002F48v2yuwr8av3","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-298","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F298.pdf","kurdi-ahafhaf-2002-toward",[5872,5874],{"paper_id":5865,"author_seq":247,"given_name":5873,"surname":4355,"affiliation":63,"orcid":63},"Mohamed-Zakaria",{"paper_id":5865,"author_seq":232,"given_name":5875,"surname":5876,"affiliation":63,"orcid":63},"Mohamed","Ahafhaf","In this paper, we present an extension of the DCR method, which is a framework for the deep evaluation of Spoken Language Understanding (SLU) Systems. The key point of our contribution is the use of a linguistic typology in order to generate an evaluation corpus that covers a significant number of the linguistic phenomena we want to evaluate our system on. This allows to have more objective and deep evaluation of SLU systems.",{"paper_id":5879,"title":5880,"year":213,"month":855,"day":63,"doi":5881,"resource_url":5882,"first_page":63,"last_page":63,"pdf_url":5883,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5884,"paper_type":860,"authors":5885,"abstract":5892},"lrec2002-main-299","Information Extraction from Text Corpora: Using Filters on Collocation Sets","10.63317\u002F46r8n22mzjhq","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-299","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F299.pdf","heyer-etal-2002-information",[5886,5889,5891],{"paper_id":5879,"author_seq":247,"given_name":5887,"surname":5888,"affiliation":63,"orcid":63},"Gerhard","Heyer",{"paper_id":5879,"author_seq":232,"given_name":1929,"surname":5890,"affiliation":63,"orcid":63},"Quasthoff",{"paper_id":5879,"author_seq":218,"given_name":2419,"surname":5793,"affiliation":63,"orcid":63},"This paper describes the application of filtering  techniques to collocation sets calculated for very large text corpora.  Additional information like patterns, grammatical information, subject  areas and numerical values associated with the collocations are used to  identify collocations with given semantic structure. Various examples  and different techniques for applying such filters are described. We  also give several examples of practical applications for this type of  information extraction.",{"paper_id":5894,"title":5895,"year":213,"month":855,"day":63,"doi":5896,"resource_url":5897,"first_page":63,"last_page":63,"pdf_url":5898,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5899,"paper_type":860,"authors":5900,"abstract":5904},"lrec2002-main-300","The DASL Project: a Case Study in Data Re-Annotation and Re-Use","10.63317\u002F2gg87ik4hasz","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-300","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F300.pdf","cieri-strassel-2002-dasl",[5901,5902],{"paper_id":5894,"author_seq":247,"given_name":4997,"surname":4998,"affiliation":63,"orcid":63},{"paper_id":5894,"author_seq":232,"given_name":5903,"surname":3524,"affiliation":63,"orcid":63},"Stephanie","It is well known and often repeated that publicly  available digital data encourages basic and collaborative research  including the comparison of results across studies, the measurement of  inter-annotator consistency and the use of stable data as a benchmark  with which to compare new models and methodologies. Instances of such  reuse abound. The reuse and re-annotation of the Switchboard and TDT  corpora was described in detail during LREC 2000 (Graff and Bird 2000).  Unfortunately, very few studies have actually focused on the issues  surrounding re-use and re-annotation of data. The LDC project to develop  Data and Annotations for Sociolinguists (DASL) encourages data sharing  and the re-annotation and reuse of published data as an important  complement to firsthand fieldwork. DASL annotators use a tool, developed  for the project, that gives linguists access to the four corpora via the  Internet and allows simultaneous annotation at multiple sites. In  addition to the empirical study of linguistic variation among the  speakers represented, this project will address methodological issues in  the corpus re-use and in team based annotation of linguistic data. The  paper will describe the tools, data and data formats developed for DASL,  outline the challenges we have faced in re-annotating the data using a  team approach and summarize the results to date.",{"paper_id":5906,"title":5907,"year":213,"month":855,"day":63,"doi":5908,"resource_url":5909,"first_page":63,"last_page":63,"pdf_url":5910,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5911,"paper_type":860,"authors":5912,"abstract":5923},"lrec2002-main-301","Evaluating Web-based Question Answering Systems","10.63317\u002F3jxm8nv5ks9u","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-301","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F301.pdf","radev-etal-2002-evaluating",[5913,5915,5918,5920],{"paper_id":5906,"author_seq":247,"given_name":5914,"surname":3517,"affiliation":63,"orcid":63},"Dragomir R.",{"paper_id":5906,"author_seq":232,"given_name":5916,"surname":5917,"affiliation":63,"orcid":63},"Hong","Qi",{"paper_id":5906,"author_seq":218,"given_name":3535,"surname":5919,"affiliation":63,"orcid":63},"Wu",{"paper_id":5906,"author_seq":203,"given_name":5921,"surname":5922,"affiliation":63,"orcid":63},"Weiguo","Fan","The official evaluation of TREC-style Q&A systems  is done manually, which is quite expensive and not scalable to web-based  Q&A systems. An automatic evaluation technique is needed for dynamic  Q&A systems. This paper presents a set of metrics that have been  implemented in our web-based Q&A system, namely NSIR. It also shows  the correlations between the different metrics.",{"paper_id":5925,"title":5926,"year":213,"month":855,"day":63,"doi":5927,"resource_url":5928,"first_page":63,"last_page":63,"pdf_url":5929,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5930,"paper_type":860,"authors":5931,"abstract":5941},"lrec2002-main-302","Construction of a Japanese Relevance-tagged Corpus","10.63317\u002F5hw484fiszwz","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-302","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F302.pdf","kawahara-etal-2002-construction",[5932,5935,5938],{"paper_id":5925,"author_seq":247,"given_name":5933,"surname":5934,"affiliation":63,"orcid":63},"Daisuke","Kawahara",{"paper_id":5925,"author_seq":232,"given_name":5936,"surname":5937,"affiliation":63,"orcid":63},"Sadao","Kurohashi",{"paper_id":5925,"author_seq":218,"given_name":5939,"surname":5940,"affiliation":63,"orcid":63},"Kôiti","Hasida","This paper describes our corpus annotation project. The annotated corpus has relevance tags which consist of predicate-argument relations, relations between nouns, and coreferences. To construct this    relevance-tagged corpus, we investigated a large corpus and established the specification of the annotation. This paper shows the specification and difficult tagging problems which have emerged through the annotation so far.",{"paper_id":5943,"title":5944,"year":213,"month":855,"day":63,"doi":5945,"resource_url":5946,"first_page":63,"last_page":63,"pdf_url":5947,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5948,"paper_type":860,"authors":5949,"abstract":5957},"lrec2002-main-303","The American National Corpus: More Than the Web Can Provide","10.63317\u002F38kxz26sc2sv","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-303","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F303.pdf","ide-etal-2002-american",[5950,5951,5954],{"paper_id":5943,"author_seq":247,"given_name":5213,"surname":5214,"affiliation":63,"orcid":63},{"paper_id":5943,"author_seq":232,"given_name":5952,"surname":5953,"affiliation":63,"orcid":63},"Randi","Reppen",{"paper_id":5943,"author_seq":218,"given_name":5955,"surname":5956,"affiliation":63,"orcid":63},"Keith","Suderman","The American National Corpus (ANC) project is developing a corpus comparable to the British National Corpus (BNC), covering American English. Recent interest in the web as a source of corpus materials has caused some in the language processing community to suggest that the development of a corpus of American English is unnecessary. However, we argue that far from being rendered superfluous by the availability of web materials, the ANC is likely to provide a resource for developing web acquisition techniques to support tasks such as genre and language detection and automatic annotation. This paper presents a comparison of the ANC in terms of both content and format with a test corpus compiled from web data, and a discussion of points of intersection and divergence.",{"paper_id":5959,"title":5960,"year":213,"month":855,"day":63,"doi":5961,"resource_url":5962,"first_page":63,"last_page":63,"pdf_url":5963,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5964,"paper_type":860,"authors":5965,"abstract":5969},"lrec2002-main-304","FORM: An Extensible, Kinematically-based Gesture Annotation Scheme.","10.63317\u002F3zieqr6nhgn3","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-304","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F304.pdf","martell-2002-form",[5966],{"paper_id":5959,"author_seq":247,"given_name":5967,"surname":5968,"affiliation":63,"orcid":63},"Craig","Martell","Annotated corpora have played a critical role in  speech and natural language research; and, there is an increasing  interest in corpora-based research in sign language and gesture as well.  As examples, consider the tools Anvil and MediaTagger. These are  excellent tools which allow for multi-track annotation of videos of  speakers or signers. With tools such as these, researchers can create  corpora containing, for example, grammatical information, discourse  structure, facial expression, and gesture. The issue, then, is not the  ability to create corpora containing gesture and speech information, but  the type of information captured when describing gestures. We present a  non-semantic, geometrically-based annotation scheme, FORM, which allows  an annotator to capture the kinematic information in a gesture just from  videos of speakers. In addition, FORM stores this gestural information  in Annotation Graph format—allowing for easy integration of gesture  information with other types of communication information, e.g.,  discourse structure, parts of speech, intonation information, etc.",{"paper_id":5971,"title":5972,"year":213,"month":855,"day":63,"doi":5973,"resource_url":5974,"first_page":63,"last_page":63,"pdf_url":5975,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5976,"paper_type":860,"authors":5977,"abstract":63},"lrec2002-main-305","Toward a Broad-coverage Bilingual Corpus for Speech Translation of Travel Conversations in the Real World","10.63317\u002F3s3iqvy9jh6g","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-305","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F305.pdf","takezawa-etal-2002-toward",[5978,5979,5980,5981,5983],{"paper_id":5971,"author_seq":247,"given_name":1096,"surname":1097,"affiliation":63,"orcid":63},{"paper_id":5971,"author_seq":232,"given_name":2316,"surname":2317,"affiliation":63,"orcid":63},{"paper_id":5971,"author_seq":218,"given_name":2983,"surname":2984,"affiliation":63,"orcid":63},{"paper_id":5971,"author_seq":203,"given_name":5982,"surname":1094,"affiliation":63,"orcid":63},"Hirofumi",{"paper_id":5971,"author_seq":188,"given_name":2987,"surname":1094,"affiliation":63,"orcid":63},{"paper_id":5985,"title":5986,"year":213,"month":855,"day":63,"doi":5987,"resource_url":5988,"first_page":63,"last_page":63,"pdf_url":5989,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":5990,"paper_type":860,"authors":5991,"abstract":5996},"lrec2002-main-306","Scaling the ISLE Framework: Use of Existing Corpus Resources for Validation of MT Evaluation Metrics across Languages","10.63317\u002F4cnkooo5u8cf","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-306","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F306.pdf","vanni-miller-2002-scaling",[5992,5995],{"paper_id":5985,"author_seq":247,"given_name":5993,"surname":5994,"affiliation":63,"orcid":63},"Michelle","Vanni",{"paper_id":5985,"author_seq":232,"given_name":5955,"surname":900,"affiliation":63,"orcid":63},"This paper describes the next step in a machine translation (MT)  evaluation (MTE) research program previously reported on at MT Summit 2001. The development of this evaluation methodology has  benefited from the availability of two collections of source language texts and the results of processing these texts with  several consumer off-the-shelf (COTS) MT engines (DARPA 1994, Doyon, Taylor, & White 1999).  The crucial characteristic of  this methodology is a systematic development of a predictive relationship between discrete, well-defined metrics (a set of  quality test scores) and specific information processing tasks that can be reliably performed with output of a given MT system.  One might view the intended outcomes as (1) a system for classifying MT output in terms of the information processing  functions it can serve and (2) an indicator for research and development directions in MT designed to serve a specific  information processing function.",{"paper_id":5998,"title":5999,"year":213,"month":855,"day":63,"doi":6000,"resource_url":6001,"first_page":63,"last_page":63,"pdf_url":6002,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6003,"paper_type":860,"authors":6004,"abstract":6008},"lrec2002-main-307","Learning description of term patterns using glossary resources","10.63317\u002F2mmsma3f8gqf","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-307","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F307.pdf","ha-2002-learning",[6005],{"paper_id":5998,"author_seq":247,"given_name":6006,"surname":6007,"affiliation":63,"orcid":63},"Le An","Ha","In this paper, we describe a method of automatic extraction of knowledge patterns using in term descriptions from glossary, and using them to extract term description from technical texts. A brief introduction of the problem will be presented. After that, knowledge patterns and related works are discussed. By statistical analysis, we will show that these patterns can be learned, and we will propose a method to learn these pattern based on discover collocation of important verbs and nouns represent main concepts in the domain. Evaluation have been made showing that using the method, we can extract half of the correct descriptions, with a noise just one third.",{"paper_id":6010,"title":6011,"year":213,"month":855,"day":63,"doi":6012,"resource_url":6013,"first_page":63,"last_page":63,"pdf_url":6014,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6015,"paper_type":860,"authors":6016,"abstract":6023},"lrec2002-main-308","The binomial cumulative distribution function, or, is my system better than yours?","10.63317\u002F2oiwshzuc7w9","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-308","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F308.pdf","di-eugenio-etal-2002-binomial",[6017,6019,6021],{"paper_id":6010,"author_seq":247,"given_name":1805,"surname":6018,"affiliation":63,"orcid":63},"Di Eugenio",{"paper_id":6010,"author_seq":232,"given_name":2079,"surname":6020,"affiliation":63,"orcid":63},"Glass",{"paper_id":6010,"author_seq":218,"given_name":6022,"surname":1411,"affiliation":63,"orcid":63},"Michael J.","In human language technology, it is becoming more and more common to  run systematic evaluations in which two or more systems, or two or more versions of the same system, are pitted one against the other.  We propose the binomial cumulative distribution function as a way to assess the cumulative effect of the measures collected in such  evaluations. We present an application of this measure to the evaluation of the NL interface to an  Intelligent Tutoring System. We conclude by discussing a few issues pertaining to this  statistical measure.",{"paper_id":6025,"title":6026,"year":213,"month":855,"day":63,"doi":6027,"resource_url":6028,"first_page":63,"last_page":63,"pdf_url":6029,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6030,"paper_type":860,"authors":6031,"abstract":6039},"lrec2002-main-309","Proposal of a very-large-corpus acquisition method by cell-formed registration","10.63317\u002F39wds9erdb8m","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-309","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F309.pdf","suyaga-etal-2002-proposal",[6032,6034,6035,6038],{"paper_id":6025,"author_seq":247,"given_name":2983,"surname":6033,"affiliation":63,"orcid":63},"Suyaga",{"paper_id":6025,"author_seq":232,"given_name":1096,"surname":1097,"affiliation":63,"orcid":63},{"paper_id":6025,"author_seq":218,"given_name":6036,"surname":6037,"affiliation":63,"orcid":63},"Genichiro","Kikui",{"paper_id":6025,"author_seq":203,"given_name":2987,"surname":1094,"affiliation":63,"orcid":63},"One promising way to improve the performance of a speech translation system is to collect a large volume of data in the target tasks\u002Fdomains.  However, a naïve expansion of the traditional data collection scheme consumes valuable resources.  Advanced speech recognition technology can provide a highly accurate recognizer if a machine-friendly speech is permitted.  We propose a new data collection scheme that is supported by this speaking style.  The preliminary results of data collection show that the proposed scheme has a three-digit efficiency.",{"paper_id":6041,"title":6042,"year":213,"month":855,"day":63,"doi":6043,"resource_url":6044,"first_page":63,"last_page":63,"pdf_url":6045,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6046,"paper_type":860,"authors":6047,"abstract":6051},"lrec2002-main-310","Bootstrapping Large Sense Tagged Corpora","10.63317\u002F2ei98jihmdkt","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-310","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F310.pdf","mihalcea-2002-bootstrapping",[6048],{"paper_id":6041,"author_seq":247,"given_name":6049,"surname":6050,"affiliation":63,"orcid":63},"Rada F.","Mihalcea","The performance of Word Sense Disambiguation systems  largely depends on the availability of sense tagged corpora. Since the  semantic annotations are usually done by humans, the size of such  corpora is limited to a handful of tagged texts. This paper proposes a  generation algorithm that may be used to automatically create large  sense tagged corpora. The approach is evaluated through comparative  sense disambiguation experiments performed on data provided during the  SENSEVAL-2",{"paper_id":6053,"title":6054,"year":213,"month":855,"day":63,"doi":6055,"resource_url":6056,"first_page":63,"last_page":63,"pdf_url":6057,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6058,"paper_type":860,"authors":6059,"abstract":6067},"lrec2002-main-311","The Valence Patterns of Japanese Verbs Extracted From The EDR Corpus","10.63317\u002F3wjidgeqx7fp","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-311","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F311.pdf","ogino-etal-2002-valence",[6060,6063,6064],{"paper_id":6053,"author_seq":247,"given_name":6061,"surname":6062,"affiliation":63,"orcid":63},"Takano","Ogino",{"paper_id":6053,"author_seq":232,"given_name":1296,"surname":1297,"affiliation":63,"orcid":63},{"paper_id":6053,"author_seq":218,"given_name":6065,"surname":6066,"affiliation":63,"orcid":63},"Kazuhiro","Kobayashi","This paper describes research on particular verb valences obtained from actual linguistic data.  We created verb valence data using data from the EDR Co-occurrence Dictionary as our source.  The EDR Co-occurrence Dictionary is coded with syntactic governing-dependent relation tags and semantic tags.The syntactic governing-dependent relations data in the EDR Co-occurrence Dictionary however, is expressed as individual constituent pairs.  In this study, we grouped each of the governing-dependent relation pairs according to their verb concepts and then unified them into a number of combinations based on case.  After the data was automatically unified from the source data, we manually corrected mistaken governing-dependent relations, and also made changes to case where necessary.  By following this procedure, we created basic valence data for each verb.  Further, based on this valence data and the verb patterns created from it, we are currently looking into creating semantic groups for nouns on which semantic  restrictions are imposed by the verb.",{"paper_id":6069,"title":6070,"year":213,"month":855,"day":63,"doi":6071,"resource_url":6072,"first_page":63,"last_page":63,"pdf_url":6073,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6074,"paper_type":860,"authors":6075,"abstract":6083},"lrec2002-main-312","Multimodal and Adaptative Pedagogical Resources","10.63317\u002F2ombhewp9u38","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-312","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F312.pdf","martin-etal-2002-multimodal",[6076,6077,6080],{"paper_id":6069,"author_seq":247,"given_name":5730,"surname":2179,"affiliation":63,"orcid":63},{"paper_id":6069,"author_seq":232,"given_name":6078,"surname":6079,"affiliation":63,"orcid":63},"Jean-Hugues","Réty",{"paper_id":6069,"author_seq":218,"given_name":6081,"surname":6082,"affiliation":63,"orcid":63},"Nelly","Bensimon","When interacting with students, teachers usually combine several communication modalities (speech, hand gestures, gaze, posture, facial expression, graphics on a blackboard, slides…) and have to adapt their communication to the lecture settings (computer knowledge of the students, duration of the lecture…). Although educational resources and intelligent tutoring systems are developing, they are seldom being used as language resources per se, nor based on real-world pedagogical recording. Even in the field of pedagogical agents where a graphical persona is used as a complementary means of communication, the multimodal and adaptative behaviour of the graphical agent is often based on general knowledge about communication studies rather than on the annotation of pedagogical behaviour observed in video corpora. In this paper, we describe how an educational video corpus is being collected and how it is planned to be used for improving the existing on-line tutorial with multimodal and adaptative hypermedia features.",{"paper_id":6085,"title":6086,"year":213,"month":855,"day":63,"doi":6087,"resource_url":6088,"first_page":63,"last_page":63,"pdf_url":6089,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6090,"paper_type":860,"authors":6091,"abstract":6100},"lrec2002-main-313","Enhanced Japanese Electronic Dictionary Look-up","10.63317\u002F38q4x7vp3b6f","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-313","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F313.pdf","baldwin-etal-2002-enhanced",[6092,6093,6096,6098,6099],{"paper_id":6085,"author_seq":247,"given_name":3291,"surname":3292,"affiliation":63,"orcid":63},{"paper_id":6085,"author_seq":232,"given_name":6094,"surname":6095,"affiliation":63,"orcid":63},"Slaven","Bilac",{"paper_id":6085,"author_seq":218,"given_name":6097,"surname":1572,"affiliation":63,"orcid":63},"Ryo",{"paper_id":6085,"author_seq":203,"given_name":1568,"surname":1569,"affiliation":63,"orcid":63},{"paper_id":6085,"author_seq":188,"given_name":1577,"surname":1578,"affiliation":63,"orcid":63},"This paper describes the process of data preparation  and reading generation for an ongoing project aimed at improving the  accessibility of unknown words for learners of foreign languages,  focusing initially on Japanese. Rather then requiring absolute knowledge  of the readings of words in the foreign language, we allow look-up of  dictionary entries by readings which learners can predictably be  expected to associate with them. We automatically extract an exhaustive  set of phonemic readings for each grapheme segment and learn basic  morpho-phonological rules governing compound word formation, associating  a probability with each. Then we apply the naive Bayes model to generate  a set of readings and give each a likeliness score based on previously  extracted evidence and corpus frequencies.",{"paper_id":6102,"title":6103,"year":213,"month":855,"day":63,"doi":6104,"resource_url":6105,"first_page":63,"last_page":63,"pdf_url":6106,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6107,"paper_type":860,"authors":6108,"abstract":6113},"lrec2002-main-314","Evaluation of a Vector Space Similarity Measure in a Multilingual Framework","10.63317\u002F58uag4vqk8np","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-314","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F314.pdf","besancon-rajman-2002-evaluation",[6109,6112],{"paper_id":6102,"author_seq":247,"given_name":6110,"surname":6111,"affiliation":63,"orcid":63},"Romaric","Besançon",{"paper_id":6102,"author_seq":232,"given_name":2179,"surname":2180,"affiliation":63,"orcid":63},"In this contribution, we propose a method that uses a multilingual  framework to validate the relevance of the notion of vector based semantic similarity between texts. The goal is to verify that vector  based semantic similarities can be reliably transfered from one language to another. More precisely, the idea is to test whether the  relative positions of documents in a vector space associated with a given source language are close to the ones of their translations in  the vector space associated with the target language. The experiments, carried out with both the standard Vector Space model and the more  advanced DSIR model, have given very promising results.",{"paper_id":6115,"title":6116,"year":213,"month":855,"day":63,"doi":6117,"resource_url":6118,"first_page":63,"last_page":63,"pdf_url":6119,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6120,"paper_type":860,"authors":6121,"abstract":6125},"lrec2002-main-315","Corpora as Object-Oriented System. From UML-notation to Implementation","10.63317\u002F4q2taymocx52","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-315","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F315.pdf","yablonsky-2002-corpora",[6122],{"paper_id":6115,"author_seq":247,"given_name":6123,"surname":6124,"affiliation":63,"orcid":63},"Serge A.","Yablonsky","The paper descusses the complete process of building and managing a corpora warehouse,  including case study involving the development of UML-specifications and patterns, architecture  and examples of actual implementations of DBMS tools to support strategic corpora analysis.",{"paper_id":6127,"title":6128,"year":213,"month":855,"day":63,"doi":6129,"resource_url":6130,"first_page":63,"last_page":63,"pdf_url":6131,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6132,"paper_type":860,"authors":6133,"abstract":6140},"lrec2002-main-316","The Lexicon-Grammar Balance in Robust Parsing of Italian","10.63317\u002F36ccta6tprsc","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-316","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F316.pdf","bartolini-etal-2002-lexicon",[6134,6135,6136,6139],{"paper_id":6127,"author_seq":247,"given_name":1626,"surname":5229,"affiliation":63,"orcid":63},{"paper_id":6127,"author_seq":232,"given_name":5190,"surname":5191,"affiliation":63,"orcid":63},{"paper_id":6127,"author_seq":218,"given_name":6137,"surname":6138,"affiliation":63,"orcid":63},"Simonetta","Montemagni",{"paper_id":6127,"author_seq":203,"given_name":4837,"surname":4838,"affiliation":63,"orcid":63},"What is the role of lexical information in robust  parsing of unrestricted texts? In this paper we provide experimental  evidence showing that, in order to strike the balance between robustness  and coverage needed for practical NLP applications, judicious use of  positive lexical evidence given a text should be complemented with a  battery of dynamic parsing strategies aimed at solving local constraint  conflicts. Likewise, negative lexical evidence should not blindly  override grammatical information. Unlike fully lexicalised approaches to  parsing where cross-categorial constraints on lexicon usage apply  freely, optimal results can be obtained by modulating the way  subcategorisation information is brought to bear in identifying  dependency relations in context.",{"paper_id":6142,"title":6143,"year":213,"month":855,"day":63,"doi":6144,"resource_url":6145,"first_page":63,"last_page":63,"pdf_url":6146,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6147,"paper_type":860,"authors":6148,"abstract":6151},"lrec2002-main-317","Humans as Corpus - Language Learning Strategies in Virtually Mediated Authentic Environments","10.63317\u002F328ntfn2evev","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-317","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F317.pdf","jung-2002-humans",[6149],{"paper_id":6142,"author_seq":247,"given_name":1353,"surname":6150,"affiliation":63,"orcid":63},"Jung","This paper deals with the design and presuppositions  of a research project yet to be conducted. The notion of  \"linguistically relevant data\" is interpreted as the language  resource which is represented by other humans. In a MOO environment  (virtually mediated), students' language learning strategies are  observed and described, especially the use they make of the  communication with co-students and native speakers (authentic  environment).",{"paper_id":6153,"title":6154,"year":213,"month":855,"day":63,"doi":6155,"resource_url":6156,"first_page":63,"last_page":63,"pdf_url":6157,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6158,"paper_type":860,"authors":6159,"abstract":6169},"lrec2002-main-318","An Annotated Japanese Sign Language Corpus","10.63317\u002F33icn6fm4b8p","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-318","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F318.pdf","koizumi-etal-2002-annotated",[6160,6163,6166],{"paper_id":6153,"author_seq":247,"given_name":6161,"surname":6162,"affiliation":63,"orcid":63},"Atsuko","Koizumi",{"paper_id":6153,"author_seq":232,"given_name":6164,"surname":6165,"affiliation":63,"orcid":63},"Hirohiko","Sagawa",{"paper_id":6153,"author_seq":218,"given_name":6167,"surname":6168,"affiliation":63,"orcid":63},"Masaru","Takeuchi","Sign language is characterized by its interactivity and multimodality, which cause difficulties in data collection and annotation.  To address these difficulties, we have developed a video-based Japanese sign language (JSL) corpus and a corpus tool for annotation and linguistic analysis.  As the first step of linguistic annotation, we transcribed manual signs expressing lexical information as well as non-manual signs (NMSs) - including head movements, facial actions, and posture - that are used to express grammatical information.  Our purpose is to extract grammatical rules from this corpus for the sign-language translation system underdevelopment.  From this viewpoint, we will discuss methods for collecting elicited data, annotation required for grammatical analysis, as well as corpus tool required for annotation and grammatical analysis.  As the result of annotating 2800 utterances, we confirmed that there are at least 50 kinds of NMSs in JSL, using head (seven kinds), jaw! (six kinds), mouth (18 kinds), cheeks (one kind), eyebrows (four kinds), eyes (seven kinds), eye gaze (two kinds), bydy posture (five kinds).  We use this corpus for designing and testing an algorithm and grammatical rules for the  sign-language translation system underdevelopment.",{"paper_id":6171,"title":6172,"year":213,"month":855,"day":63,"doi":6173,"resource_url":6174,"first_page":63,"last_page":63,"pdf_url":6175,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6176,"paper_type":860,"authors":6177,"abstract":6185},"lrec2002-main-319","EMILLE, A 67-Million Word Corpus of Indic Languages: Data Collection, Mark-up and Harmonisation","10.63317\u002F24zr5nmbm3t3","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-319","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F319.pdf","baker-etal-2002-emille",[6178,6179,6182,6183,6184],{"paper_id":6171,"author_seq":247,"given_name":3674,"surname":3175,"affiliation":63,"orcid":63},{"paper_id":6171,"author_seq":232,"given_name":6180,"surname":6181,"affiliation":63,"orcid":63},"Andrew","Hardie",{"paper_id":6171,"author_seq":218,"given_name":2224,"surname":4478,"affiliation":63,"orcid":63},{"paper_id":6171,"author_seq":203,"given_name":3490,"surname":3491,"affiliation":63,"orcid":63},{"paper_id":6171,"author_seq":188,"given_name":5563,"surname":4297,"affiliation":63,"orcid":63},"The paper describes developments to date on the  EMILLE Project (Enabling Minority Language Engineering) being carried  out at the Universities of Lancaster and Sheffield. EMILLE was  established to construct a 67 million word corpus of South Asian  languages. In addition to undertaking this corpus construction, the  project has had to address a number of related issues in the context of  establishing a language engineering (LE) environment for South Asian  language processing, such as translating 8-bit language data into  Unicode and producing a number of basic LE tools. The development of  tools on EMILLE has contributed to the on-going development of the LE  architecture GATE.",{"paper_id":6187,"title":6188,"year":213,"month":855,"day":63,"doi":6189,"resource_url":6190,"first_page":63,"last_page":63,"pdf_url":6191,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6192,"paper_type":860,"authors":6193,"abstract":63},"lrec2002-main-320","Multi-Modal Menus And Traffic Interaction. Timing As A Crucial Factor For User Driven Mode Decision","10.63317\u002F4dg6oubmdjef","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-320","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F320.pdf","salmen-2002-multi",[6194],{"paper_id":6187,"author_seq":247,"given_name":3891,"surname":6195,"affiliation":63,"orcid":63},"Salmen",{"paper_id":6197,"title":6198,"year":213,"month":855,"day":63,"doi":6199,"resource_url":6200,"first_page":63,"last_page":63,"pdf_url":6201,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6202,"paper_type":860,"authors":6203,"abstract":6207},"lrec2002-main-321","Assessing the difficulty of finding people in texts","10.63317\u002F3u4styu7it37","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-321","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F321.pdf","orasan-evans-2002-assessing",[6204,6206],{"paper_id":6197,"author_seq":247,"given_name":2767,"surname":6205,"affiliation":63,"orcid":63},"Orăsan",{"paper_id":6197,"author_seq":232,"given_name":1407,"surname":3088,"affiliation":63,"orcid":63},"In this paper several methods for animacy recognition  are evaluated. Each method has an increasing complexity over the previous one and involves more resources, and as a result, more  computation. When assessing the performace of these methods we consider three factors: the results of an intrinsic evaluation,  the results of an extrinsic evaluation, and the complexity of the method. For intrinsic evaluation the accuracy of the overall  classification is considered as well as the precision and recall for each type  classification. In the extrinsic evaluation, the animacy classifier is used to filter  candidates in a pronominal anaphora resolution system. Given the wide variety of texts used,  an anaphora resolution system could not be used for this evaluation because its performance depends upon the genre of the  text being processed. For this reason, the reduction of the number of candidates, the reduction of the number of antecedents, and the increase in the number of pronouns without any antecedents were  recorded and used to differentiate between the systems. Comparison between different systems showed that the best one is the system  which uses machine learning, and that the additional information brought by different modules does not lead to an increase in the success of the system due to the errors introduced by them.",{"paper_id":6209,"title":6210,"year":213,"month":855,"day":63,"doi":6211,"resource_url":6212,"first_page":63,"last_page":63,"pdf_url":6213,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6214,"paper_type":860,"authors":6215,"abstract":6219},"lrec2002-main-322","Lemma selection in domain specific computational lexica - some specific problems","10.63317\u002F42xk4mrb73pe","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-322","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F322.pdf","olsen-2002-lemma",[6216],{"paper_id":6209,"author_seq":247,"given_name":6217,"surname":6218,"affiliation":63,"orcid":63},"Sussi","Olsen","This paper describes the lemma selection process of a  Danish computational lexicon, the STO project, for domain specific  language and focuses on some specific problems encountered during the  lemma selection process. After a short introduction to the STO project  and an explanation of why the lemmas are selected from a corpus and not  chosen from existing dictionaries, the lemma selection process for  domain specific language is described in detail. The purpose is to make  the lemma selection process as automatic as possible but a manual  examination of the final candidate lemma lists is inevitable. The lemmas  found in the corpora are compared to a list of lemmas of general  language, sorting out lemmas already encoded in the database. Words that  have already been encoded as general language words but that are also  found with another meaning and perhaps another syntactic behaviour in a  specific domain should be kept on a list and the paper describes how  this is done. The recognition of borrowed words the spelling of which  have not been established constitutes a big problem to the automatic  lemma selection process. The paper gives some examples of this problem  and describes how the STO project tries to solve it.",{"paper_id":6221,"title":6222,"year":213,"month":855,"day":63,"doi":6223,"resource_url":6224,"first_page":63,"last_page":63,"pdf_url":6225,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6226,"paper_type":860,"authors":6227,"abstract":6234},"lrec2002-main-323","Automatism and User Interaction: Building a Hungarian WordNet","10.63317\u002F244crnwctbqr","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-323","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F323.pdf","proszeky-mihaltz-2002-automatism",[6228,6231],{"paper_id":6221,"author_seq":247,"given_name":6229,"surname":6230,"affiliation":63,"orcid":63},"Gábor","Prószéky",{"paper_id":6221,"author_seq":232,"given_name":6232,"surname":6233,"affiliation":63,"orcid":63},"Márton","Miháltz","This paper attempts to provide an account of an ongoing project on developing methods with software implementation for building multilingual lexical databases based on Princeton WordNet. The objectives of this project are not unique; several similar projects have been carried out to different stages. We have been implementing a combination of manual and automatic techniques. The result is an effective procedure of building lexical nets with acceptable precision. As the project has been in progress for several months now, our account is according to the partial results we achieved so far.",{"paper_id":6236,"title":6237,"year":213,"month":855,"day":63,"doi":6238,"resource_url":6239,"first_page":63,"last_page":63,"pdf_url":6240,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6241,"paper_type":860,"authors":6242,"abstract":63},"lrec2002-main-324","Comparative study of oral and written French automatically tagged with morpho-syntactic information","10.63317\u002F4qwvh4mgaeub","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-324","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F324.pdf","gendner-2002-comparative",[6243],{"paper_id":6236,"author_seq":247,"given_name":2440,"surname":6244,"affiliation":63,"orcid":63},"Gendner",{"paper_id":6246,"title":6247,"year":213,"month":855,"day":63,"doi":6248,"resource_url":6249,"first_page":63,"last_page":63,"pdf_url":6250,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6251,"paper_type":860,"authors":6252,"abstract":6265},"lrec2002-main-325","A Dependency Treebank for English","10.63317\u002F4a5i32vsa5rt","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-325","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F325.pdf","rambow-etal-2002-dependency",[6253,6256,6259,6262,6264],{"paper_id":6246,"author_seq":247,"given_name":6254,"surname":6255,"affiliation":63,"orcid":63},"Owen","Rambow",{"paper_id":6246,"author_seq":232,"given_name":6257,"surname":6258,"affiliation":63,"orcid":63},"Cassandre","Creswell",{"paper_id":6246,"author_seq":218,"given_name":6260,"surname":6261,"affiliation":63,"orcid":63},"Rachel","Szekely",{"paper_id":6246,"author_seq":203,"given_name":4540,"surname":6263,"affiliation":63,"orcid":63},"Taber",{"paper_id":6246,"author_seq":188,"given_name":3240,"surname":3241,"affiliation":63,"orcid":63},"This paper presents the syntactic annotation level of  a project aimed at providing a small dialog corpus with multiple levels  of annotation. The syntactic annotation is based on dependency syntax.  We outline the reasons for choosing dependency, and show the syntactic  annotation for some constructions. We finish by describing the current  state of the project.",{"paper_id":6267,"title":6268,"year":213,"month":855,"day":63,"doi":6269,"resource_url":6270,"first_page":63,"last_page":63,"pdf_url":6271,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6272,"paper_type":860,"authors":6273,"abstract":63},"lrec2002-main-326","A Text-based for Detection and Filtering of Commercial Segments in Broadcast News","10.63317\u002F4s82c8y669yc","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-326","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F326.pdf","ramesh-bagga-2002-text",[6274,6276],{"paper_id":6267,"author_seq":247,"given_name":6275,"surname":2770,"affiliation":63,"orcid":63},"Ganesh",{"paper_id":6267,"author_seq":232,"given_name":6277,"surname":6278,"affiliation":63,"orcid":63},"Amit","Bagga",{"paper_id":6280,"title":6281,"year":213,"month":855,"day":63,"doi":6282,"resource_url":6283,"first_page":63,"last_page":63,"pdf_url":6284,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6285,"paper_type":860,"authors":6286,"abstract":6289},"lrec2002-main-327","Standards for Language Resources","10.63317\u002F2wronhkd4wch","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-327","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F327.pdf","ide-romary-2002-standards",[6287,6288],{"paper_id":6280,"author_seq":247,"given_name":5213,"surname":5214,"affiliation":63,"orcid":63},{"paper_id":6280,"author_seq":232,"given_name":3944,"surname":3945,"affiliation":63,"orcid":63},"This paper presents an abstract data model for  linguistic annotations and its implementation using XML, RDF and related  standards; and to outline the work of a newly formed committee of the  International Standards Organization (ISO), ISO\u002FTC 37\u002FSC 4 Language  Resource Management, which will use this work as its starting point. The  primary motive for presenting the latter is to solicit the participation  of members of the research community to contribute to the work of the  committee.",{"paper_id":6291,"title":6292,"year":213,"month":855,"day":63,"doi":6293,"resource_url":6294,"first_page":63,"last_page":63,"pdf_url":6295,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6296,"paper_type":860,"authors":6297,"abstract":6303},"lrec2002-main-328","PIA-Core: Semantic Annotation through Example-based Learning","10.63317\u002F26en2bfpohss","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-328","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F328.pdf","collier-takeuchi-2002-pia",[6298,6301],{"paper_id":6291,"author_seq":247,"given_name":6299,"surname":6300,"affiliation":63,"orcid":63},"Nigel","Collier",{"paper_id":6291,"author_seq":232,"given_name":6302,"surname":6168,"affiliation":63,"orcid":63},"Koichi","This paper summarizes the aims and scope of the PIA  (Portable Information Access) project’s PIA-Core system for automatic  annotation of documents on the Semantic Web, i.e. the next generation  World Wide Web. The focus of the project is to develop a portable  information extraction system that can be easily adapted to new domains.  PIA has its foundations on three resources: the PIA-Core information  extraction module, application modules and PIA guidelines for ensuring  consistent annotation. We are currently developing PIA-Core based on  advanced machines learning methods to automatically annotate documents  with terminology, names, temporal and quantity expressions etc. using  examples of annotated documents.",{"paper_id":6305,"title":6306,"year":213,"month":855,"day":63,"doi":6307,"resource_url":6308,"first_page":63,"last_page":63,"pdf_url":6309,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6310,"paper_type":860,"authors":6311,"abstract":63},"lrec2002-main-329","Progress on Multi-lingual Named Entity Annotation Guidelines using RDF (S)","10.63317\u002F42hg52d8car4","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-329","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F329.pdf","collier-etal-2002-progress",[6312,6313,6314,6315,6318],{"paper_id":6305,"author_seq":247,"given_name":6299,"surname":6300,"affiliation":63,"orcid":63},{"paper_id":6305,"author_seq":232,"given_name":6302,"surname":6168,"affiliation":63,"orcid":63},{"paper_id":6305,"author_seq":218,"given_name":2852,"surname":2853,"affiliation":63,"orcid":63},{"paper_id":6305,"author_seq":203,"given_name":6316,"surname":6317,"affiliation":63,"orcid":63},"Junichi","Fukumoto",{"paper_id":6305,"author_seq":188,"given_name":6319,"surname":6320,"affiliation":63,"orcid":63},"Norihiro","Ogata",{"paper_id":6322,"title":6323,"year":213,"month":855,"day":63,"doi":6324,"resource_url":6325,"first_page":63,"last_page":63,"pdf_url":6326,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6327,"paper_type":860,"authors":6328,"abstract":6332},"lrec2002-main-330","iLex - A tool for Sign Language Lexicography and Corpus Analysis","10.63317\u002F3nb76bqgx946","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-330","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F330.pdf","hanke-2002-ilex",[6329],{"paper_id":6322,"author_seq":247,"given_name":6330,"surname":6331,"affiliation":63,"orcid":63},"Thomas","Hanke","This paper describes a tool that combines features found in empirical  sign language lexicography and in sign language discourse transcription. It supports the user in lexicon building while working  on the transcription of a corpus. While it tries to reach a certain level of compatibility with upcoming multimedia annotation tools, it  offers a number of unique features considered essential due to the specific nature of sign languages.",{"paper_id":6334,"title":6335,"year":213,"month":855,"day":63,"doi":6336,"resource_url":6337,"first_page":63,"last_page":63,"pdf_url":6338,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6339,"paper_type":860,"authors":6340,"abstract":6360},"lrec2002-main-331","COLLATE: Competence Center in Speech and Language Technology","10.63317\u002F3o5xip7ypa7m","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-331","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F331.pdf","capstick-etal-2002-collate",[6341,6344,6345,6347,6348,6350,6352,6355,6357],{"paper_id":6334,"author_seq":247,"given_name":6342,"surname":6343,"affiliation":63,"orcid":63},"Joanne","Capstick",{"paper_id":6334,"author_seq":232,"given_name":1145,"surname":1146,"affiliation":63,"orcid":63},{"paper_id":6334,"author_seq":218,"given_name":4831,"surname":6346,"affiliation":63,"orcid":63},"Wahlster",{"paper_id":6334,"author_seq":203,"given_name":2371,"surname":4637,"affiliation":63,"orcid":63},{"paper_id":6334,"author_seq":188,"given_name":5196,"surname":6349,"affiliation":63,"orcid":63},"Erbach",{"paper_id":6334,"author_seq":172,"given_name":2182,"surname":6351,"affiliation":63,"orcid":63},"Jameson",{"paper_id":6334,"author_seq":155,"given_name":6353,"surname":6354,"affiliation":63,"orcid":63},"Brigitte","Jorg",{"paper_id":6334,"author_seq":138,"given_name":1864,"surname":6356,"affiliation":63,"orcid":63},"Karger",{"paper_id":6334,"author_seq":121,"given_name":6358,"surname":6359,"affiliation":63,"orcid":63},"Tillmann","Wegst","This paper presents the structure and activitities of  the recently established Competence Center in Speech and Language  Technology in Saarbrücken. The objectives of the Competence Center are  to provide a comprehensive information service about speech and language  technologies, including live demonstrations of the most important  language technology (LT) systems, and to advance the state of the art in  the evaluation of LT systems for real-world applications. The Competence  Center comprises the following components: 1. the Virtual Information  Center \"Language Technology World\" (www.lt-world.org), the  world's most comprehensive information resource about speech and  language technology, 2. the Demonstration Center in Saarbrücken, which  offers interested parties the possibility to play and experiment with  different speech and language technologies, or to attend guided  demonstrations, 3. the Evaluation Center, which conducts evaluations of  the overall usability of language technology systems and advances  knowledge of relevant usability issues and evaluation methods. The work  presented in this paper was carried out by the German Research Center  for Artificial Intelligence in collaboration with Saarland University in  the context of the project COLLATE (COmputational Linguistics and  LAnguage TEchnology for Real Life Applications), funded by the German  Federal Ministry of Education and Research (www.bmbf.de).",{"paper_id":6362,"title":6363,"year":213,"month":855,"day":63,"doi":6364,"resource_url":6365,"first_page":63,"last_page":63,"pdf_url":6366,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6367,"paper_type":860,"authors":6368,"abstract":6375},"lrec2002-main-332","Japanese and American Sign Language Dictionary System for Japanese and English Users","10.63317\u002F2esxhxq7p4ei","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-332","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F332.pdf","suzuki-kakihana-2002-japanese",[6369,6372],{"paper_id":6362,"author_seq":247,"given_name":6370,"surname":6371,"affiliation":63,"orcid":63},"Emiko","Suzuki",{"paper_id":6362,"author_seq":232,"given_name":6373,"surname":6374,"affiliation":63,"orcid":63},"Kyoko","Kakihana","We discuss the basic ideas behind a Japanese and  American Sign Language Dictionary System for Japanese and English users.  Our discussion covers two main points. The first describes the necessity  of a bilingual dictionary. Since there is no \"universal sign  language\" or real \"international sign language,\" if Deaf  people should learn at least three languages: they want to talk to  people whose mother tongue is different from their owns, the mother sign  language , the mother spoken language as an intermediate language, and  the sign language in which they want to communicate. The second  describes the use of computer. As the use of computers becomes  widespread, it is increasingly convenient to study through computer  software or Internet facilities. Our dictionary system provides Deaf  people with an easy means of access using their mother-spoken language.  It also provides a way for people who are going to learn two sign  languages to look up new vocabulary. We are further planning to examine  how our system could be used to educate and assist Deaf people.",{"paper_id":6377,"title":6378,"year":213,"month":855,"day":63,"doi":6379,"resource_url":6380,"first_page":63,"last_page":63,"pdf_url":6381,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6382,"paper_type":860,"authors":6383,"abstract":6387},"lrec2002-main-333","The feasibility of a complete text corpus","10.63317\u002F2jddmumunbx5","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-333","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F333.pdf","jakopin-2002-feasibility",[6384],{"paper_id":6377,"author_seq":247,"given_name":6385,"surname":6386,"affiliation":63,"orcid":63},"Primož","Jakopin","In the paper the annual increase in size of a  complete text corpus of a single language, Slovenian, is estimated. It  comprises the serial publications in Slovenian, monographs and pages,  published on Internet. The estimate for the year 2000, based on 21,000  units of serial publications, 675,000 pages from 5,200 units of printed  monographs, 377.000 pages from 5,500 units of unpublished monographs  (mostly academic theses) and 300,000 pages on Internet is given at less  than 1.5 billion words. An extension of the Law of legal deposit, which  would also cover electronic versions of printed texts, is proposed. It  is suggested that to make the idea of a complete corpus viable, it  should be simple and profitable for the publishers to supply web  versions of their publications alongside with printed ones.",{"paper_id":6389,"title":6390,"year":213,"month":855,"day":63,"doi":6391,"resource_url":6392,"first_page":63,"last_page":63,"pdf_url":6393,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6394,"paper_type":860,"authors":6395,"abstract":6398},"lrec2002-main-334","Lexical Annotation for Multi-word Entries Containing Nominalizations","10.63317\u002F2zznn5ys2xxo","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-334","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F334.pdf","macleod-2002-lexical",[6396],{"paper_id":6389,"author_seq":247,"given_name":5141,"surname":6397,"affiliation":63,"orcid":63},"Macleod","New York University has produced a dictionary of  nominalizations (NOMLEX) whose entries capture the relationship of the  nominalization with its associated verb.  This dictionary indicates where the verbal arguments may be found in the noun phrase which  contains the nominalization. We have now made a study and produced some entries for nominalizations and their co-occurring verbs. These  entries are much more complex than NOMLEX entries. In order to express all the relationships between the nominalization and its co-occurring  verb, we made use of the terminology of Igor Mel'\\^cuk, whose theories have been used to create dictionaries in French and Russian. His  categories were found to be very useful for this task. The verb + nominalization pairs were selected by frequency of co-occurrence and  thus do not strictly conform to what are considered support verbs. Support verbs are generally defined as having no semantic content,  serving only to carry tense and number which the nominalization cannot  express. A typical example of this is ``commit a murder''. The paper  below describes the NOMLEX entry which is the basis of this work and then demonstrates the additional information needed to describe the  verb + nominalization pair.",{"paper_id":6400,"title":6401,"year":213,"month":855,"day":63,"doi":6402,"resource_url":6403,"first_page":63,"last_page":63,"pdf_url":6404,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6405,"paper_type":860,"authors":6406,"abstract":6414},"lrec2002-main-335","Diversity of Scenarios in Information extraction","10.63317\u002F2zdcy73vby7t","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-335","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F335.pdf","huttunen-etal-2002-diversity",[6407,6410,6413],{"paper_id":6400,"author_seq":247,"given_name":6408,"surname":6409,"affiliation":63,"orcid":63},"Silja","Huttunen",{"paper_id":6400,"author_seq":232,"given_name":6411,"surname":6412,"affiliation":63,"orcid":63},"Roman","Yangarber",{"paper_id":6400,"author_seq":218,"given_name":1767,"surname":1768,"affiliation":63,"orcid":63},"This paper discusses\u002Fpresents problems of template structure for  Information Extraction.  We investigate these problems in the context of two  new Information Extraction scenarios which are linguistically and structurally  more challenging than the traditional MUC scenarios.  By a scenario we mean a  predefined set of facts to be extracted from text.  Traditional views on event  structure and template design are not adequate for the more complex scenarios.  We identify two structural factors that contribute to the complexity of a scenario: first, the scattering of events in text, and second,  inclusion relationship between events.  These factors cause difficulty in  representing the facts in an unambiguous way.  Traditional views on event  structure and template design are not adequate for the more complex scenarios.  We propose that these kinds of event relationships can be better described  with a modular, hierarchical model.",{"paper_id":6416,"title":6417,"year":213,"month":855,"day":63,"doi":6418,"resource_url":6419,"first_page":63,"last_page":63,"pdf_url":6420,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6421,"paper_type":860,"authors":6422,"abstract":6426},"lrec2002-main-336","Multimodal Systems, Resources and Evaluation","10.63317\u002F3mcbfahhsb4t","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-336","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F336.pdf","maybury-2002-multimodal",[6423],{"paper_id":6416,"author_seq":247,"given_name":6424,"surname":6425,"affiliation":63,"orcid":63},"Mark T.","Maybury","This paper considers multimodal systems, resources,  and evaluation. We first motivate the value of multimodal information  access with a vision of multimodal question answering and an example of  content based access to broadcast news video. We next describe  intelligent multimodal interfaces, define terminology, and summarize a  range of applications, required corpora, and associated media. We then  introduce a jointly created roadmap for multimodality and show an  example of an open source multimodal spoken dialogue toolkit. We next  describe requirements for and an abstract architecture of multimodal  systems. We conclude discussing multimodal collaboration, multimodal  instrumentation, and multilevel evaluation.",{"paper_id":6428,"title":6429,"year":213,"month":855,"day":63,"doi":6430,"resource_url":6431,"first_page":63,"last_page":63,"pdf_url":6432,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6433,"paper_type":860,"authors":6434,"abstract":6447},"lrec2002-main-337","Designing speech database with prosodic variety for expressive TTS system","10.63317\u002F24fsfni4sch3","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-337","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F337.pdf","kawanami-etal-2002-designing",[6435,6438,6441,6444],{"paper_id":6428,"author_seq":247,"given_name":6436,"surname":6437,"affiliation":63,"orcid":63},"Hiromichi","Kawanami",{"paper_id":6428,"author_seq":232,"given_name":6439,"surname":6440,"affiliation":63,"orcid":63},"Tsuyoshi","Masuda",{"paper_id":6428,"author_seq":218,"given_name":6442,"surname":6443,"affiliation":63,"orcid":63},"Tomoki","Toda",{"paper_id":6428,"author_seq":203,"given_name":6445,"surname":6446,"affiliation":63,"orcid":63},"Kiyohiro","Shikano","For the purpose of building speech synthesis system that can generate high-quality speech with wide range in prosody and realize fine  prosody control, we propose new speech database constructing method. As a speech synthesis method, we select a hybrid system which  consists of two part : speech unit selection and prosody modification part by STRAIGHT (vocoder type high quality analysis-synthesis  method). Our viewpoint for designing database is to reduce amount of prosody modification. which causes quality deterioration. Hence,  to make it possible to generate arbitrary prosody within permissible range of prosody modification, we designed 9 sub-databases those  consist of same phonetic balanced text set with different prosody. In this paper, we report the designing method and general features of  obtained databases. Listening tests focused on durational fearure were also conducted. The results show effectiveness of the method and  the necessity to change unit selection cost according to speech rate.",{"paper_id":6449,"title":6450,"year":213,"month":855,"day":63,"doi":6451,"resource_url":6452,"first_page":63,"last_page":63,"pdf_url":6453,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6454,"paper_type":860,"authors":6455,"abstract":6464},"lrec2002-main-338","Producing a Large-scale Encyclopedic Corpus over the Web","10.63317\u002F2hyengvpq66q","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-338","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F338.pdf","fujii-etal-2002-producing",[6456,6459,6462],{"paper_id":6449,"author_seq":247,"given_name":6457,"surname":6458,"affiliation":63,"orcid":63},"Atsushi","Fujii",{"paper_id":6449,"author_seq":232,"given_name":6460,"surname":6461,"affiliation":63,"orcid":63},"Katunobu","Itou",{"paper_id":6449,"author_seq":218,"given_name":6463,"surname":2825,"affiliation":63,"orcid":63},"Tetsuya","Encyclopedias, which describe general\u002Ftechnical terms, are valuable  language resources (LRs). As with other types of LRs relying on human introspection and supervision, constructing encyclopedias is quite  expensive. To resolve this problem, we automatically produced a large-scale encyclopedic corpus over the World Wide Web. We first  searched the Web for pages containing a term in question. Then we used linguistic patterns and HTML structures to extract text fragments  describing the term. Finally, we organized extracted term descriptions based on domains. The resultant corpus contains approximately 100,000  terms. We also evaluated the quality of 2,000 test terms, and found that correct descriptions were obtained for 65\\% of test terms.",{"paper_id":6466,"title":6467,"year":213,"month":855,"day":63,"doi":6468,"resource_url":6469,"first_page":63,"last_page":63,"pdf_url":6470,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6471,"paper_type":860,"authors":6472,"abstract":6489},"lrec2002-main-339","Continuous Speech Recognition Consortium an Open Repository for CSR Tools and Models","10.63317\u002F55rrgess3ys2","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-339","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F339.pdf","lee-etal-2002-continuous",[6473,6475,6477,6478,6481,6483,6486,6488],{"paper_id":6466,"author_seq":247,"given_name":6474,"surname":2924,"affiliation":63,"orcid":63},"Akinobu",{"paper_id":6466,"author_seq":232,"given_name":6476,"surname":5934,"affiliation":63,"orcid":63},"Tatsuya",{"paper_id":6466,"author_seq":218,"given_name":1102,"surname":1103,"affiliation":63,"orcid":63},{"paper_id":6466,"author_seq":203,"given_name":6479,"surname":6480,"affiliation":63,"orcid":63},"Masato","Mimura",{"paper_id":6466,"author_seq":188,"given_name":6457,"surname":6482,"affiliation":63,"orcid":63},"Yamada",{"paper_id":6466,"author_seq":172,"given_name":6484,"surname":6485,"affiliation":63,"orcid":63},"Akinori","Ito",{"paper_id":6466,"author_seq":155,"given_name":6487,"surname":6461,"affiliation":63,"orcid":63},"Katsunobu",{"paper_id":6466,"author_seq":138,"given_name":6445,"surname":6446,"affiliation":63,"orcid":63},"Continuous Speech Recognition Consortium (CSRC) was  founded on 2000 to promote sharable high-quality platform for research  and development of speech recognition. It is a continued work of the  former Japanese Dictation Toolkit project from 1997 to 2000. An overview  of the software developed in the first year (Oct. 2000 - Sep. 2001) is  given in this paper. We have revised the LVCSR (large vocabulary  continuous speech recognition) engine Julius, and constructed new  acoustic models using very large speech corpora. Moreover, a variety of  acoustic and language models as well as tools are being set up.  Currently over 50 companies and academic institutes are joining. The  software is available by contacting the address",{"paper_id":6491,"title":6492,"year":213,"month":855,"day":63,"doi":6493,"resource_url":6494,"first_page":63,"last_page":63,"pdf_url":6495,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6496,"paper_type":860,"authors":6497,"abstract":6499},"lrec2002-main-340","Ethical and legal issues in corpus construction","10.63317\u002F2mhgu8jb7py3","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-340","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F340.pdf","mcenery-2002-ethical",[6498],{"paper_id":6491,"author_seq":247,"given_name":2224,"surname":4478,"affiliation":63,"orcid":63},"Not available",{"paper_id":6501,"title":6502,"year":213,"month":855,"day":63,"doi":6503,"resource_url":6504,"first_page":63,"last_page":63,"pdf_url":6505,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6506,"paper_type":860,"authors":6507,"abstract":6514},"lrec2002-main-341","Which way should we go? Metaphoric expressions in lexical resources","10.63317\u002F3bhj7rkcv3vs","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-341","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F341.pdf","alonge-castelli-2002-way",[6508,6511],{"paper_id":6501,"author_seq":247,"given_name":6509,"surname":6510,"affiliation":63,"orcid":63},"Antonietta","Alonge",{"paper_id":6501,"author_seq":232,"given_name":6512,"surname":6513,"affiliation":63,"orcid":63},"Margherita","Castelli","In this paper we address the issue of the encoding of  information on metaphoric expressions in a (multilingual)  lexical-semantic database for NLP applications. When analysing corpus  data we find a huge number of metaphoric expressions which can be hardly  dealt with by using as reference databases resources already developed.  In particular, we have compared information contained both in  dictionaries of Italian and in a WordNet-like resource – ItalWordNet  – with actual uses of words found in a corpus. We thus put forward  proposals to enrich a resource like IWN with relevant information.",{"paper_id":6516,"title":6517,"year":213,"month":855,"day":63,"doi":6518,"resource_url":6519,"first_page":63,"last_page":63,"pdf_url":6520,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6521,"paper_type":860,"authors":6522,"abstract":6535},"lrec2002-main-342","Phonetically Distributed Continuous Speech Corpus for Thai Language","10.63317\u002F4yhaeua7zzs2","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-342","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F342.pdf","wutiwiwatchai-etal-2002-phonetically",[6523,6526,6529,6532],{"paper_id":6516,"author_seq":247,"given_name":6524,"surname":6525,"affiliation":63,"orcid":63},"Chai","Wutiwiwatchai",{"paper_id":6516,"author_seq":232,"given_name":6527,"surname":6528,"affiliation":63,"orcid":63},"Patcharika","Cotsomrong",{"paper_id":6516,"author_seq":218,"given_name":6530,"surname":6531,"affiliation":63,"orcid":63},"Sinaporn","Suebvisai",{"paper_id":6516,"author_seq":203,"given_name":6533,"surname":6534,"affiliation":63,"orcid":63},"Supphanat","Kanokphara","This paper proposes a work on phonetically balanced sentence (PB) and phonetically distributed sentence (PD) set, which are parts of the text prompt for speech recording in Large Vocabulary Continuous Speech Recognition (LVCSR) corpus for Thai language. Firstly, a protocol of Thai phonetic transcription and some essential rules of phonetic correction after grapheme-to-phoneme (G2P) process are described. An iterative procedure of PB and PD sentence selection is conducted in order to avoid tedious work of manual phone correction on all initial sentences. A standard text corpus, ORCHID, was chosen for the initial text. Analysis of several attributes such as the number of words, syllables, monophones and biphones, phone's distribution, etc., in both the PB and PD sets are reported. At the end, the final selected PB are partially compared to the American English TIMIT's PB set (MIT-450) and the Japanese ATR's 503 PB set.",{"paper_id":6537,"title":6538,"year":213,"month":855,"day":63,"doi":6539,"resource_url":6540,"first_page":63,"last_page":63,"pdf_url":6541,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6542,"paper_type":860,"authors":6543,"abstract":6547},"lrec2002-main-343","Signatures, Typed Feature Structures and RDFS","10.63317\u002F3k39sd4pjg4t","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-343","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F343.pdf","denecke-2002-signatures",[6544],{"paper_id":6537,"author_seq":247,"given_name":6545,"surname":6546,"affiliation":63,"orcid":63},"Matthias","Denecke","In this paper, we examine how attribute logic  signatures and typed feature structures can be serialized using emerging  semantic web standards RDF and RDFS. Inversely, we also consider to  which degree the logic of typed feature structure is capable of  representing and drawing inferences over RDF and RDFS documents.",{"paper_id":6549,"title":6550,"year":213,"month":855,"day":63,"doi":6551,"resource_url":6552,"first_page":63,"last_page":63,"pdf_url":6553,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6554,"paper_type":860,"authors":6555,"abstract":6562},"lrec2002-main-344","Report on the Revision of the Lexicographical Standard ISO 1951 Presentation\u002FRepresentation of Entries in Dictionaries","10.63317\u002F4m53o5bsu8g2","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-344","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F344.pdf","derouin-meur-2002-report",[6556,6559],{"paper_id":6549,"author_seq":247,"given_name":6557,"surname":6558,"affiliation":63,"orcid":63},"Marie-Jeanne","Derouin",{"paper_id":6549,"author_seq":232,"given_name":6560,"surname":6561,"affiliation":63,"orcid":63},"André Le","Meur","The two authors of this paper belong to the expert  commission of the standardization bodies in France (AFNOR) and in  Germany (DIN) and are, within the ISO\u002FTC37\u002FSC2, project leader and  expert for the revision of the ISO-standard. In this paper we will  report on the revision of the standard ISO 1951",{"paper_id":6564,"title":6565,"year":213,"month":855,"day":63,"doi":6566,"resource_url":6567,"first_page":63,"last_page":63,"pdf_url":6568,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6569,"paper_type":860,"authors":6570,"abstract":6586},"lrec2002-main-345","A Protocol for Evaluating Analyzers of Syntax (PEAS)","10.63317\u002F3tbshvu9orf4","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-345","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F345.pdf","gendner-etal-2002-protocol",[6571,6572,6575,6578,6579,6581,6584],{"paper_id":6564,"author_seq":247,"given_name":2440,"surname":6244,"affiliation":63,"orcid":63},{"paper_id":6564,"author_seq":232,"given_name":6573,"surname":6574,"affiliation":63,"orcid":63},"Gabriel","Illouz",{"paper_id":6564,"author_seq":218,"given_name":6576,"surname":6577,"affiliation":63,"orcid":63},"Michèle","Jardino",{"paper_id":6564,"author_seq":203,"given_name":1127,"surname":5330,"affiliation":63,"orcid":63},{"paper_id":6564,"author_seq":188,"given_name":1024,"surname":6580,"affiliation":63,"orcid":63},"Paroubek",{"paper_id":6564,"author_seq":172,"given_name":6582,"surname":6583,"affiliation":63,"orcid":63},"Isabelle","Robba",{"paper_id":6564,"author_seq":155,"given_name":905,"surname":6585,"affiliation":63,"orcid":63},"Vilnat","Providing a comparative framework for parsers is a  task that has already been tried in the past, e.g. (Abeill´e, 1991),  (Atwell and Sutcliffe, 1997), (Black et al., 1991), and studied in the  literature (Black, 1993), (Black, 1994), (Carroll et al., 1998),  (Gaizauskas et al., 1998), (WEPS-98, ), (Mengel and Lezius, 2000), but  mainly for English. In this paper, we present PEAS: a Protocol for  Evaluating Analyzers of Syntax (in French: Protocole d’Evaluation pour  les Analyseurs Syntaxiques), based on an ongoing experiment at LIMSI  which aims at developing and testing a generic quantitative black-box  evaluation protocol for parsers of French. Two fully operational parsers  will be used to test the evaluation protocol; they are: the parser  (Giguet and Vergne, 1997) developed at GREYC (Caen University) and the  latest version of the parser developed at Rank Xerox Research Center in  Grenoble (Ait-Mokhtar and Chanod, 1997)",{"paper_id":6588,"title":6589,"year":213,"month":855,"day":63,"doi":6590,"resource_url":6591,"first_page":63,"last_page":63,"pdf_url":6592,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6593,"paper_type":860,"authors":6594,"abstract":6597},"lrec2002-main-346","Language Resources and Evaluation: International Strategy Panel","10.63317\u002F3z4oc2gaanbu","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-346","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F346.pdf","maybury-zampolli-2002-language",[6595,6596],{"paper_id":6588,"author_seq":247,"given_name":6424,"surname":6425,"affiliation":63,"orcid":63},{"paper_id":6588,"author_seq":232,"given_name":1692,"surname":4186,"affiliation":63,"orcid":63},"This panel aims to foster international understanding  and cooperation on the topic of language resources and evaluation.  Through a series of short presentations and discussion focused on the  questions below, the participants will attempt to articulate and jointly  discover international strategies for language resources and evaluation.",{"paper_id":6599,"title":6600,"year":213,"month":855,"day":63,"doi":6601,"resource_url":6602,"first_page":63,"last_page":63,"pdf_url":6603,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6604,"paper_type":860,"authors":6605,"abstract":6609},"lrec2002-main-347","Machine Translation Evaluation: N-grams to the Rescue","10.63317\u002F2fjqfypf5jsh","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-347","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F347.pdf","papineni-2002-machine",[6606],{"paper_id":6599,"author_seq":247,"given_name":6607,"surname":6608,"affiliation":63,"orcid":63},"Kishore","Papineni","Human judges weigh many subtle aspects of translation  quality. But human evaluations are very expensive. Developers of Machine  Translation systems need to evaluate quality constantly. Automatic  methods that approximate human judgment are therefore very useful. The  main difculty in automatic evaluation is that there are many correct  translations that differ in choice and order of words. There is no  single gold standard to compare a translation with. The closer a machine  translation is to professional human translations, the better it is. We  borrow precision and recall concepts from Information Retrieval to  measure closeness. The precision measure is used on variablelength  n-grams. Unigram matches between machine translation and the  professional reference translations account for adequacy. Longer n-gram  matches account for uency. The n-gram precisions are aggregated  across sentences and averaged. A multiplicative brevity penalty prevents  cheating. The resulting metric correlates highly with human judgments of  translation quality. This method is tested for robustness across  language families and across the spectrum of translation quality. We  discuss BLEU, an automatic method to evaluate translation quality that  is cheap, fast, and good.",{"paper_id":6611,"title":6612,"year":213,"month":855,"day":63,"doi":6613,"resource_url":6614,"first_page":63,"last_page":63,"pdf_url":6615,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6616,"paper_type":860,"authors":6617,"abstract":6623},"lrec2002-main-348","Inside the Evaluation Process of the Cross-Language Evaluation Forum (CLEF): Issues of Multilingual Topic Creation and Multilingual Relevance Assessment","10.63317\u002F2p7pbsd7fib6","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-348","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F348.pdf","kluck-womser-hacker-2002-inside",[6618,6620],{"paper_id":6611,"author_seq":247,"given_name":2079,"surname":6619,"affiliation":63,"orcid":63},"Kluck",{"paper_id":6611,"author_seq":232,"given_name":6621,"surname":6622,"affiliation":63,"orcid":63},"Christa","Womser-Hacker","Topic creation and relevance assessment are  considered as crucial components of the evaluation process in  Information Retrieval (IR). In the context of the Cross-Language  Evaluation Forum (CLEF), the focus lies on evaluating multilingual  functions of IR systems. Therefore, topics are generated in various  languages and judging the documents delivered by the systems, requires  native speakers of the participating languages who are experts in the  topics’ domains. In this paper, the important issues of topic  generation and relevance assessment under multilingual conditions are  discussed.",{"paper_id":6625,"title":6626,"year":213,"month":855,"day":63,"doi":6627,"resource_url":6628,"first_page":63,"last_page":63,"pdf_url":6629,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6630,"paper_type":860,"authors":6631,"abstract":6639},"lrec2002-main-349","Beyond Tag Trigrams: New Local Features for Tagging","10.63317\u002F3awirx6886mh","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-349","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F349.pdf","finch-etal-2002-beyond",[6632,6634,6636],{"paper_id":6625,"author_seq":247,"given_name":6180,"surname":6633,"affiliation":63,"orcid":63},"Finch",{"paper_id":6625,"author_seq":232,"given_name":6635,"surname":2619,"affiliation":63,"orcid":63},"Ezra",{"paper_id":6625,"author_seq":218,"given_name":6637,"surname":6638,"affiliation":63,"orcid":63},"Ringo","Wathelet","The set of features used by any predictive model is  of pivotal importance to its performance. In this paper we show the  utility and quantify the effect of adding features consisting of  arrangements of words and tags (selected by an expert grammarian) in the  local context of a trigram tagger. We look in detail at the effect, on  tagging with a large syntactic and semantic tagset, of adding these  features. We show that the addition of a set of such features improves  the the error rate of a trigram tagger by approximately 11%.",{"paper_id":6641,"title":6642,"year":213,"month":855,"day":63,"doi":6643,"resource_url":6644,"first_page":63,"last_page":63,"pdf_url":6645,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6646,"paper_type":860,"authors":6647,"abstract":6656},"lrec2002-main-350","Multidocument Summarization with GISTexter","10.63317\u002F3a6uoxmft49m","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-350","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F350.pdf","harabagiu-etal-2002-multidocument",[6648,6651,6654],{"paper_id":6641,"author_seq":247,"given_name":6649,"surname":6650,"affiliation":63,"orcid":63},"Sanda","Harabagiu",{"paper_id":6641,"author_seq":232,"given_name":6652,"surname":6653,"affiliation":63,"orcid":63},"Finley","Lacatusu",{"paper_id":6641,"author_seq":218,"given_name":3674,"surname":6655,"affiliation":63,"orcid":63},"Morarescu","This paper presents the architecture and the  multidocument summarization techniques implemented in the GISTEXTER  system. The paper presents an algorithm for producing incremental  multi-document summaries if extraction templates of good quality are  available. An empirical method of generating ad-hoc templates that can  be populated with  information extracted from texts by  automatically acquired extraction patterns is also presented. The  results of GISTEXTER in the DUC-2001 evaluations account for the  advantages of using the techniques presented in this paper.",{"paper_id":6658,"title":6659,"year":213,"month":855,"day":63,"doi":6660,"resource_url":6661,"first_page":63,"last_page":63,"pdf_url":6662,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6663,"paper_type":860,"authors":6664,"abstract":6672},"lrec2002-main-351","A Domain Adaptive Approach to Automatic Acquisition of Domain Relevant Terms and their Relations with Bootstrapping","10.63317\u002F28avqbskk5yj","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-351","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F351.pdf","xu-etal-2002-domain",[6665,6666,6668,6669],{"paper_id":6658,"author_seq":247,"given_name":1169,"surname":1170,"affiliation":63,"orcid":63},{"paper_id":6658,"author_seq":232,"given_name":1330,"surname":6667,"affiliation":63,"orcid":63},"Kurz",{"paper_id":6658,"author_seq":218,"given_name":1160,"surname":1161,"affiliation":63,"orcid":63},{"paper_id":6658,"author_seq":203,"given_name":6670,"surname":6671,"affiliation":63,"orcid":63},"Sven","Schmeier","In this paper, we present an unsupervised hybrid  text-mining approach to automatic acquisition of domain relevant terms  and their relations. We deploy the TFIDF-based term classification  method to acquire domain relevant single -word terms. Further, we apply  two strategies in order to learn lexico-syntatic patterns which indicate  paradigmatic and domain relevant syntagmatic relations between the  extracted terms. The first one uses an existing ontology as initial  knowledge for learning lexico-syntactic patterns, while the second is  based on different collocation acquisition methods to deal with the  free-word order languages like German. This domain-adaptive method  yields good results even when trained on relatively small training  corpora. It can be applied to different real-world applications, which  need domain-relevant ontology, for example , information extraction,  information retrieval or text classification.",{"paper_id":6674,"title":6675,"year":213,"month":855,"day":63,"doi":6676,"resource_url":6677,"first_page":63,"last_page":63,"pdf_url":6678,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6679,"paper_type":860,"authors":6680,"abstract":63},"lrec2002-main-352","User-State Labeling Procedures For The Multimodal Data Collection Of SmartKom","10.63317\u002F4geskmy32r3o","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-352","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F352.pdf","steininger-etal-2002-user",[6681,6682,6683],{"paper_id":6674,"author_seq":247,"given_name":1657,"surname":1658,"affiliation":63,"orcid":63},{"paper_id":6674,"author_seq":232,"given_name":1198,"surname":1199,"affiliation":63,"orcid":63},{"paper_id":6674,"author_seq":218,"given_name":3891,"surname":6684,"affiliation":63,"orcid":63},"Glesner",{"paper_id":6686,"title":6687,"year":213,"month":855,"day":63,"doi":6688,"resource_url":6689,"first_page":63,"last_page":63,"pdf_url":6690,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6691,"paper_type":860,"authors":6692,"abstract":6499},"lrec2002-main-353","Creating Domain-specific Information Servers","10.63317\u002F549vksj92q5p","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-353","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F353.pdf","pustejovsky-2002-creating",[6693],{"paper_id":6686,"author_seq":247,"given_name":3861,"surname":6694,"affiliation":63,"orcid":63},"Pustejovsky",{"paper_id":6696,"title":6697,"year":213,"month":855,"day":63,"doi":6698,"resource_url":6699,"first_page":63,"last_page":63,"pdf_url":6700,"poster_url":63,"slide_url":63,"video_url":63,"supplementary_url":63,"bibkey":6701,"paper_type":860,"authors":6702,"abstract":6708},"lrec2002-main-354","UNL Lexical Selection with Conceptual Vectors","10.63317\u002F3tg7ge7oagsz","https:\u002F\u002Flrec.elra.info\u002Flrec2002-main-354","http:\u002F\u002Fwww.lrec-conf.org\u002Fproceedings\u002Flrec2002\u002Fpdf\u002F354.pdf","lafourcade-boitet-2002-unl",[6703,6706],{"paper_id":6696,"author_seq":247,"given_name":6704,"surname":6705,"affiliation":63,"orcid":63},"Mathieu","Lafourcade",{"paper_id":6696,"author_seq":232,"given_name":2419,"surname":6707,"affiliation":63,"orcid":63},"Boitet","When deconverting a UNL graph into some natural  language LG, we often encounter lexical items (called UWs) made of an  English headword and formalized semantic restrictions, such as  \"look for (icl>do, agt>person)\", which are not yet  connected to lemmas, so that is it necessary to find a  \"nearest\" UW in the UNL-LG dictionary, such as \"look for  (icl>action, agt>human, obj>thing)\". Then, this UW may be  connected to several lemmas of LG. In order to solve these problems of  incompleteness and polysemy, we are applying a method based on the  computation of \"conceptual vectors\", previously used  successfully in the context of thematic indexing of French and English  documents."]