{"dcterms:modified":"2024-01-18","dcterms:creator":"heiDATA","@type":"ore:ResourceMap","schema:additionalType":"Dataverse OREMap Format v1.0.0","dvcore:generatedBy":{"@type":"schema:SoftwareApplication","schema:name":"Dataverse","schema:version":"6.1 build 1590-f5d1299","schema:url":"https://github.com/iqss/dataverse"},"@id":"https://heidata.uni-heidelberg.de/api/datasets/export?exporter=OAI_ORE&persistentId=https://doi.org/10.11588/data/10001","ore:describes":{"citation:producer":[{"citation:producerName":"Jehl, Laura","citation:producerAffiliation":"Department of Computational Linguistics"},{"citation:producerName":"Sokolov, Artem","citation:producerAffiliation":"Department of Computational Linguistics"},{"citation:producerName":"Ruppert, Eugen","citation:producerAffiliation":"Department of Computational Linguistics"}],"author":[{"citation:authorName":"Sokolov, Artem","citation:authorAffiliation":"Department of Computational Linguistics"},{"citation:authorName":"Jehl Laura","citation:authorAffiliation":"Department of Computational Linguistics"},{"citation:authorName":"Hieber Felix","citation:authorAffiliation":"Department of Computational Linguistics"},{"citation:authorName":"Ruppert, Eugen","citation:authorAffiliation":"Department of Computational Linguistics"},{"citation:authorName":"Riezler, Stefan","citation:authorAffiliation":"Department of Computational Linguistics"}],"publication":{"publicationCitation":"Artem Sokolov, Laura Jehl, Felix Hieber, Stefan Riezler. \"Boosting Cross-Language Retrieval by Learning Bilingual Phrase Associations from Relevance Rankings\". In Proceedings of the Conference on Empirical Methods in Natural Language Processing (EMNLP), Seattle, USA, 2013","publicationURL":"http://aclweb.org/anthology//D/D13/D13-1175.pdf"},"citation:datasetContact":{"citation:datasetContactName":"Prof. Dr. Stefan Riezler","citation:datasetContactAffiliation":"Department of Computational Linguistics","citation:datasetContactEmail":"riezler@cl.uni-heidelberg.de"},"citation:dsDescription":{"citation:dsDescriptionValue":"BoostCLIR is a bilingual (Japanese-English) corpus of patent abstracts, extracted from the\r\nMAREC patent data, and the data from the NTCIR PatentMT workshop collections, accompanied with relevance judgements for the task of patent prior-art search.
Important: The English side of the corpus contains patent IDs as well as the text of the abstracts. The Japanese side only contains patent IDs because of NTCIR copyright restrictions. The Jap\r\nanese patent abstracts can be extracted from full text Japanese patent documents, which are available from the organizers of the NTCIR workshop.\r\n
The corpus contains training, development and testing subsets sampled from non-intersecting time periods.
Relevance judgement for patent retrieval are constructed from patent citations by assigning three integer levels to three categories of relationships, with highest relevance (3) for family patents, lower relevance for patents cited in search reports by patent examiners (2), and lowest relevance level (1) for applicants’ citations.
For a detailed descrip\r\ntion of the corpus construction process, please see the above publication."},"title":"BoostCLIR: JP-EN Relevance Marked Patent Corpus","dateOfDeposit":"2014-05-21","citation:productionPlace":"Heidelberg, Germany","citation:productionDate":"2013","subject":"Computer and Information Science","citation:relatedMaterial":"