{"id":"https:\/\/openalex.org\/W2122423589","doi":"https:\/\/doi.org\/10.1093\/llc\/19.4.509","title":"Corpus Linguistics and South Asian Languages: Corpus Creation and Tool Development","display_name":"Corpus Linguistics and South Asian Languages: Corpus Creation and Tool Development","publication_year":2004,"publication_date":"2004-11-01","ids":{"openalex":"https:\/\/openalex.org\/W2122423589","doi":"https:\/\/doi.org\/10.1093\/llc\/19.4.509","mag":"2122423589"},"language":"en","primary_location":{"id":"doi:10.1093\/llc\/19.4.509","is_oa":false,"landing_page_url":"https:\/\/doi.org\/10.1093\/llc\/19.4.509","pdf_url":null,"source":{"id":"https:\/\/openalex.org\/S84784070","display_name":"Literary and Linguistic Computing","issn_l":"0268-1145","issn":["0268-1145","1477-4615"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https:\/\/openalex.org\/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https:\/\/openalex.org\/P4310311648","https:\/\/openalex.org\/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Literary and Linguistic Computing","raw_type":"journal-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https:\/\/openalex.org\/A5110220695","display_name":"Paul Baker","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"P. Baker","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":1,"corresponding_author_ids":["https:\/\/openalex.org\/A5110220695"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":3.697,"has_fulltext":false,"cited_by_count":74,"citation_normalized_percentile":{"value":0.93311773,"is_in_top_1_percent":false,"is_in_top_10_percent":true},"cited_by_percentile_year":{"min":89,"max":100},"biblio":{"volume":"19","issue":"4","first_page":"509","last_page":"524"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https:\/\/openalex.org\/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},"topics":[{"id":"https:\/\/openalex.org\/T10181","display_name":"Natural Language Processing Techniques","score":0.9998000264167786,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T10759","display_name":"Translation Studies and Practices","score":0.9807999730110168,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1203","display_name":"Language and Linguistics"},"field":{"id":"https:\/\/openalex.org\/fields\/12","display_name":"Arts and Humanities"},"domain":{"id":"https:\/\/openalex.org\/domains\/2","display_name":"Social Sciences"}},{"id":"https:\/\/openalex.org\/T13629","display_name":"Text Readability and Simplification","score":0.9309999942779541,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https:\/\/openalex.org\/keywords\/unicode","display_name":"Unicode","score":0.8821314573287964},{"id":"https:\/\/openalex.org\/keywords\/computer-science","display_name":"Computer science","score":0.7785553932189941},{"id":"https:\/\/openalex.org\/keywords\/lemmatisation","display_name":"Lemmatisation","score":0.6428880095481873},{"id":"https:\/\/openalex.org\/keywords\/hindi","display_name":"Hindi","score":0.6299315690994263},{"id":"https:\/\/openalex.org\/keywords\/natural-language-processing","display_name":"Natural language processing","score":0.567756175994873},{"id":"https:\/\/openalex.org\/keywords\/demonstrative","display_name":"Demonstrative","score":0.554269015789032},{"id":"https:\/\/openalex.org\/keywords\/corpus-linguistics","display_name":"Corpus linguistics","score":0.5331090688705444},{"id":"https:\/\/openalex.org\/keywords\/urdu","display_name":"Urdu","score":0.4887521266937256},{"id":"https:\/\/openalex.org\/keywords\/artificial-intelligence","display_name":"Artificial intelligence","score":0.47352051734924316},{"id":"https:\/\/openalex.org\/keywords\/linguistics","display_name":"Linguistics","score":0.47226300835609436},{"id":"https:\/\/openalex.org\/keywords\/text-corpus","display_name":"Text corpus","score":0.4396578371524811}],"concepts":[{"id":"https:\/\/openalex.org\/C500551929","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q8819","display_name":"Unicode","level":2,"score":0.8821314573287964},{"id":"https:\/\/openalex.org\/C41008148","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q21198","display_name":"Computer science","level":0,"score":0.7785553932189941},{"id":"https:\/\/openalex.org\/C161831844","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q2554325","display_name":"Lemmatisation","level":2,"score":0.6428880095481873},{"id":"https:\/\/openalex.org\/C519982507","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1568","display_name":"Hindi","level":2,"score":0.6299315690994263},{"id":"https:\/\/openalex.org\/C204321447","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q30642","display_name":"Natural language processing","level":1,"score":0.567756175994873},{"id":"https:\/\/openalex.org\/C2780277889","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q282301","display_name":"Demonstrative","level":2,"score":0.554269015789032},{"id":"https:\/\/openalex.org\/C532629269","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q865083","display_name":"Corpus linguistics","level":2,"score":0.5331090688705444},{"id":"https:\/\/openalex.org\/C2777350258","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1617","display_name":"Urdu","level":2,"score":0.4887521266937256},{"id":"https:\/\/openalex.org\/C154945302","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q11660","display_name":"Artificial intelligence","level":1,"score":0.47352051734924316},{"id":"https:\/\/openalex.org\/C41895202","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q8162","display_name":"Linguistics","level":1,"score":0.47226300835609436},{"id":"https:\/\/openalex.org\/C2474386","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q461183","display_name":"Text corpus","level":2,"score":0.4396578371524811},{"id":"https:\/\/openalex.org\/C138885662","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q5891","display_name":"Philosophy","level":0,"score":0}],"mesh":[],"locations_count":3,"locations":[{"id":"doi:10.1093\/llc\/19.4.509","is_oa":false,"landing_page_url":"https:\/\/doi.org\/10.1093\/llc\/19.4.509","pdf_url":null,"source":{"id":"https:\/\/openalex.org\/S84784070","display_name":"Literary and Linguistic Computing","issn_l":"0268-1145","issn":["0268-1145","1477-4615"],"is_oa":false,"is_in_doaj":false,"is_core":true,"host_organization":"https:\/\/openalex.org\/P4310311648","host_organization_name":"Oxford University Press","host_organization_lineage":["https:\/\/openalex.org\/P4310311648","https:\/\/openalex.org\/P4310311647"],"host_organization_lineage_names":["Oxford University Press","University of Oxford"],"type":"journal"},"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Literary and Linguistic Computing","raw_type":"journal-article"},{"id":"pmh:oai:eprints.lancs.ac.uk:1069","is_oa":false,"landing_page_url":"https:\/\/eprints.lancs.ac.uk\/id\/eprint\/1069\/","pdf_url":null,"source":{"id":"https:\/\/openalex.org\/S4306401916","display_name":"Lancaster EPrints (Lancaster University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I67415387","host_organization_name":"Lancaster University","host_organization_lineage":["https:\/\/openalex.org\/I67415387"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Journal Article"},{"id":"pmh:oai:pure.atira.dk:publications\/ad6dc1da-af82-4e2c-90e5-723c11eb6983","is_oa":false,"landing_page_url":"https:\/\/research.edgehill.ac.uk\/en\/publications\/ad6dc1da-af82-4e2c-90e5-723c11eb6983","pdf_url":null,"source":{"id":"https:\/\/openalex.org\/S4306402462","display_name":"Edge Hill University Research Information Repository (Edge Hill University)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I165525304","host_organization_name":"Edge Hill University","host_organization_lineage":["https:\/\/openalex.org\/I165525304"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"Baker, P, Hardie, A, McEnery, T, Xiao, R, Bontcheva, K, Cunningham, H, Gaizauskas, R, Hamza, O, Maynard, D, Tablan, V, Ursu, C, Jayaram, B & Leisher, M 2004, 'Corpus Linguistics and South Asian Languages: Corpus Creation and Tool Development', Literary and Linguistic Computing, vol. 19, no. 4, pp. 509-524. https:\/\/doi.org\/10.1093\/llc\/19.4.509","raw_type":"info:eu-repo\/semantics\/publishedVersion"}],"best_oa_location":null,"sustainable_development_goals":[{"score":0.6600000262260437,"display_name":"Quality Education","id":"https:\/\/metadata.un.org\/sdg\/4"}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":13,"referenced_works":["https:\/\/openalex.org\/W667520","https:\/\/openalex.org\/W37575355","https:\/\/openalex.org\/W143309896","https:\/\/openalex.org\/W1495378783","https:\/\/openalex.org\/W1528859715","https:\/\/openalex.org\/W1567304211","https:\/\/openalex.org\/W1606813020","https:\/\/openalex.org\/W1607983314","https:\/\/openalex.org\/W1971218376","https:\/\/openalex.org\/W1998347217","https:\/\/openalex.org\/W2016134015","https:\/\/openalex.org\/W2078758416","https:\/\/openalex.org\/W2101844505"],"related_works":["https:\/\/openalex.org\/W4200604753","https:\/\/openalex.org\/W4206291365","https:\/\/openalex.org\/W3175433403","https:\/\/openalex.org\/W2581717984","https:\/\/openalex.org\/W2997552349","https:\/\/openalex.org\/W2251452258","https:\/\/openalex.org\/W2525330506","https:\/\/openalex.org\/W3196684728","https:\/\/openalex.org\/W1666921087","https:\/\/openalex.org\/W4385896431"],"abstract_inverted_index":{"This":[0],"paper":[1],"describes":[2],"the":[3,8,19,30,33,89,98,146,150,168,176,179],"work":[4],"carried":[5],"out":[6],"on":[7],"EMILLE":[9,34,67,142,180],"Project":[10],"(Enabling":[11],"Minority":[12],"Language":[13],"Engineering),":[14],"which":[15,36,154],"was":[16],"undertaken":[17],"by":[18,29],"Universities":[20],"of":[21,38,41,59,63,91,106,133,139,149,161,167,178],"Lancaster":[22],"and":[23,55,61,129],"Sheffield.":[24],"The":[25,66,137],"primary":[26],"resource":[27],"developed":[28],"project":[31,99],"is":[32],"Corpus,":[35],"consists":[37],"a":[39,56,104,111,131],"series":[40],"monolingual":[42],"corpora":[43],"for":[44,116,141,170,175],"fourteen":[45],"South":[46,117],"Asian":[47,118],"languages,":[48],"totalling":[49],"more":[50],"than":[51],"96":[52],"million":[53],"words,":[54],"parallel":[57],"corpus":[58,84],"English":[60],"five":[62],"these":[64],"languages.":[65],"Corpus":[68],"also":[69],"includes":[70],"an":[71],"annotated":[72,86],"component,":[73],"namely,":[74],"part-of-speech":[75],"tagged":[76],"Urdu":[77],"data,":[78],"together":[79],"with":[80],"twenty":[81],"written":[82],"Hindi":[83],"files":[85],"to":[87,102,109,145,158],"show":[88],"nature":[90],"demonstrative":[92],"use":[93,160],"in":[94],"Hindi.":[95],"In":[96],"addition,":[97],"has":[100,143,155],"had":[101],"address":[103],"number":[105,132],"issues":[107],"related":[108],"establishing":[110],"language":[112,119,125,171],"engineering":[113],"(LE)":[114],"environment":[115],"processing,":[120],"such":[121],"as":[122],"translating":[123],"8-bit":[124],"data":[126],"into":[127],"Unicode":[128],"producing":[130],"basic":[134],"LE":[135,151],"tools.":[136],"development":[138,148],"tools":[140],"contributed":[144],"ongoing":[147],"architecture":[152],"GATE,":[153],"been":[156],"extended":[157],"make":[159],"Unicode.":[162],"GATE":[163],"thus":[164],"plugs":[165],"some":[166],"gaps":[169],"processing":[172],"R&amp;D":[173],"necessary":[174],"exploitation":[177],"corpora.":[181]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":2},{"year":2021,"cited_by_count":1},{"year":2020,"cited_by_count":1},{"year":2019,"cited_by_count":3},{"year":2018,"cited_by_count":1},{"year":2017,"cited_by_count":1},{"year":2016,"cited_by_count":1},{"year":2015,"cited_by_count":2},{"year":2014,"cited_by_count":6},{"year":2013,"cited_by_count":40},{"year":2012,"cited_by_count":2}],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}