{"id":"https:\/\/openalex.org\/W2808195542","doi":"https:\/\/doi.org\/10.21437\/interspeech.2018-1943","title":"Deep Lip Reading: A Comparison of Models and an Online Application","display_name":"Deep Lip Reading: A Comparison of Models and an Online Application","publication_year":2018,"publication_date":"2018-08-28","ids":{"openalex":"https:\/\/openalex.org\/W2808195542","doi":"https:\/\/doi.org\/10.21437\/interspeech.2018-1943","mag":"2808195542"},"language":"en","primary_location":{"id":"doi:10.21437\/interspeech.2018-1943","is_oa":false,"landing_page_url":"https:\/\/doi.org\/10.21437\/interspeech.2018-1943","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2018","raw_type":"proceedings-article"},"type":"preprint","indexed_in":["arxiv","crossref","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https:\/\/arxiv.org\/pdf\/1806.06053","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https:\/\/openalex.org\/A5018690028","display_name":"Triantafyllos Afouras","orcid":"https:\/\/orcid.org\/0000-0002-3935-9681"},"institutions":[{"id":"https:\/\/openalex.org\/I40120149","display_name":"University of Oxford","ror":"https:\/\/ror.org\/052gg0110","country_code":"GB","type":"education","lineage":["https:\/\/openalex.org\/I40120149"]}],"countries":["GB"],"is_corresponding":true,"raw_author_name":"Triantafyllos Afouras","raw_affiliation_strings":["University of Oxford, Oxford, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Oxford, Oxford, United Kingdom","institution_ids":["https:\/\/openalex.org\/I40120149"]}]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5038723822","display_name":"Joon Son Chung","orcid":"https:\/\/orcid.org\/0000-0001-7741-7275"},"institutions":[{"id":"https:\/\/openalex.org\/I40120149","display_name":"University of Oxford","ror":"https:\/\/ror.org\/052gg0110","country_code":"GB","type":"education","lineage":["https:\/\/openalex.org\/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Joon Son Chung","raw_affiliation_strings":["University of Oxford, Oxford, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Oxford, Oxford, United Kingdom","institution_ids":["https:\/\/openalex.org\/I40120149"]}]},{"author_position":"last","author":{"id":"https:\/\/openalex.org\/A5057678172","display_name":"Andrew Zisserman","orcid":"https:\/\/orcid.org\/0000-0002-8945-8573"},"institutions":[{"id":"https:\/\/openalex.org\/I40120149","display_name":"University of Oxford","ror":"https:\/\/ror.org\/052gg0110","country_code":"GB","type":"education","lineage":["https:\/\/openalex.org\/I40120149"]}],"countries":["GB"],"is_corresponding":false,"raw_author_name":"Andrew Zisserman","raw_affiliation_strings":["University of Oxford, Oxford, United Kingdom"],"affiliations":[{"raw_affiliation_string":"University of Oxford, Oxford, United Kingdom","institution_ids":["https:\/\/openalex.org\/I40120149"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":3,"corresponding_author_ids":["https:\/\/openalex.org\/A5018690028"],"corresponding_institution_ids":["https:\/\/openalex.org\/I40120149"],"apc_list":null,"apc_paid":null,"fwci":2.3214,"has_fulltext":true,"cited_by_count":17,"citation_normalized_percentile":{"value":0.89354923,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":98},"biblio":{"volume":null,"issue":null,"first_page":"3514","last_page":"3518"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https:\/\/openalex.org\/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1711","display_name":"Signal Processing"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},"topics":[{"id":"https:\/\/openalex.org\/T10860","display_name":"Speech and Audio Processing","score":0.9997000098228455,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1711","display_name":"Signal Processing"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T11448","display_name":"Face recognition and analysis","score":0.9609000086784363,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T10326","display_name":"Indoor and Outdoor Localization Technologies","score":0.9556999802589417,"subfield":{"id":"https:\/\/openalex.org\/subfields\/2208","display_name":"Electrical and Electronic Engineering"},"field":{"id":"https:\/\/openalex.org\/fields\/22","display_name":"Engineering"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https:\/\/openalex.org\/keywords\/computer-science","display_name":"Computer science","score":0.8201686143875122},{"id":"https:\/\/openalex.org\/keywords\/transformer","display_name":"Transformer","score":0.7233390212059021},{"id":"https:\/\/openalex.org\/keywords\/connectionism","display_name":"Connectionism","score":0.7104721665382385},{"id":"https:\/\/openalex.org\/keywords\/speech-recognition","display_name":"Speech recognition","score":0.6575788259506226},{"id":"https:\/\/openalex.org\/keywords\/language-model","display_name":"Language model","score":0.598694920539856},{"id":"https:\/\/openalex.org\/keywords\/artificial-intelligence","display_name":"Artificial intelligence","score":0.5331900119781494},{"id":"https:\/\/openalex.org\/keywords\/benchmark","display_name":"Benchmark (surveying)","score":0.5046015977859497},{"id":"https:\/\/openalex.org\/keywords\/decoding-methods","display_name":"Decoding methods","score":0.501328706741333},{"id":"https:\/\/openalex.org\/keywords\/recurrent-neural-network","display_name":"Recurrent neural network","score":0.48337286710739136},{"id":"https:\/\/openalex.org\/keywords\/latency","display_name":"Latency (audio)","score":0.47665077447891235},{"id":"https:\/\/openalex.org\/keywords\/word-error-rate","display_name":"Word error rate","score":0.43701502680778503},{"id":"https:\/\/openalex.org\/keywords\/natural-language-processing","display_name":"Natural language processing","score":0.41765716671943665},{"id":"https:\/\/openalex.org\/keywords\/artificial-neural-network","display_name":"Artificial neural network","score":0.11500552296638489},{"id":"https:\/\/openalex.org\/keywords\/algorithm","display_name":"Algorithm","score":0.10313895344734192}],"concepts":[{"id":"https:\/\/openalex.org\/C41008148","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q21198","display_name":"Computer science","level":0,"score":0.8201686143875122},{"id":"https:\/\/openalex.org\/C66322947","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q11658","display_name":"Transformer","level":3,"score":0.7233390212059021},{"id":"https:\/\/openalex.org\/C8521452","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q203790","display_name":"Connectionism","level":3,"score":0.7104721665382385},{"id":"https:\/\/openalex.org\/C28490314","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q189436","display_name":"Speech recognition","level":1,"score":0.6575788259506226},{"id":"https:\/\/openalex.org\/C137293760","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q3621696","display_name":"Language model","level":2,"score":0.598694920539856},{"id":"https:\/\/openalex.org\/C154945302","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5331900119781494},{"id":"https:\/\/openalex.org\/C185798385","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.5046015977859497},{"id":"https:\/\/openalex.org\/C57273362","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q576722","display_name":"Decoding methods","level":2,"score":0.501328706741333},{"id":"https:\/\/openalex.org\/C147168706","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1457734","display_name":"Recurrent neural network","level":3,"score":0.48337286710739136},{"id":"https:\/\/openalex.org\/C82876162","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q17096504","display_name":"Latency (audio)","level":2,"score":0.47665077447891235},{"id":"https:\/\/openalex.org\/C40969351","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q3516228","display_name":"Word error rate","level":2,"score":0.43701502680778503},{"id":"https:\/\/openalex.org\/C204321447","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q30642","display_name":"Natural language processing","level":1,"score":0.41765716671943665},{"id":"https:\/\/openalex.org\/C50644808","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q192776","display_name":"Artificial neural network","level":2,"score":0.11500552296638489},{"id":"https:\/\/openalex.org\/C11413529","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q8366","display_name":"Algorithm","level":1,"score":0.10313895344734192},{"id":"https:\/\/openalex.org\/C76155785","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q418","display_name":"Telecommunications","level":1,"score":0},{"id":"https:\/\/openalex.org\/C205649164","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1071","display_name":"Geography","level":0,"score":0},{"id":"https:\/\/openalex.org\/C62520636","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q944","display_name":"Quantum mechanics","level":1,"score":0},{"id":"https:\/\/openalex.org\/C13280743","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q131089","display_name":"Geodesy","level":1,"score":0},{"id":"https:\/\/openalex.org\/C121332964","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q413","display_name":"Physics","level":0,"score":0},{"id":"https:\/\/openalex.org\/C165801399","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q25428","display_name":"Voltage","level":2,"score":0}],"mesh":[],"locations_count":5,"locations":[{"id":"doi:10.21437\/interspeech.2018-1943","is_oa":false,"landing_page_url":"https:\/\/doi.org\/10.21437\/interspeech.2018-1943","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"Interspeech 2018","raw_type":"proceedings-article"},{"id":"pmh:oai:arXiv.org:1806.06053","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/1806.06053","pdf_url":"https:\/\/arxiv.org\/pdf\/1806.06053","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"mag:2808195542","is_oa":true,"landing_page_url":"https:\/\/www.arxiv.org\/pdf\/1806.06053","pdf_url":null,"source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"arXiv (Cornell University)","raw_type":null},{"id":"pmh:oai:ora.ox.ac.uk:uuid:0243e187-4559-4007-ac9a-edfc0cebf69c","is_oa":false,"landing_page_url":"https:\/\/ora.ox.ac.uk\/objects\/uuid:0243e187-4559-4007-ac9a-edfc0cebf69c","pdf_url":null,"source":{"id":"https:\/\/openalex.org\/S4306402636","display_name":"Oxford University Research Archive (ORA) (University of Oxford)","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I40120149","host_organization_name":"University of Oxford","host_organization_lineage":["https:\/\/openalex.org\/I40120149"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"Conference item"},{"id":"doi:10.48550\/arxiv.1806.06053","is_oa":true,"landing_page_url":"https:\/\/doi.org\/10.48550\/arxiv.1806.06053","pdf_url":null,"source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:1806.06053","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/1806.06053","pdf_url":"https:\/\/arxiv.org\/pdf\/1806.06053","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"score":0.8700000047683716,"id":"https:\/\/metadata.un.org\/sdg\/4","display_name":"Quality Education"}],"awards":[{"id":"https:\/\/openalex.org\/G1277543710","display_name":null,"funder_award_id":"EP\/M013774\/1","funder_id":"https:\/\/openalex.org\/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https:\/\/openalex.org\/G2018133609","display_name":"Seebibyte: Visual Search for the Era of Big Data","funder_award_id":"EP\/M013774\/1","funder_id":"https:\/\/openalex.org\/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"},{"id":"https:\/\/openalex.org\/G8416736120","display_name":null,"funder_award_id":"Seebibyte EP\/M013774\/1","funder_id":"https:\/\/openalex.org\/F4320334627","funder_display_name":"Engineering and Physical Sciences Research Council"}],"funders":[{"id":"https:\/\/openalex.org\/F4320317153","display_name":"DeepMind","ror":"https:\/\/ror.org\/00971b260"},{"id":"https:\/\/openalex.org\/F4320334627","display_name":"Engineering and Physical Sciences Research Council","ror":"https:\/\/ror.org\/0439y7842"}],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https:\/\/content.openalex.org\/works\/W2808195542.pdf","grobid_xml":"https:\/\/content.openalex.org\/works\/W2808195542.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https:\/\/openalex.org\/W2964283370","https:\/\/openalex.org\/W2952746495","https:\/\/openalex.org\/W2594690981","https:\/\/openalex.org\/W2194775991","https:\/\/openalex.org\/W2127141656","https:\/\/openalex.org\/W2015143272","https:\/\/openalex.org\/W3011959832","https:\/\/openalex.org\/W2964121744","https:\/\/openalex.org\/W2963403868","https:\/\/openalex.org\/W2891205112","https:\/\/openalex.org\/W2883383043","https:\/\/openalex.org\/W2596627958","https:\/\/openalex.org\/W2578229578","https:\/\/openalex.org\/W2267805933","https:\/\/openalex.org\/W2753094133","https:\/\/openalex.org\/W2964330921","https:\/\/openalex.org\/W3045372535","https:\/\/openalex.org\/W2806872492","https:\/\/openalex.org\/W3095173472","https:\/\/openalex.org\/W2782397232"],"abstract_inverted_index":{"The":[0,46],"goal":[1],"of":[2,117],"this":[3],"paper":[4],"is":[5,70],"to":[6],"develop":[7,18],"state-of-the-art":[8,80],"models":[9,51],"for":[10,66,111],"lip":[11,115],"reading":[12,116],"--":[13],"visual":[14],"speech":[15],"recognition.":[16],"We":[17],"three":[19],"architectures":[20],"and":[21,25,39,48,60,120],"compare":[22],"their":[23],"accuracy":[24],"training":[26],"times:":[27],"(i)":[28],"a":[29,35,55,71,100],"recurrent":[30,47],"model":[31,65,77,108],"using":[32],"LSTMs;":[33],"(ii)":[34],"fully":[36,49,106],"convolutional":[37,50,107],"model;":[38],"(iii)":[40],"the":[41,68,79,85,105],"recently":[42],"proposed":[43],"transformer":[44,69],"model.":[45,73],"are":[52],"trained":[53],"with":[54,127],"Connectionist":[56],"Temporal":[57],"Classification":[58],"loss":[59],"use":[61],"an":[62],"explicit":[63],"language":[64],"decoding,":[67],"sequence-to-sequence":[72],"Our":[74],"best":[75],"performing":[76],"improves":[78],"word":[81],"error":[82],"rate":[83],"on":[84],"challenging":[86],"BBC-Oxford":[87],"Lip":[88],"Reading":[89],"Sentences":[90],"2":[91],"(LRS2)":[92],"benchmark":[93],"dataset":[94],"by":[95],"over":[96],"20":[97],"percent.":[98],"As":[99],"further":[101],"contribution":[102],"we":[103],"investigate":[104],"when":[109],"used":[110],"online":[112],"(real":[113],"time)":[114],"continuous":[118],"speech,":[119],"show":[121],"that":[122],"it":[123],"achieves":[124],"high":[125],"performance":[126],"low":[128],"latency.":[129]},"counts_by_year":[{"year":2024,"cited_by_count":2},{"year":2022,"cited_by_count":1},{"year":2021,"cited_by_count":4},{"year":2020,"cited_by_count":6},{"year":2019,"cited_by_count":2},{"year":2018,"cited_by_count":2}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}