{"id":"https:\/\/openalex.org\/W4364385701","doi":"https:\/\/doi.org\/10.48550\/arxiv.2304.04487","title":"Inference with Reference: Lossless Acceleration of Large Language Models","display_name":"Inference with Reference: Lossless Acceleration of Large Language Models","publication_year":2023,"publication_date":"2023-04-10","ids":{"openalex":"https:\/\/openalex.org\/W4364385701","doi":"https:\/\/doi.org\/10.48550\/arxiv.2304.04487"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2304.04487","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2304.04487","pdf_url":"https:\/\/arxiv.org\/pdf\/2304.04487","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https:\/\/openalex.org\/licenses\/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https:\/\/arxiv.org\/pdf\/2304.04487","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https:\/\/openalex.org\/A5100680993","display_name":"Nan Yang","orcid":"https:\/\/orcid.org\/0000-0002-9373-5289"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Yang, Nan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5100826716","display_name":"Tao Ge","orcid":"https:\/\/orcid.org\/0000-0002-6630-634X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ge, Tao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5100456616","display_name":"Liang Wang","orcid":"https:\/\/orcid.org\/0009-0008-6088-8589"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Liang","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5045811313","display_name":"Binxing Jiao","orcid":"https:\/\/orcid.org\/0000-0003-4710-0095"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiao, Binxing","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5060116992","display_name":"Daxin Jiang","orcid":"https:\/\/orcid.org\/0000-0002-6657-5806"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jiang, Daxin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5100334272","display_name":"Linjun Yang","orcid":"https:\/\/orcid.org\/0000-0002-6208-0582"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Yang, Linjun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5089546303","display_name":"Rangan Majumder","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Majumder, Rangan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https:\/\/openalex.org\/A5014662947","display_name":"Furu Wei","orcid":"https:\/\/orcid.org\/0000-0002-7810-5852"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wei, Furu","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":8,"corresponding_author_ids":["https:\/\/openalex.org\/A5100680993"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":14,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https:\/\/openalex.org\/T10028","display_name":"Topic Modeling","score":0.9984999895095825,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},"topics":[{"id":"https:\/\/openalex.org\/T10028","display_name":"Topic Modeling","score":0.9984999895095825,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T10181","display_name":"Natural Language Processing Techniques","score":0.9973000288009644,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T10201","display_name":"Speech Recognition and Synthesis","score":0.9634000062942505,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https:\/\/openalex.org\/keywords\/decoding-methods","display_name":"Decoding methods","score":0.8802762031555176},{"id":"https:\/\/openalex.org\/keywords\/computer-science","display_name":"Computer science","score":0.7937394380569458},{"id":"https:\/\/openalex.org\/keywords\/context","display_name":"Context (archaeology)","score":0.647318959236145},{"id":"https:\/\/openalex.org\/keywords\/lossless-compression","display_name":"Lossless compression","score":0.6305858492851257},{"id":"https:\/\/openalex.org\/keywords\/inference","display_name":"Inference","score":0.5703649520874023},{"id":"https:\/\/openalex.org\/keywords\/parallelism","display_name":"Parallelism (grammar)","score":0.4519578516483307},{"id":"https:\/\/openalex.org\/keywords\/span","display_name":"Span (engineering)","score":0.42234525084495544},{"id":"https:\/\/openalex.org\/keywords\/algorithm","display_name":"Algorithm","score":0.3299253582954407},{"id":"https:\/\/openalex.org\/keywords\/natural-language-processing","display_name":"Natural language processing","score":0.3204399347305298},{"id":"https:\/\/openalex.org\/keywords\/artificial-intelligence","display_name":"Artificial intelligence","score":0.24455007910728455},{"id":"https:\/\/openalex.org\/keywords\/parallel-computing","display_name":"Parallel computing","score":0.18587619066238403},{"id":"https:\/\/openalex.org\/keywords\/data-compression","display_name":"Data compression","score":0.1678503155708313}],"concepts":[{"id":"https:\/\/openalex.org\/C57273362","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q576722","display_name":"Decoding methods","level":2,"score":0.8802762031555176},{"id":"https:\/\/openalex.org\/C41008148","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q21198","display_name":"Computer science","level":0,"score":0.7937394380569458},{"id":"https:\/\/openalex.org\/C2779343474","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q3109175","display_name":"Context (archaeology)","level":2,"score":0.647318959236145},{"id":"https:\/\/openalex.org\/C81081738","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q55542","display_name":"Lossless compression","level":3,"score":0.6305858492851257},{"id":"https:\/\/openalex.org\/C2776214188","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q408386","display_name":"Inference","level":2,"score":0.5703649520874023},{"id":"https:\/\/openalex.org\/C2781172179","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q853109","display_name":"Parallelism (grammar)","level":2,"score":0.4519578516483307},{"id":"https:\/\/openalex.org\/C2778753569","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1960395","display_name":"Span (engineering)","level":2,"score":0.42234525084495544},{"id":"https:\/\/openalex.org\/C11413529","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q8366","display_name":"Algorithm","level":1,"score":0.3299253582954407},{"id":"https:\/\/openalex.org\/C204321447","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q30642","display_name":"Natural language processing","level":1,"score":0.3204399347305298},{"id":"https:\/\/openalex.org\/C154945302","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q11660","display_name":"Artificial intelligence","level":1,"score":0.24455007910728455},{"id":"https:\/\/openalex.org\/C173608175","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q232661","display_name":"Parallel computing","level":1,"score":0.18587619066238403},{"id":"https:\/\/openalex.org\/C78548338","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q2493","display_name":"Data compression","level":2,"score":0.1678503155708313},{"id":"https:\/\/openalex.org\/C86803240","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q420","display_name":"Biology","level":0,"score":0},{"id":"https:\/\/openalex.org\/C147176958","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q77590","display_name":"Civil engineering","level":1,"score":0},{"id":"https:\/\/openalex.org\/C127413603","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q11023","display_name":"Engineering","level":0,"score":0},{"id":"https:\/\/openalex.org\/C151730666","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q7205","display_name":"Paleontology","level":1,"score":0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2304.04487","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2304.04487","pdf_url":"https:\/\/arxiv.org\/pdf\/2304.04487","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https:\/\/openalex.org\/licenses\/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550\/arxiv.2304.04487","is_oa":true,"landing_page_url":"https:\/\/doi.org\/10.48550\/arxiv.2304.04487","pdf_url":null,"source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https:\/\/openalex.org\/licenses\/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2304.04487","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2304.04487","pdf_url":"https:\/\/arxiv.org\/pdf\/2304.04487","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https:\/\/openalex.org\/licenses\/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"id":"https:\/\/metadata.un.org\/sdg\/4","display_name":"Quality Education","score":0.5099999904632568}],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https:\/\/content.openalex.org\/works\/W4364385701.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https:\/\/openalex.org\/W3106969033","https:\/\/openalex.org\/W2186939576","https:\/\/openalex.org\/W2357988910","https:\/\/openalex.org\/W2948148442","https:\/\/openalex.org\/W2377158164","https:\/\/openalex.org\/W2187600494","https:\/\/openalex.org\/W2114030128","https:\/\/openalex.org\/W2135707701","https:\/\/openalex.org\/W2095678457","https:\/\/openalex.org\/W2352493357"],"abstract_inverted_index":{"We":[0],"propose":[1],"LLMA,":[2],"an":[3,35],"LLM":[4,36],"accelerator":[5],"to":[6,64,90],"losslessly":[7],"speed":[8],"up":[9],"Large":[10],"Language":[11],"Model":[12],"(LLM)":[13],"inference":[14],"with":[15,97],"references.":[16],"LLMA":[17,51,89],"is":[18,41],"motivated":[19],"by":[20,34],"the":[21,31,38,58,65,71,75],"observation":[22],"that":[23,40],"there":[24],"are":[25],"abundant":[26],"identical":[27,98],"text":[28,55],"spans":[29],"between":[30,112],"decoding":[32,76,82,103],"result":[33,77],"and":[37,60,67,115,121],"reference":[39,59,114],"available":[42],"in":[43,78,104],"many":[44,105],"real":[45],"world":[46],"scenarios":[47,108],"(e.g.,":[48,118],"retrieved":[49],"documents).":[50],"first":[52],"selects":[53],"a":[54],"span":[56],"from":[57],"copies":[61],"its":[62],"tokens":[63],"decoder":[66],"then":[68],"efficiently":[69],"checks":[70],"tokens'":[72],"appropriateness":[73],"as":[74,101],"parallel":[79],"within":[80],"one":[81],"step.":[83],"The":[84],"improved":[85],"computational":[86],"parallelism":[87],"allows":[88],"achieve":[91],"over":[92],"2x":[93],"speed-up":[94],"for":[95],"LLMs":[96],"generation":[99,107],"results":[100],"greedy":[102],"practical":[106],"where":[109],"significant":[110],"overlap":[111],"in-context":[113],"outputs":[116],"exists":[117],"search":[119],"engines":[120],"multi-turn":[122],"conversations).":[123]},"counts_by_year":[{"year":2025,"cited_by_count":4},{"year":2024,"cited_by_count":3},{"year":2023,"cited_by_count":7}],"updated_date":"2026-03-14T08:43:22.919905","created_date":"2025-10-10T00:00:00"}