{"id":"https:\/\/openalex.org\/W4378509386","doi":"https:\/\/doi.org\/10.48550\/arxiv.2305.14292","title":"WikiChat: Stopping the Hallucination of Large Language Model Chatbots by Few-Shot Grounding on Wikipedia","display_name":"WikiChat: Stopping the Hallucination of Large Language Model Chatbots by Few-Shot Grounding on Wikipedia","publication_year":2023,"publication_date":"2023-05-23","ids":{"openalex":"https:\/\/openalex.org\/W4378509386","doi":"https:\/\/doi.org\/10.48550\/arxiv.2305.14292"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2305.14292","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2305.14292","pdf_url":"https:\/\/arxiv.org\/pdf\/2305.14292","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https:\/\/arxiv.org\/pdf\/2305.14292","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https:\/\/openalex.org\/A5031309131","display_name":"Sina J. Semnani","orcid":null},"institutions":[{"id":"https:\/\/openalex.org\/I97018004","display_name":"Stanford University","ror":"https:\/\/ror.org\/00f54p054","country_code":"US","type":"education","lineage":["https:\/\/openalex.org\/I97018004"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Semnani, Sina J.","raw_affiliation_strings":["Computer Science Department Stanford University Stanford, CA"],"affiliations":[{"raw_affiliation_string":"Computer Science Department Stanford University Stanford, CA","institution_ids":["https:\/\/openalex.org\/I97018004"]}]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5092110854","display_name":"Violet Z. Yao","orcid":"https:\/\/orcid.org\/0009-0008-0237-9356"},"institutions":[{"id":"https:\/\/openalex.org\/I97018004","display_name":"Stanford University","ror":"https:\/\/ror.org\/00f54p054","country_code":"US","type":"education","lineage":["https:\/\/openalex.org\/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Yao, Violet Z.","raw_affiliation_strings":["Computer Science Department Stanford University Stanford, CA"],"affiliations":[{"raw_affiliation_string":"Computer Science Department Stanford University Stanford, CA","institution_ids":["https:\/\/openalex.org\/I97018004"]}]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5107242554","display_name":"Heidi C. Zhang","orcid":null},"institutions":[{"id":"https:\/\/openalex.org\/I97018004","display_name":"Stanford University","ror":"https:\/\/ror.org\/00f54p054","country_code":"US","type":"education","lineage":["https:\/\/openalex.org\/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Zhang, Heidi C.","raw_affiliation_strings":["Computer Science Department Stanford University Stanford, CA"],"affiliations":[{"raw_affiliation_string":"Computer Science Department Stanford University Stanford, CA","institution_ids":["https:\/\/openalex.org\/I97018004"]}]},{"author_position":"last","author":{"id":"https:\/\/openalex.org\/A5078375725","display_name":"Monica S. Lam","orcid":"https:\/\/orcid.org\/0000-0002-7626-6468"},"institutions":[{"id":"https:\/\/openalex.org\/I97018004","display_name":"Stanford University","ror":"https:\/\/ror.org\/00f54p054","country_code":"US","type":"education","lineage":["https:\/\/openalex.org\/I97018004"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Lam, Monica S.","raw_affiliation_strings":["Computer Science Department Stanford University Stanford, CA"],"affiliations":[{"raw_affiliation_string":"Computer Science Department Stanford University Stanford, CA","institution_ids":["https:\/\/openalex.org\/I97018004"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https:\/\/openalex.org\/A5031309131"],"corresponding_institution_ids":["https:\/\/openalex.org\/I97018004"],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":9,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https:\/\/openalex.org\/T10028","display_name":"Topic Modeling","score":0.9993000030517578,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},"topics":[{"id":"https:\/\/openalex.org\/T10028","display_name":"Topic Modeling","score":0.9993000030517578,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T11147","display_name":"Misinformation and Its Impacts","score":0.9886999726295471,"subfield":{"id":"https:\/\/openalex.org\/subfields\/3312","display_name":"Sociology and Political Science"},"field":{"id":"https:\/\/openalex.org\/fields\/33","display_name":"Social Sciences"},"domain":{"id":"https:\/\/openalex.org\/domains\/2","display_name":"Social Sciences"}},{"id":"https:\/\/openalex.org\/T12128","display_name":"AI in Service Interactions","score":0.9761999845504761,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https:\/\/openalex.org\/keywords\/computer-science","display_name":"Computer science","score":0.702552318572998},{"id":"https:\/\/openalex.org\/keywords\/chatbot","display_name":"Chatbot","score":0.6642277240753174},{"id":"https:\/\/openalex.org\/keywords\/latency","display_name":"Latency (audio)","score":0.6392384767532349},{"id":"https:\/\/openalex.org\/keywords\/information-retrieval","display_name":"Information retrieval","score":0.48437461256980896},{"id":"https:\/\/openalex.org\/keywords\/natural-language-processing","display_name":"Natural language processing","score":0.45979732275009155},{"id":"https:\/\/openalex.org\/keywords\/ground-truth","display_name":"Ground truth","score":0.445133775472641},{"id":"https:\/\/openalex.org\/keywords\/artificial-intelligence","display_name":"Artificial intelligence","score":0.43217384815216064}],"concepts":[{"id":"https:\/\/openalex.org\/C41008148","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q21198","display_name":"Computer science","level":0,"score":0.702552318572998},{"id":"https:\/\/openalex.org\/C2779041454","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q870780","display_name":"Chatbot","level":2,"score":0.6642277240753174},{"id":"https:\/\/openalex.org\/C82876162","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q17096504","display_name":"Latency (audio)","level":2,"score":0.6392384767532349},{"id":"https:\/\/openalex.org\/C23123220","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q816826","display_name":"Information retrieval","level":1,"score":0.48437461256980896},{"id":"https:\/\/openalex.org\/C204321447","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q30642","display_name":"Natural language processing","level":1,"score":0.45979732275009155},{"id":"https:\/\/openalex.org\/C146849305","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q370766","display_name":"Ground truth","level":2,"score":0.445133775472641},{"id":"https:\/\/openalex.org\/C154945302","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q11660","display_name":"Artificial intelligence","level":1,"score":0.43217384815216064},{"id":"https:\/\/openalex.org\/C76155785","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q418","display_name":"Telecommunications","level":1,"score":0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2305.14292","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2305.14292","pdf_url":"https:\/\/arxiv.org\/pdf\/2305.14292","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550\/arxiv.2305.14292","is_oa":true,"landing_page_url":"https:\/\/doi.org\/10.48550\/arxiv.2305.14292","pdf_url":null,"source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article-journal"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2305.14292","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2305.14292","pdf_url":"https:\/\/arxiv.org\/pdf\/2305.14292","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.7900000214576721,"display_name":"Quality Education","id":"https:\/\/metadata.un.org\/sdg\/4"}],"awards":[],"funders":[{"id":"https:\/\/openalex.org\/F4320306076","display_name":"National Science Foundation","ror":"https:\/\/ror.org\/021nxhr62"},{"id":"https:\/\/openalex.org\/F4320306151","display_name":"Alfred P. Sloan Foundation","ror":"https:\/\/ror.org\/052csg198"}],"has_content":{"grobid_xml":false,"pdf":true},"content_urls":{"pdf":"https:\/\/content.openalex.org\/works\/W4378509386.pdf"},"referenced_works_count":43,"referenced_works":["https:\/\/openalex.org\/W2101105183","https:\/\/openalex.org\/W2898875342","https:\/\/openalex.org\/W2938704169","https:\/\/openalex.org\/W2963961878","https:\/\/openalex.org\/W3007672467","https:\/\/openalex.org\/W3027879771","https:\/\/openalex.org\/W3034828027","https:\/\/openalex.org\/W3100258764","https:\/\/openalex.org\/W3153947101","https:\/\/openalex.org\/W3186138538","https:\/\/openalex.org\/W3194769714","https:\/\/openalex.org\/W3207604732","https:\/\/openalex.org\/W4206118214","https:\/\/openalex.org\/W4221143046","https:\/\/openalex.org\/W4226069413","https:\/\/openalex.org\/W4226112939","https:\/\/openalex.org\/W4229005866","https:\/\/openalex.org\/W4281657280","https:\/\/openalex.org\/W4281758439","https:\/\/openalex.org\/W4282980384","https:\/\/openalex.org\/W4288113479","https:\/\/openalex.org\/W4292779060","https:\/\/openalex.org\/W4296711106","https:\/\/openalex.org\/W4301243929","https:\/\/openalex.org\/W4319793302","https:\/\/openalex.org\/W4322718191","https:\/\/openalex.org\/W4322760121","https:\/\/openalex.org\/W4361807267","https:\/\/openalex.org\/W4362508231","https:\/\/openalex.org\/W4385570481","https:\/\/openalex.org\/W4385571232","https:\/\/openalex.org\/W4385571271","https:\/\/openalex.org\/W4385571680","https:\/\/openalex.org\/W4385572601","https:\/\/openalex.org\/W4385572752","https:\/\/openalex.org\/W4385572901","https:\/\/openalex.org\/W4385574338","https:\/\/openalex.org\/W4389010541","https:\/\/openalex.org\/W4389518954","https:\/\/openalex.org\/W4389519118","https:\/\/openalex.org\/W4389519254","https:\/\/openalex.org\/W4389519598","https:\/\/openalex.org\/W4389520806"],"related_works":["https:\/\/openalex.org\/W4383501580","https:\/\/openalex.org\/W4214931137","https:\/\/openalex.org\/W4387007686","https:\/\/openalex.org\/W4313813117","https:\/\/openalex.org\/W4382052417","https:\/\/openalex.org\/W3192088754","https:\/\/openalex.org\/W3084631705","https:\/\/openalex.org\/W3176146353","https:\/\/openalex.org\/W4293646425","https:\/\/openalex.org\/W4379781060"],"abstract_inverted_index":{"This":[0],"paper":[1],"presents":[2],"the":[3,23,26,40,52],"first":[4],"few-shot":[5],"LLM-based":[6,115],"chatbot":[7],"that":[8,98],"almost":[9],"never":[10],"hallucinates":[11],"and":[12,16,43,57,82,84,87,114,117,121,126,144,173],"has":[13],"high":[14],"conversationality":[15],"low":[17],"latency.":[18],"WikiChat":[19,31,62,138,150],"is":[20,139],"grounded":[21,41],"on":[22,64,123],"English":[24],"Wikipedia,":[25],"largest":[27],"curated":[28],"free-text":[29],"corpus.":[30],"generates":[32],"a":[33,67,90],"response":[34],"from":[35,51],"an":[36,148],"LLM,":[37],"retains":[38],"only":[39],"facts,":[42],"combines":[44],"them":[45],"with":[46,71,157],"additional":[47],"information":[48],"it":[49],"retrieves":[50],"corpus":[53],"to":[54,76,130,133],"form":[55],"factual":[56,104,153],"engaging":[58],"responses.":[59],"We":[60],"distill":[61],"based":[63],"GPT-4":[65],"into":[66],"7B-parameter":[68],"LLaMA":[69],"model":[70],"minimal":[72],"loss":[73],"of":[74],"quality,":[75],"significantly":[77,110,141,169],"improve":[78],"its":[79],"latency,":[80],"cost":[81],"privacy,":[83],"facilitate":[85],"research":[86],"deployment.":[88],"Using":[89],"novel":[91],"hybrid":[92],"human-and-LLM":[93],"evaluation":[94],"methodology,":[95],"we":[96],"show":[97],"our":[99],"best":[100],"system":[101],"achieves":[102,151],"97.3%":[103],"accuracy":[105,154],"in":[106,155],"simulated":[107],"conversations.":[108],"It":[109],"outperforms":[111],"all":[112],"retrieval-based":[113,136],"baselines,":[116],"by":[118],"3.9%,":[119],"38.6%":[120],"51.0%":[122],"head,":[124],"tail":[125],"recent":[127,161],"knowledge":[128],"compared":[129],"GPT-4.":[131],"Compared":[132],"previous":[134],"state-of-the-art":[135],"chatbots,":[137],"also":[140],"more":[142,174],"informative":[143],"engaging,":[145],"just":[146],"like":[147],"LLM.":[149],"97.9%":[152],"conversations":[156],"human":[158],"users":[159],"about":[160],"topics,":[162],"55.0%":[163],"better":[164],"than":[165],"GPT-4,":[166],"while":[167],"receiving":[168],"higher":[170],"user":[171],"ratings":[172],"favorable":[175],"comments.":[176]},"counts_by_year":[{"year":2025,"cited_by_count":3},{"year":2024,"cited_by_count":4},{"year":2023,"cited_by_count":2}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}