{"id":"https:\/\/openalex.org\/W4416937930","doi":"https:\/\/doi.org\/10.48550\/arxiv.2503.19482","title":"KSHSeek: Data-Driven Approaches to Mitigating and Detecting Knowledge-Shortcut Hallucinations in Generative Models","display_name":"KSHSeek: Data-Driven Approaches to Mitigating and Detecting Knowledge-Shortcut Hallucinations in Generative Models","publication_year":2025,"publication_date":"2025-03-25","ids":{"openalex":"https:\/\/openalex.org\/W4416937930","doi":"https:\/\/doi.org\/10.48550\/arxiv.2503.19482"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2503.19482","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2503.19482","pdf_url":"https:\/\/arxiv.org\/pdf\/2503.19482","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https:\/\/arxiv.org\/pdf\/2503.19482","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https:\/\/openalex.org\/A5019147450","display_name":"Zhongxin Liu","orcid":"https:\/\/orcid.org\/0000-0002-3565-4800"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Liu, Zhongxin","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5100449969","display_name":"Zhiwei Wang","orcid":"https:\/\/orcid.org\/0000-0002-9402-4729"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhiwei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5101705598","display_name":"Jun Niu","orcid":"https:\/\/orcid.org\/0000-0001-8332-0269"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Niu, Jun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5100414164","display_name":"Ying Li","orcid":"https:\/\/orcid.org\/0000-0002-1800-0901"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Li, Ying","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5015774114","display_name":"Hongyu Sun","orcid":"https:\/\/orcid.org\/0000-0002-9182-4827"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Sun, Hongyu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5074806890","display_name":"Meng Xu","orcid":"https:\/\/orcid.org\/0000-0002-2218-6737"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Xu, Meng","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5100351674","display_name":"He Wang","orcid":"https:\/\/orcid.org\/0000-0002-5269-0029"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, He","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5005525514","display_name":"Gaofei Wu","orcid":"https:\/\/orcid.org\/0000-0002-7843-6520"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wu, Gaofei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https:\/\/openalex.org\/A5100401900","display_name":"Yuqing Zhang","orcid":"https:\/\/orcid.org\/0000-0003-0451-4597"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Yuqing","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":9,"corresponding_author_ids":["https:\/\/openalex.org\/A5019147450"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https:\/\/openalex.org\/T10028","display_name":"Topic Modeling","score":0.6351000070571899,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},"topics":[{"id":"https:\/\/openalex.org\/T10028","display_name":"Topic Modeling","score":0.6351000070571899,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T11273","display_name":"Advanced Graph Neural Networks","score":0.04989999905228615,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T11714","display_name":"Multimodal Machine Learning Applications","score":0.03350000083446503,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https:\/\/openalex.org\/keywords\/spurious-relationship","display_name":"Spurious relationship","score":0.7128999829292297},{"id":"https:\/\/openalex.org\/keywords\/generative-grammar","display_name":"Generative grammar","score":0.652899980545044},{"id":"https:\/\/openalex.org\/keywords\/robustness","display_name":"Robustness (evolution)","score":0.6247000098228455},{"id":"https:\/\/openalex.org\/keywords\/generative-model","display_name":"Generative model","score":0.5598000288009644},{"id":"https:\/\/openalex.org\/keywords\/perspective","display_name":"Perspective (graphical)","score":0.4918999969959259},{"id":"https:\/\/openalex.org\/keywords\/preprocessor","display_name":"Preprocessor","score":0.4781000018119812},{"id":"https:\/\/openalex.org\/keywords\/reliability","display_name":"Reliability (semiconductor)","score":0.4424000084400177},{"id":"https:\/\/openalex.org\/keywords\/language-model","display_name":"Language model","score":0.4406000077724457},{"id":"https:\/\/openalex.org\/keywords\/natural-language-understanding","display_name":"Natural language understanding","score":0.39399999380111694}],"concepts":[{"id":"https:\/\/openalex.org\/C41008148","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q21198","display_name":"Computer science","level":0,"score":0.7181000113487244},{"id":"https:\/\/openalex.org\/C97256817","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1462316","display_name":"Spurious relationship","level":2,"score":0.7128999829292297},{"id":"https:\/\/openalex.org\/C39890363","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q36108","display_name":"Generative grammar","level":2,"score":0.652899980545044},{"id":"https:\/\/openalex.org\/C63479239","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.6247000098228455},{"id":"https:\/\/openalex.org\/C154945302","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5910000205039978},{"id":"https:\/\/openalex.org\/C119857082","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q2539","display_name":"Machine learning","level":1,"score":0.5598000288009644},{"id":"https:\/\/openalex.org\/C167966045","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q5532625","display_name":"Generative model","level":3,"score":0.5598000288009644},{"id":"https:\/\/openalex.org\/C12713177","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1900281","display_name":"Perspective (graphical)","level":2,"score":0.4918999969959259},{"id":"https:\/\/openalex.org\/C34736171","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q918333","display_name":"Preprocessor","level":2,"score":0.4781000018119812},{"id":"https:\/\/openalex.org\/C43214815","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q7310987","display_name":"Reliability (semiconductor)","level":3,"score":0.4424000084400177},{"id":"https:\/\/openalex.org\/C137293760","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q3621696","display_name":"Language model","level":2,"score":0.4406000077724457},{"id":"https:\/\/openalex.org\/C2779439875","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1078276","display_name":"Natural language understanding","level":3,"score":0.39399999380111694},{"id":"https:\/\/openalex.org\/C195324797","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q33742","display_name":"Natural language","level":2,"score":0.3783999979496002},{"id":"https:\/\/openalex.org\/C204321447","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q30642","display_name":"Natural language processing","level":1,"score":0.375900000333786},{"id":"https:\/\/openalex.org\/C103278499","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q254465","display_name":"Similarity (geometry)","level":3,"score":0.3652999997138977},{"id":"https:\/\/openalex.org\/C108010975","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q500094","display_name":"Pruning","level":2,"score":0.3124000132083893},{"id":"https:\/\/openalex.org\/C2988773926","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q25104379","display_name":"Generative adversarial network","level":3,"score":0.29750001430511475},{"id":"https:\/\/openalex.org\/C116834253","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q2039217","display_name":"Identification (biology)","level":2,"score":0.2759999930858612},{"id":"https:\/\/openalex.org\/C159423971","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q177251","display_name":"Associative property","level":2,"score":0.2736999988555908},{"id":"https:\/\/openalex.org\/C160920958","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q7662746","display_name":"Synthetic data","level":2,"score":0.2700999975204468},{"id":"https:\/\/openalex.org\/C26517878","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q228039","display_name":"Key (lock)","level":2,"score":0.26910001039505005},{"id":"https:\/\/openalex.org\/C15744967","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q9418","display_name":"Psychology","level":0,"score":0.2687999904155731},{"id":"https:\/\/openalex.org\/C67186912","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q367664","display_name":"Data modeling","level":2,"score":0.26269999146461487},{"id":"https:\/\/openalex.org\/C184337299","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1437428","display_name":"Semantics (computer science)","level":2,"score":0.26159998774528503},{"id":"https:\/\/openalex.org\/C2776608160","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q4785462","display_name":"Natural (archaeology)","level":2,"score":0.2581000030040741},{"id":"https:\/\/openalex.org\/C129792486","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1050419","display_name":"Language identification","level":3,"score":0.2540999948978424}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2503.19482","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2503.19482","pdf_url":"https:\/\/arxiv.org\/pdf\/2503.19482","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550\/arxiv.2503.19482","is_oa":true,"landing_page_url":"https:\/\/doi.org\/10.48550\/arxiv.2503.19482","pdf_url":null,"source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2503.19482","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2503.19482","pdf_url":"https:\/\/arxiv.org\/pdf\/2503.19482","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"The":[0],"emergence":[1],"of":[2,12,49,55,113],"large":[3],"language":[4,14,34],"models":[5],"(LLMs)":[6],"has":[7],"significantly":[8],"advanced":[9],"the":[10,47,53,88,97,111],"development":[11],"natural":[13,33],"processing":[15],"(NLP),":[16],"especially":[17],"in":[18,32,73,96,128,136,150,158],"text":[19],"generation":[20,35],"tasks":[21,37],"like":[22],"question":[23,137],"answering.":[24,138],"However,":[25],"model":[26,134],"hallucinations":[27,51,59,70,108],"remain":[28],"a":[29,82,102,142],"major":[30],"challenge":[31],"(NLG)":[36],"due":[38],"to":[39,92,109],"their":[40,154],"complex":[41],"causes.":[42],"We":[43],"systematically":[44],"expand":[45],"on":[46],"causes":[48],"factual":[50],"from":[52,61],"perspective":[54],"knowledge":[56],"shortcuts,":[57],"analyzing":[58],"arising":[60],"correct":[62],"and":[63,66,156],"defect-free":[64],"data":[65,89],"demonstrating":[67],"that":[68,120],"knowledge-shortcut":[69,107,125],"are":[71],"prevalent":[72],"generative":[74,151],"models.":[75],"To":[76],"mitigate":[77],"this":[78],"issue,":[79],"we":[80,100],"propose":[81],"high":[83],"similarity":[84],"pruning":[85],"algorithm":[86],"at":[87],"preprocessing":[90],"level":[91],"reduce":[93],"spurious":[94],"correlations":[95],"data.":[98],"Additionally,":[99],"design":[101],"specific":[103,147],"detection":[104],"method":[105],"for":[106,145],"evaluate":[110],"effectiveness":[112],"our":[114,121],"mitigation":[115],"strategy.":[116],"Experimental":[117],"results":[118],"show":[119],"approach":[122],"effectively":[123],"reduces":[124],"hallucinations,":[126],"particularly":[127],"fine-tuning":[129],"tasks,":[130],"without":[131],"negatively":[132],"impacting":[133],"performance":[135],"This":[139],"work":[140],"introduces":[141],"new":[143],"paradigm":[144],"mitigating":[146],"hallucination":[148],"issues":[149],"models,":[152],"enhancing":[153],"robustness":[155],"reliability":[157],"real-world":[159],"applications.":[160]},"counts_by_year":[],"updated_date":"2026-04-20T07:46:08.049788","created_date":"2025-10-10T00:00:00"}