{"id":"https:\/\/openalex.org\/W4377090033","doi":"https:\/\/doi.org\/10.48550\/arxiv.2305.09859","title":"Smaller Language Models are Better Black-box Machine-Generated Text Detectors","display_name":"Smaller Language Models are Better Black-box Machine-Generated Text Detectors","publication_year":2023,"publication_date":"2023-05-17","ids":{"openalex":"https:\/\/openalex.org\/W4377090033","doi":"https:\/\/doi.org\/10.48550\/arxiv.2305.09859"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2305.09859","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2305.09859","pdf_url":"https:\/\/arxiv.org\/pdf\/2305.09859","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https:\/\/openalex.org\/licenses\/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https:\/\/arxiv.org\/pdf\/2305.09859","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":null,"display_name":"Mireshghallah, Niloofar","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Mireshghallah, Niloofar","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5017592905","display_name":"Justus Mattern","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Mattern, Justus","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5081085178","display_name":"Sicun Gao","orcid":"https:\/\/orcid.org\/0000-0003-2524-4960"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Gao, Sicun","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5084892128","display_name":"Reza Shokri","orcid":"https:\/\/orcid.org\/0000-0001-9816-0173"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Shokri, Reza","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https:\/\/openalex.org\/A5017455302","display_name":"Taylor Berg-Kirkpatrick","orcid":"https:\/\/orcid.org\/0000-0002-1283-4075"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Berg-Kirkpatrick, Taylor","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":[],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":5,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https:\/\/openalex.org\/T10028","display_name":"Topic Modeling","score":0.995199978351593,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},"topics":[{"id":"https:\/\/openalex.org\/T10028","display_name":"Topic Modeling","score":0.995199978351593,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T10181","display_name":"Natural Language Processing Techniques","score":0.95660001039505,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T12535","display_name":"Machine Learning and Data Classification","score":0.940500020980835,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https:\/\/openalex.org\/keywords\/computer-science","display_name":"Computer science","score":0.7232968807220459},{"id":"https:\/\/openalex.org\/keywords\/generator","display_name":"Generator (circuit theory)","score":0.612407922744751},{"id":"https:\/\/openalex.org\/keywords\/language-model","display_name":"Language model","score":0.5960632562637329},{"id":"https:\/\/openalex.org\/keywords\/artificial-intelligence","display_name":"Artificial intelligence","score":0.5683963894844055},{"id":"https:\/\/openalex.org\/keywords\/detector","display_name":"Detector","score":0.5465863943099976},{"id":"https:\/\/openalex.org\/keywords\/generative-model","display_name":"Generative model","score":0.5223430395126343},{"id":"https:\/\/openalex.org\/keywords\/function","display_name":"Function (biology)","score":0.5022616386413574},{"id":"https:\/\/openalex.org\/keywords\/natural-language-processing","display_name":"Natural language processing","score":0.48364198207855225},{"id":"https:\/\/openalex.org\/keywords\/generative-grammar","display_name":"Generative grammar","score":0.46881675720214844},{"id":"https:\/\/openalex.org\/keywords\/machine-learning","display_name":"Machine learning","score":0.41410404443740845},{"id":"https:\/\/openalex.org\/keywords\/text-messaging","display_name":"Text messaging","score":0.411589115858078},{"id":"https:\/\/openalex.org\/keywords\/physics","display_name":"Physics","score":0.08766612410545349},{"id":"https:\/\/openalex.org\/keywords\/world-wide-web","display_name":"World Wide Web","score":0.08476349711418152}],"concepts":[{"id":"https:\/\/openalex.org\/C41008148","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q21198","display_name":"Computer science","level":0,"score":0.7232968807220459},{"id":"https:\/\/openalex.org\/C2780992000","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q17016113","display_name":"Generator (circuit theory)","level":3,"score":0.612407922744751},{"id":"https:\/\/openalex.org\/C137293760","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q3621696","display_name":"Language model","level":2,"score":0.5960632562637329},{"id":"https:\/\/openalex.org\/C154945302","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5683963894844055},{"id":"https:\/\/openalex.org\/C94915269","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1834857","display_name":"Detector","level":2,"score":0.5465863943099976},{"id":"https:\/\/openalex.org\/C167966045","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q5532625","display_name":"Generative model","level":3,"score":0.5223430395126343},{"id":"https:\/\/openalex.org\/C14036430","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q3736076","display_name":"Function (biology)","level":2,"score":0.5022616386413574},{"id":"https:\/\/openalex.org\/C204321447","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q30642","display_name":"Natural language processing","level":1,"score":0.48364198207855225},{"id":"https:\/\/openalex.org\/C39890363","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q36108","display_name":"Generative grammar","level":2,"score":0.46881675720214844},{"id":"https:\/\/openalex.org\/C119857082","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q2539","display_name":"Machine learning","level":1,"score":0.41410404443740845},{"id":"https:\/\/openalex.org\/C3018949938","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q17166101","display_name":"Text messaging","level":2,"score":0.411589115858078},{"id":"https:\/\/openalex.org\/C121332964","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q413","display_name":"Physics","level":0,"score":0.08766612410545349},{"id":"https:\/\/openalex.org\/C136764020","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q466","display_name":"World Wide Web","level":1,"score":0.08476349711418152},{"id":"https:\/\/openalex.org\/C163258240","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q25342","display_name":"Power (physics)","level":2,"score":0},{"id":"https:\/\/openalex.org\/C86803240","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q420","display_name":"Biology","level":0,"score":0},{"id":"https:\/\/openalex.org\/C78458016","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q840400","display_name":"Evolutionary biology","level":1,"score":0},{"id":"https:\/\/openalex.org\/C62520636","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q944","display_name":"Quantum mechanics","level":1,"score":0},{"id":"https:\/\/openalex.org\/C76155785","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q418","display_name":"Telecommunications","level":1,"score":0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2305.09859","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2305.09859","pdf_url":"https:\/\/arxiv.org\/pdf\/2305.09859","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https:\/\/openalex.org\/licenses\/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550\/arxiv.2305.09859","is_oa":true,"landing_page_url":"https:\/\/doi.org\/10.48550\/arxiv.2305.09859","pdf_url":null,"source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https:\/\/openalex.org\/licenses\/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2305.09859","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2305.09859","pdf_url":"https:\/\/arxiv.org\/pdf\/2305.09859","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https:\/\/openalex.org\/licenses\/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[{"score":0.8600000143051147,"display_name":"Quality Education","id":"https:\/\/metadata.un.org\/sdg\/4"}],"awards":[{"id":"https:\/\/openalex.org\/G4376535288","display_name":null,"funder_award_id":"HR00112020054","funder_id":"https:\/\/openalex.org\/F4320332180","funder_display_name":"Defense Advanced Research Projects Agency"}],"funders":[{"id":"https:\/\/openalex.org\/F4320332180","display_name":"Defense Advanced Research Projects Agency","ror":"https:\/\/ror.org\/02caytj08"},{"id":"https:\/\/openalex.org\/F4320332603","display_name":"University of California, San Diego","ror":"https:\/\/ror.org\/0168r3w48"}],"has_content":{"pdf":true,"grobid_xml":false},"content_urls":{"pdf":"https:\/\/content.openalex.org\/works\/W4377090033.pdf"},"referenced_works_count":0,"referenced_works":[],"related_works":["https:\/\/openalex.org\/W4365211920","https:\/\/openalex.org\/W3005996785","https:\/\/openalex.org\/W3014948380","https:\/\/openalex.org\/W4386984417","https:\/\/openalex.org\/W4380551139","https:\/\/openalex.org\/W4317695495","https:\/\/openalex.org\/W2280377497","https:\/\/openalex.org\/W4387506531","https:\/\/openalex.org\/W4238433571","https:\/\/openalex.org\/W3174044702"],"abstract_inverted_index":{"With":[0],"the":[1,78,81,86,91,100,109,113,151,158,166,171,188],"advent":[2],"of":[3,24,64,72,80,112,177,194],"fluent":[4],"generative":[5],"language":[6],"models":[7,38,127],"that":[8,102,122,149],"can":[9,134],"produce":[10],"convincing":[11],"utterances":[12],"very":[13],"similar":[14],"to":[15,42,50,67,77,88,165],"those":[16],"written":[17],"by":[18],"humans,":[19],"distinguishing":[20],"whether":[21,150],"a":[22,62,184],"piece":[23],"text":[25,104,117,131,138],"is":[26,105,118,161],"machine-generated":[27,69],"or":[28,84],"human-written":[29,116],"becomes":[30],"more":[31,34,135],"challenging":[32],"and":[33,49,54,125,143,153],"important,":[35],"as":[36],"such":[37,94],"could":[39],"be":[40],"used":[41],"spread":[43],"misinformation,":[44],"fake":[45,47],"news,":[46],"reviews":[48],"mimic":[51],"certain":[52],"authors":[53],"figures.":[55],"To":[56],"this":[57],"end,":[58],"there":[59],"have":[60],"been":[61],"slew":[63],"methods":[65,74],"proposed":[66],"detect":[68,137],"text.":[70],"Most":[71],"these":[73],"need":[75,85],"access":[76],"logits":[79],"target":[82],"model":[83,173,186],"ability":[87],"sample":[89],"from":[90,140,187],"target.":[92],"One":[93],"black-box":[95],"detection":[96,167],"method":[97],"relies":[98],"on":[99,157],"observation":[101],"generated":[103,139],"locally":[106],"optimal":[107],"under":[108],"likelihood":[110],"function":[111],"generator,":[114],"while":[115],"not.":[119],"We":[120],"find":[121,148],"overall,":[123],"smaller":[124],"partially-trained":[126],"are":[128],"better":[129],"universal":[130],"detectors:":[132],"they":[133],"precisely":[136],"both":[141],"small":[142],"larger":[144,185],"models.":[145],"Interestingly,":[146],"we":[147],"detector":[152],"generator":[154],"were":[155],"trained":[156],"same":[159],"data":[160],"not":[162],"critically":[163],"important":[164],"success.":[168],"For":[169],"instance":[170],"OPT-125M":[172],"has":[174,192],"an":[175],"AUC":[176,193],"0.81":[178],"in":[179],"detecting":[180],"ChatGPT":[181],"generations,":[182],"whereas":[183],"GPT":[189],"family,":[190],"GPTJ-6B,":[191],"0.45.":[195]},"counts_by_year":[{"year":2025,"cited_by_count":1},{"year":2023,"cited_by_count":4}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2023-05-20T00:00:00"}