{"id":"https:\/\/openalex.org\/W4416257382","doi":"https:\/\/doi.org\/10.48550\/arxiv.2509.18119","title":"MobileRL: Online Agentic Reinforcement Learning for Mobile GUI Agents","display_name":"MobileRL: Online Agentic Reinforcement Learning for Mobile GUI Agents","publication_year":2025,"publication_date":"2025-09-10","ids":{"openalex":"https:\/\/openalex.org\/W4416257382","doi":"https:\/\/doi.org\/10.48550\/arxiv.2509.18119"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2509.18119","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2509.18119","pdf_url":"https:\/\/arxiv.org\/pdf\/2509.18119","source":{"id":"https:\/\/openalex.org\/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https:\/\/arxiv.org\/pdf\/2509.18119","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https:\/\/openalex.org\/A5100633493","display_name":"Yifan Xu","orcid":null},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Xu, Yifan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5075936732","display_name":"Xiao Liu","orcid":"https:\/\/orcid.org\/0000-0001-8400-5754"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xiao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5025550930","display_name":"Xinghan Liu","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Liu, Xinghan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5001130000","display_name":"Jiaqi Fu","orcid":"https:\/\/orcid.org\/0000-0003-0081-6133"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Fu, Jiaqi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5001724504","display_name":"Hanchen Zhang","orcid":"https:\/\/orcid.org\/0000-0003-0498-429X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Hanchen","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5017283302","display_name":"Bohao Jing","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Jing, Bohao","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5104152821","display_name":"Shudan Zhang","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhang, Shudan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5100384219","display_name":"Yuting Wang","orcid":"https:\/\/orcid.org\/0000-0003-3990-2418"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Yuting","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5113163352","display_name":"Wenyi Zhao","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Zhao, Wenyi","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https:\/\/openalex.org\/A5026632701","display_name":"Yuxiao Dong","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Dong, Yuxiao","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":10,"corresponding_author_ids":["https:\/\/openalex.org\/A5100633493"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https:\/\/openalex.org\/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7436000108718872,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},"topics":[{"id":"https:\/\/openalex.org\/T11714","display_name":"Multimodal Machine Learning Applications","score":0.7436000108718872,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T10462","display_name":"Reinforcement Learning in Robotics","score":0.08229999989271164,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T10709","display_name":"Social Robot Interaction and HRI","score":0.027400000020861626,"subfield":{"id":"https:\/\/openalex.org\/subfields\/3207","display_name":"Social Psychology"},"field":{"id":"https:\/\/openalex.org\/fields\/32","display_name":"Psychology"},"domain":{"id":"https:\/\/openalex.org\/domains\/2","display_name":"Social Sciences"}}],"keywords":[{"id":"https:\/\/openalex.org\/keywords\/reinforcement-learning","display_name":"Reinforcement learning","score":0.8414000272750854},{"id":"https:\/\/openalex.org\/keywords\/task","display_name":"Task (project management)","score":0.6553000211715698},{"id":"https:\/\/openalex.org\/keywords\/inefficiency","display_name":"Inefficiency","score":0.6502000093460083},{"id":"https:\/\/openalex.org\/keywords\/component","display_name":"Component (thermodynamics)","score":0.5367000102996826},{"id":"https:\/\/openalex.org\/keywords\/interface","display_name":"Interface (matter)","score":0.3702999949455261},{"id":"https:\/\/openalex.org\/keywords\/graphical-user-interface","display_name":"Graphical user interface","score":0.34290000796318054},{"id":"https:\/\/openalex.org\/keywords\/variety","display_name":"Variety (cybernetics)","score":0.33550000190734863},{"id":"https:\/\/openalex.org\/keywords\/task-analysis","display_name":"Task analysis","score":0.3269999921321869}],"concepts":[{"id":"https:\/\/openalex.org\/C97541855","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q830687","display_name":"Reinforcement learning","level":2,"score":0.8414000272750854},{"id":"https:\/\/openalex.org\/C41008148","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q21198","display_name":"Computer science","level":0,"score":0.7594000101089478},{"id":"https:\/\/openalex.org\/C2780451532","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q759676","display_name":"Task (project management)","level":2,"score":0.6553000211715698},{"id":"https:\/\/openalex.org\/C2778869765","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q6028363","display_name":"Inefficiency","level":2,"score":0.6502000093460083},{"id":"https:\/\/openalex.org\/C107457646","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q207434","display_name":"Human\u2013computer interaction","level":1,"score":0.6240000128746033},{"id":"https:\/\/openalex.org\/C168167062","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1117970","display_name":"Component (thermodynamics)","level":2,"score":0.5367000102996826},{"id":"https:\/\/openalex.org\/C154945302","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q11660","display_name":"Artificial intelligence","level":1,"score":0.3822000026702881},{"id":"https:\/\/openalex.org\/C113843644","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q901882","display_name":"Interface (matter)","level":4,"score":0.3702999949455261},{"id":"https:\/\/openalex.org\/C37789001","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q782543","display_name":"Graphical user interface","level":2,"score":0.34290000796318054},{"id":"https:\/\/openalex.org\/C136197465","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1729295","display_name":"Variety (cybernetics)","level":2,"score":0.33550000190734863},{"id":"https:\/\/openalex.org\/C175154964","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q380077","display_name":"Task analysis","level":3,"score":0.3269999921321869},{"id":"https:\/\/openalex.org\/C89505385","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q47146","display_name":"User interface","level":2,"score":0.326200008392334},{"id":"https:\/\/openalex.org\/C47932503","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q5395689","display_name":"Error-driven learning","level":3,"score":0.3012999892234802},{"id":"https:\/\/openalex.org\/C186967261","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q5082128","display_name":"Mobile device","level":2,"score":0.29760000109672546},{"id":"https:\/\/openalex.org\/C67203356","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1321905","display_name":"Reinforcement","level":2,"score":0.29269999265670776},{"id":"https:\/\/openalex.org\/C124304363","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q673661","display_name":"Abstraction","level":2,"score":0.2694999873638153},{"id":"https:\/\/openalex.org\/C192209626","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q190909","display_name":"Focus (optics)","level":2,"score":0.2694999873638153},{"id":"https:\/\/openalex.org\/C2164484","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q5170150","display_name":"Core (optical fiber)","level":2,"score":0.2678000032901764},{"id":"https:\/\/openalex.org\/C2986087404","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q15946010","display_name":"Online learning","level":2,"score":0.2671999931335449},{"id":"https:\/\/openalex.org\/C198531522","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q485146","display_name":"Sample (material)","level":2,"score":0.26179999113082886},{"id":"https:\/\/openalex.org\/C119857082","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q2539","display_name":"Machine learning","level":1,"score":0.25380000472068787}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2509.18119","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2509.18119","pdf_url":"https:\/\/arxiv.org\/pdf\/2509.18119","source":{"id":"https:\/\/openalex.org\/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"doi:10.48550\/arxiv.2509.18119","is_oa":true,"landing_page_url":"https:\/\/doi.org\/10.48550\/arxiv.2509.18119","pdf_url":null,"source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https:\/\/openalex.org\/licenses\/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2509.18119","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2509.18119","pdf_url":"https:\/\/arxiv.org\/pdf\/2509.18119","source":{"id":"https:\/\/openalex.org\/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"pdf":false,"grobid_xml":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Building":[0],"general-purpose":[1],"graphical":[2],"user":[3],"interface":[4],"(GUI)":[5],"agents":[6,23,57],"has":[7],"become":[8],"increasingly":[9],"promising":[10],"with":[11,24],"the":[12,32,39,65,83,91,100],"progress":[13],"in":[14,58,103,143],"vision":[15],"language":[16],"models.":[17],"However,":[18],"developing":[19],"effective":[20],"mobile":[21,59,122],"GUI":[22,56],"reinforcement":[25,50],"learning":[26,51],"(RL)":[27],"remains":[28],"challenging":[29],"due":[30],"to":[31,54,81,85,96,129],"heavy-tailed":[33],"distribution":[34],"of":[35,41,145],"task":[36,87,101],"difficulty":[37],"and":[38,77,116,124,134,152],"inefficiency":[40],"large-scale":[42],"environment":[43],"sampling.":[44],"We":[45,89,126],"present":[46],"an":[47],"online":[48],"agentic":[49,105],"framework":[52,157],"MobileRL":[53],"enhance":[55],"environments.":[60],"Its":[61],"core":[62],"component":[63],"is":[64,158],"Difficulty-ADAptive":[66],"GRPO":[67],"(ADAGRPO)":[68],"algorithm.":[69],"In":[70],"ADAGRPO,":[71],"we":[72],"design":[73],"difficulty-adaptive":[74],"positive":[75],"replay":[76],"failure":[78],"curriculum":[79],"filtering":[80],"adapt":[82],"model":[84,139],"different":[86],"difficulties.":[88],"introduce":[90],"shortest-path":[92],"reward":[93],"adjustment":[94],"strategy":[95],"reshape":[97],"rewards":[98],"concerning":[99],"length":[102],"multi-turn":[104],"tasks.":[106,125],"Those":[107],"strategies":[108],"jointly":[109],"stabilize":[110],"RL":[111],"training,":[112],"improve":[113],"sample":[114],"efficiency,":[115],"generate":[117],"strong":[118],"performance":[119],"across":[120],"diverse":[121],"apps":[123],"apply":[127],"MOBILERL":[128,156],"two":[130],"open":[131],"models":[132],"(Qwen2.5-VL-7B-Instruct":[133],"GLM-4.1V-9B-Base).":[135],"The":[136,155],"resultant":[137],"MOBILERL-9B":[138],"achieves":[140],"state-of-the-art":[141],"results":[142],"terms":[144],"success":[146],"rates":[147],"on":[148],"both":[149],"AndroidWorld":[150],"(80.2%)":[151],"AndroidLab":[153],"(53.6%).":[154],"open-sourced":[159],"at:":[160],"https:\/\/github.com\/THUDM\/MobileRL.":[161]},"counts_by_year":[],"updated_date":"2026-03-07T16:01:11.037858","created_date":"2025-10-10T00:00:00"}