{"id":"https:\/\/openalex.org\/W4414585944","doi":"https:\/\/doi.org\/10.48550\/arxiv.2505.19238","title":"Efficient Policy Optimization in Robust Constrained MDPs with Iteration Complexity Guarantees","display_name":"Efficient Policy Optimization in Robust Constrained MDPs with Iteration Complexity Guarantees","publication_year":2025,"publication_date":"2025-05-25","ids":{"openalex":"https:\/\/openalex.org\/W4414585944","doi":"https:\/\/doi.org\/10.48550\/arxiv.2505.19238"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2505.19238","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2505.19238","pdf_url":"https:\/\/arxiv.org\/pdf\/2505.19238","source":{"id":"https:\/\/openalex.org\/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https:\/\/arxiv.org\/pdf\/2505.19238","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https:\/\/openalex.org\/A5070748487","display_name":"S. Ganguly","orcid":"https:\/\/orcid.org\/0000-0002-1243-4987"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Ganguly, Sourav","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5005536479","display_name":"Kishan Panaganti","orcid":"https:\/\/orcid.org\/0000-0001-9746-698X"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Panaganti, Kishan","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5022713299","display_name":"Arnob Ghosh","orcid":"https:\/\/orcid.org\/0000-0003-0793-7536"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Ghosh, Arnob","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https:\/\/openalex.org\/A5062565732","display_name":"Adam Wierman","orcid":"https:\/\/orcid.org\/0000-0002-5923-0199"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wierman, Adam","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":4,"corresponding_author_ids":["https:\/\/openalex.org\/A5070748487"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":false,"cited_by_count":0,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https:\/\/openalex.org\/T11182","display_name":"Auction Theory and Applications","score":0.984000027179718,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https:\/\/openalex.org\/fields\/18","display_name":"Decision Sciences"},"domain":{"id":"https:\/\/openalex.org\/domains\/2","display_name":"Social Sciences"}},"topics":[{"id":"https:\/\/openalex.org\/T11182","display_name":"Auction Theory and Applications","score":0.984000027179718,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1803","display_name":"Management Science and Operations Research"},"field":{"id":"https:\/\/openalex.org\/fields\/18","display_name":"Decision Sciences"},"domain":{"id":"https:\/\/openalex.org\/domains\/2","display_name":"Social Sciences"}},{"id":"https:\/\/openalex.org\/T12288","display_name":"Optimization and Search Problems","score":0.9010000228881836,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1705","display_name":"Computer Networks and Communications"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https:\/\/openalex.org\/keywords\/markov-decision-process","display_name":"Markov decision process","score":0.7123000025749207},{"id":"https:\/\/openalex.org\/keywords\/constraint","display_name":"Constraint (computer-aided design)","score":0.5809000134468079},{"id":"https:\/\/openalex.org\/keywords\/bellman-equation","display_name":"Bellman equation","score":0.5144000053405762},{"id":"https:\/\/openalex.org\/keywords\/set","display_name":"Set (abstract data type)","score":0.512499988079071},{"id":"https:\/\/openalex.org\/keywords\/value","display_name":"Value (mathematics)","score":0.4790000021457672},{"id":"https:\/\/openalex.org\/keywords\/function","display_name":"Function (biology)","score":0.461899995803833},{"id":"https:\/\/openalex.org\/keywords\/robustness","display_name":"Robustness (evolution)","score":0.4293999969959259},{"id":"https:\/\/openalex.org\/keywords\/computation","display_name":"Computation","score":0.41999998688697815},{"id":"https:\/\/openalex.org\/keywords\/constrained-optimization","display_name":"Constrained optimization","score":0.3474999964237213}],"concepts":[{"id":"https:\/\/openalex.org\/C106189395","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q176789","display_name":"Markov decision process","level":3,"score":0.7123000025749207},{"id":"https:\/\/openalex.org\/C126255220","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q141495","display_name":"Mathematical optimization","level":1,"score":0.6995000243186951},{"id":"https:\/\/openalex.org\/C41008148","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q21198","display_name":"Computer science","level":0,"score":0.6381000280380249},{"id":"https:\/\/openalex.org\/C2776036281","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q48769818","display_name":"Constraint (computer-aided design)","level":2,"score":0.5809000134468079},{"id":"https:\/\/openalex.org\/C14646407","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1430750","display_name":"Bellman equation","level":2,"score":0.5144000053405762},{"id":"https:\/\/openalex.org\/C177264268","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1514741","display_name":"Set (abstract data type)","level":2,"score":0.512499988079071},{"id":"https:\/\/openalex.org\/C2776291640","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q2912517","display_name":"Value (mathematics)","level":2,"score":0.4790000021457672},{"id":"https:\/\/openalex.org\/C14036430","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q3736076","display_name":"Function (biology)","level":2,"score":0.461899995803833},{"id":"https:\/\/openalex.org\/C63479239","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q7353546","display_name":"Robustness (evolution)","level":3,"score":0.4293999969959259},{"id":"https:\/\/openalex.org\/C45374587","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q12525525","display_name":"Computation","level":2,"score":0.41999998688697815},{"id":"https:\/\/openalex.org\/C55660270","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q5164377","display_name":"Constrained optimization","level":2,"score":0.3474999964237213},{"id":"https:\/\/openalex.org\/C77618280","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1155772","display_name":"Scheme (mathematics)","level":2,"score":0.33480000495910645},{"id":"https:\/\/openalex.org\/C159886148","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q176645","display_name":"Markov process","level":2,"score":0.33070001006126404},{"id":"https:\/\/openalex.org\/C2778023678","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q554403","display_name":"Duality (order theory)","level":2,"score":0.3264000117778778},{"id":"https:\/\/openalex.org\/C3309909","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q864155","display_name":"Binary decision diagram","level":2,"score":0.3068999946117401},{"id":"https:\/\/openalex.org\/C137836250","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q984063","display_name":"Optimization problem","level":2,"score":0.30140000581741333},{"id":"https:\/\/openalex.org\/C48372109","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q3913","display_name":"Binary number","level":2,"score":0.29829999804496765},{"id":"https:\/\/openalex.org\/C41045048","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q202843","display_name":"Linear programming","level":2,"score":0.29269999265670776},{"id":"https:\/\/openalex.org\/C98763669","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q176645","display_name":"Markov chain","level":2,"score":0.28529998660087585},{"id":"https:\/\/openalex.org\/C2780980858","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q110022","display_name":"Dual (grammatical number)","level":2,"score":0.28380000591278076},{"id":"https:\/\/openalex.org\/C177067428","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q17013331","display_name":"Feasible region","level":2,"score":0.2833000123500824},{"id":"https:\/\/openalex.org\/C2776502983","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q690182","display_name":"Contrast (vision)","level":2,"score":0.2831999957561493},{"id":"https:\/\/openalex.org\/C31531917","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q915157","display_name":"Robust control","level":3,"score":0.2809000015258789},{"id":"https:\/\/openalex.org\/C193254401","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q2160088","display_name":"Robust optimization","level":2,"score":0.26829999685287476},{"id":"https:\/\/openalex.org\/C2780598303","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q65921492","display_name":"Flexibility (engineering)","level":2,"score":0.2676999866962433},{"id":"https:\/\/openalex.org\/C61797465","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1188986","display_name":"Term (time)","level":2,"score":0.2660999894142151},{"id":"https:\/\/openalex.org\/C2775924081","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q55608371","display_name":"Control (management)","level":2,"score":0.2655999958515167},{"id":"https:\/\/openalex.org\/C44616089","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q30158686","display_name":"Constraint satisfaction","level":3,"score":0.26429998874664307}],"mesh":[],"locations_count":3,"locations":[{"id":"pmh:oai:arXiv.org:2505.19238","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2505.19238","pdf_url":"https:\/\/arxiv.org\/pdf\/2505.19238","source":{"id":"https:\/\/openalex.org\/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},{"id":"pmh:doi:10.48550\/arxiv.2505.19238","is_oa":true,"landing_page_url":null,"pdf_url":null,"source":{"id":"https:\/\/openalex.org\/S4406922384","display_name":"Open MIND","issn_l":null,"issn":null,"is_oa":false,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https:\/\/openalex.org\/licenses\/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"Article"},{"id":"doi:10.48550\/arxiv.2505.19238","is_oa":true,"landing_page_url":"https:\/\/doi.org\/10.48550\/arxiv.2505.19238","pdf_url":null,"source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https:\/\/openalex.org\/licenses\/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2505.19238","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2505.19238","pdf_url":"https:\/\/arxiv.org\/pdf\/2505.19238","source":{"id":"https:\/\/openalex.org\/S4393918464","display_name":"ArXiv.org","issn_l":"2331-8422","issn":["2331-8422"],"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":null,"host_organization_name":null,"host_organization_lineage":[],"host_organization_lineage_names":[],"type":"repository"},"license":null,"license_id":null,"version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":null,"raw_type":"text"},"sustainable_development_goals":[],"awards":[],"funders":[],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":0,"referenced_works":[],"related_works":[],"abstract_inverted_index":{"Constrained":[0],"decision-making":[1],"is":[2,42],"essential":[3],"for":[4,96,138,227,239],"designing":[5],"safe":[6],"policies":[7],"in":[8],"real-world":[9,19],"control":[10],"systems,":[11],"yet":[12],"simulated":[13],"environments":[14],"often":[15],"fail":[16],"to":[17,69,204,212],"capture":[18],"adversities.":[20],"We":[21,148,182],"consider":[22,57],"the":[23,32,46,58,71,75,78,84,107,110,118,125,131,139,144,156,161,164,169,177,205,220],"problem":[24,63],"of":[25,106,109,230,242],"learning":[26],"a":[27,37,43,150,189,214],"policy":[28,190,198],"that":[29,153,184],"will":[30],"maximize":[31,70,176],"cumulative":[33],"reward":[34,72,140,179],"while":[35],"satisfying":[36],"constraint,":[38],"even":[39],"when":[40,167],"there":[41],"mismatch":[44],"between":[45],"real":[47],"model":[48,82],"and":[49,73,143,196,234],"an":[50,66,89,186],"accessible":[51],"simulator\/nominal":[52],"model.":[53,92],"In":[54,202],"particular,":[55],"we":[56,208,218],"robust":[59,120,178],"constrained":[60,98],"Markov":[61],"decision":[62],"(RCMDP)":[64],"where":[65],"agent":[67],"needs":[68],"satisfy":[74,160],"constraint":[76,145,157],"against":[77],"worst":[79,132],"possible":[80],"stochastic":[81],"under":[83],"uncertainty":[85],"set":[86],"centered":[87],"around":[88],"unknown":[90],"nominal":[91],"Primal-dual":[93],"methods,":[94],"effective":[95],"standard":[97,119],"MDP":[99],"(CMDP),":[100],"are":[101,171],"not":[102,210],"applicable":[103],"here":[104],"because":[105],"lack":[108],"strong":[111],"duality":[112],"property.":[113],"Further,":[114],"one":[115],"cannot":[116],"apply":[117],"value-iteration":[121],"based":[122],"approach":[123],"on":[124,163],"composite":[126],"value":[127,141,146,158,180,229,241],"function":[128,142],"either":[129],"as":[130],"case":[133],"models":[134],"may":[135],"be":[136],"different":[137],"function.":[147,181],"propose":[149],"novel":[151],"technique":[152],"effectively":[154],"minimizes":[155],"function--to":[159],"constraints;":[162],"other":[165],"hand,":[166],"all":[168],"constraints":[170],"satisfied,":[172],"it":[173],"can":[174],"simply":[175],"prove":[183],"such":[185],"algorithm":[187],"finds":[188],"with":[191],"at":[192,224,236],"most":[193],"$\u03b5$":[194],"sub-optimality":[195],"feasible":[197],"after":[199],"$O(\u03b5^{-2})$":[200],"iterations.":[201],"contrast":[203],"state-of-the-art":[206],"method,":[207],"do":[209],"need":[211],"employ":[213],"binary":[215],"search,":[216],"thus,":[217],"reduce":[219],"computation":[221],"time":[222],"by":[223,235],"least":[225,237],"4x":[226],"smaller":[228],"discount":[231],"factor":[232],"($\u03b3$)":[233],"6x":[238],"larger":[240],"$\u03b3$.":[243]},"counts_by_year":[],"updated_date":"2026-04-04T16:13:02.066488","created_date":"2025-10-10T00:00:00"}