{"id":"https:\/\/openalex.org\/W4387294486","doi":"https:\/\/doi.org\/10.48550\/arxiv.2309.17384","title":"Toward Universal Speech Enhancement for Diverse Input Conditions","display_name":"Toward Universal Speech Enhancement for Diverse Input Conditions","publication_year":2023,"publication_date":"2023-09-29","ids":{"openalex":"https:\/\/openalex.org\/W4387294486","doi":"https:\/\/doi.org\/10.48550\/arxiv.2309.17384"},"language":"en","primary_location":{"id":"pmh:oai:arXiv.org:2309.17384","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2309.17384","pdf_url":"https:\/\/arxiv.org\/pdf\/2309.17384","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https:\/\/openalex.org\/licenses\/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"type":"preprint","indexed_in":["arxiv","datacite"],"open_access":{"is_oa":true,"oa_status":"green","oa_url":"https:\/\/arxiv.org\/pdf\/2309.17384","any_repository_has_fulltext":true},"authorships":[{"author_position":"first","author":{"id":"https:\/\/openalex.org\/A5071937621","display_name":"Wangyou Zhang","orcid":"https:\/\/orcid.org\/0000-0003-4500-3515"},"institutions":[],"countries":[],"is_corresponding":true,"raw_author_name":"Zhang, Wangyou","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5013385059","display_name":"Kohei Saijo","orcid":null},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Saijo, Kohei","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5101607498","display_name":"Zhong-Qiu Wang","orcid":"https:\/\/orcid.org\/0000-0002-4204-9430"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Wang, Zhong-Qiu","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5001291873","display_name":"Shinji Watanabe","orcid":"https:\/\/orcid.org\/0000-0002-5970-8631"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Watanabe, Shinji","raw_affiliation_strings":[],"affiliations":[]},{"author_position":"last","author":{"id":"https:\/\/openalex.org\/A5100341993","display_name":"Yanmin Qian","orcid":"https:\/\/orcid.org\/0000-0002-0314-3790"},"institutions":[],"countries":[],"is_corresponding":false,"raw_author_name":"Qian, Yanmin","raw_affiliation_strings":[],"affiliations":[]}],"institutions":[],"countries_distinct_count":0,"institutions_distinct_count":5,"corresponding_author_ids":["https:\/\/openalex.org\/A5071937621"],"corresponding_institution_ids":[],"apc_list":null,"apc_paid":null,"fwci":null,"has_fulltext":true,"cited_by_count":1,"citation_normalized_percentile":null,"cited_by_percentile_year":null,"biblio":{"volume":null,"issue":null,"first_page":null,"last_page":null},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https:\/\/openalex.org\/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1711","display_name":"Signal Processing"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},"topics":[{"id":"https:\/\/openalex.org\/T10860","display_name":"Speech and Audio Processing","score":0.9998000264167786,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1711","display_name":"Signal Processing"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T10201","display_name":"Speech Recognition and Synthesis","score":0.9595000147819519,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T13289","display_name":"Infant Health and Development","score":0.9455000162124634,"subfield":{"id":"https:\/\/openalex.org\/subfields\/3611","display_name":"Pharmacy"},"field":{"id":"https:\/\/openalex.org\/fields\/36","display_name":"Health Professions"},"domain":{"id":"https:\/\/openalex.org\/domains\/4","display_name":"Health Sciences"}}],"keywords":[{"id":"https:\/\/openalex.org\/keywords\/computer-science","display_name":"Computer science","score":0.7607690095901489},{"id":"https:\/\/openalex.org\/keywords\/benchmark","display_name":"Benchmark (surveying)","score":0.6553301811218262},{"id":"https:\/\/openalex.org\/keywords\/microphone","display_name":"Microphone","score":0.6352190971374512},{"id":"https:\/\/openalex.org\/keywords\/speech-enhancement","display_name":"Speech enhancement","score":0.5888574719429016},{"id":"https:\/\/openalex.org\/keywords\/task","display_name":"Task (project management)","score":0.578300952911377},{"id":"https:\/\/openalex.org\/keywords\/range","display_name":"Range (aeronautics)","score":0.563788652420044},{"id":"https:\/\/openalex.org\/keywords\/channel","display_name":"Channel (broadcasting)","score":0.5547771453857422},{"id":"https:\/\/openalex.org\/keywords\/speech-recognition","display_name":"Speech recognition","score":0.5426663756370544},{"id":"https:\/\/openalex.org\/keywords\/sampling","display_name":"Sampling (signal processing)","score":0.531532347202301},{"id":"https:\/\/openalex.org\/keywords\/noise-reduction","display_name":"Noise reduction","score":0.5091127753257751},{"id":"https:\/\/openalex.org\/keywords\/signal","display_name":"SIGNAL (programming language)","score":0.4408051669597626},{"id":"https:\/\/openalex.org\/keywords\/artificial-intelligence","display_name":"Artificial intelligence","score":0.38800302147865295},{"id":"https:\/\/openalex.org\/keywords\/machine-learning","display_name":"Machine learning","score":0.32680201530456543},{"id":"https:\/\/openalex.org\/keywords\/telecommunications","display_name":"Telecommunications","score":0.1279517412185669}],"concepts":[{"id":"https:\/\/openalex.org\/C41008148","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q21198","display_name":"Computer science","level":0,"score":0.7607690095901489},{"id":"https:\/\/openalex.org\/C185798385","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1161707","display_name":"Benchmark (surveying)","level":2,"score":0.6553301811218262},{"id":"https:\/\/openalex.org\/C2778263558","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q46384","display_name":"Microphone","level":3,"score":0.6352190971374512},{"id":"https:\/\/openalex.org\/C2776182073","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q7575395","display_name":"Speech enhancement","level":3,"score":0.5888574719429016},{"id":"https:\/\/openalex.org\/C2780451532","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q759676","display_name":"Task (project management)","level":2,"score":0.578300952911377},{"id":"https:\/\/openalex.org\/C204323151","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q905424","display_name":"Range (aeronautics)","level":2,"score":0.563788652420044},{"id":"https:\/\/openalex.org\/C127162648","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q16858953","display_name":"Channel (broadcasting)","level":2,"score":0.5547771453857422},{"id":"https:\/\/openalex.org\/C28490314","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q189436","display_name":"Speech recognition","level":1,"score":0.5426663756370544},{"id":"https:\/\/openalex.org\/C140779682","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q210868","display_name":"Sampling (signal processing)","level":3,"score":0.531532347202301},{"id":"https:\/\/openalex.org\/C163294075","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q581861","display_name":"Noise reduction","level":2,"score":0.5091127753257751},{"id":"https:\/\/openalex.org\/C2779843651","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q7390335","display_name":"SIGNAL (programming language)","level":2,"score":0.4408051669597626},{"id":"https:\/\/openalex.org\/C154945302","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q11660","display_name":"Artificial intelligence","level":1,"score":0.38800302147865295},{"id":"https:\/\/openalex.org\/C119857082","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q2539","display_name":"Machine learning","level":1,"score":0.32680201530456543},{"id":"https:\/\/openalex.org\/C76155785","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q418","display_name":"Telecommunications","level":1,"score":0.1279517412185669},{"id":"https:\/\/openalex.org\/C68115822","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1068172","display_name":"Sound pressure","level":2,"score":0},{"id":"https:\/\/openalex.org\/C13280743","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q131089","display_name":"Geodesy","level":1,"score":0},{"id":"https:\/\/openalex.org\/C159985019","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q181790","display_name":"Composite material","level":1,"score":0},{"id":"https:\/\/openalex.org\/C199360897","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q9143","display_name":"Programming language","level":1,"score":0},{"id":"https:\/\/openalex.org\/C192562407","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q228736","display_name":"Materials science","level":0,"score":0},{"id":"https:\/\/openalex.org\/C187736073","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q2920921","display_name":"Management","level":1,"score":0},{"id":"https:\/\/openalex.org\/C205649164","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1071","display_name":"Geography","level":0,"score":0},{"id":"https:\/\/openalex.org\/C162324750","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q8134","display_name":"Economics","level":0,"score":0},{"id":"https:\/\/openalex.org\/C94915269","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1834857","display_name":"Detector","level":2,"score":0}],"mesh":[],"locations_count":2,"locations":[{"id":"pmh:oai:arXiv.org:2309.17384","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2309.17384","pdf_url":"https:\/\/arxiv.org\/pdf\/2309.17384","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https:\/\/openalex.org\/licenses\/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},{"id":"doi:10.48550\/arxiv.2309.17384","is_oa":true,"landing_page_url":"https:\/\/doi.org\/10.48550\/arxiv.2309.17384","pdf_url":null,"source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https:\/\/openalex.org\/licenses\/cc-by","version":null,"is_accepted":false,"is_published":null,"raw_source_name":null,"raw_type":"article"}],"best_oa_location":{"id":"pmh:oai:arXiv.org:2309.17384","is_oa":true,"landing_page_url":"http:\/\/arxiv.org\/abs\/2309.17384","pdf_url":"https:\/\/arxiv.org\/pdf\/2309.17384","source":{"id":"https:\/\/openalex.org\/S4306400194","display_name":"arXiv (Cornell University)","issn_l":null,"issn":null,"is_oa":true,"is_in_doaj":false,"is_core":false,"host_organization":"https:\/\/openalex.org\/I205783295","host_organization_name":"Cornell University","host_organization_lineage":["https:\/\/openalex.org\/I205783295"],"host_organization_lineage_names":[],"type":"repository"},"license":"cc-by","license_id":"https:\/\/openalex.org\/licenses\/cc-by","version":"submittedVersion","is_accepted":false,"is_published":false,"raw_source_name":"","raw_type":"text"},"sustainable_development_goals":[{"display_name":"Peace, Justice and strong institutions","id":"https:\/\/metadata.un.org\/sdg\/16","score":0.6600000262260437}],"awards":[],"funders":[],"has_content":{"grobid_xml":true,"pdf":true},"content_urls":{"pdf":"https:\/\/content.openalex.org\/works\/W4387294486.pdf","grobid_xml":"https:\/\/content.openalex.org\/works\/W4387294486.grobid-xml"},"referenced_works_count":0,"referenced_works":[],"related_works":["https:\/\/openalex.org\/W2378211422","https:\/\/openalex.org\/W2745001401","https:\/\/openalex.org\/W4321353415","https:\/\/openalex.org\/W2130974462","https:\/\/openalex.org\/W2028665553","https:\/\/openalex.org\/W2086519370","https:\/\/openalex.org\/W972276598","https:\/\/openalex.org\/W2396048001","https:\/\/openalex.org\/W2905188205","https:\/\/openalex.org\/W1559044324"],"abstract_inverted_index":{"The":[0],"past":[1],"decade":[2],"has":[3],"witnessed":[4],"substantial":[5],"growth":[6],"of":[7,29,86,98,127],"data-driven":[8],"speech":[9],"enhancement":[10],"(SE)":[11],"techniques":[12],"thanks":[13],"to":[14,82],"deep":[15],"learning.":[16],"While":[17],"existing":[18,115],"approaches":[19],"have":[20],"shown":[21],"impressive":[22],"performance":[23],"in":[24],"some":[25],"common":[26],"datasets,":[27],"most":[28],"them":[30],"are":[31],"designed":[32],"only":[33,47],"for":[34],"a":[35,42,49,71,91,109,124],"single":[36,50,72,92,133],"condition":[37],"(e.g.,":[38,52],"single-channel,":[39],"multi-channel,":[40],"or":[41,46,54],"fixed":[43],"sampling":[44,104],"frequency)":[45],"consider":[48],"task":[51],"denoising":[53],"dereverberation).":[55],"Currently,":[56],"there":[57],"is":[58,96],"no":[59],"universal":[60,110],"SE":[61,93,111],"approach":[62],"that":[63,95,130],"can":[64,135],"effectively":[65],"handle":[66,137],"diverse":[67,138],"input":[68],"conditions":[69,139],"with":[70,118,140],"model.":[73],"In":[74],"this":[75,84],"paper,":[76],"we":[77,89,107],"make":[78],"the":[79,131],"first":[80],"attempt":[81],"investigate":[83],"line":[85],"research.":[87],"First,":[88],"devise":[90],"model":[94,134],"independent":[97],"microphone":[99],"channels,":[100],"signal":[101],"lengths,":[102],"and":[103],"frequencies.":[105],"Second,":[106],"design":[108],"benchmark":[112],"by":[113],"combining":[114],"public":[116],"corpora":[117],"multiple":[119],"conditions.":[120],"Our":[121],"experiments":[122],"on":[123],"wide":[125],"range":[126],"datasets":[128],"show":[129],"proposed":[132],"successfully":[136],"strong":[141],"performance.":[142]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-03-10T16:38:18.471706","created_date":"2025-10-10T00:00:00"}