{"id":"https:\/\/openalex.org\/W4392904807","doi":"https:\/\/doi.org\/10.1109\/icassp48485.2024.10447328","title":"AugSumm: Towards Generalizable Speech Summarization Using Synthetic Labels from Large Language Models","display_name":"AugSumm: Towards Generalizable Speech Summarization Using Synthetic Labels from Large Language Models","publication_year":2024,"publication_date":"2024-03-18","ids":{"openalex":"https:\/\/openalex.org\/W4392904807","doi":"https:\/\/doi.org\/10.1109\/icassp48485.2024.10447328"},"language":"en","primary_location":{"id":"doi:10.1109\/icassp48485.2024.10447328","is_oa":false,"landing_page_url":"http:\/\/dx.doi.org\/10.1109\/icassp48485.2024.10447328","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https:\/\/openalex.org\/A5091834525","display_name":"Jee-weon Jung","orcid":"https:\/\/orcid.org\/0000-0003-0505-2988"},"institutions":[{"id":"https:\/\/openalex.org\/I74973139","display_name":"Carnegie Mellon University","ror":"https:\/\/ror.org\/05x2bcf33","country_code":"US","type":"education","lineage":["https:\/\/openalex.org\/I74973139"]}],"countries":["US"],"is_corresponding":true,"raw_author_name":"Jee-Weon Jung","raw_affiliation_strings":["Carnegie Mellon University,USA","Carnegie Mellon University, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,USA","institution_ids":["https:\/\/openalex.org\/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University, USA","institution_ids":["https:\/\/openalex.org\/I74973139"]}]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5058649002","display_name":"Roshan Sharma","orcid":"https:\/\/orcid.org\/0000-0003-3760-9955"},"institutions":[{"id":"https:\/\/openalex.org\/I74973139","display_name":"Carnegie Mellon University","ror":"https:\/\/ror.org\/05x2bcf33","country_code":"US","type":"education","lineage":["https:\/\/openalex.org\/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Roshan Sharma","raw_affiliation_strings":["Carnegie Mellon University,USA","Carnegie Mellon University, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,USA","institution_ids":["https:\/\/openalex.org\/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University, USA","institution_ids":["https:\/\/openalex.org\/I74973139"]}]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5055644612","display_name":"William Chen","orcid":"https:\/\/orcid.org\/0000-0001-8924-5853"},"institutions":[{"id":"https:\/\/openalex.org\/I74973139","display_name":"Carnegie Mellon University","ror":"https:\/\/ror.org\/05x2bcf33","country_code":"US","type":"education","lineage":["https:\/\/openalex.org\/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"William Chen","raw_affiliation_strings":["Carnegie Mellon University,USA","Carnegie Mellon University, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,USA","institution_ids":["https:\/\/openalex.org\/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University, USA","institution_ids":["https:\/\/openalex.org\/I74973139"]}]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5113017615","display_name":"Bhiksha Raj","orcid":null},"institutions":[{"id":"https:\/\/openalex.org\/I4210113480","display_name":"Mohamed bin Zayed University of Artificial Intelligence","ror":"https:\/\/ror.org\/0258gkt32","country_code":"AE","type":"education","lineage":["https:\/\/openalex.org\/I4210113480"]},{"id":"https:\/\/openalex.org\/I74973139","display_name":"Carnegie Mellon University","ror":"https:\/\/ror.org\/05x2bcf33","country_code":"US","type":"education","lineage":["https:\/\/openalex.org\/I74973139"]}],"countries":["AE","US"],"is_corresponding":false,"raw_author_name":"Bhiksha Raj","raw_affiliation_strings":["Carnegie Mellon University,USA","Mohamed bin Zayed University of AI, Abu Dhabi","Carnegie Mellon University, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,USA","institution_ids":["https:\/\/openalex.org\/I74973139"]},{"raw_affiliation_string":"Mohamed bin Zayed University of AI, Abu Dhabi","institution_ids":["https:\/\/openalex.org\/I4210113480"]},{"raw_affiliation_string":"Carnegie Mellon University, USA","institution_ids":["https:\/\/openalex.org\/I74973139"]}]},{"author_position":"last","author":{"id":"https:\/\/openalex.org\/A5001291873","display_name":"Shinji Watanabe","orcid":"https:\/\/orcid.org\/0000-0002-5970-8631"},"institutions":[{"id":"https:\/\/openalex.org\/I74973139","display_name":"Carnegie Mellon University","ror":"https:\/\/ror.org\/05x2bcf33","country_code":"US","type":"education","lineage":["https:\/\/openalex.org\/I74973139"]}],"countries":["US"],"is_corresponding":false,"raw_author_name":"Shinji Watanabe","raw_affiliation_strings":["Carnegie Mellon University,USA","Carnegie Mellon University, USA"],"affiliations":[{"raw_affiliation_string":"Carnegie Mellon University,USA","institution_ids":["https:\/\/openalex.org\/I74973139"]},{"raw_affiliation_string":"Carnegie Mellon University, USA","institution_ids":["https:\/\/openalex.org\/I74973139"]}]}],"institutions":[],"countries_distinct_count":2,"institutions_distinct_count":5,"corresponding_author_ids":["https:\/\/openalex.org\/A5091834525"],"corresponding_institution_ids":["https:\/\/openalex.org\/I74973139"],"apc_list":null,"apc_paid":null,"fwci":0.3345,"has_fulltext":false,"cited_by_count":1,"citation_normalized_percentile":{"value":0.61165668,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":90,"max":94},"biblio":{"volume":null,"issue":null,"first_page":"12071","last_page":"12075"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https:\/\/openalex.org\/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},"topics":[{"id":"https:\/\/openalex.org\/T10028","display_name":"Topic Modeling","score":0.9998999834060669,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T10181","display_name":"Natural Language Processing Techniques","score":0.9998999834060669,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T10201","display_name":"Speech Recognition and Synthesis","score":0.9994000196456909,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1702","display_name":"Artificial Intelligence"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https:\/\/openalex.org\/keywords\/automatic-summarization","display_name":"Automatic summarization","score":0.875741720199585},{"id":"https:\/\/openalex.org\/keywords\/computer-science","display_name":"Computer science","score":0.8586477041244507},{"id":"https:\/\/openalex.org\/keywords\/leverage","display_name":"Leverage (statistics)","score":0.7291048169136047},{"id":"https:\/\/openalex.org\/keywords\/ground-truth","display_name":"Ground truth","score":0.6140227317810059},{"id":"https:\/\/openalex.org\/keywords\/artificial-intelligence","display_name":"Artificial intelligence","score":0.5809388756752014},{"id":"https:\/\/openalex.org\/keywords\/natural-language-processing","display_name":"Natural language processing","score":0.535087525844574},{"id":"https:\/\/openalex.org\/keywords\/language-model","display_name":"Language model","score":0.5166957378387451},{"id":"https:\/\/openalex.org\/keywords\/probabilistic-logic","display_name":"Probabilistic logic","score":0.5024421215057373},{"id":"https:\/\/openalex.org\/keywords\/annotation","display_name":"Annotation","score":0.47128644585609436},{"id":"https:\/\/openalex.org\/keywords\/point","display_name":"Point (geometry)","score":0.42596641182899475},{"id":"https:\/\/openalex.org\/keywords\/machine-learning","display_name":"Machine learning","score":0.39009028673171997}],"concepts":[{"id":"https:\/\/openalex.org\/C170858558","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1394144","display_name":"Automatic summarization","level":2,"score":0.875741720199585},{"id":"https:\/\/openalex.org\/C41008148","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q21198","display_name":"Computer science","level":0,"score":0.8586477041244507},{"id":"https:\/\/openalex.org\/C153083717","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q6535263","display_name":"Leverage (statistics)","level":2,"score":0.7291048169136047},{"id":"https:\/\/openalex.org\/C146849305","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q370766","display_name":"Ground truth","level":2,"score":0.6140227317810059},{"id":"https:\/\/openalex.org\/C154945302","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q11660","display_name":"Artificial intelligence","level":1,"score":0.5809388756752014},{"id":"https:\/\/openalex.org\/C204321447","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q30642","display_name":"Natural language processing","level":1,"score":0.535087525844574},{"id":"https:\/\/openalex.org\/C137293760","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q3621696","display_name":"Language model","level":2,"score":0.5166957378387451},{"id":"https:\/\/openalex.org\/C49937458","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q2599292","display_name":"Probabilistic logic","level":2,"score":0.5024421215057373},{"id":"https:\/\/openalex.org\/C2776321320","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q857525","display_name":"Annotation","level":2,"score":0.47128644585609436},{"id":"https:\/\/openalex.org\/C28719098","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q44946","display_name":"Point (geometry)","level":2,"score":0.42596641182899475},{"id":"https:\/\/openalex.org\/C119857082","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q2539","display_name":"Machine learning","level":1,"score":0.39009028673171997},{"id":"https:\/\/openalex.org\/C33923547","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q395","display_name":"Mathematics","level":0,"score":0},{"id":"https:\/\/openalex.org\/C2524010","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q8087","display_name":"Geometry","level":1,"score":0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109\/icassp48485.2024.10447328","is_oa":false,"landing_page_url":"http:\/\/dx.doi.org\/10.1109\/icassp48485.2024.10447328","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"ICASSP 2024 - 2024 IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[{"id":"https:\/\/openalex.org\/G1353381076","display_name":null,"funder_award_id":"2138296","funder_id":"https:\/\/openalex.org\/F4320312143","funder_display_name":"National Centre for Supercomputing Applications"},{"id":"https:\/\/openalex.org\/G1912064254","display_name":null,"funder_award_id":"2137603","funder_id":"https:\/\/openalex.org\/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https:\/\/openalex.org\/G2507572247","display_name":null,"funder_award_id":"2138286","funder_id":"https:\/\/openalex.org\/F4320337377","funder_display_name":"Office of Advanced Cyberinfrastructure"},{"id":"https:\/\/openalex.org\/G4006316252","display_name":null,"funder_award_id":"2138259","funder_id":"https:\/\/openalex.org\/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https:\/\/openalex.org\/G4010157413","display_name":null,"funder_award_id":"2138259","funder_id":"https:\/\/openalex.org\/F4320337377","funder_display_name":"Office of Advanced Cyberinfrastructure"},{"id":"https:\/\/openalex.org\/G4968279142","display_name":null,"funder_award_id":"2138307","funder_id":"https:\/\/openalex.org\/F4320312143","funder_display_name":"National Centre for Supercomputing Applications"},{"id":"https:\/\/openalex.org\/G509285653","display_name":null,"funder_award_id":"2138307","funder_id":"https:\/\/openalex.org\/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https:\/\/openalex.org\/G5173294414","display_name":null,"funder_award_id":"2138286","funder_id":"https:\/\/openalex.org\/F4320312143","funder_display_name":"National Centre for Supercomputing Applications"},{"id":"https:\/\/openalex.org\/G5464919783","display_name":null,"funder_award_id":"2137603","funder_id":"https:\/\/openalex.org\/F4320337377","funder_display_name":"Office of Advanced Cyberinfrastructure"},{"id":"https:\/\/openalex.org\/G6664589177","display_name":null,"funder_award_id":"2138307","funder_id":"https:\/\/openalex.org\/F4320337377","funder_display_name":"Office of Advanced Cyberinfrastructure"},{"id":"https:\/\/openalex.org\/G7622549653","display_name":null,"funder_award_id":"213829","funder_id":"https:\/\/openalex.org\/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https:\/\/openalex.org\/G8165146668","display_name":null,"funder_award_id":"2138259","funder_id":"https:\/\/openalex.org\/F4320312143","funder_display_name":"National Centre for Supercomputing Applications"},{"id":"https:\/\/openalex.org\/G8300691027","display_name":null,"funder_award_id":"2138286","funder_id":"https:\/\/openalex.org\/F4320306076","funder_display_name":"National Science Foundation"},{"id":"https:\/\/openalex.org\/G8819975937","display_name":null,"funder_award_id":"2137603","funder_id":"https:\/\/openalex.org\/F4320312143","funder_display_name":"National Centre for Supercomputing Applications"},{"id":"https:\/\/openalex.org\/G8897435675","display_name":null,"funder_award_id":"2138296","funder_id":"https:\/\/openalex.org\/F4320337377","funder_display_name":"Office of Advanced Cyberinfrastructure"}],"funders":[{"id":"https:\/\/openalex.org\/F4320306076","display_name":"National Science Foundation","ror":"https:\/\/ror.org\/021nxhr62"},{"id":"https:\/\/openalex.org\/F4320307110","display_name":"Delta","ror":"https:\/\/ror.org\/03g9c1e75"},{"id":"https:\/\/openalex.org\/F4320312143","display_name":"National Centre for Supercomputing Applications","ror":"https:\/\/ror.org\/03r10zj06"},{"id":"https:\/\/openalex.org\/F4320337377","display_name":"Office of Advanced Cyberinfrastructure","ror":"https:\/\/ror.org\/04nh1dc89"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":28,"referenced_works":["https:\/\/openalex.org\/W2149593800","https:\/\/openalex.org\/W2766219058","https:\/\/openalex.org\/W2809191921","https:\/\/openalex.org\/W2936774411","https:\/\/openalex.org\/W2953104586","https:\/\/openalex.org\/W2962704246","https:\/\/openalex.org\/W3097777922","https:\/\/openalex.org\/W3162090017","https:\/\/openalex.org\/W3163793923","https:\/\/openalex.org\/W3198084221","https:\/\/openalex.org\/W4224925623","https:\/\/openalex.org\/W4285129823","https:\/\/openalex.org\/W4307929142","https:\/\/openalex.org\/W4321455981","https:\/\/openalex.org\/W4321649710","https:\/\/openalex.org\/W4372183461","https:\/\/openalex.org\/W4372269220","https:\/\/openalex.org\/W4385571440","https:\/\/openalex.org\/W4385572754","https:\/\/openalex.org\/W4385822678","https:\/\/openalex.org\/W6631362777","https:\/\/openalex.org\/W6638910383","https:\/\/openalex.org\/W6682631176","https:\/\/openalex.org\/W6755559483","https:\/\/openalex.org\/W6761205521","https:\/\/openalex.org\/W6842080761","https:\/\/openalex.org\/W6846274923","https:\/\/openalex.org\/W6850202480"],"related_works":["https:\/\/openalex.org\/W2366403280","https:\/\/openalex.org\/W1495108544","https:\/\/openalex.org\/W2091301346","https:\/\/openalex.org\/W3148229873","https:\/\/openalex.org\/W4389760904","https:\/\/openalex.org\/W2150160875","https:\/\/openalex.org\/W4242223894","https:\/\/openalex.org\/W3046127589","https:\/\/openalex.org\/W4287704324","https:\/\/openalex.org\/W3093409497"],"abstract_inverted_index":{"Abstractive":[0],"speech":[1],"summarization":[2],"(SSUM)":[3],"aims":[4],"to":[5,30,71,93,105,118,150,156],"generate":[6,106,119],"humanlike":[7],"summaries":[8,38,108,121,130,141,159,172,177,191],"from":[9,122],"speech.":[10],"Given":[11],"variations":[12],"in":[13,22,160],"information":[14],"captured":[15],"and":[16,51,111,162,173,186],"phrasing,":[17],"recordings":[18],"can":[19],"be":[20,69],"summarized":[21],"multiple":[23,65,132],"ways.":[24],"Therefore,":[25],"it":[26],"is":[27,78,82],"more":[28,148],"reasonable":[29],"consider":[31],"a":[32,41,54,91,100],"probabilistic":[33],"distribution":[34,75],"of":[35,128],"all":[36],"potential":[37],"rather":[39],"than":[40],"single":[42,55],"summary.":[43],"However,":[44],"conventional":[45],"SSUM":[46],"models":[47,97],"are":[48,145,192],"mostly":[49],"trained":[50],"evaluated":[52],"with":[53],"ground-truth":[56],"(GT)":[57],"human-annotated":[58],"deterministic":[59],"summary":[60],"for":[61,102,109],"every":[62],"recording.":[63],"Generating":[64],"human":[66,103,135],"references":[67],"would":[68],"ideal":[70],"better":[72],"represent":[73],"the":[74,126],"statistically,":[76],"but":[77],"impractical":[79],"because":[80],"annotation":[81],"expensive.":[83],"We":[84,124],"tackle":[85],"this":[86],"challenge":[87],"by":[88,180],"proposing":[89],"AugSumm,":[90],"method":[92],"leverage":[94],"large":[95],"language":[96],"(LLMs)":[98],"as":[99,147],"proxy":[101],"annotators":[104],"augmented":[107],"training":[110,161],"evaluation.":[112,163],"First,":[113],"we":[114,138,153],"explore":[115],"prompting":[116],"strategies":[117],"synthetic":[120,129,158,171],"ChatGPT.":[123],"validate":[125],"quality":[127],"using":[131,143],"metrics":[133],"including":[134],"evaluation,":[136],"where":[137],"find":[139],"that":[140,168],"generated":[142],"AugSumm":[144,190],"perceived":[146],"valid":[149],"humans.":[151],"Second,":[152],"develop":[154],"methods":[155],"utilize":[157],"Experiments":[164],"on":[165,170,175,183],"How2":[166],"demonstrate":[167],"pre-training":[169],"fine-tuning":[174],"GT":[176,185],"improves":[178],"ROUGE-L":[179],"1":[181],"point":[182],"both":[184],"AugSumm-based":[187],"test":[188],"sets.":[189],"available":[193],"at":[194],"https:\/\/github.com\/Jungjee\/AugSumm.":[195]},"counts_by_year":[{"year":2024,"cited_by_count":1}],"updated_date":"2026-04-21T08:09:41.155169","created_date":"2025-10-10T00:00:00"}