{"id":"https:\/\/openalex.org\/W2798508334","doi":"https:\/\/doi.org\/10.1109\/samos.2017.8344617","title":"Adaptive runtime exploiting sparsity in tensor of deep learning neural network on heterogeneous systems","display_name":"Adaptive runtime exploiting sparsity in tensor of deep learning neural network on heterogeneous systems","publication_year":2017,"publication_date":"2017-07-01","ids":{"openalex":"https:\/\/openalex.org\/W2798508334","doi":"https:\/\/doi.org\/10.1109\/samos.2017.8344617","mag":"2798508334"},"language":"en","primary_location":{"id":"doi:10.1109\/samos.2017.8344617","is_oa":false,"landing_page_url":"https:\/\/doi.org\/10.1109\/samos.2017.8344617","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS)","raw_type":"proceedings-article"},"type":"article","indexed_in":["crossref"],"open_access":{"is_oa":false,"oa_status":"closed","oa_url":null,"any_repository_has_fulltext":false},"authorships":[{"author_position":"first","author":{"id":"https:\/\/openalex.org\/A5078187915","display_name":"Kuo-You Peng","orcid":null},"institutions":[{"id":"https:\/\/openalex.org\/I16733864","display_name":"National Taiwan University","ror":"https:\/\/ror.org\/05bqach95","country_code":"TW","type":"education","lineage":["https:\/\/openalex.org\/I16733864"]}],"countries":["TW"],"is_corresponding":true,"raw_author_name":"Kuo-You Peng","raw_affiliation_strings":["National Taiwan University, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan University, Taiwan","institution_ids":["https:\/\/openalex.org\/I16733864"]}]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5031659418","display_name":"Sheng\u2010Yu Fu","orcid":null},"institutions":[{"id":"https:\/\/openalex.org\/I16733864","display_name":"National Taiwan University","ror":"https:\/\/ror.org\/05bqach95","country_code":"TW","type":"education","lineage":["https:\/\/openalex.org\/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Sheng-Yu Fu","raw_affiliation_strings":["National Taiwan University, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan University, Taiwan","institution_ids":["https:\/\/openalex.org\/I16733864"]}]},{"author_position":"middle","author":{"id":"https:\/\/openalex.org\/A5101965293","display_name":"Liu Yu-ping","orcid":"https:\/\/orcid.org\/0000-0001-5923-6868"},"institutions":[{"id":"https:\/\/openalex.org\/I16733864","display_name":"National Taiwan University","ror":"https:\/\/ror.org\/05bqach95","country_code":"TW","type":"education","lineage":["https:\/\/openalex.org\/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Yu-Ping Liu","raw_affiliation_strings":["National Taiwan University, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan University, Taiwan","institution_ids":["https:\/\/openalex.org\/I16733864"]}]},{"author_position":"last","author":{"id":"https:\/\/openalex.org\/A5017207899","display_name":"Wei\u2010Chung Hsu","orcid":"https:\/\/orcid.org\/0000-0002-0833-7981"},"institutions":[{"id":"https:\/\/openalex.org\/I16733864","display_name":"National Taiwan University","ror":"https:\/\/ror.org\/05bqach95","country_code":"TW","type":"education","lineage":["https:\/\/openalex.org\/I16733864"]}],"countries":["TW"],"is_corresponding":false,"raw_author_name":"Wei-Chung Hsu","raw_affiliation_strings":["National Taiwan University, Taiwan"],"affiliations":[{"raw_affiliation_string":"National Taiwan University, Taiwan","institution_ids":["https:\/\/openalex.org\/I16733864"]}]}],"institutions":[],"countries_distinct_count":1,"institutions_distinct_count":4,"corresponding_author_ids":["https:\/\/openalex.org\/A5078187915"],"corresponding_institution_ids":["https:\/\/openalex.org\/I16733864"],"apc_list":null,"apc_paid":null,"fwci":0.3641,"has_fulltext":false,"cited_by_count":7,"citation_normalized_percentile":{"value":0.70766492,"is_in_top_1_percent":false,"is_in_top_10_percent":false},"cited_by_percentile_year":{"min":89,"max":97},"biblio":{"volume":"30","issue":null,"first_page":"105","last_page":"112"},"is_retracted":false,"is_paratext":false,"is_xpac":false,"primary_topic":{"id":"https:\/\/openalex.org\/T10036","display_name":"Advanced Neural Network Applications","score":0.9994000196456909,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},"topics":[{"id":"https:\/\/openalex.org\/T10036","display_name":"Advanced Neural Network Applications","score":0.9994000196456909,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1707","display_name":"Computer Vision and Pattern Recognition"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T10054","display_name":"Parallel Computing and Optimization Techniques","score":0.9984999895095825,"subfield":{"id":"https:\/\/openalex.org\/subfields\/1708","display_name":"Hardware and Architecture"},"field":{"id":"https:\/\/openalex.org\/fields\/17","display_name":"Computer Science"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}},{"id":"https:\/\/openalex.org\/T12303","display_name":"Tensor decomposition and applications","score":0.9965000152587891,"subfield":{"id":"https:\/\/openalex.org\/subfields\/2605","display_name":"Computational Mathematics"},"field":{"id":"https:\/\/openalex.org\/fields\/26","display_name":"Mathematics"},"domain":{"id":"https:\/\/openalex.org\/domains\/3","display_name":"Physical Sciences"}}],"keywords":[{"id":"https:\/\/openalex.org\/keywords\/computer-science","display_name":"Computer science","score":0.8835570812225342},{"id":"https:\/\/openalex.org\/keywords\/cache","display_name":"Cache","score":0.725741446018219},{"id":"https:\/\/openalex.org\/keywords\/speedup","display_name":"Speedup","score":0.6446459889411926},{"id":"https:\/\/openalex.org\/keywords\/deep-learning","display_name":"Deep learning","score":0.5825826525688171},{"id":"https:\/\/openalex.org\/keywords\/computation","display_name":"Computation","score":0.5763955116271973},{"id":"https:\/\/openalex.org\/keywords\/pruning","display_name":"Pruning","score":0.563534677028656},{"id":"https:\/\/openalex.org\/keywords\/parallel-computing","display_name":"Parallel computing","score":0.5577526092529297},{"id":"https:\/\/openalex.org\/keywords\/partition","display_name":"Partition (number theory)","score":0.5340344905853271},{"id":"https:\/\/openalex.org\/keywords\/artificial-neural-network","display_name":"Artificial neural network","score":0.44852936267852783},{"id":"https:\/\/openalex.org\/keywords\/in-memory-processing","display_name":"In-Memory Processing","score":0.43615972995758057},{"id":"https:\/\/openalex.org\/keywords\/cache-algorithms","display_name":"Cache algorithms","score":0.43437460064888},{"id":"https:\/\/openalex.org\/keywords\/cpu-cache","display_name":"CPU cache","score":0.4264552593231201},{"id":"https:\/\/openalex.org\/keywords\/artificial-intelligence","display_name":"Artificial intelligence","score":0.37283986806869507},{"id":"https:\/\/openalex.org\/keywords\/algorithm","display_name":"Algorithm","score":0.13691866397857666},{"id":"https:\/\/openalex.org\/keywords\/search-engine","display_name":"Search engine","score":0.09721949696540833}],"concepts":[{"id":"https:\/\/openalex.org\/C41008148","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q21198","display_name":"Computer science","level":0,"score":0.8835570812225342},{"id":"https:\/\/openalex.org\/C115537543","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q165596","display_name":"Cache","level":2,"score":0.725741446018219},{"id":"https:\/\/openalex.org\/C68339613","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1549489","display_name":"Speedup","level":2,"score":0.6446459889411926},{"id":"https:\/\/openalex.org\/C108583219","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q197536","display_name":"Deep learning","level":2,"score":0.5825826525688171},{"id":"https:\/\/openalex.org\/C45374587","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q12525525","display_name":"Computation","level":2,"score":0.5763955116271973},{"id":"https:\/\/openalex.org\/C108010975","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q500094","display_name":"Pruning","level":2,"score":0.563534677028656},{"id":"https:\/\/openalex.org\/C173608175","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q232661","display_name":"Parallel computing","level":1,"score":0.5577526092529297},{"id":"https:\/\/openalex.org\/C42812","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q1082910","display_name":"Partition (number theory)","level":2,"score":0.5340344905853271},{"id":"https:\/\/openalex.org\/C50644808","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q192776","display_name":"Artificial neural network","level":2,"score":0.44852936267852783},{"id":"https:\/\/openalex.org\/C123593499","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q6008583","display_name":"In-Memory Processing","level":5,"score":0.43615972995758057},{"id":"https:\/\/openalex.org\/C38556500","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q13404475","display_name":"Cache algorithms","level":4,"score":0.43437460064888},{"id":"https:\/\/openalex.org\/C189783530","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q352090","display_name":"CPU cache","level":3,"score":0.4264552593231201},{"id":"https:\/\/openalex.org\/C154945302","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q11660","display_name":"Artificial intelligence","level":1,"score":0.37283986806869507},{"id":"https:\/\/openalex.org\/C11413529","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q8366","display_name":"Algorithm","level":1,"score":0.13691866397857666},{"id":"https:\/\/openalex.org\/C97854310","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q19541","display_name":"Search engine","level":2,"score":0.09721949696540833},{"id":"https:\/\/openalex.org\/C194222762","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q114486","display_name":"Query by Example","level":4,"score":0},{"id":"https:\/\/openalex.org\/C114614502","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q76592","display_name":"Combinatorics","level":1,"score":0},{"id":"https:\/\/openalex.org\/C164120249","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q995982","display_name":"Web search query","level":3,"score":0},{"id":"https:\/\/openalex.org\/C6557445","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q173113","display_name":"Agronomy","level":1,"score":0},{"id":"https:\/\/openalex.org\/C86803240","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q420","display_name":"Biology","level":0,"score":0},{"id":"https:\/\/openalex.org\/C23123220","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q816826","display_name":"Information retrieval","level":1,"score":0},{"id":"https:\/\/openalex.org\/C33923547","wikidata":"https:\/\/www.wikidata.org\/wiki\/Q395","display_name":"Mathematics","level":0,"score":0}],"mesh":[],"locations_count":1,"locations":[{"id":"doi:10.1109\/samos.2017.8344617","is_oa":false,"landing_page_url":"https:\/\/doi.org\/10.1109\/samos.2017.8344617","pdf_url":null,"source":null,"license":null,"license_id":null,"version":"publishedVersion","is_accepted":true,"is_published":true,"raw_source_name":"2017 International Conference on Embedded Computer Systems: Architectures, Modeling, and Simulation (SAMOS)","raw_type":"proceedings-article"}],"best_oa_location":null,"sustainable_development_goals":[],"awards":[],"funders":[{"id":"https:\/\/openalex.org\/F4320322795","display_name":"Ministry of Science and Technology, Taiwan","ror":"https:\/\/ror.org\/02kv4zf79"}],"has_content":{"grobid_xml":false,"pdf":false},"content_urls":null,"referenced_works_count":18,"referenced_works":["https:\/\/openalex.org\/W780656674","https:\/\/openalex.org\/W1935978687","https:\/\/openalex.org\/W2025768430","https:\/\/openalex.org\/W2025890876","https:\/\/openalex.org\/W2112796928","https:\/\/openalex.org\/W2163605009","https:\/\/openalex.org\/W2257979135","https:\/\/openalex.org\/W2271840356","https:\/\/openalex.org\/W2285660444","https:\/\/openalex.org\/W2289252105","https:\/\/openalex.org\/W2513554817","https:\/\/openalex.org\/W2557283755","https:\/\/openalex.org\/W2963000224","https:\/\/openalex.org\/W2963674932","https:\/\/openalex.org\/W6640289440","https:\/\/openalex.org\/W6684191040","https:\/\/openalex.org\/W6694517276","https:\/\/openalex.org\/W6725543821"],"related_works":["https:\/\/openalex.org\/W2350686196","https:\/\/openalex.org\/W2061075966","https:\/\/openalex.org\/W2147122795","https:\/\/openalex.org\/W3147501184","https:\/\/openalex.org\/W2167303720","https:\/\/openalex.org\/W2046128376","https:\/\/openalex.org\/W2268996566","https:\/\/openalex.org\/W2363672756","https:\/\/openalex.org\/W2109715593","https:\/\/openalex.org\/W3161817247"],"abstract_inverted_index":{"Deep":[0],"neural":[1],"networks":[2],"have":[3,32],"been":[4,34],"widely":[5],"applied":[6],"in":[7,36,46,101],"many":[8],"areas,":[9],"such":[10],"as":[11],"computer":[12],"vision,":[13],"natural":[14],"language":[15],"processing":[16],"and":[17,26,53,78,88,91,104,128,144,151],"information":[18],"retrieval.":[19],"However,":[20],"due":[21],"to":[22,48,82,85,181],"the":[23,44,50,70,80,123,134,137,148,152,184,187],"high":[24],"computation":[25,51,87,124],"memory":[27,54,89,158],"demands,":[28],"deep":[29],"learning":[30,138],"applications":[31],"not":[33],"adopted":[35],"edge":[37],"learning.":[38],"In":[39],"this":[40],"paper,":[41],"we":[42,75],"exploit":[43],"sparsity":[45],"tensors":[47],"reduce":[49,86],"overheads":[52],"demands.":[55],"Unlike":[56],"other":[57],"approaches":[58],"which":[59,111],"rely":[60],"on":[61,106],"hardware":[62],"accelerator":[63],"designs":[64],"or":[65],"sacrifice":[66],"model":[67,150],"accuracy":[68,135],"for":[69,147],"performance":[71],"by":[72],"pruning":[73],"parameters,":[74],"adaptively":[76],"partition":[77],"deploy":[79],"workload":[81],"heterogeneous":[83,115],"devices":[84],"requirements":[90],"increase":[92],"computing":[93,116],"efficiency.":[94],"We":[95],"had":[96],"implemented":[97],"our":[98,160],"partitioning":[99],"algorithms":[100],"Google's":[102],"TensorFlow":[103],"evaluated":[105],"an":[107,113],"AMD":[108],"Kaveri":[109],"system,":[110],"is":[112],"HSA-based":[114],"system.":[117],"Our":[118,140,171],"method":[119],"has":[120,173],"effectively":[121],"reduced":[122],"time,":[125],"cache":[126,129,165,169,176],"accesses,":[127],"miss":[130,177],"rates,":[131],"without":[132],"impacting":[133],"of":[136,186],"models.":[139],"approach":[141,161],"achieves":[142],"66%":[143],"88%":[145],"speedup":[146],"lenet-5":[149],"lenet-1024-1024":[153,188],"model,":[154],"respectively.":[155],"For":[156],"reducing":[157],"traffic,":[159],"reduces":[162],"71%":[163],"instruction":[164],"references,":[166],"32%":[167],"data":[168],"references.":[170],"system":[172],"also":[174],"improved":[175],"rate":[178],"from":[179],"1.6%":[180],"0.5%":[182],"during":[183],"training":[185],"model.":[189]},"counts_by_year":[{"year":2023,"cited_by_count":1},{"year":2021,"cited_by_count":2},{"year":2020,"cited_by_count":3},{"year":2019,"cited_by_count":1}],"updated_date":"2025-11-06T03:46:38.306776","created_date":"2025-10-10T00:00:00"}