{"status":"ok","message-type":"work","message-version":"1.0.0","message":{"indexed":{"date-parts":[[2025,5,31]],"date-time":"2025-05-31T05:07:02Z","timestamp":1748668022847,"version":"3.28.0"},"reference-count":31,"publisher":"IEEE","content-domain":{"domain":[],"crossmark-restriction":false},"short-container-title":[],"published-print":{"date-parts":[[2015,5]]},"DOI":"10.1109\/ipdps.2015.76","type":"proceedings-article","created":{"date-parts":[[2015,7,22]],"date-time":"2015-07-22T16:11:10Z","timestamp":1437581470000},"page":"417-426","source":"Crossref","is-referenced-by-count":13,"title":["Performance Engineering of the Kernel Polynomal Method on Large-Scale CPU-GPU Systems"],"prefix":"10.1109","author":[{"given":"Moritz","family":"Kreutzer","sequence":"first","affiliation":[]},{"given":"Andreas","family":"Pieper","sequence":"additional","affiliation":[]},{"given":"Georg","family":"Hager","sequence":"additional","affiliation":[]},{"given":"Gerhard","family":"Wellein","sequence":"additional","affiliation":[]},{"given":"Andreas","family":"Alvermann","sequence":"additional","affiliation":[]},{"given":"Holger","family":"Fehske","sequence":"additional","affiliation":[]}],"member":"263","reference":[{"key":"ref31","doi-asserted-by":"publisher","DOI":"10.1177\/1094342004041296"},{"journal-title":"Kepler Tuning Guide NVIDIA","year":"0","key":"ref30"},{"journal-title":"GHOST General Hybrid and Optimized Sparse Toolkit","year":"0","key":"ref10"},{"key":"ref11","doi-asserted-by":"publisher","DOI":"10.1137\/1.9781611971538"},{"key":"ref12","doi-asserted-by":"publisher","DOI":"10.1145\/1654059.1654078"},{"key":"ref13","doi-asserted-by":"publisher","DOI":"10.1137\/130930352"},{"article-title":"Automatic performance tuning of sparse matrix kernels","year":"2003","author":"vuduc","key":"ref14"},{"key":"ref15","article-title":"Accelerating the LOBPCG method on GPU s using a blocked sparse matrix vector product","author":"anzt","year":"2014","journal-title":"University of Tennessee Innovative Computing Laboratory Technical Report UT-CS-14-731"},{"journal-title":"CUDA sparse matrix library (cuSPARSE) NVIDIA","year":"0","key":"ref16"},{"key":"ref17","doi-asserted-by":"publisher","DOI":"10.1109\/HiPC.2012.6507483"},{"key":"ref18","doi-asserted-by":"publisher","DOI":"10.1007\/s10766-012-0204-y"},{"key":"ref19","doi-asserted-by":"publisher","DOI":"10.1103\/RevModPhys.82.3045"},{"journal-title":"NVIDIA Kepler GK110 Architecture Whitepaper","year":"0","key":"ref28"},{"key":"ref4","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2012.14"},{"key":"ref27","doi-asserted-by":"publisher","DOI":"10.1109\/SC.2008.5214359"},{"key":"ref3","first-page":"233","article-title":"Towards realistic performance bounds for implicit CFD codes","author":"gropp","year":"1999","journal-title":"Proceedings of Parallel CFD"},{"key":"ref6","doi-asserted-by":"publisher","DOI":"10.1006\/jcph.1996.0048"},{"journal-title":"CUB NVIDIA Research","year":"0","key":"ref29"},{"key":"ref5","doi-asserted-by":"publisher","DOI":"10.1109\/IPDPS.2014.125"},{"article-title":"Efficient estimation of eigenvalue counts in an interval","year":"2013","author":"di napoli","key":"ref8"},{"key":"ref7","doi-asserted-by":"crossref","first-page":"275","DOI":"10.1103\/RevModPhys.78.275","article-title":"The kernel polynomial method","volume":"78","author":"wei\u00dfe","year":"2006","journal-title":"Rev Mod Phys"},{"journal-title":"MAGMA Matrix Algebra on GPU and Multicore Architectures","year":"0","key":"ref2"},{"key":"ref9","article-title":"Highly scalable linear time estimation of spectrograms-a tool for very large scale data analysis","author":"bhardwaj","year":"0","journal-title":"2013 poster at 2013 ACM\/IEEE International Conference on High Performance Computing Networking Storage and Analysis"},{"journal-title":"Top500 Supercomputer Sites","year":"2014","key":"ref1"},{"key":"ref20","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevB.85.201105"},{"key":"ref22","doi-asserted-by":"publisher","DOI":"10.1002\/pamm.201410391"},{"key":"ref21","doi-asserted-by":"publisher","DOI":"10.1103\/PhysRevB.89.165121"},{"journal-title":"NVIDIA Profiler NVIDIA","year":"0","key":"ref24"},{"key":"ref23","doi-asserted-by":"publisher","DOI":"10.1109\/ICPPW.2010.38"},{"key":"ref26","article-title":"Increasing the performance of the Jacobi-Davidson method by blocking","author":"r\u00f6hrig-z\u00f6llner","year":"0","journal-title":"2014 submitted to SIAM J Sci Comput"},{"key":"ref25","doi-asserted-by":"publisher","DOI":"10.1145\/1498765.1498785"}],"event":{"name":"2015 IEEE International Parallel and Distributed Processing Symposium (IPDPS)","start":{"date-parts":[[2015,5,25]]},"location":"Hyderabad, India","end":{"date-parts":[[2015,5,29]]}},"container-title":["2015 IEEE International Parallel and Distributed Processing Symposium"],"original-title":[],"link":[{"URL":"http:\/\/xplorestaging.ieee.org\/ielx7\/7159926\/7161257\/07161530.pdf?arnumber=7161530","content-type":"unspecified","content-version":"vor","intended-application":"similarity-checking"}],"deposited":{"date-parts":[[2017,6,23]],"date-time":"2017-06-23T12:12:17Z","timestamp":1498219937000},"score":1,"resource":{"primary":{"URL":"http:\/\/ieeexplore.ieee.org\/document\/7161530\/"}},"subtitle":[],"short-title":[],"issued":{"date-parts":[[2015,5]]},"references-count":31,"URL":"https:\/\/doi.org\/10.1109\/ipdps.2015.76","relation":{},"subject":[],"published":{"date-parts":[[2015,5]]}}}