Keywords (tags) and Publication List
Hayes, Ari B; Hua, Fei; Huang, Jin; Chen, Yanhao; Zhang, Eddy Z Decoding CUDA Binary Conference Proceedings of the 2019 IEEE/ACM International Symposium on Code Generation and Optimization (CGO 2019), IEEE Press, Washington, DC, USA, 2019, ISBN: 9781728114361. Abstract | BibTeX | Tags: Code generation, Code translation and transformation, CUDA, GPU, Instruction set architecture (ISA) Shen, Xipeng; Liu, Yixun; Zhang, Eddy Z; Bhamidipati, Poornima An Infrastructure for Tackling Input-Sensitivity of GPU Program Optimizations Journal Article Int. J. Parallel Program., 41 (6), pp. 855–869, 2013, ISSN: 0885-7458. Abstract | Links | BibTeX | Tags: Cross-input adaptation, CUDA, Empirical search, G-ADAPT, GPU, Program optimizations
2019
title = {Decoding CUDA Binary},
author = {Ari B Hayes and Fei Hua and Jin Huang and Yanhao Chen and Eddy Z Zhang},
isbn = {9781728114361},
year = {2019},
date = {2019-01-01},
booktitle = {Proceedings of the 2019 IEEE/ACM International Symposium on Code Generation and Optimization (CGO 2019)},
pages = {229–241},
publisher = {IEEE Press},
address = {Washington, DC, USA},
abstract = {NVIDIA’s software does not offer translation of assembly code to binary for their GPUs, since the specifications are closed-source. This work fills that gap. We develop a systematic method of decoding the Instruction Set Architectures (ISAs) of NVIDIA’s GPUs, and generating assemblers for different generations of GPUs. Our framework enables cross-architecture binary analysis and transformation. Making the ISA accessible in this manner opens up a world of opportunities for developers and researchers, enabling numerous optimizations and explorations that are unachievable at the source-code level. Our infrastructure has already benefited and been adopted in important applications including performance tuning, binary instrumentation, resource allocation, and memory protection.},
keywords = {Code generation, Code translation and transformation, CUDA, GPU, Instruction set architecture (ISA)},
pubstate = {published},
tppubtype = {conference}
}
2013
title = {An Infrastructure for Tackling Input-Sensitivity of GPU Program Optimizations},
author = {Xipeng Shen and Yixun Liu and Eddy Z Zhang and Poornima Bhamidipati},
url = {https://doi.org/10.1007/s10766-012-0236-3},
doi = {10.1007/s10766-012-0236-3},
issn = {0885-7458},
year = {2013},
date = {2013-01-01},
journal = {Int. J. Parallel Program.},
volume = {41},
number = {6},
pages = {855–869},
publisher = {Kluwer Academic Publishers},
address = {USA},
abstract = {Graphic processing units (GPU) have become increasingly adopted for the enhancement of computing throughput. However, the development of a high-quality GPU application is challenging, due to the large optimization space and complex unpredictable effects of optimizations on GPU program performance. Many recent efforts have been employing empirical search-based auto-tuners to tackle the problem, but few of them have concentrated on the influence of program inputs on the optimizations. In this paper, based on a set of CUDA and OpenCL kernels, we report some evidences on the importance for auto-tuners to adapt to program input changes, and present a framework, G-ADAPT+, to address the influence by constructing cross-input predictive models for automatically predicting the (near-)optimal configurations for an arbitrary input to a GPU program. G-ADAPT+ is based on source-to-source compilers, specifically, Cetus and ROSE. It supports the optimizations of both CUDA and OpenCL programs.},
keywords = {Cross-input adaptation, CUDA, Empirical search, G-ADAPT, GPU, Program optimizations},
pubstate = {published},
tppubtype = {article}
}