@InProceedings{crandall:iochar, author = {Phyllis E. Crandall and Ruth A. Aydt and Andrew A. Chien and Daniel A. Reed}, title = {Input/Output Characteristics of Scalable Parallel Applications}, booktitle = {Proceedings of Supercomputing '95}, year = {1995}, month = {December}, publisher = {IEEE Computer Society Press}, address = {San Diego, CA}, URL = {http://doi.acm.org/10.1145/224170.224396}, keywords = {file access pattern, file system workload, workload characterization, parallel I/O, pario-bib}, abstract = {Rapid increases in computing and communication performance are exacerbating the long-standing problem of performance-limited input/output. Indeed, for many otherwise scalable parallel applications, input/output is emerging as a major performance bottleneck. The design of scalable input/output systems depends critically on the input/output requirements and access patterns for this emerging class of large-scale parallel applications. However, hard data on the behavior of such applications is only now becoming available. In this paper, we describe the input/output requirements of three scalable parallel applications (electron scattering, terrain rendering, and quantum chemistry) on the Intel Paragon XP/S. As part of an ongoing parallel input/output characterization effort, we used instrumented versions of the application codes to capture and analyze input/output volume, request size distributions, and temporal request structure. Because complete traces of individual application input/output requests were captured, in-depth, off-line analyses were possible. In addition, we conducted informal interviews of the application developers to understand the relation between the codes' current and desired input/output structure. The results of our studies show a wide variety of temporal and spatial access patterns, including highly read-intensive and write-intensive phases, extremely large and extremely small request sizes, and both sequential and highly irregular access patterns. We conclude with a discussion of the broad spectrum of access patterns and their profound implications for parallel file caching and prefetching schemes.}, comment = {They use the Pablo instrumentation and analysis tools to instrument three scalable applications that use heavy I/O: electron scattering, terrain rendering, and quantum chemistry. They look at the volume of data moved, the timing of I/O, and the periodic nature of I/O. They do a little bit with the access patterns of data within each file. They found a HUGE variation in request sizes, amount of I/O, number of files, and so forth. Their primary conclusion is thus that file systems should be adaptable to different access patterns, preferably under control of the application. Note proceedings only available on CD-ROM or WWW.} }