@TechReport{poole:sio-survey, author = {James T. Poole}, title = {Preliminary Survey of {I/O} Intensive Applications}, year = {1994}, number = {CCSF-38}, institution = {Scalable I/O Initiative}, address = {Caltech Concurrent Supercomputing Facilities, Caltech}, URL = {http://www.cacr.caltech.edu/SIO/pubs/SIO_apps.ps}, keywords = {parallel I/O, pario-bib, multiprocessor file system, file access pattern, checkpoint}, comment = {Goal is to collect a set of representative applications from biology, chemistry, earth science, engineering, graphics, and physics, use performance-monitoring tools to analyze them, create templates and benchmarks that represent them, and then later to evaluate the performance of new I/O tools created by rest of the SIO initiative. Seem to be four categories of I/O needs: input, output, checkpoint, and virtual memory (``out-of-core'' scratch space). Not all types are significant in all applications. (Two groups mention databases and the need to perform computationally complex queries.) Large input is typically raw data (seismic soundings, astronomical observations, satellite remote sensing, weather information). Sometimes there are real-time constraints. Output is often periodic, e.g., the state of the system every few timesteps; typically the volume would increase along with I/O capacity and bandwidth. Checkpointing is a common request; preferably allowing application to choose what and when to checkpoint, and definitely including the state of files. Many kinds of out-of-core: 1) temp files between passes (often written and read sequentially), 2) regular patterns like FFT, matrix transpose, solvers, and single-pass read/compute/write, 3) random access, e.g., to precomputed tables of integrals. Distinct differences in the ways people choose to divide data into files; sometimes all in one huge file, sometimes many ``small'' files (e.g., one per processor, one per timestep, one per region, etc.). Important: overlap of computation and I/O, independent access by individual processors. Not always important: ordering of records read or written by different processors, exposing the I/O model to the application writer. Units of I/O seem to be either (sub)matrices (1--5 dimensions) or items in a collection of objects (100--10000 bytes each). Data sets varied up to 1~TB; bandwidth needs varied up to 1~GB/s. See also bagrodia:sio-character, choudhary:sio-language, bershad:sio-os.} }