@InProceedings{wang:workload, author = {Feng Wang and Qin Xin and Bo Hong and Scott A. Brandt and Ethan L. Miller and Darrell D. E. Long and Tyce T. McLarty}, title = {File system workload analysis for large scale scientific computing applications}, booktitle = {Proceedings of the Twentieth IEEE/Eleventh NASA Goddard Conference on Mass Storage Systems and Technologies}, year = {2004}, month = {April}, publisher = {IEEE Computer Society Press}, address = {College Park, MD}, URL = {http://ssrc.cse.ucsc.edu/Papers/wang-mss04.pdf}, keywords = {file system workload, workload characterization, ASCI, lustre, scientific applications, pario-app, pario-bib}, abstract = {Parallel scientific applications require high-performance I/O support from underlying file systems. A comprehensive understanding of the expected workload is therefore essential for the design of high-performance parallel file systems. We re-examine the workload characteristics in parallel computing environments in the light of recent technology advances and new applications. \par We analyze application traces from a cluster with hundreds of nodes. On average, each application has only one or two typical request sizes. Large requests from several hundred kilobytes to several megabytes are very common. Although in some applications, small requests account for more than 90% of all requests, almost all of the I/O data are transferred by large requests. All of these applications show bursty access patterns. More than 65% of write requests have inter-arrival times within one millisecond in most applications. By running the same benchmark on different file models, we also find that the write throughput of using an individual output file for each node exceeds that of using a shared file for all nodes by a factor of 5. This indicates that current file systems are not well optimized for file sharing.}, comment = {An I/O workload study of three applications on a 960 node (dual-processors) cluster at LLNL running the lustre-light parallel file system. The applications include a I/O benchmarking code (ior2) and two physics simulations: one that ran on 343 processors and one that ran on 1620 processors.} }