@InProceedings{wang:workload,
  author = {Feng Wang and Qin Xin and Bo Hong and Scott A. Brandt and Ethan L.
  Miller and Darrell D. E. Long and Tyce T. McLarty},
  title = {File system workload analysis for large scale scientific computing
  applications},
  booktitle = {Proceedings of the Twentieth IEEE/Eleventh NASA Goddard
  Conference on Mass Storage Systems and Technologies},
  year = {2004},
  month = {April},
  publisher = {IEEE Computer Society Press},
  address = {College Park, MD},
  URL = {http://ssrc.cse.ucsc.edu/Papers/wang-mss04.pdf},
  keywords = {file system workload, workload characterization, ASCI, lustre,
  scientific applications, pario-app, pario-bib},
  abstract = {Parallel scientific applications require high-performance I/O
  support from underlying file systems. A comprehensive understanding of the
  expected workload is therefore essential for the design of high-performance
  parallel file systems. We re-examine the workload characteristics in parallel
  computing environments in the light of recent technology advances and new
  applications. \par We analyze application traces from a cluster with hundreds
  of nodes. On average, each application has only one or two typical request
  sizes. Large requests from several hundred kilobytes to several megabytes are
  very common. Although in some applications, small requests account for more
  than 90% of all requests, almost all of the I/O data are transferred by large
  requests. All of these applications show bursty access patterns. More than
  65% of write requests have inter-arrival times within one millisecond in most
  applications. By running the same benchmark on different file models, we also
  find that the write throughput of using an individual output file for each
  node exceeds that of using a shared file for all nodes by a factor of 5. This
  indicates that current file systems are not well optimized for file
  sharing.},
  comment = {An I/O workload study of three applications on a 960 node
  (dual-processors) cluster at LLNL running the lustre-light parallel file
  system. The applications include a I/O benchmarking code (ior2) and two
  physics simulations: one that ran on 343 processors and one that ran on 1620
  processors.}
}