@PhdThesis{ap:thesis,
  author = {Apratim Purakayastha},
  title = {Characterizing and Optimizing Parallel File Systems},
  year = {1996},
  month = {June},
  school = {Dept. of Computer Science, Duke University},
  address = {Durham, NC},
  note = {Also available as technical report CS-1996-10},
  URL = {ftp://ftp.cs.duke.edu/dist/techreport/1996/1996-10.ps.gz},
  keywords = {parallel I/O, multiprocessor file system, file access patterns,
  workload characterization, file caching, disk-directed I/O, pario-bib},
  abstract = {High-performance parallel file systems are needed to satisfy
  tremendous I/O requirements of parallel scientific applications. The design
  of such parallel file systems depends on a comprehensive understanding of the
  expected workload, but so far there have been very few usage studies of
  multiprocessor file systems. In the first part of this dissertation, we
  attempt to fill this void by measuring a real file-system workload on a
  production parallel machine, namely the CM-5 at the National Center for
  Supercomputing Applications. We collect information about nearly every
  individual I/O request from the mix of jobs running on the machine. Analysis
  of the traces leads to various recommendations for design of future parallel
  file systems. Our usage study showed that writes to write-only files are a
  dominant part of the workload. Therefore, optimizing writes could have a
  significant impact on overall performance. In the second part of this
  dissertation, we propose ENWRICH, a compute-processor write-caching scheme
  for write-only files in parallel file systems. Within its framework, ENWRICH
  uses a recently proposed high performance implementation of collective I/O
  operations called disk-directed I/O, but it eliminates a number of
  limitations of disk-directed I/O. ENWRICH combines low-overhead write caching
  at the compute processors with high performance disk-directed I/O at the I/O
  processors to achieve both low latency and high bandwidth. This combination
  facilitates the use of the powerful disk-directed I/O technique independent
  of any particular choice of interface, and without the requirement for
  mapping libraries at the I/O processors. By collecting writes over many files
  and applications, ENWRICH lets the I/O processors optimize disk I/O over a
  large pool of requests. We evaluate our design of ENWRICH using simulated
  implementation and extensive experimentation. We show that ENWRICH achieves
  high performance for various configurations and workloads. We pinpoint the
  reasons for ENWRICH`s failure to perform well for certain workloads, and
  suggest possible enhancements. Finally, we discuss the nuances of
  implementing ENWRICH on a real platform and speculate about possible
  adaptations of ENWRICH for emerging multiprocessing platforms.},
  comment = {See also ap:enwrich, ap:workload, and nieuwejaar:workload}
}