@InProceedings{cho:local,
  author = {Yong Cho and Marianne Winslett and Mahesh Subramaniam and Ying Chen
  and Szu-wen Kuo and Kent E. Seamons},
  title = {Exploiting Local Data in Parallel Array {I/O} on a Practical Network
  of Workstations},
  booktitle = {Proceedings of the Fifth Workshop on Input/Output in Parallel
  and Distributed Systems},
  year = {1997},
  month = {November},
  pages = {1--13},
  publisher = {ACM Press},
  address = {San Jose, CA},
  URL = {http://doi.acm.org/10.1145/266220.266221},
  keywords = {parallel I/O, distributed system, pario-bib},
  abstract = {A cost-effective way to run a parallel application is to use
  existing workstations connected by a local area network such as Ethernet or
  FDDI. In this paper, we present an approach for parallel I/O of
  multidimensional arrays on small networks of workstations with a shared-media
  interconnect, using the Panda I/O library. \par In such an environment, the
  message passing throughput per node is lower than the throughput obtainable
  from a fast disk and it is not easy for users to determine the configuration
  which will yield the best I/O performance. \par We introduce an I/O strategy
  that exploits local data to reduce the amount of data that must be shipped
  across the network, present experimental results, and analyze the results
  using an analytical performance model and predict the best choice of I/O
  parameters. \par Our experiments show that the new strategy results in a
  factor of 1.2--2.1 speedup in response time compared to the Panda version
  originally developed for the IBM SP2, depending on the array sizes,
  distributions and compute and I/O node meshes. Further, the performance model
  predicts the results within a 13\% margin of error.},
  comment = {They examine a system that supports nodes that are both compute
  and I/O nodes. The assumption is that the application is writing data to a
  new file, and does not care to which disks the data goes. They are trying to
  decide which nodes should be used for I/O, given the distribution of data on
  compute nodes and the distribution desired across disks. They use a Hungarian
  algorithm to solve a weighted optimization problem on a bipartite graph
  connecting I/O nodes to compute nodes, in an attempt to minimize the data
  flow across the network. But there is no attempt to make a decision that
  might be sensible for a future read operation that may want to read in a
  different pattern.}
}