@InProceedings{baylor:vulcan-perf,
  author = {Sandra Johnson Baylor and Caroline Benveniste and Yarsun Hsu},
  title = {Performance Evaluation of a Massively Parallel {I/O} Subsystem},
  booktitle = {Proceedings of the IPPS~'94 Workshop on Input/Output in Parallel
  Computer Systems},
  year = {1994},
  pages = {1--15},
  organization = {IBM Watson Research Center},
  note = {Also appeared in Computer Architecture News 22(4)},
  later = {baylor:vulcan-perf-book},
  keywords = {parallel I/O, parallel architecture, performance analysis,
  pario-bib},
  comment = {See polished version baylor:vulcan-perf-book. Simulation of the
  I/O architecture for the Vulcan MPP at IBM TJW. This is a distributed-memory
  MIMD system with a bidirectional omega-type interconnection network, and
  separate compute and I/O nodes. They use a stochastic workload to evaluate
  the average I/O performance under a few different situations, and then use
  that average performance, along with a stochastic workload, in a detailed
  simulation of the interconnection network. (What would be the effect of
  adding variance to the I/O-node performance?) A key point is that the I/O
  node will not accept any more requests until a current write request is
  finished being processed (copied into the write-back cache). If there are
  many writes, this can backup the network (would a different write-request
  protocol help?) Not clear how concurrency of reads are modeled. Results show
  that network saturates for high request rates and small number of I/O nodes.
  As request rate decreases or number of I/O nodes increases, performance
  levels off to a reasonable value. Placement of I/O nodes didn't make much
  difference, nor did extra non-I/O traffic. Given their parameters, and for
  reasonable loads, 1 I/O node per 4 compute nodes was a reasonable balance,
  and was scalable.}
}