@InProceedings{tierney:cache,
  author = {Brian L. Tierney and Jason Lee and Brian Crowley and Mason Holding
  and Jeremy Hylton and Fred L. {Drake, Jr.}},
  title = {A Network-Aware Distributed Storage Cache for Data-Intensive
  Environments},
  booktitle = {Proceedings of the Eighth IEEE International Symposium on High
  Performance Distributed Computing},
  year = {1999},
  month = {August},
  pages = {185--193},
  publisher = {IEEE Computer Society Press},
  address = {Redondo Beach, CA},
  URL = {http://computer.org/conferen/proceed/hpdc/0287/02870033abs.htm},
  keywords = {distributed cache, distributed computing, grid, input/output,
  network-aware, parallel I/O, pario-bib},
  abstract = {Modern scientific computing involves organizing, moving,
  visualizing, and analyzing massive amounts of data at multiple sites around
  the world. The technologies, the middleware services, and the architectures
  that are used to build useful high-speed, wide area distributed systems,
  constitute the field of data intensive computing. In this paper we will
  describe an architecture for data intensive applications where we use a
  high-speed distributed data cache as a common element for all of the sources
  and sinks of data. This cache-based approach provides standard interfaces to
  a large, application-oriented, distributed, on-line, transient storage
  system. We describe our implementation of this cache, how we have made it
  "network aware," and how we do dynamic load balancing based on the current
  network conditions. We also show large increases in application throughput by
  access to knowledge of the network conditions.},
  comment = {They discuss their implemetation of a "netowork aware" data cache
  (Distributed Parallel Storage System) that adapts to changing network
  conditions. The system itself looks much like the Galley File System. The
  client library is multi-threaded with a client thread for each DPSS server. A
  DPSS server is composed of a a block request thread, a block writer thread, a
  shared disk cache and a reader thread for each disk. Block requests move into
  the shared cache from the disks. A DPSS master directs the clients requests
  to an appropriate DPSS server. They use Java agents to monitor network
  performance and use a data replication for load balancing. A minimum cost
  flow algorithm is run each time a client request arrives to detirmine the
  best place to retrieve the data block. They argue that since the algorithm is
  fast (< 1 ms), the overhead of the algorithm is not significant.}
}