@InProceedings{foster:remote-io,
  author = {Ian Foster and David {Kohr, Jr.} and Rakesh Krishnaiyer and Jace
  Mogill},
  title = {Remote {I/O}: Fast Access to Distant Storage},
  booktitle = {Proceedings of the Fifth Workshop on Input/Output in Parallel
  and Distributed Systems},
  year = {1997},
  month = {November},
  pages = {14--25},
  publisher = {ACM Press},
  address = {San Jose, CA},
  URL = {http://doi.acm.org/10.1145/266220.266222},
  keywords = {parallel I/O, distributed file system, pario-bib},
  abstract = {As high-speed networks make it easier to use distributed
  resources, it becomes increasingly common that applications and their data
  are not colocated. Users have traditionally addressed this problem by
  manually staging data to and from remote computers. We argue instead for a
  remote I/O paradigm in which programs use familiar parallel I/O interfaces to
  access remote filesystems. In addition to simplifying remote execution,
  remote I/O can improve performance relative to staging by overlapping
  computation and data transfer or by reducing communication requirements.
  However, remote I/O also introduces new technical challenges in the areas of
  portability, performance, and integration with distributed computing systems.
  We propose techniques designed to address these challenges and describe a
  remote I/O library called RIO that we are developing to evaluate the
  effectiveness of these techniques. RIO addresses issues of portability by
  adopting the quasi-standard MPI-IO interface and by defining a RIO device and
  RIO server within the ADIO abstract I/O device architecture. It addresses
  performance issues by providing traditional I/O optimizations such as
  asynchronous operations and through implementation techniques such as
  buffering and message forwarding to offload communication overheads.
  Microbenchmarks and application experiments demonstrate that our techniques
  can improve turnaround time relative to staging.},
  comment = {They want to support users that have datasets at different
  locations in the Internet, but need to access the data at supercomputer
  parallel machines. Rather than staging data in and out, they want to provide
  remote access. Issues: naming, dynamic loads, heterogeneity, security,
  fault-tolerance. All traffic goes through a 'forwarder node' that funnels all
  the traffic into the network. They use URLs for pathnames (e.g.,
  "x-rio://..."). They find that non-blocking ops are important, as is
  collective I/O. They think that buffering will be important. Limited
  experiments.}
}