@InProceedings{choudhary:management,
  author = {A. Choudhary and M. Kandemir and H. Nagesh and J. No and X. Shen
  and V. Taylor and S. More and R. Thakur},
  title = {Data Management for Large-Scale Scientific Computations in High
  Performance Distributed Systems},
  booktitle = {Proceedings of the Eighth IEEE International Symposium on High
  Performance Distributed Computing},
  year = {1999},
  month = {August},
  pages = {263--272},
  publisher = {IEEE Computer Society Press},
  address = {Redondo Beach, CA},
  later = {choudhary:jmanagement},
  URL = {http://computer.org/conferen/proceed/hpdc/0287/02870042abs.htm},
  keywords = {cluster computing, scientific computing, parallel I/O, data
  management, pario-bib},
  abstract = {With the increasing number of scientific applications
  manipulating huge amounts of data, effective data management is an
  increasingly important problem. Unfortunately, so far the solutions to this
  data management problem either require deep understanding of specific storage
  architectures and file layouts (as in high-performance file systems) or
  produce unsatisfactory I/O performance in exchange for ease-of-use and
  portability (as in relational DBMSs).\par In this paper we present a new
  environment which is built around an active meta-data management system
  (MDMS). The key components of our three-tiered architecture are user
  application, the MDMS, and a hierarchical storage system (HSS). Our
  environment overcomes the performance problems of pure database-oriented
  solutions, while maintaining their advantages in terms of ease-of-use and
  portability.\par The high levels of performance are achieved by the MDMS,
  with the aid of user-specified directives. Our environment supports a simple,
  easy-to-use yet powerful user interface, leaving the task of choosing
  appropriate I/O techniques to the MDMS. We discuss the importance of an
  active MDMS and show how the three components, namely application, the MDMS,
  and the HSS, fit together. We also report performance numbers from our
  initial implementation and illustrate that significant improvements are made
  possible without undue programming effort.},
  comment = {They argue that existing parallel file systems are too low-level,
  they have their own set of I/O calls (non-portable), and policies are
  generally hard-coded into the system. Databases provide a portable layer on
  top of the file system, but they cannot provide high performance. They
  propose to "combine the advantages of file systems and databases, while
  avoiding their respective disadvantages." Their system is composed of a user
  program, a meta-data management system (MDMS), and a heirarchical storage
  system (HSS). The user program will query the MDMS to learn where in the HSS
  their data reside, what the performance of the storage system is, information
  about the acc data from the storage system, etc...}
}