@InProceedings{no:file-db,
  author = {Jaechun No and Rajeev Thakur and Alok Choudhary},
  title = {Integrating Parallel File {I/O} and Database Support for
  High-Performance Scientific Data Management},
  booktitle = {Proceedings of SC2000: High Performance Networking and
  Computing},
  year = {2000},
  month = {November},
  publisher = {IEEE Computer Society Press},
  address = {Dallas, TX},
  note = {To appear},
  URL = {http://www.mcs.anl.gov/~thakur/papers/sdm.ps},
  keywords = {scientific computing, database, parallel I/O, pario-bib},
  abstract = {Many scientific applications have large I/O requirements, in
  terms of both the size of data and the number of files or data sets.
  Management, storage, efficient access, and analysis of this data present an
  extremely challenging task. Traditionally, two different solutions are used
  for this problem: file I/O or databases. File I/O can provide high
  performance but is tedious to use with large numbers of files and large and
  complex data sets. Databases can be convenient, flexible, and powerful but do
  not perform and scale well for parallel supercomputing applications. We have
  developed a software system, called Scientific Data Manager (SDM), that aims
  to combine the good features of both file I/O and databases. SDM provides a
  high-level API to the user and, internally, uses a parallel file system to
  store real data and a database to store application-related metadata. SDM
  takes advantage of various I/O optimizations available in MPI-IO, such as
  collective I/O and noncontiguous requests, in a manner that is transparent to
  the user. As a result, users can write and retrieve data with the performance
  of parallel file I/O, without having to bother with the details of actually
  performing file I/O. \par In this paper, we describe the design and
  implementation of SDM. With the help of two parallel application templates,
  ASTRO3D and an Euler solver, we illustrate how some of the design criteria
  affect performance.}
}