@InProceedings{ghemawat:googlefs,
  author = {Sanjay Ghemawat and Howard Gobioff and Shun-Tak Leung},
  title = {The {Google} File System},
  booktitle = {Proceedings of the Nineteenth ACM Symposium on Operating Systems
  Principles},
  year = {2003},
  month = {October},
  pages = {96--108},
  publisher = {ACM Press},
  address = {Bolton Landing, NY},
  URL = {http://www.cs.rochester.edu/sosp2003/papers/p125-ghemawat.pdf},
  keywords = {distributed file system, pario-bib},
  abstract = {We have designed and implemented the Google File System, a
  scalable distributed file system for large distributed data-intensive
  applications. It provides fault tolerance while running on inexpensive
  commodity hardware, and it delivers high aggregate performance to a large
  number of clients. While sharing many of the same goals as previous
  distributed file systems, our design has been driven by observations of our
  application workloads and technological environment, both current and
  anticipated, that reflect a marked departure from some earlier file system
  assumptions. This has led us to re-examine traditional choices and explore
  radically different design points. \par The file system has successfully met
  our storage needs. It is widely deployed within Google as the storage
  platform for the generation and processing of data used by our service as
  well as research and development efforts that require large data sets. The
  largest cluster to date provides hundreds of terabytes of storage across
  thousands of disks on over a thousand machines, and it is concurrently
  accessed by hundreds of clients. \par In this paper, we present file system
  interface extensions designed to support distributed applications, discuss
  many aspects of our design, and report measurements from both
  micro-benchmarks and real world use.}
}