BibTeX for papers by David Kotz; for complete/updated list see https://www.cs.dartmouth.edu/~kotz/research/papers.html @Article{nieuwejaar:workload, author = {Nils Nieuwejaar and David Kotz and Apratim Purakayastha and Carla Schlatter Ellis and Michael Best}, title = {{File-Access Characteristics of Parallel Scientific Workloads}}, journal = {IEEE Transactions on Parallel and Distributed Systems}, year = 1996, month = {October}, volume = 7, number = 10, pages = {1075--1089}, publisher = {IEEE}, copyright = {IEEE}, DOI = {10.1109/71.539739}, URL = {https://www.cs.dartmouth.edu/~kotz/research/nieuwejaar-workload/index.html}, abstract = {Phenomenal improvements in the computational performance of multiprocessors have not been matched by comparable gains in I/O system performance. This imbalance has resulted in I/O becoming a significant bottleneck for many scientific applications. One key to overcoming this bottleneck is improving the performance of multiprocessor file systems. \par The design of a high-performance multiprocessor file system requires a comprehensive understanding of the expected workload. Unfortunately, until recently, no general workload studies of multiprocessor file systems have been conducted. The goal of the CHARISMA project was to remedy this problem by characterizing the behavior of several production workloads, on different machines, at the level of individual reads and writes. The first set of results from the CHARISMA project describe the workloads observed on an Intel iPSC/860 and a Thinking Machines CM-5. This paper is intended to compare and contrast these two workloads for an understanding of their essential similarities and differences, isolating common trends and platform-dependent variances. Using this comparison, we are able to gain more insight into the general principles that should guide multiprocessor file-system design.}, } @InProceedings{ap:workload, author = {Apratim Purakayastha and Carla Schlatter Ellis and David Kotz and Nils Nieuwejaar and Michael Best}, title = {{Characterizing Parallel File-Access Patterns on a Large-Scale Multiprocessor}}, booktitle = {{Proceedings of the International Parallel Processing Symposium (IPPS)}}, year = 1995, month = {April}, pages = {165--172}, publisher = {IEEE}, copyright = {IEEE}, DOI = {10.1109/IPPS.1995.395928}, URL = {https://www.cs.dartmouth.edu/~kotz/research/ap-workload/index.html}, abstract = {High-performance parallel file systems are needed to satisfy tremendous I/O requirements of parallel scientific applications. The design of such high-performance parallel file systems depends on a comprehensive understanding of the expected workload, but so far there have been very few usage studies of multiprocessor file systems. This paper is part of the CHARISMA project, which intends to fill this void by measuring real file-system workloads on various production parallel machines. In particular, here we present results from the CM-5 at the National Center for Supercomputing Applications. Our results are unique because we collect information about nearly every individual I/O request from the mix of jobs running on the machine. Analysis of the traces leads to various recommendations for parallel file-system design.}, } @TechReport{nieuwejaar:workload-tr, author = {Nils Nieuwejaar and David Kotz and Apratim Purakayastha and Carla Schlatter Ellis and Michael Best}, title = {{File-Access Characteristics of Parallel Scientific Workloads}}, institution = {Dartmouth Computer Science}, year = 1995, month = {August}, number = {PCS-TR95-263}, copyright = {the authors}, URL = {https://www.cs.dartmouth.edu/~kotz/research/nieuwejaar-workload-tr/index.html}, abstract = {Phenomenal improvements in the computational performance of multiprocessors have not been matched by comparable gains in I/O system performance. This imbalance has resulted in I/O becoming a significant bottleneck for many scientific applications. One key to overcoming this bottleneck is improving the performance of parallel file systems. \par The design of a high-performance parallel file system requires a comprehensive understanding of the expected workload. Unfortunately, until recently, no general workload studies of parallel file systems have been conducted. The goal of the CHARISMA project was to remedy this problem by characterizing the behavior of several production workloads, on different machines, at the level of individual reads and writes. The first set of results from the CHARISMA project describe the workloads observed on an Intel iPSC/860 and a Thinking Machines CM-5. This paper is intended to compare and contrast these two workloads for an understanding of their essential similarities and differences, isolating common trends and platform-dependent variances. Using this comparison, we are able to gain more insight into the general principles that should guide parallel file-system design.}, } @Article{kotz:jworkload, author = {David Kotz and Nils Nieuwejaar}, title = {{File-System Workload on a Scientific Multiprocessor}}, journal = {IEEE Parallel and Distributed Technology}, year = 1995, month = {Spring}, volume = 3, number = 1, pages = {51--60}, publisher = {IEEE}, copyright = {IEEE}, DOI = {10.1109/88.384584}, URL = {https://www.cs.dartmouth.edu/~kotz/research/kotz-jworkload/index.html}, abstract = {The Charisma project records individual read and write requests in live, multiprogramming parallel workloads. This information can be used to design more efficient multiprocessor systems. We present the first results from the project: a characterization of the file-system workload on an iPSC/860 multiprocessor running production, parallel scientific applications at NASA Ames Research Center. We use the resulting information to address the following questions: What did the job mix look like (that is, how many jobs ran concurrently?) How many files were read and written? Which were temporary files? What were their sizes? What were typical read and write request sizes, and how were they spaced in the file? Were the accesses sequential? What forms of locality were there? How might caching be useful? What are the implications for file-system design?}, } @TechReport{ap:workload-tr, author = {Apratim Purakayastha and Carla Schlatter Ellis and David Kotz and Nils Nieuwejaar and Michael Best}, title = {{Characterizing Parallel File-Access Patterns on a Large-Scale Multiprocessor}}, institution = {Dept. of Computer Science, Duke University}, year = 1994, month = {October}, number = {CS-1994-33}, copyright = {the authors}, URL = {https://www.cs.dartmouth.edu/~kotz/research/ap-workload-tr/index.html}, abstract = {Rapid increases in the computational speeds of multiprocessors have not been matched by corresponding performance enhancements in the I/O subsystem. To satisfy the large and growing I/O requirements of some parallel scientific applications, we need parallel file systems that can provide high-bandwidth and high-volume data transfer between the I/O subsystem and thousands of processors. \par Design of such high-performance parallel file systems depends on a thorough grasp of the expected workload. So far there have been no comprehensive usage studies of multiprocessor file systems. Our CHARISMA project intends to fill this void. The first results from our study involve an iPSC/860 at NASA Ames. This paper presents results from a different platform, the CM-5 at the National Center for Supercomputing Applications. The CHARISMA studies are unique because we collect information about every individual read and write request and about the entire mix of applications running on the machines. \par The results of our trace analysis lead to recommendations for parallel file system design. First, the file system should support efficient concurrent access to many files, and I/O requests from many jobs under varying load condit ions. Second, it must efficiently manage large files kept open for long periods. Third, it should expect to see small requests, predominantly sequential access patterns, application-wide synchronous access, no concurrent file-sharing between jobs, appreciable byte and block sharing between processes within jobs, and strong interprocess locality. Finally, the trace data suggest that node-level write caches and collective I/O request interfaces may be useful in certain environments.}, } @TechReport{kotz:workload-tr, author = {David Kotz and Nils Nieuwejaar}, title = {{Dynamic File-Access Characteristics of a Production Parallel Scientific Workload}}, institution = {Dept. of Math and Computer Science, Dartmouth College}, year = 1994, month = {April}, number = {PCS-TR94-211}, copyright = {the authors}, URL = {https://www.cs.dartmouth.edu/~kotz/research/kotz-workload-tr/index.html}, note = {Revised May 11, 1994}, abstract = {Multiprocessors have permitted astounding increases in computational performance, but many cannot meet the intense I/O requirements of some scientific applications. An important component of any solution to this I/O bottleneck is a parallel file system that can provide high-bandwidth access to tremendous amounts of data \emph{in parallel} to hundreds or thousands of processors. \par Most successful systems are based on a solid understanding of the characteristics of the expected workload, but until now there have been no comprehensive workload characterizations of multiprocessor file systems. We began the CHARISMA project in an attempt to fill that gap. We instrumented the common node library on the iPSC/860 at NASA Ames to record all file-related activity over a two-week period. Our instrumentation is different from previous efforts in that it collects information about every read and write request and about the \emph{mix} of jobs running in the machine (rather than from selected applications). \par The trace analysis in this paper leads to many recommendations for designers of multiprocessor file systems. First, the file system should support simultaneous access to many different files by many jobs. Second, it should expect to see many small requests, predominantly sequential and regular access patterns (although of a different form than in uniprocessors), little or no concurrent file-sharing between jobs, significant byte- and block-sharing between processes within jobs, and strong interprocess locality. Third, our trace-driven simulations showed that these characteristics led to great success in caching, both at the compute nodes and at the I/O nodes. Finally, we recommend supporting strided I/O requests in the file-system interface, to reduce overhead and allow more performance optimization by the file system.}, } @InProceedings{kotz:workload, author = {David Kotz and Nils Nieuwejaar}, title = {{Dynamic File-Access Characteristics of a Production Parallel Scientific Workload}}, booktitle = {{Proceedings of Supercomputing}}, year = 1994, month = {November}, pages = {640--649}, publisher = {IEEE}, copyright = {IEEE}, address = {Washington, DC}, DOI = {10.1109/SUPERC.1994.344328}, URL = {https://www.cs.dartmouth.edu/~kotz/research/kotz-workload/index.html}, abstract = {Multiprocessors have permitted astounding increases in computational performance, but many cannot meet the intense I/O requirements of some scientific applications. An important component of any solution to this I/O bottleneck is a parallel file system that can provide high-bandwidth access to tremendous amounts of data \emph{in parallel} to hundreds or thousands of processors. \par Most successful systems are based on a solid understanding of the characteristics of the expected workload, but until now there have been no comprehensive workload characterizations of multiprocessor file systems. We began the CHARISMA project in an attempt to fill that gap. We instrumented the common node library on the iPSC/860 at NASA Ames to record all file-related activity over a two-week period. Our instrumentation is different from previous efforts in that it collects information about every read and write request and about the \emph{mix} of jobs running in the machine (rather than from selected applications). \par The trace analysis in this paper leads to many recommendations for designers of multiprocessor file systems. First, the file system should support simultaneous access to many different files by many jobs. Second, it should expect to see many small requests, predominantly sequential and regular access patterns (although of a different form than in uniprocessors), little or no concurrent file-sharing between jobs, significant byte- and block-sharing between processes within jobs, and strong interprocess locality. Third, our trace-driven simulations showed that these characteristics led to great success in caching, both at the compute nodes and at the I/O nodes. Finally, we recommend supporting strided I/O requests in the file-system interface, to reduce overhead and allow more performance optimization by the file system.}, }