@PhdThesis{nitzberg:thesis, author = {William J. Nitzberg}, title = {Collective Parallel {I/O}}, year = {1995}, month = {December}, school = {Department of Computer and Information Science, University of Oregon}, keywords = {parallel I/O, parallel algorithm, file system interface, pario-bib}, abstract = {Parallel I/O, the process of transferring a global data structure distributed among compute nodes to a file striped across storage devices, can be quite complicated and involve a significant amount of data movement. Optimizing parallel I/O with respect to data distribution, file layout, and machine architecture is critical for performance. In this work, we propose a solution to both the performance and portability problems plaguing the wide acceptance of distributed memory parallel computers for scientific computing: a collective parallel I/O interface and efficient algorithms to implement it. A collective interface allows the programmer to specify a file access as a high-level global operation rather than as a series of seeks and writes. This not only provides a more natural interface for the programmer, but also provides the system with both the opportunity and the semantic information necessary to optimize the file operation. \par We attack this problem in three steps: we evaluate an early parallel I/O system, the Intel iPSC/860 Concurrent File System; we design and analyze the performance of two classes of algorithms taking advantage of collective parallel I/O; and we design MPI-IO, a collective parallel I/O interface likely to become the standard for portable parallel I/O. \par The collective I/O algorithms fall into two broad categories: data block scheduling and collective buffering. Data block scheduling algorithms attempt to schedule the individual data transfers to minimize resource contention and to optimize for particular hardware characteristics. We develop and evaluate three data block scheduling algorithms: Grouping, Random, and Sliding Window. The data block scheduling algorithms improved performance by as much as a factor of eight. The collective buffering algorithms permute the data before writing or after reading in order to combine small file accesses into large blocks. We design and test a series of four collective buffering algorithms and demonstrate improvement in performance by two orders of magnitude over naive file I/O for the hardest, three-dimensional distributions.}, comment = {See also nitzberg:cfs and corbett:mpi-overview.} }