BibTeX bibliography for contents of the book Input/Output in Parallel and Distributed Computer Systems Ravi Jain, John Werth, and James C. Browne, editors Kluwer Academic Publishers 1996 @InCollection{jain:pario-intro, author = "Ravi Jain and John Werth and J.~C. Browne", title = "{I/O} in Parallel and Distributed Systems: An Introduction", chapter = 1, pages = "3--30", crossref = "iopads-book", keyword = "parallel I/O, out-of-core, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book.", abstract = {We sketch the reasons for the I/O bottleneck in parallel and distributed systems, pointing out that it can be viewed as a special case of a general bottleneck that arises at all levels of the memory hierarchy. We argue that because of its severity, the I/O bottleneck deserves systematic attention at all levels of system design. We then present a survey of the issues raised by the I/O bottleneck in five key areas of parallel and distributed systems: applications, algorithms, compilers, operating systems and architecture. Finally, we address some of the trends we observe emerging in new paradigms of parallel and distributed computing: the convergence of networking and I/O, I/O for massively distributed ``global information systems'' such as the World Wide Web, and I/O for mobile computing and wireless communications. These considerations suggest exciting new research directions in I/O for parallel and distributed systems in the years to come.} } @InCollection{shriver:models-algs, author = "Elizabeth Shriver and Mark Nodine", title = "An Introduction to Parallel {I/O} Models and Algorithms", chapter = 2, pages = "31--68", crossref = "iopads-book", keyword = "parallel I/O algorithms, out-of-core, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book.", abstract = {Problems whose data are too large to fit into main memory are called {\it out-of-core} problems. Out-of-core parallel-I/O algorithms can handle much larger problems than in-memory variants and have much better performance than single-device variants. However, they are not commonly used---partly because the understanding of them is not widespread. Yet such algorithms ought to be growing in importance because they address the needs of users with ever-growing problem sizes and ever-increasing performance needs. \par This paper addresses this lack of understanding by presenting an introduction to the data-transfer models on which most of the out-of-core parallel-I/O algorithms are based, with particular emphasis on the Parallel Disk Model. Sample algorithms are discussed to demonstrate the paradigms (algorithmic techniques) used with these models. \par Our aim is to provide insight into both the paradigms and the particular algorithms described, thereby also providing a background for understanding a range of related solutions. It is hoped that this background would enable the appropriate selection of existing algorithms and the development of new ones for current and future out-of-core problems. } } @InCollection{bordawekar:compiling, author = "Rajesh Bordawekar and Alok Choudhary", title = "Issues in Compiling {I/O} Intensive Problems", chapter = 3, pages = "69--96", crossref = "iopads-book", keyword = "parallel I/O, compiler, out-of-core, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book.", abstract = {None.} } @InCollection{kotz:pioarch, author = "David Kotz", title = "Introduction to Multiprocessor {I/O} Architecture", chapter = 4, pages = "97--124", crossref = "iopads-book", keyword = "parallel I/O architecture, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book.", abstract = {The computational performance of multiprocessors continues to improve by leaps and bounds, fueled in part by rapid improvements in processor and interconnection technology. I/O performance thus becomes ever more critical, to avoid becoming the bottleneck of system performance. In this paper we provide an introduction to I/O architectural issues in multiprocessors, with a focus on disk subsystems. While we discuss examples from actual architectures and provide pointers to interesting research in the literature, we do not attempt to provide a comprehensive survey. We concentrate on a study of the architectural design issues, and the effects of different design alternatives.} } @InCollection{corbett:mpi-overview-book, author = "Peter Corbett and Dror Feitelson and Sam Fineberg and Yarsun Hsu and Bill Nitzberg and Jean-Pierre Prost and Marc Snir and Bernard Traversat and Parkson Wong", title = "Overview of the {MPI-IO} Parallel {I/O} Interface", chapter = 5, pages = "127--146", crossref = "iopads-book", keyword = "parallel I/O, file system interface, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book and corbett:mpi-overview.", abstract = {Thanks to MPI, writing portable message passing parallel programs is almost a reality. One of the remaining problems is file I/O. Although parallel file systems support similar interfaces, the lack of a standard makes developing a truly portable program impossible. It is not feasible to develop large scientific applications from scratch for each generation of parallel machine, and, in the scientific world, a program is not considered truly portable unless it not only compiles, but also runs efficiently. \par The MPI-IO interface is being proposed as an extension to the MPI standard to fill this need. MPI-IO supports a high-level interface to describe the partitioning of file data among processes, a collective interface describing complete transfers of global data structures between process memories and files, asynchronous I/O operations, allowing computation to be overlapped with I/O, and optimization of physical file layout on storage devices (disks). } } @InCollection{thakur:out-of-core-book, author = "Rajeev Thakur and Alok Choudhary", title = "Runtime Support for Out-of-Core Parallel Programs", chapter = 6, pages = "147--165", crossref = "iopads-book", keyword = "parallel I/O, out-of-core, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book and thakur:out-of-core.", abstract = {In parallel programs with large out-of-core arrays stored in files, it is necessary to read/write smaller sections of the arrays from/to files. We describe a runtime method for accessing sections of out-of-core arrays efficiently. This method, called the {\em extended two-phase method}, uses collective I/O in which processors cooperate to read/write out-of-core data in an efficient manner. The I/O workload is divided among processors dynamically, depending on the access requests. Performance results on the Intel Touchstone Delta show that the extended two-phase method performs considerably better than a direct method for different access patterns, array sizes, and number of processors. We have used the extended two-phase method in the PASSION runtime library for parallel I/O. } } @InCollection{baylor:workload-book, author = "Sandra Johnson Baylor and C. Eric Wu", title = "Parallel {I/O} Workload Characteristics Using {Vesta}", chapter = 7, pages = "167--185", crossref = "iopads-book", keyword = "parallel I/O, file access pattern, workload characterization, file system workload, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book and baylor:workload.", abstract = {To develop optimal parallel I/O subsystems, one must have a thorough understanding of the workload characteristics of parallel I/O and its exploitation of the associated parallel file system. Presented are the results of a study conducted to analyze the parallel I/O workloads of several applications on a parallel processor using the Vesta parallel file system. Traces of the applications are obtained to collect system events, communication events, and parallel I/O events. The traces are then analyzed to determine workload characteristics. The results show I/O request rates on the order of hundreds of requests per second, a large majority of requests are for small amounts of data (less than 1500 bytes), a few requests are for large amounts of data (on the order of megabytes), significant file sharing among processes within a job, and strong temporal, traditional spatial, and interprocess spatial locality. } } @InCollection{kalns:video-book, author = "Edgar T. Kalns and Yarsun Hsu", title = "Video on Demand Using the {Vesta} Parallel File System", chapter = 8, pages = "187--204", crossref = "iopads-book", keyword = "parallel I/O, parallel file system, video on demand, multimedia, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book and kalns:video.", abstract = {Video on Demand (VoD) servers are expected to serve hundreds of customers with as many, or more, movie videos. Such an environment requires large storage capacity and real-time, high-bandwidth transmission capabilities. Massive striping of videos across disk arrays is a viable means to store large amounts of video data and, through parallelism of file access, achieve the needed bandwidth. The Vesta Parallel File System facilitates parallel access from an application to files distributed across a set of I/O processors, each with a set of attached disks. Given Vesta's parallel file access capabilities, this paper examines a number of issues pertaining to the implementation of VoD services on top of Vesta. We develop a prototype VoD experimentation environment on an IBM SP-1 and analyze Vesta's performance in video data retrieval for real-time playback. Specifically, we explore the impact of concurrent video streams competing for I/O node resources, cache effects, and video striping across multiple I/O nodes. } } @InCollection{nieuwejaar:strided2-book, author = "Nils Nieuwejaar and David Kotz", title = "Low-level Interfaces for High-level Parallel {I/O}", chapter = 9, pages = "205--223", crossref = "iopads-book", keyword = "parallel I/O, multiprocessor file system interface, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book and nieuwejaar:strided2.", abstract = {As the I/O needs of parallel scientific applications increase, file systems for multiprocessors are being designed to provide applications with parallel access to multiple disks. Many parallel file systems present applications with a conventional Unix-like interface that allows the application to access multiple disks transparently. By tracing all the activity of a parallel file system in a production, scientific computing environment, we show that many applications exhibit highly regular, but non-consecutive I/O access patterns. Since the conventional interface not provide an efficient method of describing these patterns, we present three extensions to the interface that support {\em strided}, {\em nested-strided}, and {\em nested-batched} I/O requests. We show how these extensions can be used to express common access patterns.} } @InCollection{moyer:scalable-book, author = "Steven A. Moyer and V.~S. Sunderam", title = "Scalable Concurrency Control for Parallel File Systems", chapter = 10, pages = "225--243", crossref = "iopads-book", keyword = "parallel I/O, parallel file system, concurrency control, synchronization, transaction, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book and moyer:scalable.", abstract = {Parallel file systems employ data declustering to increase \mbox{I/O} throughput. As a result, a single read or write operation can generate concurrent data accesses on multiple storage devices. Unless a concurrency control mechanism is employed, familiar file access semantics are likely to be violated. This paper details the transaction-based concurrency control mechanism implemented in the PIOUS parallel file system. Performance results are presented demonstrating that sequential consistency semantics can be provided without loss of system scalability. } } @InCollection{durand:scheduling-book, author = "Dannie Durand and Ravi Jain and David Tseytlin", title = "Improving the Performance of Parallel {I/O} Using Distributed Scheduling Algorithms", chapter = 11, pages = "245--269", crossref = "iopads-book", keyword = "parallel I/O, distributed scheduling algorithm, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book and durand:scheduling.", abstract = {The cost of data transfers, and in particular of I/O operations, is a growing problem in parallel computing. This performance bottleneck is especially severe for data-intensive applications such as multimedia information systems, databases, and Grand Challenge problems. A promising approach to alleviating this bottleneck is to schedule parallel I/O operations explicitly. \par Although centralized algorithms for batch scheduling of parallel I/O operations have previously been developed, they are not be appropriate for all applications and architectures. We develop a class of decentralized algorithms for scheduling parallel I/O operations, where the objective is to reduce the time required to complete a given set of transfers. These algorithms, based on edge-coloring and matching of bipartite graphs, rely upon simple heuristics to obtain shorter schedules. We present simulation results indicating that the best of our algorithms can produce schedules whose length (or makespan) is within 2 - 20\% of the optimal schedule, a substantial improvement on previous decentralized algorithms. We discuss theoretical and experimental work in progress and possible extensions.} } @InCollection{sinclair:instability-book, author = "J.~B. Sinclair and J. Tang and P.~J. Varman", title = "Placement-Related Problems in Shared Disk {I/O}", chapter = 12, pages = "271--289", crossref = "iopads-book", keyword = "parallel I/O, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book and sinclair:instability.", abstract = {In a shared-disk parallel I/O system, several processes may be accessing the disks concurrently. An important example is concurrent external merging arising in database management systems with multiple independent sort queries. Such a system may exhibit instability, with one of the processes racing ahead of the others and monopolizing I/O resources. This race can lead to serialization of the processes and poor disk utilization, even when the static load on the disks is balanced. The phenomenon can be avoided by proper layout of data on the disks, as well as through other I/O management strategies. This has implications for both data placement in multiple disk systems and task partitioning for parallel processing. } } @InCollection{baylor:vulcan-perf-book, author = "Sandra Johnson Baylor and Caroline Benveniste and Yarsun Hsu", title = "Performance Evaluation of a Massively Parallel {I/O} Subsystem", chapter = 13, pages = "293--311", crossref = "iopads-book", keyword = "parallel I/O architecture, performance evaluation, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book and baylor:vulcan-perf.", abstract = {Presented are the trace-driven simulation results of a study conducted to evaluate the performance of the internal parallel I/O subsystem of the Vulcan massively parallel processor (MPP) architecture. The system sizes evaluated vary from 16 to 512 nodes. The results show that a compute node to I/O node ratio of four is the most cost effective for all system sizes, suggesting high scalability. Also, processor-to-processor communication effects are negligible for small message sizes and the greater the fraction of I/O reads, the better the I/O performance. Worse case I/O node placement is within 13\% of more efficient placement strategies. Introducing parallelism into the internal I/O subsystem improves I/O performance significantly. } } @InCollection{vanderleest:contention-book, author = "Steven H. VanderLeest and Ravishankar K. Iyer", title = "Heterogeneous {I/O} Contention in a Single-bus Multiprocessor", chapter = 14, pages = "313--331", crossref = "iopads-book", keyword = "parallel I/O, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book and vanderleest:contention.", abstract = {None.} } @InCollection{schloss:hcsa-book, author = "Gerhard A. Schloss and Michael Vernick", title = "{HCSA}: A Hybrid Client-Server Architecture", chapter = 15, pages = "333--351", crossref = "iopads-book", keyword = "parallel I/O architecture, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book and schloss:hcsa.", abstract = {The {\em HCSA} (Hybrid Client-Server Architecture), a flexible system layout that combines the advantages of the traditional Client-Server Architecture (CSA) with those of the Shared Disk Architecture (SDA), is introduced. In {\em HCSA}, the traditional CSA-style I/O subsystem is modified to give the clients network access to both the server and the server's set of disks. Hence, the {\em HCSA} is more fault-tolerant than the CSA since there are two paths between any client and the shared data. Moreover, a simulation study demonstrates that the {\em HCSA} is able to support a larger number of clients than the CSA or SDA under similar system workloads. Finally, the {\em HCSA} can run applications in either a CSA mode, an SDA mode, or a combination of the two, thus offering backward compatibility with a large number of existing applications. } } @InCollection{yokota:nets-book, author = "Haruo Yokota and Yasuyuki Mimatsu", title = "A Scalable Disk System with Data Reconstruction Functions", chapter = 16, pages = "353--372", crossref = "iopads-book", keyword = "parallel I/O architecture, disk array, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book and yokota:nets.", abstract = {Scalable disk systems are required to implement well-balanced computer systems. We have proposed DR-nets, Data-Reconstruction networks, to construct the scalable parallel disk systems with high reliability. Each node of a DR-net has disks, and is connected by links to form an interconnection network. To realize the high reliability, nodes in a sub-network of the interconnection network organize a group of parity calculation proposed for RAIDs. Inter-node communication for calculating parity keeps the locality of data transfer, and it inhibits bottlenecks from occurring, even if the size of the network becomes very large. We have developed an experimental system using Transputers. In this chapter, we provide execution models for estimating the response time and throughput of DR-nets, and compare them to experimental results. We also discuss the reliability of the DR-nets and RAIDs.} } @InCollection{asthana:active-book, author = "Abhaya Asthana and Mark Cravatts and Paul Krzyzanowski", title = "An Experimental Memory-based {I/O} Subsystem", chapter = 17, pages = "373--390", crossref = "iopads-book", keyword = "parallel I/O architecture, pario bib", comment = "Part of a whole book on parallel I/O; see iopads-book and asthana:active.", abstract = {We describe an I/O subsystem based on an active memory named SWIM (Structured Wafer-based Intelligent Memory) designed for efficient storage and manipulation of data structures. The key architectural idea in SWIM is to associate some processing logic with each memory chip that allows it to perform data manipulation operations locally and to communicate with a disk or a communication line through a backend port. The processing logic is specially designed to perform operations such as pointer dereferencing, memory indirection, searching and bounds checking efficiently. The I/O subsystem is built using an interconnected ensemble of such memory logic pairs. A complex processing task can now be distributed between a large number of small memory processors each doing a sub-task, while still retaining a common locus of control in the host CPU for higher level administrative and provisioning functions. We argue that active memory based processing enables more powerful, scalable and robust designs for storage and communications subsystems, that can support emerging network services, multimedia workstations and wireless PCS systems. A complete parallel hardware and software system constructed using an array of SWIM elements has been operational for over a year. We present results from application of SWIM to three network functions: a national phone database server, a high performance IP router, and a call screening agent.} } @Book{iopads-book, editor = "Ravi Jain and John Werth and James C. Browne", title = "Input/Output in Parallel and Distributed Computer Systems", booktitle = "Input/Output in Parallel and Distributed Computer Systems", publisher = "Kluwer Academic Publishers", year = 1996, keyword = "parallel I/O, parallel I/O architecture, parallel I/O algorithm, multiprocessor file system, workload characterization, parallel file access pattern, pario bib", comment = "A book containing papers from IOPADS '94 and IOPADS '95, plus several survey/tutorial papers. See the bib entries with cross-ref to iopads-book. " }