@TechReport{bordawekar:exemplar-tr2, author = {Rajesh Bordawekar}, title = {Quantitative Characterization and Analysis of the {I/O} Behavior of a Commercial Distributed-shared-memory Machine}, year = {1998}, month = {March}, number = {CACR 157}, institution = {Center of Advanced Computing Research, California Insititute of Technology}, later = {bordawekar:exemplar}, URL = {http://www.cacr.caltech.edu/~rajesh/exemplar1.html}, keywords = {parallel I/O, pario-bib, workload characterization, distributed shared memory}, abstract = {This paper presents a unified evaluation of the I/O behavior of a commercial clustered DSM machine, the HP Exemplar. Our study has the following objectives: (1) To evaluate the impact of different interacting system components, namely, architecture, operating system, and programming model, on the overall I/O behavior and identify possible performance bottlenecks and (2) To provide hints to the users for achieving high out-of-box I/O throughput. We find that for the DSM machines that are built as a cluster of SMP nodes, integrated clustering of computing and I/O resources, both hardware and software, is not advantageous for two reasons. First, within an SMP node, the I/O bandwidth is often restricted by the performance of the peripheral components and cannot match the memory bandwidth. Second, since the I/O resources are shared as a global resource, the file-access costs become non-uniform and the I/O behavior of the entire system, in terms of the scalability and balance, degrades. We observe that the buffered I/O performance is determined not only by the I/O subsystem, but also by the programming model, global-shared memory subsystem, and data-communication mechanism. Moreover, programming-model support can be effectively used to overcome the performance constraints created by the architecture and operating system. For example, on the HP Exemplar, users can achieve high I/O throughput by using features of the programming model that balance the sharing and locality of the user buffers and file systems. Finally, we believe that at present, the I/O subsystems are being designed in isolation and there is a need for mending the traditional memory-oriented design approach to address this problem.} }