@Article{bordawekar:jexemplar, author = {Rajesh Bordawekar}, title = {Quantitative Characterization and Analysis of the {I/O} Behavior of a Commercial Distributed-shared-memory Machine}, journal = {IEEE Transactions on Parallel and Distributed Systems}, year = {2000}, month = {May}, volume = {11}, number = {5}, pages = {509--526}, earlier = {bordawekar:exemplar}, URL = {http://www.computer.org/tpds/td2000/l0509abs.htm}, keywords = {parallel I/O, pario-bib, workload characterization, distributed shared memory}, abstract = {This paper presents a unified evaluation of the I/O behavior of a commercial clustered DSM machine, the HP Exemplar. Our study has the following objectives: 1) To evaluate the impact of different interacting system components, namely, architecture, operating system, and programming model, on the overall I/O behavior and identify possible performance bottlenecks, and 2) To provide hints to the users for achieving high out-of-box I/O throughput. We find that for the DSM machines that are built as a cluster of SMP nodes, integrated clustering of computing and I/O resources, both hardware and software, is not advantageous for two reasons. First, within an SMP node, the I/O bandwidth is often restricted by the performance of the peripheral components and cannot match the memory bandwidth. Second, since the I/O resources are shared as a global resource, the file-access costs become nonuniform and the I/O behavior of the entire system, in terms of both scalability and balance, degrades. \par We observe that the buffered I/O performance is determined not only by the I/O subsystem, but also by the programming model, global-shared memory subsystem, and data-communication mechanism. Moreover, programming-model support can be used effectively to overcome the performance constraints created by the architecture and operating system. For example, on the HP Exemplar, users can achieve high I/O throughput by using features of the programming model that balance the sharing and locality of the user buffers and file systems. Finally, we believe that at present, the I/O subsystems are being designed in isolation, and there is a need for mending the traditional memory-oriented design approach to address this problem.} }