@InProceedings{cho:local, author = {Yong Cho and Marianne Winslett and Mahesh Subramaniam and Ying Chen and Szu-wen Kuo and Kent E. Seamons}, title = {Exploiting Local Data in Parallel Array {I/O} on a Practical Network of Workstations}, booktitle = {Proceedings of the Fifth Workshop on Input/Output in Parallel and Distributed Systems}, year = {1997}, month = {November}, pages = {1--13}, publisher = {ACM Press}, address = {San Jose, CA}, URL = {http://doi.acm.org/10.1145/266220.266221}, keywords = {parallel I/O, distributed system, pario-bib}, abstract = {A cost-effective way to run a parallel application is to use existing workstations connected by a local area network such as Ethernet or FDDI. In this paper, we present an approach for parallel I/O of multidimensional arrays on small networks of workstations with a shared-media interconnect, using the Panda I/O library. \par In such an environment, the message passing throughput per node is lower than the throughput obtainable from a fast disk and it is not easy for users to determine the configuration which will yield the best I/O performance. \par We introduce an I/O strategy that exploits local data to reduce the amount of data that must be shipped across the network, present experimental results, and analyze the results using an analytical performance model and predict the best choice of I/O parameters. \par Our experiments show that the new strategy results in a factor of 1.2--2.1 speedup in response time compared to the Panda version originally developed for the IBM SP2, depending on the array sizes, distributions and compute and I/O node meshes. Further, the performance model predicts the results within a 13\% margin of error.}, comment = {They examine a system that supports nodes that are both compute and I/O nodes. The assumption is that the application is writing data to a new file, and does not care to which disks the data goes. They are trying to decide which nodes should be used for I/O, given the distribution of data on compute nodes and the distribution desired across disks. They use a Hungarian algorithm to solve a weighted optimization problem on a bipartite graph connecting I/O nodes to compute nodes, in an attempt to minimize the data flow across the network. But there is no attempt to make a decision that might be sensible for a future read operation that may want to read in a different pattern.} }