@InProceedings{ma:buffering, author = {Xiasong Ma and Marianne Winslett and Jonghyun Lee and Shengke Yu}, title = {Improving {MPI IO} Output Performance with Active Buffering Plus Threads}, booktitle = {Proceedings of the International Parallel and Distributed Processing Symposium}, year = {2003}, month = {April}, publisher = {IEEE Computer Society Press}, URL = {http://drl.cs.uiuc.edu/pubs/abt.pdf}, keywords = {parallel I/O, pario-bib}, abstract = {Efficient collective output of intermediate results to secondary storage becomes more and more important for scientific simulations as the gap between process-ing power/interconnection bandwidth and the I/O sys-tem bandwidth enlarges. Dedicated servers can offload I/O from compute processors and shorten the execution time, but it is not always possible or easy for an appli-cation to use them. We propose the use of active buffer-ing with threads (ABT) for overlapping I/O with com-putation efficiently and flexibly without dedicated I/O servers. We show that the implementation of ABT in ROMIO, a popular implementation of MPI-IO, greatly reduces the application-visible cost of ROMIO s collec-tive write calls, and improves an application s overall performance by hiding I/O cost and saving implicit syn-chronization overhead from collective write operations. Further, ABT is high-level, platform-independent, and transparent to users, giving users the benefit of over-lapping I/O with other processing tasks even when the file system or parallel I/O library does not support asyn-chronous I/O.} }