@TechReport{thakur:astrophysics, author = {Rajeev Thakur and Ewing Lusk and William Gropp}, title = {{I/O} Characterization of a Portable Astrophysics Application on the {IBM SP} and {Intel Paragon}}, year = {1995}, month = {August}, number = {MCS-P534-0895}, institution = {Argonne National Laboratory}, note = {Revised October 1995}, URL = {http://www.mcs.anl.gov/~thakur/papers/astro.ps}, keywords = {file access pattern, workload characterization, parallel I/O, pario-bib}, abstract = {Many large-scale applications on parallel machines are bottlenecked by the I/O performance rather than the CPU or communication performance of the system. To improve the I/O performance, it is first necessary for system designers to understand the I/O requirements of various applications. This paper presents the results of a study of the I/O characteristics and performance of a real, I/O-intensive, portable, parallel application in astrophysics, on two different parallel machines---the IBM SP and the Intel Paragon. We instrumented the source code to record all I/O activity, and analyzed the resulting trace files. Our results show that, for this application, the I/O consists of fairly large writes, and writing data to files is faster on the Paragon, whereas opening and closing files are faster on the SP. We also discuss how the I/O performance of this application could be improved; particularly, we believe that this application would benefit from using collective I/O.}, comment = {Adds another data point to the collection of parallel scientific applications whose I/O has been characterized, a collection started in earnest by crandall:iochar. It's a pretty straightforward application; it just writes its matrices every few timesteps. The application writes whole matrices; the OS sees request sizes that are more a factor of the Chameleon library than of the application. Most of the I/O itself is not implemented in parallel, because they used UniTree on the SP, and because the Chameleon library sequentializes this kind of I/O through one node. Other numbers from the paper don't add much insight into the workload. Revised slightly in October 1995; the abstract represents that revision.} }