@PhdThesis{thakur:thesis, author = {Rajeev Thakur}, title = {{Runtime Support for In-Core and Out-of-Core Data-Parallel Programs}}, year = {1995}, month = {May}, school = {Department of Electrical and Computer Engineering, Syracuse University}, URL = {http://www.mcs.anl.gov/~thakur/papers/phd_thesis.ps}, keywords = {parallel I/O, runtime library, pario-bib}, abstract = {Distributed memory parallel computers or distributed computer systems are widely recognized as the only cost-effective means of achieving teraflops performance in the near future. However, the fact remains that they are difficult to program and advances in software for these machines have not kept pace with advances in hardware. This thesis addresses several issues in providing runtime support for in-core as well as out-of-core programs on distributed memory parallel computers. This runtime support can be directly used in application programs for greater efficiency, portability and ease of programming. It can also be used together with a compiler to translate programs written in a high-level data-parallel language like High Performance Fortran (HPF) to node programs for distributed memory machines. \par In distributed memory programs, it is often necessary to change the distribution of arrays during program execution. This thesis presents efficient and portable algorithms for runtime array redistribution. The algorithms have been implemented on the Intel Touchstone Delta and are found to scale well with the number of processors and array size. This thesis also presents algorithms for all-to-all collective communication on fat-tree and two-dimensional mesh interconnection topologies. The performance of these algorithms on the CM-5 and Touchstone Delta is studied extensively. A model for estimating the time taken by these algorithms on the basis of system parameters is developed and validated by comparing with experimental results. \par A number of applications deal with very large data sets which cannot fit in main memory, and hence have to be stored in files on disks, resulting in out-of-core programs. This thesis also describes the design and implementation of efficient runtime support for out-of-core computations. Several optimizations for accessing out-of-core data are presented. An Extended Two-Phase Method is proposed for accessing sections of out-of-core arrays efficiently. This method uses collective I/O and the I/O workload is divided among processors dynamically, depending on the access requests. Performance results obtained using this runtime support for out-of-core programs on the Touchstone Delta are presented.} }