@TechReport{bordawekar:reorganize, author = {Rajesh Bordawekar and Alok Choudhary and Rajeev Thakur}, title = {Data Access Reorganizations in Compiling Out-of-core Data Parallel Programs on Distributed Memory Machines}, year = {1994}, month = {September}, number = {SCCS-622}, institution = {NPAC}, address = {Syracuse, NY 13244}, earlier = {bordawekar:efficient}, URL = {ftp://erc.cat.syr.edu/ece/choudhary/PASSION/access_reorg.ps.Z}, keywords = {parallel I/O, compilation, pario-bib}, comment = {Basically they give a case study of out-of-core matrix multiplication to emphasize that the compiler's choice of loop ordering and matrix distribution for in-core matmult is not a very good choice for out-of-core matmult, because it causes too much I/O. By reorganizing the data and the loops, they get much better performance. In this particular case there are known algorithms which they should have used. In general they make the point that the compiler should consider several organizations, and estimate their costs, before generating code. They don't propose anything more sophisticated than to try all the possible organizations.} }