@InProceedings{brunet:factor, author = {Jean-Philippe Brunet and Palle Pedersen and S.~Lennart Johnsson}, title = {Load-Balanced {LU} and {QR} Factor and Solve Routines for Scalable Processors with Scalable {I/O}}, booktitle = {Proceedings of the 17th IMACS World Congress}, year = {1994}, month = {July}, address = {Atlanta, GA}, note = {Also available as Harvard University Computer Science Technical Report TR-20-94.}, URL = {ftp://das-ftp.harvard.edu/techreports/tr-20-94.ps.gz}, keywords = {parallel I/O, linear algebra, out-of-core, pario-bib}, abstract = {The concept of block-cyclic order elimination can be applied to out-of-core $LU$ and $QR$ matrix factorizations on distributed memory architectures equipped with a parallel I/O system. This elimination scheme provides load balanced computation in both the factor and solve phases and further optimizes the use of the network bandwidth to perform I/O operations. Stability of LU factorization is enforced by full column pivoting. Performance results are presented for the Connection Machine system CM-5.}, comment = {Short, not many details. Performance results shows about 3.5 Gflops for all problem sizes, both in-core on small N and out-of-core on large N.} }