@InProceedings{toledo:solar, author = {Sivan Toledo and Fred G. Gustavson}, title = {The Design and Implementation of {SOLAR}, a Portable Library for Scalable Out-of-Core Linear Algebra Computations}, booktitle = {Proceedings of the Fourth Workshop on Input/Output in Parallel and Distributed Systems}, year = {1996}, month = {May}, pages = {28--40}, publisher = {ACM Press}, address = {Philadelphia}, keywords = {parallel I/O, out-of-core, linear algebra, pario-bib}, abstract = {SOLAR is a portable high-performance library for out-of-core dense matrix computations. It combines portability with high performance by using existing high-performance in-core subroutine libraries and by using an optimized matrix input-output library. SOLAR works on parallel computers, workstations, and personal computers. It supports in-core computations on both shared-memory and distributed-memory machines, and its matrix input-output library supports both conventional I/O interfaces and parallel I/O interfaces. This paper discusses the overall design of SOLAR, its interfaces, and the design of several important subroutines. Experimental results show that SOLAR can factor on a single workstation an out-of-core positive-definite symmetric matrix at a rate exceeding 215 Mflops, and an out-of-core general matrix at a rate exceeding 195 Mflops. Less than 16\% of the running time is spent on I/O in these computations. These results indicate that SOLAR's portability does not compromise its performance. We expect that the combination of portability, modularity, and the use of a high-level I/O interface will make the library an important platform for research on out-of-core algorithms and on parallel I/O.}, comment = {Sounds great. Library package that supports LAPACK-like functionality on in-core and out-of-core matrices. Good performance. Good portability (IBM workstation, IBM SP-2, and OS/2 laptop). They separate the matrix algorithms from the underlying I/O routines in an interesting way (read and write submatrices), leaving just enough information to allow the I/O system to do some higher-level optimizations.} }