@InProceedings{seamons:compressed, author = {K. E. Seamons and M. Winslett}, title = {A Data Management Approach for Handling Large Compressed Arrays in High Performance Computing}, booktitle = {Proceedings of the Fifth Symposium on the Frontiers of Massively Parallel Computation}, year = {1995}, month = {February}, pages = {119--128}, URL = {http://bunny.cs.uiuc.edu/CADR/pubs/compression.ps}, keywords = {parallel I/O, pario-bib}, comment = {``This paper shows how compression can be used to speed up parallel i/o of large arrays. The current version of the paper focuses on improving write performance.'' They use chunked files like in seamons:interface but before writing they compress each chunk on its compute node, and after reading they decompress each chunk on its compute node. Presumably this is only useful when you plan to read back whole chunks. They find better performance for compressing in many cases, even when the compression time dominates the I/O time, because it reduces the I/O time so much. They found that the compression time and compression ratio can vary widely from chunk to chunk, leading to a tremendous load imbalance that unfortunately spoils some of the advanatages if all compute nodes must wait for the slowest to finish.} }