@Article{aggarwal:sorting, author = {Alok Aggarwal and Jeffrey Scott Vitter}, title = {The Input/Output Complexity of Sorting and Related Problems}, journal = {Communications of the ACM}, year = {1988}, month = {September}, volume = {31}, number = {9}, pages = {1116--1127}, keywords = {parallel I/O, sorting, pario-bib}, abstract = {We provide tight upper and lower bounds, up to a constant factor, for the number of inputs and outputs~(I/Os) between internal memory and secondary storage required for five sorting-related problems: sorting, the fast Fourier transform (FFT), permutation networks, permuting, and matrix transposition. The bounds hold both in the worst case and in the average case, and in several situations the constant factors match. Secondary storage is modeled as a magnetic disk capable of transfering $P$~blocks each containing $B$~records in a single time unit; the records in each block must be input from or output to $B$~contiguous locations on the disk. We give two optimal algorithms for the problems, which are variants of merge sorting and distribution sorting. In particular we show for $P=1$ that the standard merge sorting algorithm is an optimal external sorting method, up to a constant factor in the number of~I/Os. Our sorting algorithms use the same number of~I/Os as does the permutation phase of key sorting, except when the internal memory size is extremely small, thus affirming the popular adage that key sorting is not faster. We also give a simpler and more direct derivation of Hong and Kung's lower bound for the FFT for the special case $B = P = O(1)$.}, comment = {Good comments on typical external sorts, and how big they are. Focuses on parallelism at the disk. They give tight theoretical bounds on the number of I/O's required to do external sorting and other problems (FFTs, matrix transpose, etc.). If $x$ is the number of blocks in the file and $y$ is the number of blocks that fit in memory, then the number of I/Os needed grows as $\Theta (x \log x / \log y)$. If parallel transfers of $p$ blocks are allowed, speedup linear in $p$ is obtained.} }