@Article{barve:jmergesort, author = {Rakesh D. Barve and Edward F. Grove and Jeffrey S. Vitter}, title = {Simple Randomized Mergesort on Parallel Disks}, journal = {Parallel Computing}, year = {1997}, month = {June}, volume = {23}, number = {4}, pages = {601--631}, publisher = {North-Holland (Elsevier Scientific)}, earlier = {barve:mergesort}, URL = {file://cs.duke.edu/pub/jsv/Papers/BGV96.Simple_Mergesort.ps.gz}, keywords = {parallel I/O algorithm, sorting, pario-bib}, abstract = {We consider the problem of sorting a file of N records on the D-disk model of parallel I/O in which there are two sources of parallelism. Records are transferred to and from disk concurrently in blocks of B contiguous records. In each I/O operation, up to one block can be transferred to or from each of the D disks in parallel. We propose a simple, efficient, randomized mergesort algorithm called SRM that uses a forecast-and-flush approach to overcome the inherent difficulties of simple merging on parallel disks. SRM exhibits a limited use of randomization and also has a useful deterministic version. Generalizing the technique of forecasting, our algorithm is able to read in, at any time, the ``right'' block from any disk, and using the technique of flushing, our algorithm evicts, without any I/O overhead, just the ``right'' blocks from memory to make space for new ones to be read in. The disk layout of SRM is such that it enjoys perfect write parallelism, avoiding fundamental inefficiencies of previous mergesort algorithms. By analysis of generalized maximum occupancy problems we are able to derive an analytical upper bound on SRM's expected overhead valid for arbitrary inputs. \par The upper bound derived on expected I/O performance of SRM indicates that SRM is provably better than disk-striped mergesort (DSM) for realistic parameter values D, M, and B. Average-case simulations show further improvement on the analytical upper bound. Unlike previously proposed optimal sorting algorithms, SRM outperforms DSM even when the number D of parallel disks is small.}, comment = {This paper formerly called barve:mergesort; I discovered that the paper had appeared in SPAA96, so the SPAA96 paper is now called barve:mergesort.} }