@InProceedings{dewitt:parsort, author = {David J. DeWitt and Jeffrey F. Naughton and Donovan A. Schneider}, title = {Parallel Sorting on a Shared-Nothing Architecture using Probabilistic Splitting}, booktitle = {Proceedings of the First International Conference on Parallel and Distributed Information Systems}, year = {1991}, month = {December}, pages = {280--291}, keywords = {parallel I/O, parallel database, external sorting, pario-bib}, comment = {Comparing exact and probabilistic splitting for external sorting on a database. Model and experimental results from Gamma machine. Basically, the idea is to decide on a splitting vector, which defines $N$ buckets for an $N$-process program, and have each program read its initial segment of the data and send each element to the appropriate bucket (other process). All elements received are written to disks as small sorted runs. Then each process mergesorts its runs. Probabilistic split uses only a sample of the elements to define the vector.} }