@InProceedings{arge:sorting,
  author = {Lars Arge and Paolo Ferragina and Roberto Grossi and Jeffrey Scott
  Vitter},
  title = {Sequence sorting in secondary storage},
  booktitle = {Proceedings of Compression and Complexity of Sequences},
  year = {1998},
  month = {June},
  pages = {329--346},
  publisher = {IEEE Computer Society Press},
  address = {Salerno, Italy},
  keywords = {out-of-core algorithm, sorting algorithm, pario-bib},
  abstract = {We investigate the I/O complexity of the problem of sorting
  sequences (or strings of characters) in external memory, which is a
  fundamental component of many large-scale text applications. In the standard
  unit-cost RAM comparison model, the complexity of sorting K strings of total
  length N is Theta (K log/sub 2/ K+N). By analogy, in the external memory (or
  I/O) model, where the internal memory has size M and the block transfer size
  is B, it would be natural to guess that the I/O complexity of sorting
  sequences is Theta ((K/B)log/sub M/B/(K/B)+(N/B)), but the known algorithms
  do not come even close to achieving this bound. Our results show, somewhat
  counterintuitively, that the I/O complexity of string sorting depends upon
  the length of the strings relative to the block size. We first consider a
  simple comparison I/O model, where the strings are not allowed to be broken
  into their individual characters, and we show that the I/O complexity of
  string sorting in this model is Theta ((N/sub 1//B)log/sub M/B/(N/sub
  1//B)+K/sub 2/+(N/B)), where N/sub 1/ is the total length of all strings
  shorter than B and K/sub 2/ is the number of strings longer than B. We then
  consider two more general I/O comparison models in which string breaking is
  allowed. We obtain improved algorithms and in several cases lower bounds that
  match their I/O bounds. Finally, we develop more practical algorithms outside
  the comparison model.},
  comment = {This paper is really the same paper as arge:sorting-strings.}
}