@InProceedings{shah:algorithms, author = {Rahul Shah and Peter J. Varman and Jeffrey Scott Vitter}, title = {Online algorithms for prefetching and caching on parallel disks}, journal = {Annual ACM Symposium on Parallel Algorithms and Architectures}, booktitle = {Proceedings of the Sixteenth Symposium on Parallel Algorithms and Architectures}, year = {2004}, month = {June}, volume = {16}, pages = {255--264}, copyright = {(c)2004 Elsevier Engineering Information, Inc.}, howpublished = {SPAA 2004 - Sixteenth Annual ACM Symposium on Parallelism in Algorithms and Architectures; 2004; v.16; p.255-264}, address = {Barcelona, Spain}, URL = {http://doi.acm.org/10.1145/1007912.1007950}, keywords = {online algorithms, prefetching, caching, parallel disk model, threshold LRU, pario-bib}, abstract = {Parallel disks provide a cost effective way of speeding up I/Os in applications that work with large amounts of data. The main challenge is to achieve as much parallelism as possible, using prefetching to avoid bottlenecks in disk access. Efficient algorithms have been developed for some particular patterns of accessing the disk blocks, In this paper, we consider general request sequences. When the request sequence consists of unique block requests, the problem is called prefetching and is a well-solved problem for arbitrary request sequences. When the reference sequence can have repeated references to the same block, we need to devise an effective caching policy as well. While optimum offline algorithms have been recently designed for the problem, in the online case, no effective algorithm was previously known. Our main contribution is a deterministic online algorithm threshold-LRU which achieves O((MD/L) {sup 2/3}) competitive ratio and a randomized online algorithm threshold-MARK which achieves O({square root}(MD/L) log(MD/L)) competitive ratio for the caching/prefetching problem on the parallel disk model (PDM), where D is the number of disks, M is the size of fast memory buffer, and M + L is the amount of lookahead available in the request sequence. The best-known lower bound on the competitive ratio is {Omega}( {square root}MD/L) for lookahead L GRE M in both models. We also show that if the deterministic online algorithm is allowed to have twice the memory of the offline then a tight competitive ratio of {Theta}( {square root}MD/L) can be achieved. This problem generalizes the well-known paging problem on a single disk to the parallel disk model.} }