@PhdThesis{jensen:thesis, author = {David Wayne Jensen}, title = {Disk {I/O} In High-Performance Computing Systems}, year = {1993}, school = {Univ. Illinois, Urbana-Champagne}, keywords = {parallel I/O, file access pattern, multiprocessor file system, pario-bib}, comment = {He looks at the effect of I/O traffic on memory access in a multistage network, and custom mappings of file data to disks to support non-sequential I/O. He considers both the traditional ``multiuser'' workload and the case where a application accesses a single file in parallel. Assumes a dance-hall shared-memory MIMD base architecture (CEDAR). Disks are attached either to the memory or processor side of the network, and in either case require four network traversals per read/write operation. Nice summary of previous parallel I/O architectures, and characterization of the workload. Main conclusions: the network is not an inherent bottleneck, but I/O traffic can cause up to 50\% loss in memory traffic bandwidth, and bursts of I/O can saturate the network. For a high I/O request rate (eg, all procs active), spread each request over a small number of disks (eg, one), whereas for a low I/O request rate (eg, one proc active) spread each request over lots of disks (eg, all). This avoids cache thrashing when multiple procs hit on one disk node. However, if they are all reading the same data, then there is no cache thrashing and you want to maximize parallelism across disks. When accessing disjoint parts of the same file, it is sometimes better to have one proc do all the accesses, because this avoids out-of-order requests that spoil prefetching, and it avoids contention by multiple procs. No single file-to-disk mapping worked for everything; interleaved (striped) worked well for most sequential patterns, but ``sequential'' (partitioned) mappings worked better for multiple-process loads that tend to focus each process on a disk, eg, an interleaved pattern where the stride is equal to the number of disks. Thus, if your pattern can get you disk locality, use a mapping that will provide it.} }