@InProceedings{schulz:semantic, author = {Martin Schulz and Daniel A. Reed}, title = {Using Semantic Information to Guide Efficient I/O on Clusters}, booktitle = {Proceedings of the Eleventh IEEE International Symposium on High Performance Distributed Computing}, year = {2002}, pages = {135--142}, publisher = {IEEE Computer Society Press}, address = {Edinburgh, Scotland}, keywords = {I/O, data distribution, medical imaging application, parallel I/O, pario-bib}, comment = {The paper describes DIOM (Distributed I/O management), a system to manage data distributed to local disks of a cluster of workstations. The distribution process uses semantic information from both the data set and the application to decide how to distribute the data. The data is stored using a self-describing format (similar to HDF). The description of the data is either stored in a file header, or it is part of a central repository (format identified by file suffix). DIOM decides how to distribute the data based on the application-supplied splitting-pattern , of which there are three types: single (copy all data to a single node), block (divide data evenly between the nodes), round (stripe blocks in a round-robin fashion). Parameters such as stripe size, initial node, etc, are defined by the app.} }