@Article{tsujita:mpi-io, author = {Yuichi Tsujita}, title = {Effective nonblocking {MPI-I/O} in remote {I/O} operations using a multithreaded mechanism}, journal = {Lecture Notes in Computer Science}, booktitle = {2nd International Symposium on Parallel and Distributed Processing and Applications; December 13-15, 2004; Hong Kong, PEOPLES R CHINA}, editor = {Cao, J; Yang, LT; Guo, M; Lau, F}, year = {2004}, month = {November}, volume = {3358}, pages = {34--43}, institution = {Kinki Univ, Fac Engn, Dept Elect Engn \& Comp Sci, Higashihiroshima, Hiroshima 7392116, Japan}, publisher = {SPRINGER-VERLAG BERLIN}, copyright = {(c)2005 The Thomson Corporation}, URL = {http://www.springerlink.com/openurl.asp?genre=article&issn=0302-9743&volume=3358&spage=34}, keywords = {stampi, MPI-I/O, dynamic process creation, multithreaded, overlap computation and I/O, pario-bib}, abstract = {A flexible intermediate library named Stampi realizes seamless MPI operations on interconnected parallel computers. Dynamic process creation and MPI-I/O operations both inside a computer and among computers are available with it. MPI-I/O operations to a remote computer are realized by MPI-I/O processes of the Stampi library which are invoked on a remote computer using a vendor-supplied MPI-I/O library. If the vendor-supplied one is not available, a single MPI-I/O process is invoked on a remote computer, and it uses UNIX I/O functions instead of the vendor-supplied one. In nonblocking MPI-I/O functions with multiple user processes, the single MPI-I/O process carries out I/O operations required by the processes sequentially. This results in small overlap of computation by the user processes with I/O operations by the MPI-I/O process. Therefore performance of the nonblocking functions is poor with multiple user processes. To realize effective I/O operations, a Pthreads library has been implemented in the MPI-I/O mechanism, and multi-threaded I/O operations have been realized. The newly implemented MPI-I/O mechanism has been evaluated on inter-connected PC clusters, and higher overlap of the computation with the I/O operations has been achieved.}, comment = {also see tsujita:stampi*.} }