@InProceedings{franke:filters,
  author = {Ernest Franke and Michael Magee},
  title = {Reducing Data Distribution Bottlenecks by Employing Data
  Visualization Filters},
  booktitle = {Proceedings of the Eighth IEEE International Symposium on High
  Performance Distributed Computing},
  year = {1999},
  month = {August},
  pages = {255--262},
  publisher = {IEEE Computer Society Press},
  address = {Redondo Beach, CA},
  URL = {http://computer.org/conferen/proceed/hpdc/0287/02870041abs.htm},
  keywords = {distributed computing, filters, grid, input/output, parallel I/O,
  pario-bib, app-pario},
  abstract = {Between 1994 and 1997, researchers at Southwest Research
  Institute (SwRI) investigated methods for distributing parallel computation
  and data visualization under the support of an internally funded Research
  Initiative Program entitled the Advanced Visualization Technology Project
  (AVTP). A hierarchical data cache architecture was developed to provide a
  flexible interface between the modeling or simulation computational processes
  and data visualization programs. Compared to conventional post facto data
  visualization approaches, this data cache structure provides many advantages
  including simultaneous data access by multiple visualization clients,
  comparison of experimental and simulated data, and visual analysis of
  computer simulation as computation proceeds. \par However, since the data
  cache was resident on a single workstation, this approach did not address the
  issue of scalability of methods for avoiding the data storage bottleneck by
  distributing the data across multiple networked workstations. Scalability
  through distributed database approaches is being investigated as part of the
  Applied Visualization using Advanced Network Technology Infrastructure
  (AVANTI) project.\par This paper describes a methodology currently under
  development that is intended to avoid bottlenecks that typically arise as the
  result of data consumers (e.g. visualization applications) that must access
  and process large amounts of data that has been generated and resides on
  other hosts, and which must pass through a central data cache prior to being
  used by the data consumer. The methodology is based on a fundamental paradigm
  that the end result (visualization) rendered by a data consumer can, in many
  cases, be produced using a reduced data set that has been distilled or
  filtered from the original data set. \par In the most basic case, the
  filtered data used as input to the data consumer may simply be a proper
  subset of massive data sets that have been distributed among hosts. For the
  general case, however, the filtered data may bear no resemblance to the
  original data since it is the result of processing the raw data set and
  distilling it to its visual "essence", i.e. the minimal data set that is
  absolutely required by the data consumer in order to perform the required
  rendering function. Data distribution bottlenecks for visualization
  applications are thereby reduced by avoiding the transfer of large amounts of
  raw data in favor of considerably distilled visual data.\par There are, of
  course, computational costs associated with this approach since raw data must
  be processed into its visual essence, but these computational costs may be
  distributed among multiple processors. It should be realized, however, that,
  in general, these computational costs would exist any way since, for the
  visualization to be performed, there must be a transformation between the raw
  data and the visualization primitives (e.g. line segments, polygon vertices,
  etc.) to be rendered. The main principal put forth by this paper is that if
  data distribution bottlenecks are to be minimized, the amount of raw data
  transferred should be reduced by employing data filtering processes that can
  be distributed among multiple hosts. \par The complete paper demonstrates,
  both analytically and experimentally, that this approach becomes increasingly
  effective (scalable) as the computational expense associated with the data
  filtering transformation rises.},
  comment = {The goal of their work is to improve the performance of data
  visualization applications which use remote the data generators (disk or
  running application) and data consumers (the visualization station) for
  visualzation applications. They deal with network bottlenecks by using a
  distributed-redundant data cache to hold intermediate data between the data
  generator and the data consumer. They also reduce network traffic by applying
  data filters to the data at the distributed cache processors. The main
  argument is that since the data must be filtered before it is visualized, it
  makes more sense to perform the filter at the data cache so the computation
  can be distributed and to reduce the amount of data that needs to be
  transferred to the data consumer.}
}