0 ) { if( array_key_exists( $word, $dict ) ) { $dict[$word] = $dict[$word] + 1; // increment existing entry } else { $dict[$word] = 1; // add new entry to dictionary } } } $c = $c + 1; } fclose( $fd ); // close file // build word frequency based on each word in dictionary $features = Array(); $keys = array_keys( $dict ); $i = 0; foreach( $text as $t ) { $j = 0; foreach( $keys as $k ) { $k2 = " " . $k . " "; $c = substr_count( $t, $k2 ); $features[$i][$j] = $features[$i][$j] + $c; $j = $j + 1; } $i = $i + 1; } // K-means clustering // your code here... } else { print( "Could not open file... aborting\n" ); } // // point-wise add two arrays // function addArrays( $A, $B ) { $C = Array(); $n = sizeof( $A ); for( $i=0 ; $i<$n ; $i++ ) { $C[$i] = $A[$i] + $B[$i]; } return( $C ); } // // compute Euclidean distance between two vectors (cluster center and feature) // function computeDistance( $x, $y ) { $n = sizeof( $x ); $dist = 0; for( $i=0 ; $i<$n ; $i++ ) { $dist = $dist + pow( $x[$i]-$y[$i], 2 ); } $dist = sqrt( $dist ); return( $dist ); } // // strip punctuation from a string // function stripPunctuation( $s ) { $s = str_replace( ",", "", $s ); $s = str_replace( ".", "", $s ); $s = str_replace( ";", "", $s ); $s = str_replace( ":", "", $s ); $s = str_replace( "-", "", $s ); $s = str_replace( "(", "", $s ); $s = str_replace( ")", "", $s ); $s = str_replace( "?", "", $s ); $s = str_replace( "\n", "", $s ); return( $s ); } ?>