import java.util.*; /** * Identifies unique words in a document, via a set * * @author Haris Baig and Chris Bailey-Kellogg, Dartmouth CS 10, Fall 2012 * @author Tim Pierson, CS 10, Winter 2017 -- added comments * @author Tim Pierson, Dartmouth CS10, provided for Winter 2024 */ public class UniqueWords { public static void main(String[] args) { String page = "Pretend that this string was loaded from a web page. We " + "won't go to all that trouble here. This string contains multiple " + "words. And multiple copies of multiple words. And multiple " + "words with multiple copies. It is to be used as a test to " + "demonstrate how sets work in removing redundancy by keeping only one copy of each thing. Is it very very redundant in having more than one copy of some words?"; String[] allWords = page.split("[ .,?!]+"); // split on punctuation and white space // Declare new Set to hold unique words Set uniqueWords = new TreeSet(); // Loop over all the words split out of the string, adding to set for (String s: allWords) { uniqueWords.add(s.toLowerCase()); // Calling add() method for duplicate words just overwrites existing entries } System.out.println(allWords.length + " words"); //note: this is not the set, this is the String array of words after parsing String page System.out.println(uniqueWords.size() + " unique words"); //this is the set, size returns how many elements (e.g., the unique words) are present System.out.println(uniqueWords); //print the unique words } }