Question

In: Computer Science

Your task is to count the frequency of words in a text file, and return the...

Your task is to count the frequency of words in a text file, and return the most frequent word with its count. (Must use the code below without changing algorithms)

For example, given the following text:

there are two ways of constructing a software design one way is to make it so simple that there are obviously no deficiencies and the other way is to make it so complicated that there are no obvious deficiencies.

Based on the example your program should printout the following along with the milliseconds to finish the computing:
The most frequent word is "there" with 3 occurrences.

The code is below, it doesn't seem to printout what word is the most frequent and how many occurrences of it there are.

import java.io.File;
import java.util.Scanner;
import java.util.Map.Entry;
import java.util.AbstractMap;
import java.util.LinkedList;

public class WordCountLinkedList254{

    public static Entry count_ARRAY(String[] tokens) {

        int CAPACITY = 10000;
        String[] words = new String[CAPACITY];
        int[] counts = new int[CAPACITY];

        for (int j = 0; j < tokens.length; j++) {
            String token = tokens[j];
            for (int i = 0; i < CAPACITY; i++) {
                if (words[i] == null) {
                    words[i] = token;
                    counts[i] = 1;
                    break;
                } else if (words[i].equals(token))
                    counts[i] = counts[i] + 1;
            }
        }
        int maxCount = 0;
        String maxWord = "";
        for (int i = 0; i < CAPACITY & words[i] != null; i++) {
            if (counts[i] > maxCount) {
                maxWord = words[i];
                maxCount = counts[i];
            }
        }
        return new AbstractMap.SimpleEntry < String, Integer > (maxWord, maxCount);
    }
  
   public static Entry count_LINKED_LIST(String[] tokens) {
        LinkedList> list = new LinkedList> ();
        for (int j = 0; j < tokens.length; j++) {
            String word = tokens[j];
            boolean found = false;

            /* for (int i = 0; i < list.size(); i++) {
            Entry e = list.get(i);
            if (word.equals(e.getKey())) {
            e.setValue(e.getValue() + 1);
            list.set(i, e);
            found = true;
            break;
               }
            }*/

            int i = 0;
            for (Entry e: list) {
                if (word.equals(e.getKey())) {
                    e.setValue(e.getValue() + 1);
                    list.set(i, e);
                    i++;
                    found = true;
                    break;
                }
            }

            if (!found)
                list.add(new AbstractMap.SimpleEntry (word, 1));
        }
        int maxCount = 0;
        String maxWord = "";
        for (int i = 0; i < list.size(); i++) {
            int count = list.get(i).getValue();
            if (count > maxCount) {
                maxWord = list.get(i).getKey();
                maxCount = count;
            }
        }
        return new AbstractMap.SimpleEntry < String, Integer > (maxWord, maxCount);
    }
  
    static String[] readText(String PATH) throws Exception {
        Scanner doc = new Scanner(new File(PATH)).useDelimiter("[^a-zA-Z]+");
        int length = 0;
        while (doc.hasNext()) {
            doc.next();
            length++;
        }
      
        String[] tokens = new String[length];
        Scanner s = new Scanner(new File(PATH)).useDelimiter("[^a-zA-Z]+");
        length = 0;
        while (s.hasNext()) {
            tokens[length] = s.next().toLowerCase();
            length++;
        }
        doc.close();
      
        return tokens;
    }
  
    public static void main(String[] args) throws Exception {
      
        String PATH = "/Users/jianguolu/Dropbox/254/code/dblp1k.txt ";
        String[] tokens = readText(PATH);
        long startTime = System.currentTimeMillis();
        Entry entry = count_LINKED_LIST(tokens);
        long endTime = System.currentTimeMillis();
        String time = String.format("%12d", endTime - startTime);
        System.out.println("time\t" + time + "\t" + entry.getKey() + ":" + entry.getValue());
      
        tokens = readText(PATH);
        startTime = System.currentTimeMillis();
        entry = count_ARRAY(tokens);
        endTime = System.currentTimeMillis();
        time = String.format("%12d", endTime - startTime);
        System.out.println("time\t" + time + "\t" + entry.getKey() + ":" + entry.getValue());
    }
}

Solutions

Expert Solution

import java.io.File;
import java.util.Scanner;
import java.util.Map.Entry;
import java.util.AbstractMap;
import java.util.LinkedList;

public class WordCountLinkedList254{

public static Entry count_ARRAY(String[] tokens) {

int CAPACITY = 10000;
String[] words = new String[CAPACITY];
int[] counts = new int[CAPACITY];

for (int j = 0; j < tokens.length; j++) {
String token = tokens[j];
for (int i = 0; i < CAPACITY; i++) {
if (words[i] == null) {
words[i] = token;
counts[i] = 1;
break;
} else if (words[i].equals(token))
counts[i] = counts[i] + 1;
}
}
int maxCount = 0;
String maxWord = "";
for (int i = 0; i < CAPACITY & words[i] != null; i++) {
if (counts[i] > maxCount) {
maxWord = words[i];
maxCount = counts[i];
}
}
return new AbstractMap.SimpleEntry < String, Integer > (maxWord, maxCount);
}
  
public static Entry count_LINKED_LIST(String[] tokens) {
LinkedList<Entry> list = new LinkedList<> ();
for (int j = 0; j < tokens.length; j++) {
String word = tokens[j];
boolean found = false;

/* for (int i = 0; i < list.size(); i++) {
Entry e = list.get(i);
if (word.equals(e.getKey())) {
e.setValue(e.getValue() + 1);
list.set(i, e);
found = true;
break;
}
}*/

int i = 0;
for (Entry e: list) {
if (word.equals(e.getKey())) {
e.setValue((int)(e.getValue()) + 1);
list.set(i, e);
i++;
found = true;
break;
}
}

if (!found)
list.add(new AbstractMap.SimpleEntry (word, 1));
}
int maxCount = 0;
String maxWord = "";
for (int i = 0; i < list.size(); i++) {
int count = (int) list.get(i).getValue();
if (count > maxCount) {
maxWord = (String) list.get(i).getKey();
maxCount = count;
}
}
return new AbstractMap.SimpleEntry < String, Integer > (maxWord, maxCount);
}
  
static String[] readText(String PATH) throws Exception {
Scanner doc = new Scanner(new File(PATH)).useDelimiter("[^a-zA-Z]+");
int length = 0;
while (doc.hasNext()) {
doc.next();
length++;
}
  
String[] tokens = new String[length];
Scanner s = new Scanner(new File(PATH)).useDelimiter("[^a-zA-Z]+");
length = 0;
while (s.hasNext()) {
tokens[length] = s.next().toLowerCase();
length++;
}
doc.close();
  
return tokens;
}
  
public static void main(String[] args) throws Exception {
  
String PATH = "/Users/jianguolu/Dropbox/254/code/dblp1k.txt ";
String[] tokens = readText(PATH);
long startTime = System.currentTimeMillis();
Entry entry = count_LINKED_LIST(tokens);
long endTime = System.currentTimeMillis();
String time = String.format("%12d", endTime - startTime);
System.out.println("Most Frequent word is : " + entry.getKey() + ", which occured "+ entry.getValue() + " times.");
System.out.println("time taken in milliseconds : " + time);


}
}

Output is

Most Frequent word is : are, which occured 3 times.
time taken in milliseconds : 1

Screenshot is

Feel free to ask if you face any difficulty in understanding

Please upvote if you find the solution  helpful.


Related Solutions

Your task is to count the frequency of words in a text file, and return the...
Your task is to count the frequency of words in a text file, and return the most frequent word with its count. For example, given the following text: there are two ways of constructing a software design one way is to make it so simple that there are obviously no deficiencies and the other way is to make it so complicated that there are no obvious deficiencies. Based on the example your program should printout the following along with the...
How to count the number of words that only show up once in a text file,...
How to count the number of words that only show up once in a text file, and replace those words with a character '(unique)' using Python? Without list is better.
Given some data in a text file, the task is to scramble the text and output...
Given some data in a text file, the task is to scramble the text and output in a separate text file. So, we need to write a Python program that reads a text file, scrambles the words in the file and writes the output to a new text file. Rules to be followed: Words less than or equal to 3 characters need not be scrambled. Don’t scramble first and last char, so Scrambling can become Srbmnacilg or Srbmnailcg or Snmbracilg,...
The word count of the text that includes your “own words” should be at least 500...
The word count of the text that includes your “own words” should be at least 500 words (about 700 max.). The text should be typed double-spaced using a 12-point font size in Times New Roman. You are asked to make your own list of the best places to live (it can be one, two, or more) and convince your reader to move there. Build your case by using as many economic indicators you can think of. How important would the...
Write a C++ program to open and read a text file and count each unique token...
Write a C++ program to open and read a text file and count each unique token (word) by creating a new data type, struct, and by managing a vector of struct objects, passing the vector into and out of a function. Declare a struct TokenFreq that consists of two data members: (1) string value; and (2) int freq; Obviously, an object of this struct will be used to store a specific token and its frequency. For example, the following object...
Write a C++ program to create a text file. Your file should contain the following text:...
Write a C++ program to create a text file. Your file should contain the following text: Batch files are text files created by programmer. The file is written in notepad. Creating a text file and writing to it by using fstream: to write to a file, you need to open thew file as write mode. To do so, include a header filr to your program. Create an object of type fsrteam. Open the file as write mode. Reading from a...
Java 20 most frequent words in a text file. Words are supposed to be stored in...
Java 20 most frequent words in a text file. Words are supposed to be stored in array that counts eah word. Write a program that will read an article, parse each line into words, and keep track of how many times each word occurred. Run this program for each of the two articles and print out the 20 most frequently appearing words in each article. You may think you need to use a StringTokenizer, but it turns out that is...
● Write a program that reads words from a text file and displays all the words...
● Write a program that reads words from a text file and displays all the words (duplicates allowed) in ascending alphabetical order. The words must start with a letter. Must use ArrayList. MY CODE IS INCORRECT PLEASE HELP THE TEXT FILE CONTAINS THESE WORDS IN THIS FORMAT: drunk topography microwave accession impressionist cascade payout schooner relationship reprint drunk impressionist schooner THE WORDS MUST BE PRINTED ON THE ECLIPSE CONSOLE BUT PRINTED OUT ON A TEXT FILE IN ALPHABETICAL ASCENDING ORDER...
● Write a program that reads words from a text file and displays all the words...
● Write a program that reads words from a text file and displays all the words (duplicates allowed) in ascending alphabetical order. The words must start with a letter. Must use ArrayList. THE TEXT FILE CONTAINS THESE WORDS IN THIS FORMAT: drunk topography microwave accession impressionist cascade payout schooner relationship reprint drunk impressionist schooner THE WORDS MUST BE PRINTED ON THE ECLIPSE CONSOLE BUT PRINTED OUT ON A TEXT FILE IN ALPHABETICAL ASCENDING ORDER IS PREFERRED THANK YOU IN ADVANCE...
Write a python program function to check the frequency of the words in text files. Make...
Write a python program function to check the frequency of the words in text files. Make sure to remove any punctuation and convert all words to lower case. If my text file is like this: Hello, This is Python Program? thAt chEcks% THE freQuency of the words! When is printed it should look like this: hello 1 this 1 is 1 python 1 program 1 that 1 checks 1 the 2 frequency 1 of 1 words 1
ADVERTISEMENT
ADVERTISEMENT
ADVERTISEMENT