Question

In: Computer Science

Your task is to count the frequency of words in a text file, and return the...

Your task is to count the frequency of words in a text file, and return the most frequent word with its count.

For example, given the following text:

there are two ways of constructing a software design one way is to make it so simple that there are obviously no deficiencies and the other way is to make it so complicated that there are no obvious deficiencies.

Based on the example your program should printout the following along with the milliseconds to finish the computing:
The most frequent word is "there" with 3 occurrences.

The code is below, it doesn't seem to printout what word is the most frequent and how many occurrences of it there are.

import java.io.File;
import java.util.Scanner;
import java.util.Map.Entry;
import java.util.AbstractMap;
import java.util.LinkedList;

public class WordCountLinkedList254{

    public static Entry count_ARRAY(String[] tokens) {

        int CAPACITY = 10000;
        String[] words = new String[CAPACITY];
        int[] counts = new int[CAPACITY];

        for (int j = 0; j < tokens.length; j++) {
            String token = tokens[j];
            for (int i = 0; i < CAPACITY; i++) {
                if (words[i] == null) {
                    words[i] = token;
                    counts[i] = 1;
                    break;
                } else if (words[i].equals(token))
                    counts[i] = counts[i] + 1;
            }
        }
        int maxCount = 0;
        String maxWord = "";
        for (int i = 0; i < CAPACITY & words[i] != null; i++) {
            if (counts[i] > maxCount) {
                maxWord = words[i];
                maxCount = counts[i];
            }
        }
        return new AbstractMap.SimpleEntry < String, Integer > (maxWord, maxCount);
    }
  
   public static Entry count_LINKED_LIST(String[] tokens) {
        LinkedList> list = new LinkedList> ();
        for (int j = 0; j < tokens.length; j++) {
            String word = tokens[j];
            boolean found = false;

            /* for (int i = 0; i < list.size(); i++) {
            Entry e = list.get(i);
            if (word.equals(e.getKey())) {
            e.setValue(e.getValue() + 1);
            list.set(i, e);
            found = true;
            break;
               }
            }*/

            int i = 0;
            for (Entry e: list) {
                if (word.equals(e.getKey())) {
                    e.setValue(e.getValue() + 1);
                    list.set(i, e);
                    i++;
                    found = true;
                    break;
                }
            }

            if (!found)
                list.add(new AbstractMap.SimpleEntry (word, 1));
        }
        int maxCount = 0;
        String maxWord = "";
        for (int i = 0; i < list.size(); i++) {
            int count = list.get(i).getValue();
            if (count > maxCount) {
                maxWord = list.get(i).getKey();
                maxCount = count;
            }
        }
        return new AbstractMap.SimpleEntry < String, Integer > (maxWord, maxCount);
    }
  
    static String[] readText(String PATH) throws Exception {
        Scanner doc = new Scanner(new File(PATH)).useDelimiter("[^a-zA-Z]+");
        int length = 0;
        while (doc.hasNext()) {
            doc.next();
            length++;
        }
      
        String[] tokens = new String[length];
        Scanner s = new Scanner(new File(PATH)).useDelimiter("[^a-zA-Z]+");
        length = 0;
        while (s.hasNext()) {
            tokens[length] = s.next().toLowerCase();
            length++;
        }
        doc.close();
      
        return tokens;
    }
  
    public static void main(String[] args) throws Exception {
      
        String PATH = "/Users/jianguolu/Dropbox/254/code/dblp1k.txt ";
        String[] tokens = readText(PATH);
        long startTime = System.currentTimeMillis();
        Entry entry = count_LINKED_LIST(tokens);
        long endTime = System.currentTimeMillis();
        String time = String.format("%12d", endTime - startTime);
        System.out.println("time\t" + time + "\t" + entry.getKey() + ":" + entry.getValue());
      
        tokens = readText(PATH);
        startTime = System.currentTimeMillis();
        entry = count_ARRAY(tokens);
        endTime = System.currentTimeMillis();
        time = String.format("%12d", endTime - startTime);
        System.out.println("time\t" + time + "\t" + entry.getKey() + ":" + entry.getValue());
    }
}

Solutions

Expert Solution

import java.io.File;
import java.util.Scanner;
import java.util.Map.Entry;
import java.util.AbstractMap;
import java.util.LinkedList;

public class WordCountLinkedList254 {

    public static Entry<String, Integer> count_ARRAY(String[] tokens) {

        int CAPACITY = 10000;
        String[] words = new String[CAPACITY];
        int[] counts = new int[CAPACITY];

        for (int j = 0; j < tokens.length; j++) {
            String token = tokens[j];
            for (int i = 0; i < CAPACITY; i++) {
                if (words[i] == null) {
                    words[i] = token;
                    counts[i] = 1;
                    break;
                } else if (words[i].equals(token))
                    counts[i] = counts[i] + 1;
            }
        }
        int maxCount = 0;
        String maxWord = "";
        for (int i = 0; i < CAPACITY & words[i] != null; i++) {
            if (counts[i] > maxCount) {
                maxWord = words[i];
                maxCount = counts[i];
            }
        }
        return new AbstractMap.SimpleEntry<String, Integer>(maxWord, maxCount);
    }

    public static Entry<String, Integer> count_LINKED_LIST(String[] tokens) {
        LinkedList<Entry<String, Integer>> list = new LinkedList<>();
        for (int j = 0; j < tokens.length; j++) {
            String word = tokens[j];
            boolean found = false;

            for (Entry<String, Integer> e : list) {
                if (word.equals(e.getKey())) {
                    e.setValue(e.getValue() + 1);
                    found = true;
                    break;
                }
            }

            if (!found)
                list.add(new AbstractMap.SimpleEntry<String, Integer>(word, 1));
        }
        
        int maxCount = 0;
        String maxWord = "";


        for (Entry<String, Integer> e : list) {
            int count = e.getValue();
            if (count > maxCount) {
                maxWord = e.getKey();
                maxCount = count;
            }
        }
        
        return new AbstractMap.SimpleEntry<String, Integer>(maxWord, maxCount);
    }

    static String[] readText(String PATH) throws Exception {
        Scanner doc = new Scanner(new File(PATH));
        doc.useDelimiter("[^a-zA-Z]+");
        int length = 0;
        while (doc.hasNext()) {
            doc.next();
            length++;
        }
        doc.close();

        String[] tokens = new String[length];
        Scanner s = new Scanner(new File(PATH));
        s.useDelimiter("[^a-zA-Z]+");
        length = 0;
        while (s.hasNext()) {
            tokens[length] = s.next().toLowerCase();
            length++;
        }
        s.close();

        return tokens;
    }

    public static void main(String[] args) throws Exception {

        String PATH = "data.txt";
        String[] tokens = readText(PATH);
        long startTime = System.currentTimeMillis();
        Entry<String, Integer> entry = count_LINKED_LIST(tokens);
        long endTime = System.currentTimeMillis();
        String time = String.format("%12d", endTime - startTime);
        System.out.println("time\t" + time + "\t" + entry.getKey() + ":" + entry.getValue());

        tokens = readText(PATH);
        startTime = System.currentTimeMillis();
        entry = count_ARRAY(tokens);
        endTime = System.currentTimeMillis();
        time = String.format("%12d", endTime - startTime);
        System.out.println("time\t" + time + "\t" + entry.getKey() + ":" + entry.getValue());
    }
}

i have corrected everything now.. The code works perfectly.. please please upvote.


Related Solutions

How to count the number of words that only show up once in a text file,...
How to count the number of words that only show up once in a text file, and replace those words with a character '(unique)' using Python? Without list is better.
The word count of the text that includes your “own words” should be at least 500...
The word count of the text that includes your “own words” should be at least 500 words (about 700 max.). The text should be typed double-spaced using a 12-point font size in Times New Roman. You are asked to make your own list of the best places to live (it can be one, two, or more) and convince your reader to move there. Build your case by using as many economic indicators you can think of. How important would the...
Write a C++ program to open and read a text file and count each unique token...
Write a C++ program to open and read a text file and count each unique token (word) by creating a new data type, struct, and by managing a vector of struct objects, passing the vector into and out of a function. Declare a struct TokenFreq that consists of two data members: (1) string value; and (2) int freq; Obviously, an object of this struct will be used to store a specific token and its frequency. For example, the following object...
Write a C++ program to create a text file. Your file should contain the following text:...
Write a C++ program to create a text file. Your file should contain the following text: Batch files are text files created by programmer. The file is written in notepad. Creating a text file and writing to it by using fstream: to write to a file, you need to open thew file as write mode. To do so, include a header filr to your program. Create an object of type fsrteam. Open the file as write mode. Reading from a...
Java 20 most frequent words in a text file. Words are supposed to be stored in...
Java 20 most frequent words in a text file. Words are supposed to be stored in array that counts eah word. Write a program that will read an article, parse each line into words, and keep track of how many times each word occurred. Run this program for each of the two articles and print out the 20 most frequently appearing words in each article. You may think you need to use a StringTokenizer, but it turns out that is...
● Write a program that reads words from a text file and displays all the words...
● Write a program that reads words from a text file and displays all the words (duplicates allowed) in ascending alphabetical order. The words must start with a letter. Must use ArrayList. MY CODE IS INCORRECT PLEASE HELP THE TEXT FILE CONTAINS THESE WORDS IN THIS FORMAT: drunk topography microwave accession impressionist cascade payout schooner relationship reprint drunk impressionist schooner THE WORDS MUST BE PRINTED ON THE ECLIPSE CONSOLE BUT PRINTED OUT ON A TEXT FILE IN ALPHABETICAL ASCENDING ORDER...
An HTML file is a text file that contains text to be displayed in a browser...
An HTML file is a text file that contains text to be displayed in a browser and __________ to be interpreted (read) by the browser formatting and styling the document Both C++ and javascript are computer programing languages; what are their differences? What HTML tag can let you insert javascript in to document Which attributes of the <script> tag tells the brorwser what kind of scripting language is insterted? (give an example) in the javascript section of the HTML file,...
Write a python program function to check the frequency of the words in text files. Make...
Write a python program function to check the frequency of the words in text files. Make sure to remove any punctuation and convert all words to lower case. If my text file is like this: Hello, This is Python Program? thAt chEcks% THE freQuency of the words! When is printed it should look like this: hello 1 this 1 is 1 python 1 program 1 that 1 checks 1 the 2 frequency 1 of 1 words 1
Write a C program to find out the number of words in an input text file...
Write a C program to find out the number of words in an input text file (in.txt). Also, make a copy of the input file. Solve in C programming.
Write a python program that does the following: Prompt for a file name of text words....
Write a python program that does the following: Prompt for a file name of text words. Words can be on many lines with multiple words per line. Read the file and convert the words to a list. Call a function you created called list_to_once_words(), that takes a list as an argument and returns a list that contains only words that occurred once in the file. Print the results of the function with an appropriate description. Think about everything you must...
ADVERTISEMENT
ADVERTISEMENT
ADVERTISEMENT