In: Computer Science
Your task is to count the frequency of words in a text file, and return the most frequent word with its count. (Must use the code below without changing algorithms)
For example, given the following text:
there are two ways of constructing a software design one way is to make it so simple that there are obviously no deficiencies and the other way is to make it so complicated that there are no obvious deficiencies.
Based on the example your program should printout the
following along with the milliseconds to finish the
computing:
The most frequent word is "there" with 3 occurrences.
The code is below, it doesn't seem to printout what word is the most frequent and how many occurrences of it there are.
import java.io.File;
import java.util.Scanner;
import java.util.Map.Entry;
import java.util.AbstractMap;
import java.util.LinkedList;
public class WordCountLinkedList254{
public static Entry count_ARRAY(String[] tokens) {
int CAPACITY =
10000;
String[] words = new
String[CAPACITY];
int[] counts = new
int[CAPACITY];
for (int j = 0; j
< tokens.length; j++) {
String token = tokens[j];
for (int i = 0; i < CAPACITY; i++) {
if (words[i] == null) {
words[i] = token;
counts[i] = 1;
break;
} else if (words[i].equals(token))
counts[i] = counts[i] + 1;
}
}
int maxCount = 0;
String maxWord =
"";
for (int i = 0; i <
CAPACITY & words[i] != null; i++) {
if (counts[i] > maxCount) {
maxWord = words[i];
maxCount = counts[i];
}
}
return new
AbstractMap.SimpleEntry < String, Integer > (maxWord,
maxCount);
}
public static Entry count_LINKED_LIST(String[] tokens)
{
LinkedList> list =
new LinkedList> ();
for (int j = 0; j <
tokens.length; j++) {
String word = tokens[j];
boolean found = false;
/* for (int i = 0; i < list.size(); i++) {
Entry e = list.get(i);
if (word.equals(e.getKey())) {
e.setValue(e.getValue() + 1);
list.set(i, e);
found = true;
break;
}
}*/
int i = 0;
for (Entry e: list) {
if (word.equals(e.getKey())) {
e.setValue(e.getValue() + 1);
list.set(i, e);
i++;
found = true;
break;
}
}
if (!found)
list.add(new AbstractMap.SimpleEntry (word, 1));
}
int maxCount = 0;
String maxWord =
"";
for (int i = 0; i <
list.size(); i++) {
int count = list.get(i).getValue();
if (count > maxCount) {
maxWord = list.get(i).getKey();
maxCount = count;
}
}
return new
AbstractMap.SimpleEntry < String, Integer > (maxWord,
maxCount);
}
static String[] readText(String PATH) throws
Exception {
Scanner doc = new
Scanner(new File(PATH)).useDelimiter("[^a-zA-Z]+");
int length = 0;
while (doc.hasNext())
{
doc.next();
length++;
}
String[] tokens = new
String[length];
Scanner s = new
Scanner(new File(PATH)).useDelimiter("[^a-zA-Z]+");
length = 0;
while (s.hasNext())
{
tokens[length] = s.next().toLowerCase();
length++;
}
doc.close();
return tokens;
}
public static void main(String[] args) throws
Exception {
String PATH =
"/Users/jianguolu/Dropbox/254/code/dblp1k.txt ";
String[] tokens =
readText(PATH);
long startTime =
System.currentTimeMillis();
Entry entry =
count_LINKED_LIST(tokens);
long endTime =
System.currentTimeMillis();
String time =
String.format("%12d", endTime - startTime);
System.out.println("time\t" + time + "\t" + entry.getKey() + ":" +
entry.getValue());
tokens =
readText(PATH);
startTime =
System.currentTimeMillis();
entry =
count_ARRAY(tokens);
endTime =
System.currentTimeMillis();
time =
String.format("%12d", endTime - startTime);
System.out.println("time\t" + time + "\t" + entry.getKey() + ":" +
entry.getValue());
}
}
import java.io.File;
import java.util.Scanner;
import java.util.Map.Entry;
import java.util.AbstractMap;
import java.util.LinkedList;
public class WordCountLinkedList254{
public static Entry count_ARRAY(String[] tokens) {
int CAPACITY = 10000;
String[] words = new String[CAPACITY];
int[] counts = new int[CAPACITY];
for (int j = 0; j < tokens.length; j++) {
String token = tokens[j];
for (int i = 0; i < CAPACITY; i++) {
if (words[i] == null) {
words[i] = token;
counts[i] = 1;
break;
} else if (words[i].equals(token))
counts[i] = counts[i] + 1;
}
}
int maxCount = 0;
String maxWord = "";
for (int i = 0; i < CAPACITY & words[i] != null; i++)
{
if (counts[i] > maxCount) {
maxWord = words[i];
maxCount = counts[i];
}
}
return new AbstractMap.SimpleEntry < String, Integer >
(maxWord, maxCount);
}
public static Entry count_LINKED_LIST(String[] tokens) {
LinkedList<Entry> list = new LinkedList<> ();
for (int j = 0; j < tokens.length; j++) {
String word = tokens[j];
boolean found = false;
/* for (int i = 0; i < list.size(); i++) {
Entry e = list.get(i);
if (word.equals(e.getKey())) {
e.setValue(e.getValue() + 1);
list.set(i, e);
found = true;
break;
}
}*/
int i = 0;
for (Entry e: list) {
if (word.equals(e.getKey())) {
e.setValue((int)(e.getValue()) + 1);
list.set(i, e);
i++;
found = true;
break;
}
}
if (!found)
list.add(new AbstractMap.SimpleEntry (word, 1));
}
int maxCount = 0;
String maxWord = "";
for (int i = 0; i < list.size(); i++) {
int count = (int) list.get(i).getValue();
if (count > maxCount) {
maxWord = (String) list.get(i).getKey();
maxCount = count;
}
}
return new AbstractMap.SimpleEntry < String, Integer >
(maxWord, maxCount);
}
static String[] readText(String PATH) throws Exception {
Scanner doc = new Scanner(new
File(PATH)).useDelimiter("[^a-zA-Z]+");
int length = 0;
while (doc.hasNext()) {
doc.next();
length++;
}
String[] tokens = new String[length];
Scanner s = new Scanner(new
File(PATH)).useDelimiter("[^a-zA-Z]+");
length = 0;
while (s.hasNext()) {
tokens[length] = s.next().toLowerCase();
length++;
}
doc.close();
return tokens;
}
public static void main(String[] args) throws Exception {
String PATH = "/Users/jianguolu/Dropbox/254/code/dblp1k.txt
";
String[] tokens = readText(PATH);
long startTime = System.currentTimeMillis();
Entry entry = count_LINKED_LIST(tokens);
long endTime = System.currentTimeMillis();
String time = String.format("%12d", endTime - startTime);
System.out.println("Most Frequent word is : " + entry.getKey() + ",
which occured "+ entry.getValue() + " times.");
System.out.println("time taken in milliseconds : " + time);
}
}
Output is
Most Frequent word is : are, which occured 3 times.
time taken in milliseconds : 1
Screenshot is
Feel free to ask if you face any difficulty in understanding
Please upvote if you find the solution helpful.