Question

In: Computer Science

Write a method that returns the 200tth most frequent word and its frequency from a text...

Write a method that returns the 200tth most frequent word and its frequency from a text file. Use the best text reading method with a merge sort. and/or other techniques. The method signature:

public static Integer countFAST(String fileName) throws Exception {...}

The function takes the text file name.

Solutions

Expert Solution

Source Code-

#include <stdio.h> 
#include <string.h> 
#include <ctype.h> 
  
# define MAX_CHARS 26 
# define MAX_WORD_SIZE 30 
  
struct TrieNode 
{ 
    bool isEnd;
    unsigned frequency;  
    int indexMinHeap; 
    TrieNode* child[MAX_CHARS];  
}; 
  
struct MinHeapNode 
{ 
    TrieNode* root; 
    unsigned frequency; 
    char* word;  
}; 
  
struct MinHeap 
{ 
    unsigned capacity; 
    int count; 
    MinHeapNode* array;  
}; 
  
TrieNode* newTrieNode() 
{ 
    TrieNode* trieNode = new TrieNode; 
  
    trieNode->isEnd = 0; 
    trieNode->frequency = 0; 
    trieNode->indexMinHeap = -1; 
    for( int i = 0; i < MAX_CHARS; ++i ) 
        trieNode->child[i] = NULL; 
  
    return trieNode; 
} 
  
MinHeap* createMinHeap( int capacity ) 
{ 
    MinHeap* minHeap = new MinHeap; 
  
    minHeap->capacity = capacity; 
    minHeap->count  = 0; 
  
    minHeap->array = new MinHeapNode [ minHeap->capacity ]; 
  
    return minHeap; 
} 


void swapMinHeapNodes ( MinHeapNode* a, MinHeapNode* b ) 
{ 
    MinHeapNode temp = *a; 
    *a = *b; 
    *b = temp; 
} 
  

void minHeapify( MinHeap* minHeap, int idx ) 
{ 
    int left, right, smallest; 
  
    left = 2 * idx + 1; 
    right = 2 * idx + 2; 
    smallest = idx; 
    if ( left < minHeap->count && 
         minHeap->array[ left ]. frequency < 
         minHeap->array[ smallest ]. frequency 
       ) 
        smallest = left; 
  
    if ( right < minHeap->count && 
         minHeap->array[ right ]. frequency < 
         minHeap->array[ smallest ]. frequency 
       ) 
        smallest = right; 
  
    if( smallest != idx ) 
    { 
        minHeap->array[ smallest ]. root->indexMinHeap = idx; 
        minHeap->array[ idx ]. root->indexMinHeap = smallest; 
  
        swapMinHeapNodes (&minHeap->array[ smallest ], &minHeap->array[ idx ]); 
  
        minHeapify( minHeap, smallest ); 
    } 
} 
  
void buildMinHeap( MinHeap* minHeap ) 
{ 
    int n, i; 
    n = minHeap->count - 1; 
  
    for( i = ( n - 1 ) / 2; i >= 0; --i ) 
        minHeapify( minHeap, i ); 
} 
  
void insertInMinHeap( MinHeap* minHeap, TrieNode** root, const char* word ) 
{ 
     
    if( (*root)->indexMinHeap != -1 ) 
    { 
        ++( minHeap->array[ (*root)->indexMinHeap ]. frequency ); 
  
        
        minHeapify( minHeap, (*root)->indexMinHeap ); 
    } 
  
    else if( minHeap->count < minHeap->capacity ) 
    { 
        int count = minHeap->count; 
        minHeap->array[ count ]. frequency = (*root)->frequency; 
        minHeap->array[ count ]. word = new char [strlen( word ) + 1]; 
        strcpy( minHeap->array[ count ]. word, word ); 
  
        minHeap->array[ count ]. root = *root; 
        (*root)->indexMinHeap = minHeap->count; 
  
        ++( minHeap->count ); 
        buildMinHeap( minHeap ); 
    } 
  
    else if ( (*root)->frequency > minHeap->array[0]. frequency ) 
    { 
  
        minHeap->array[ 0 ]. root->indexMinHeap = -1; 
        minHeap->array[ 0 ]. root = *root; 
        minHeap->array[ 0 ]. root->indexMinHeap = 0; 
        minHeap->array[ 0 ]. frequency = (*root)->frequency; 
  
        delete [] minHeap->array[ 0 ]. word; 
        minHeap->array[ 0 ]. word = new char [strlen( word ) + 1]; 
        strcpy( minHeap->array[ 0 ]. word, word ); 
  
        minHeapify ( minHeap, 0 ); 
    } 
} 
  
void insertUtil ( TrieNode** root, MinHeap* minHeap, 
                        const char* word, const char* dupWord ) 
{ 
    if ( *root == NULL ) 
        *root = newTrieNode(); 
  
    if ( *word != '\0' ) 
        insertUtil ( &((*root)->child[ tolower( *word ) - 97 ]), 
                         minHeap, word + 1, dupWord ); 
    else
    { 
        if ( (*root)->isEnd ) 
            ++( (*root)->frequency ); 
        else
        { 
            (*root)->isEnd = 1; 
            (*root)->frequency = 1; 
        } 
  
        insertInMinHeap( minHeap, root, dupWord ); 
    } 
} 
  
  
void insertTrieAndHeap(const char *word, TrieNode** root, MinHeap* minHeap) 
{ 
    insertUtil( root, minHeap, word, word ); 
} 
  
void displayMinHeap( MinHeap* minHeap ) 
{ 
    int i; 
  
    for( i = 0; i < minHeap->count; ++i ) 
    { 
        printf( "%s : %d\n", minHeap->array[i].word, 
                            minHeap->array[i].frequency ); 
    } 
} 
  

void printKMostFreq( FILE* fp, int k ) 
{ 
    MinHeap* minHeap = createMinHeap( k ); 
     
    TrieNode* root = NULL; 
  
    char buffer[MAX_WORD_SIZE]; 
  
    while( fscanf( fp, "%s", buffer ) != EOF ) 
        insertTrieAndHeap(buffer, &root, minHeap); 
  
    displayMinHeap( minHeap ); 
} 
  
int main() 
{ 
    int k = 200;   //helps in finding 200th most frequent word in a text file
    FILE *fp = fopen ("file.txt", "r"); 
    if (fp == NULL) 
        printf ("File doesn't exist "); 
    else
        printKMostFreq (fp, k); 
    return 0; 
} 
Let me know if you have any doubts or if you need anything to change. 

If you are satisfied with the solution, please leave a +ve feedback : ) Let me know for any help with any other questions.

Thank You!
===========================================================================

Related Solutions

Create a method that returns the third character from the String word. Be sure to use...
Create a method that returns the third character from the String word. Be sure to use a try catch block and use the appropriate exception to deal with indexes out of a String’s bound: StringIndexOutOfBoundsException. Return the character 'x' (lowercase) when this exception is caught. Do not use if statements and do not use the general exception handler Exception. Examples: thirdLetter("test") -> 's' public char thirdLetter(String word) { } ​should return the char 'r'
Java 20 most frequent words in a text file. Words are supposed to be stored in...
Java 20 most frequent words in a text file. Words are supposed to be stored in array that counts eah word. Write a program that will read an article, parse each line into words, and keep track of how many times each word occurred. Run this program for each of the two articles and print out the 20 most frequently appearing words in each article. You may think you need to use a StringTokenizer, but it turns out that is...
Write a method called mode that returns the most frequently occurring element of an array of...
Write a method called mode that returns the most frequently occurring element of an array of integers. Assume that the array has at least one element and that every element in the array has a value between 0 and 100 inclusive. Break ties by choosing the lower value. For example, if the array passed contains the values [27, 15, 15, 11, 27], your method should return 15. write a version of this method that does not rely on the values...
Write a function bracket_by_len that takes a word as an input argument and returns the word...
Write a function bracket_by_len that takes a word as an input argument and returns the word bracketed to indicate its length. Words less than five characters long are bracketed with << >>, words five to ten letters long are bracketed with (* *), and words over ten characters long are bracketed with /+ +/. Your function should require the calling function to provide as the first argument, space for the result, and as the third argument, the amount of space...
Write a function checkvertical(board, word, row, col), which returns True if the word word can be...
Write a function checkvertical(board, word, row, col), which returns True if the word word can be added to the board starting at position board[row][col] and going down. The restrictions are (1) it has to fit entirely on the board, (2) it has to intersect and match with one or more non-blank letters on the board, and (3) it cannot change any letters on the board, other than blanks (that is, overlapping mismatches are not allowed). My program isn't working :(...
In PYTHON: Write a function that receives a sentence and returns the last word of that...
In PYTHON: Write a function that receives a sentence and returns the last word of that sentence. You may assume that there is exactly one space between every two words, and that there are no other spaces at the sentence. To make the problem simpler, you may assume that the sentence contains no hyphens, and you may return the word together with punctuation at its end.
Write a python program function to check the frequency of the words in text files. Make...
Write a python program function to check the frequency of the words in text files. Make sure to remove any punctuation and convert all words to lower case. If my text file is like this: Hello, This is Python Program? thAt chEcks% THE freQuency of the words! When is printed it should look like this: hello 1 this 1 is 1 python 1 program 1 that 1 checks 1 the 2 frequency 1 of 1 words 1
IN JAVA Write a program with a method that returns an array. The method should accept...
IN JAVA Write a program with a method that returns an array. The method should accept as input a comma-delimited string with three values from a user. The array should store each value in a different element. Use Try..Catch error handling and print any failure messages, or print success from within method if the execution is successful (see Chapter 13 in the text). Call the method from the main method of the program to demonstrate its functionality by looping through...
Write a regular expressions that would match lines in text that contain word DATE: at the...
Write a regular expressions that would match lines in text that contain word DATE: at the beginning of a line followed by actual date in format YYYY-MM-DD. Space between colon ( : ) and date may or may not exist. In C, if you issue the following statement n << 2 where n is an integer, what will be value of n? In bash, if you define variable var = “date” and issue a statement echo `$var`, what output will...
Write a regular expressions that would match lines in text that contain word DATE: at the...
Write a regular expressions that would match lines in text that contain word DATE: at the beginning of a line followed by actual date in format YYYY-MM-DD. Space between colon ( : ) and date may or may not exist. In C, if you issue the following statement n << 2 where n is an integer, what will be value of n?
ADVERTISEMENT
ADVERTISEMENT
ADVERTISEMENT