In: Computer Science
Write a method ( C++ )
map occurance_map(const string path);
that reads in an ascii text file and returns an assocation where each key is a word in the text file and each value is the number of occurances of that word. Ignore punctuation and numbers. The method should be case-insensitive and should store the keys as lowercase. A word is definited by an string consisting entirely of alpha-numeric characters or apostrophes (single quote characteris). For example, if the file contained
This is a sentence. Don't think of wier_d strings as words. Really, 123 is a nice number.
you would include sentence, don't, and 123 but not ., ,, or wier_d. Using single quotes as quotes, as in
'I should use double quotes'
is a user error and will catch the "words"
'I should use double quotes'
CodeToCopy:
count_occurences.cpp
#include <iostream> /* cout object */
#include <fstream> /* for ifstream class */
#include <vector> /* for vector class */
#include <map> /* for map class */
#include <algorithm> /* for using find() function */
#include <iomanip> /* for using find() function */
/* Macros which define ascii values of characters 0,9,A,Z,a,z,',.,, */
#define ZERO_ASCII 48
#define NINE_ASCII 57
#define UPPERCASE_A_ASCII 65
#define UPPERCASE_Z_ASCII 90
#define LOWERCASE_A_ASCII 97
#define LOWERCASE_Z_ASCII 122
#define APOSTROPHE_ASCII 39
#define PERIOD_ASCII 46
#define COMMA_ASCII 44
#define SPACE_ASCII 32
#define NEWLINE_ASCII 10
using namespace std;
/* This function adds word to the map, if word does not exist.
* If word already exists, then it increases the frequency of
* the word */
void add_word(map<string, int> * occurences_map, string word) {
/* finding word in the map */
map<string, int> ::iterator it = occurences_map->find(word);
/* checking if word is present or not */
if (it == occurences_map->end()) {
/* if word is not there, inserting word with frequency 1 */
occurences_map->insert(make_pair(word, 1));
}
else {
/* if word is there, increasing its frequency by 1 */
it->second = it->second + 1;
}
}
map<string, int> occurance_map(const string path) {
/* stores the file name */
const char * text_file_name = path.c_str();
/* stores the letter */
char ch;
/* flag variables tells whether parsing a word is in progress or not */
bool wordstart = false;
/* string variable stores the word */
string word;
/* map which stores words and their frequencies as a key-value pair */
map<string, int> word_occurences;
/* creating ifstream objects to open text file */
ifstream text_file;
/* opening Teams.txt file in read mode */
text_file.open(text_file_name, ios::in);
/* checking Teams.txt is opened or not */
if (text_file.fail()) {
/* if not, printing error and exiting the program */
cout << "Error::: Teams.text: No such file exists. ";
return word_occurences;
}
/* reading letter by letter from input text file */
while(text_file >> noskipws >> ch) {
/* condition that enables only parsing alpha numerics and apostrophe */
if ((ch >= ZERO_ASCII && ch <= NINE_ASCII) ||
(ch >= UPPERCASE_A_ASCII && ch <= UPPERCASE_Z_ASCII) ||
(ch >= LOWERCASE_A_ASCII && ch <= LOWERCASE_Z_ASCII) ||
(ch == APOSTROPHE_ASCII)) {
/* changing word-start flag variable to true */
wordstart = true;
/* appending each character string variable */
word.append(1u, ch);
}
else {
/* checking for word ending characters such as SPACE, COMA, PERIOD */
if ((ch == SPACE_ASCII || ch == PERIOD_ASCII || ch == COMMA_ASCII || ch == NEWLINE_ASCII) && wordstart == true) {
/* if word ending character is found, then adding the parsed word to map */
add_word(&word_occurences, word);
/* resetting word-start flag to false */
wordstart = false;
/* resetting letters of string variable */
word.clear();
}
else {
/* check for unsupported characters when word processing is in progress */
if (wordstart == true && ch != ' ') {
/* parsing rest of the characters in the word and ignoring them */
string post_letters;
text_file >> post_letters;
/* clearing stored characters */
word.clear();
/* resetting letters of string variable */
wordstart = false;
}
}
}
}
/* if string variable characters are not reset, then adding the stored
* word in map */
if (word.size()) {
add_word(&word_occurences, word);
}
/* closing the file */
text_file.close();
/* returing map */
return word_occurences;
}
/* main function */
int main(int argc, char **argv) {
/* checking number of command line arguments for user input-file */
if (argc != 2) {
cout << "Usage: " << argv[0] << ": <input-text-file>" << endl;
return 0;
}
/* calling occurance_map() function with path */
map<string, int> occurences = occurance_map(argv[1]);
/* iterating map to print key-value pair values */
map<string, int> ::iterator it = occurences.begin();
/* printing word and frequencies with proper format */
cout << setw(6) << right << "Word" << setw(20) << right <<"Frequency ";
cout << setw(15) << left << "-------" << setw(3) << right <<"---------- ";
for(; it != occurences.end(); ++it) {
cout << setw(15) << left << it->first << " " << setw(5) << right << it->second << " ";
}
return 0;
}
OutputScreenshot: