In: Computer Science
C++ Question
The first phase of compilation is called scanning or lexical analysis. This phase interprets the input program as a sequence of characters and produces a sequence of tokens, which will be used by the parser.
Write a C++ program that implements a simple scanner for a source file given as a command-line argument. The format of the tokens is described below. You may assume that the input is syntactically correct. Optionally, your program can build a symbol table (a hash table is a good choice), which contains an entry for each token that was found in the input. When all the input has been read, your program should produce a summary report that includes a list of all the tokens that appeared in the input, the number of times each token appears in the input and the class of each token. Your program should also list how many times tokens of each category appeared in the input.
Sample token format:
keyword -> if | then | else | begin | end
identifier -> character | character identifier
integer -> digit | digit integer
real -> integer.integer
special -> ( | ) | [ | ] | + | - | = | , | ;
digit -> 0|1|2|3|4|5|6|7|8|9
character -> a|b|c ... |z|A|B|C ... |Z
More details:
The following program tokenizes a given source file and classifies the them into five different classes namely Keywords, Real, Special, Character and Digit. To add more items to a class, just modify and add the items to the unordered_set variables defined at the top of the file. Compile with the -std flga set to c++11
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <unordered_map>
#include <unordered_set>
#include <vector>
#include <string>
#include <locale>
using namespace std;
unordered_set<string> keywords =
{"if","then","else","begin","end"};
unordered_set<string> special =
{"(",")","[","]","+","-","=",",",";"};
unordered_set<string> digit_set =
{"0","1","2","3","4","5","6","7","8","9"};
bool isKeyword(string key) {
return keywords.find(key) != keywords.end();
}
bool isSpecial(string key) {
return special.find(key) != special.end();
}
bool isDigit(string key) {
return digit_set.find(key) != digit_set.end();
}
bool isChar(string key) {
return (key.length() == 1 && (key.at(0) >= 'A'
&& key.at(0) <= 'Z' && key.at(0) >= 'a'
&& key.at(0) <= 'z'));
}
bool isReal(string key) {
int found = key.find(".");
if(found != string::npos) {
string part1 = key.substr(0,found-1);
string part2 = key.substr(found+1,key.length());
if(isDigit(part1) && isDigit(part2))
return true;
}
return false;
}
int main(int argc, char *argv[]) {
if(argc < 2) {
cout << "Incorrect usage! use: ./tokenize
filename.ext";
exit(-1);
}
ifstream fin;
fin.open(argv[1]);
unordered_map<string,vector<string>>
symbol_table;
vector<string> keyword_vec, real_vec, special_vec,
character_vec, digit_vec;
symbol_table["keyword"] = keyword_vec;
symbol_table["real"] = real_vec;
symbol_table["special"] = special_vec;
symbol_table["character"] = character_vec;
symbol_table["digit"] = digit_vec;
string temp;
while(fin) {
getline(fin,temp,' ');
if(isKeyword(temp))
symbol_table["keyword"].push_back(temp);
else if(isReal(temp))
symbol_table["real"].push_back(temp);
else if(isSpecial(temp))
symbol_table["special"].push_back(temp);
else if(isChar(temp))
symbol_table["character"].push_back(temp);
else if(isDigit(temp))
symbol_table["digit"].push_back(temp);
}
fin.close();
cout << "\n-----File Summary------";
cout << "\n-----------------------";
cout << "\nKeywords : \t" <<
symbol_table["keyword"].size();
cout << "\nReal Values : \t" <<
symbol_table["real"].size();
cout << "\nSpecials : \t" <<
symbol_table["special"].size();
cout << "\nCharacters : " <<
symbol_table["character"].size();
cout << "\nDigits : " <<
symbol_table["digit"].size();
return 0;
}