In: Computer Science
C++ Question
The first phase of compilation is called scanning or lexical analysis. This phase interprets the input program as a sequence of characters and produces a sequence of tokens, which will be used by the parser.
Write a C++ program that implements a simple scanner for a source file given as a command-line argument. The format of the tokens is described below. You may assume that the input is syntactically correct. Optionally, your program can build a symbol table (a hash table is a good choice), which contains an entry for each token that was found in the input. When all the input has been read, your program should produce a summary report that includes a list of all the tokens that appeared in the input, the number of times each token appears in the input and the class of each token. Your program should also list how many times tokens of each category appeared in the input.
Sample token format:
keyword -> if | then | else | begin | end
identifier -> character | character identifier
integer -> digit | digit integer
real -> integer.integer
special -> ( | ) | [ | ] | + | - | = | , | ;
digit -> 0|1|2|3|4|5|6|7|8|9
character -> a|b|c ... |z|A|B|C ... |Z
#include<iostream>
#include<cstring>
#include<cstdlib>
#include<stdio.h>
#include<string>
#include<map>
#include<vector>
#include<fstream>
#include<cstring>
#include<string.h>
#include<functional>
using namespace std;
map<string, int> classes =
{{"keyword",0},{"identifier",0},{"digit",0},{"integer",0},{"real",0},{"character",0},{"alpha",0}};
vector<string> ints =
{"0","1","2","3","4","5","6","7","8","9"};
vector<string> key_words =
{"if","else","then","begin","end"};
vector<string> identi_fiers =
{"(",")","[","]","+","=",",","-",":"};
vector<string> alpha_bets =
{"a","b","c","d","e","f","g","h","i","j","k","l","m","n","o","p","q","r","s","t","u","v","w","x","y","z"};
vector<std::string>::iterator iters;
bool is_keyword(string ch);
bool is_identifier(char ch);
bool is_integer(char ch);
bool is_digit(string ch);
bool is_real(string ch);
bool is_char(string ch);
bool is_alpha(char ch);
int cnt_key = 0,cnt_ident =0,cnt_digit = 0,cnt_int = 0,cnt_alpha = 0,cnt_char = 0,cnt_real =0;
bool is_keyword(string ch)
{
iters =
find(key_words.begin(),key_words.end(),ch);
if(iters != key_words.end()){
classes["keyword"]++;
return true;
}
else{
return false
}
}
bool is_identifier(string ch)
{
iters =
find(identi_fiers.begin(),identi_fiers.end(),ch);
if(iters != identi_fiers.end()){
classes["identifier"]++;
return true;
}
else{
return false
}
}
bool is_digit(string ch)
{
iters = find(ints.begin(),ints.end(),ch);
if(iters != ints.end()){
classes["digit"]++;
classes["integer"]++;
return true;
}
else{
return false
}
}
bool is_integer(string ch)
{
char *p;
strtol(ch.c_str(), &p , 10);
if(*p == 0){
classes["digit"]++;
return true;
}
else{
return false
}
}
bool is_real(string ch)
{
bool isreal=false;
if(ch.find(".") != std::string::npos){
for(int
j=0;j<ch.length;j++){
if(ch[j] !=
'.'){
string search;
search = ch[j];
iters =
find(ints.begin(),ints.end(),search);
if(iters != ints.end()){
isreal = true;
}
else{
isreal = false;
}
}
else{;}
}
}
else {;}
if(isreal == true){
classes["real"]++;
return true;}
else {return false;}
}
bool is_alpha(string ch)
{
iters = find(alpha.begin(),alpha.end(),ch);
if(iters != alpha.end()){
classes["alpha"]++;
classes["char"]++;
return true;
}
else{
return false
}
}
bool is_char(string ch)
{
bool ischar = false;
for(int i=0;i<ch.length();i++){
if(isalpha(ch[i])
ischar =
true;
else
ischar =
false;
}
if(ch.length() >1 && ischar ==true)
{
classes["char"]++;
return true;}
else{
return false
}
}
int main(int argc, char *argv[])
{
int token_cnt = 0; //used to count tokens as they are
read.
//atleast one commandline argument must be
supplied
if(argc < 2)
{
cerr <<"Error: filename
argument not given" <<endl;
exit(1);
}
ifstream in_File(argv[1], ios::in) //open file for
input.
//check for errors in opening the file
if(!in_File){
cerr <<"File" <<argv[1]
<< "counld not be opened" <<endl;
exit(1);
}
char lines[100]; //char array used to hold lines of
text input
char *ps; //pointer used to tokenize string
while(in_File.getline(lines,100)){ //reads upto
100chars into lies array
ps = strtok(lines, "
~'!@#$^&*_{}:<>|?"); //assigns tokenpointer to first
token in line
while(ps != NULL){
if(is_keyword(ch)){
cnt_key++;
}
else if
(isIdentifier(ps)) {
cnt_ident++; }
else if
(is_char(ps)) {
cnt_char++; }
else if
(is_digit(ps)) {
cnt_digit++; }
else if
(is_integer(ps)) {
cnt_int++; }
else if
(is_real(ps)) {
cn++; }
else if
(is_alpha(ps)) {
cnt_alpha++; }
else {;}
ps =
strtok(lines, " ~'!@#$^&*_{}:<>|?"); //assigns
tokenpointer to first token in line
}
}
std::cout << "CLASS" <<"keyword" <<":"
<<classes["keyword"] <<"\n";
std::cout << "CLASS" <<"identifier" <<":"
<<classes["identifier"] <<"\n";
std::cout << "CLASS" <<"digit" <<":"
<<classes["digit"] <<"\n";
std::cout << "CLASS" <<"integer" <<":"
<<classes["integer"] <<"\n";
std::cout << "CLASS" <<"character" <<":"
<<classes["char"] <<"\n";
std::cout << "CLASS" <<"alphabet" <<":"
<<classes["alpha"] <<"\n";
std::cout << "CLASS" <<"real" <<":"
<<classes["real"] <<"\n";
in_File.close();
return 0;
}