Question

In: Computer Science

c++ "Programming For Big Data [ DvcScheduleV2.cpp and StaticArray.h and/or DynamicArray.h ] Assignment 5's runtime was...

c++ "Programming For Big Data [ DvcScheduleV2.cpp and StaticArray.h and/or DynamicArray.h ]
Assignment 5's runtime was too slow -- a couple of minutes or so. It's because of the duplicate-checking, with over 4 billion compares.

Rewrite the duplicate-checking logic from Assignment 5, using a technique from "Techniques For Big Data, Reading" to do fewer compares, and come up with the exact same results as Assignment 5.

You may use your StaticArray.h from Assignment 3 and/or your DynamicArray.h from assignments 4, but you may not use any STL containers. Submit the H file(s) you use in your solution, even if there are no changes since your previous work. Your project will be compiled for grading using the default stack memory size of 1MB.

Progress Bar

Since this version is supposed to be fast, there is no longer a need for a progress bar. Include one if you wish, or you may leave it out -- your choice. But if you do have a progress bar, do remember to "flush"...

You should get the same result as V1 and should run an order of magnitude faster."

For some reason I'm getting different results from my first version of the programs and the program I've written for this assignment.

Code for the prior assignment :

#define _CRT_SECURE_NO_WARNINGS

#include <iostream>
#include <fstream>
#include <string>

using namespace std;

#include <cstring>
#include "DynamicArray.h"

struct Class
{
string code;
int count;
};

int main()
{

DynamicArray <Class> sub;
DynamicArray <string> sem;
DynamicArray <string> sec;

int totalSubjects = 0;
int dup = 0;
int total = 0;
int counter = 0;
bool duplicate;
bool stored;

//for parsing inputfile
char* token;
char buf[1000];
const char* const tab = "\t";

//open input file
ifstream fin;
fin.open("dvc-schedule.txt");
if (!fin.good())
cout << "I/O error. File can't be found!\n";

//read the input file
while (fin.good())
{
//progress bar
if(counter % 1000 == 0)
cout << '.'; cout.flush();
duplicate = false;
stored = false;
string line;
getline(fin, line);
total++; //total lines processed
strcpy(buf, line.c_str());
if (buf[0] == 0) continue; // skip blank lines
//parse the line
const string term(token = strtok(buf, tab));
const string section(token = strtok(0, tab));
const string course((token = strtok(0, tab)) ? token : "");
const string instructor((token = strtok(0, tab)) ? token : "");
const string whenWhere((token = strtok(0, tab)) ? token : "");
if (course.find('-') == string::npos) continue;
const string code(course.begin(), course.begin() + course.find('-'));

//check for duplicates
for(int i = 0; i < counter; i++)
{
if(sem[i] == term && sec[i] == section)
{
dup++;
duplicate = true;
break;
}
}

if(duplicate == true)
continue;

sem[counter] = term;
sec[counter] = section;
counter++;

for(int i = 0; i < totalSubjects; i++)
{
if (sub[i].code == code)
{
sub[i].count++;
stored = true;
break;
}
}

if(stored == true)
continue;

Class y;
y.code = code;
y.count = 1;
sub[totalSubjects] = y;
totalSubjects++;
}
fin.close();
cout << endl;

//sort
for (int i = 0; i < totalSubjects; i++)
for (int j = i + 1; j < totalSubjects; j++)
if (sub[j].code < sub[i].code)
swap(sub[j], sub[i]);

//output
for(int i = 0; i < totalSubjects; i++)
{
cout << sub[i].code << ", " << sub[i].count << " section" << endl;
}
cout << "Total duplication: " << dup << endl;
cout << "Total counts: " << total << endl;
cout << "Total subjects: " << totalSubjects << endl;
}

Code for this assignment:

#define _CRT_SECURE_NO_WARNINGS

#include <iostream>
#include <fstream>
#include <string>

using namespace std;

#include <cstring>
#include "DynamicArray.h"

struct SectionsForTerm
{
string term;
int numberOfSectionsSeen;
DynamicArray<string> seenSectionNumbers;
};

struct Class
{
string code;
int count;
};

int main()
{

int numberOfTermsSeen = 0;
DynamicArray <SectionsForTerm> alreadySeen;
DynamicArray <Class> sub;

int totalSubjects = 0;
int dup = 0;
int total = 0;
int counter = 0;
bool match;
bool duplicate;
bool stored;

//for parsing inputfile
char* token;
char buf[1000];
const char* const tab = "\t";

//open input file
ifstream fin;
fin.open("dvc-schedule.txt");
if (!fin.good())
cout << "I/O error. File can't be found!\n";

//read the input file
while (fin.good())
{
//check for false
match = false;
duplicate = false;
stored = false;

//read lines
string line;
getline(fin, line);
total++; //total lines processed
strcpy(buf, line.c_str());
if (buf[0] == 0) continue; // skip blank lines

//parse the line
const string term(token = strtok(buf, tab));
const string section(token = strtok(0, tab));
const string course((token = strtok(0, tab)) ? token : "");
const string instructor((token = strtok(0, tab)) ? token : "");
const string whenWhere((token = strtok(0, tab)) ? token : "");
if (course.find('-') == string::npos) continue;
const string code(course.begin(), course.begin() + course.find('-'));

//check for duplicates
int i;
for(int i = 0; i < numberOfTermsSeen; i++)
{
if(alreadySeen[i].term == term)
{
match = true;
break;
}
}

if(match == true)
{
for(int j = 0; j < alreadySeen[i].numberOfSectionsSeen; j++)
if (alreadySeen[i].seenSectionNumbers[j]== section)
{
duplicate = true;
dup++;
break;
}

if (duplicate == true)
continue;

else
{
alreadySeen[i].seenSectionNumbers[alreadySeen[i].numberOfSectionsSeen]= section;
alreadySeen[i].numberOfSectionsSeen++;
}
}

else
{
alreadySeen[numberOfTermsSeen].term = term;
alreadySeen[i].numberOfSectionsSeen = 1;
numberOfTermsSeen++;
}

// check for same section
for(i = 0; i < totalSubjects; i++)
{
if(sub[i].code == code)
{
stored = true;
break;
}
}

if(stored == true)
{
sub[i].count++;
}

else
{
sub[totalSubjects].code = code;
sub[totalSubjects].count = 1;
totalSubjects++;
}

counter++;
}
fin.close();
cout << endl;

//sort
for (int i = 0; i < totalSubjects; i++)
for (int j = i + 1; j < totalSubjects; j++)
if (sub[j].code < sub[i].code)
swap(sub[j], sub[i]);

//output
for(int i = 0; i < totalSubjects; i++)
{
cout << sub[i].code << ", " << sub[i].count << " section" << endl;
}
cout << "Total duplication: " << dup << endl;
cout << "Total counts: " << total << endl;
cout << "Total subjects: " << totalSubjects << endl;
}

*When I compare the results of both programs the subjects and total are the same but the total duplication number is significantly off. Also the amount of sections per course is off? I'm not really sure how, why, or how to fix it?

Solutions

Expert Solution

Before answering your question i believe you are comfortable with dry running.

I am saying this, because in all my industrial experience biggest mistake developers make is they directly jump into the logic part, believe me your question is 90% solved if you can do so witj pen and paper.

Now, i tried both of your code with pen and paper, with my own data, it was small though, but i got an insight about your codes

In your first assignment and secon too, your logic about duplicate terms is completely fine and i appreciate your attemt in deciding the invariant about findind duplicate terms efficiently.

Now, what i believe your codes are getting diff values are due to counter, no of subjects in 1st code

And nooftermsseen in the second, try to matchup with that part, because that is the only way your code can lead to loopholes?

Now some good debugging skills for your future projects too.

Always try to debug your logic first on paper, not your code directly.

Secondly, always use print statements intuitively they are best friends of a developer.

Hope you will learn from this.

Happy learning ??


Related Solutions

Programming II: C++ - Programming Assignment Fraction Object with Operator Overloads Overview In this assignment, the...
Programming II: C++ - Programming Assignment Fraction Object with Operator Overloads Overview In this assignment, the student will write a C++ program that implements a “fraction” object. When writing the object, the student will demonstrate mastery of implementing overloaded operators in a meaningful way for the object. When completing this assignment, the student should demonstrate mastery of the following concepts: · Mathematical Modeling - Fractions · Operator Overloading – Binary Operators (Internal Overload) · Operator Overloading – Binary Operator (External...
Programming II: C++ - Programming Assignment Vector Overloads Overview In this assignment, the student will write...
Programming II: C++ - Programming Assignment Vector Overloads Overview In this assignment, the student will write a C++ program that overloads the arithmetic operators for a pre-defined Vector object. When completing this assignment, the student should demonstrate mastery of the following concepts: · Object-oriented Paradigm · Operator Overloading - Internal · Operator Overloading - External · Mathematical Modeling Assignment In this assignment, the student will implement the overloaded operators on a pre-defined object that represents a Vector. Use the following...
The assignment: C++ program or Java You need to use the following programming constructs/data structures on...
The assignment: C++ program or Java You need to use the following programming constructs/data structures on this assignment. 1. A structure named student which contains:   a. int ID; student id; b. last name; // as either array of char or a string c. double GPA;    2. An input file containing the information of at least 10 students. 3. An array of struct: read the student information from the input file into the array. 4. A stack: you can use the...
Programming Language Concept assignment: 1. Design abstract data type for matrices with integer elements in C++...
Programming Language Concept assignment: 1. Design abstract data type for matrices with integer elements in C++ language, including operations for matrix addition, subtraction, and multiplication! 2. Design abstract queue data types for float elements in C++ language, including operations for enqueue, dequeue, and empty. The dequeue operation removes the element and returns its value! 3. Set semaphores in Ada and use them to provide co-operation and synchronization of competitions in shared buffer instances!
C PROGRAMMING – Steganography In this assignment, you will write an C program that includes processing...
C PROGRAMMING – Steganography In this assignment, you will write an C program that includes processing input, using control structures, and bitwise operations. The input for your program will be a text file containing a large amount of English. Your program must extract the “secret message” from the input file. The message is hidden inside the file using the following scheme. The message is hidden in binary notation, as a sequence of 0’s and 1’s. Each block of 8-bits is...
The purpose of this C++ programming assignment is to practice using an array. This problem is...
The purpose of this C++ programming assignment is to practice using an array. This problem is selected from the online contest problem archive, which is used mostly by college students worldwide to challenge their programming ability and to prepare themselves for attending programming contests such as the prestige ACM International Collegiate Programming Contest. For your convenience, I copied the description of the problem below with my note on the I/O and a sample executable. Background The world-known gangster Vito Deadstone...
introduction: C PROGRAMMING For this assignment you will write an encoder and a decoder for a...
introduction: C PROGRAMMING For this assignment you will write an encoder and a decoder for a modified "book cipher." A book cipher uses a document or book as the cipher key, and the cipher itself uses numbers that reference the words within the text. For example, one of the Beale ciphers used an edition of The Declaration of Independence as the cipher key. The cipher you will write will use a pair of numbers corresponding to each letter in the...
Programming Assignment 1 Performance Assessment Objective: To write a C program (not C++) that calculates the...
Programming Assignment 1 Performance Assessment Objective: To write a C program (not C++) that calculates the average CPI, total processing time (T), and MIPS of a sequence of instructions, given the number of instruction classes, the CPI and total count of each instruction type, and the clock rate (frequency) of the machine. The following is what the program would look like if it were run interactively. Your program will read values without using prompts. Inputs: • Clock rate of machine...
Programming Assignment 1 Performance Assessment Objective: To write a C program (not C++) that calculates the...
Programming Assignment 1 Performance Assessment Objective: To write a C program (not C++) that calculates the average CPI, total processing time (T), and MIPS of a sequence of instructions, given the number of instruction classes, the CPI and total count of each instruction type, and the clock rate (frequency) of the machine. The following is what the program would look like if it were run interactively. Your program will read values without using prompts. Inputs: • Clock rate of machine...
CS 400 Assignment 5 Recursive/Backtracking: Generating Permutations WRITE CODE IN C++ PROGRAMMING LANGUAGE WITH COMMENTS INCLUDED...
CS 400 Assignment 5 Recursive/Backtracking: Generating Permutations WRITE CODE IN C++ PROGRAMMING LANGUAGE WITH COMMENTS INCLUDED Description: Mimic the code for N-queen problem (https://introcs.cs.princeton.edu/java/23recursion/Queens.java.html), develop a program that generates all permutations for the set {1, 2, 3, 4, 5}. The output should contain all 5! = 120 permutations. Output Sample: P#1: 1 2 3 4 5 P#2: 1 2 3 5 4 P#3: 1 2 4 3 5 ... P#120: 5 4 3 2 1 Hint: - Thoroughly study the...
ADVERTISEMENT
ADVERTISEMENT
ADVERTISEMENT