In: Computer Science
Sorting with Binary Search Tree
This assignment asks you to sort the lines of an input file (or from standard input) and print the sorted lines to an output file (or standard output). Your program, called bstsort (binary search tree sort), will take the following command line arguments:
% bstsort [-c] [-o output_file_name] [input_file_name]
If -c is present, the program needs to compare the strings case sensitive; otherwise, it's case insensitive. If the output_file_name is given with the -o option, the program will output the sorted lines to the given output file; otherwise, the output shall be the standard output. Similarly, if the input_file_name is given, the program will read from the input file; otherwise, the input will be from the standard input. You must use getopt() to parse the command line arguments to determine the cases. All strings will be no more than 100 characters long.
In addition to parsing and processing the command line arguments, your program needs to do the following:
1. You need to construct a binary search tree as you read from input. A binary search tree is a binary tree. Each node can have at most two child nodes (one on the left and one on the right),
both or either one can be empty. If a child node exists, it's the root of a binary search tree (we call subtree). Each node contains a key (in our case, it's a string) and a count of how many of
that string were included. If he left subtree of a node exists, it contains only nodes with keys less than the node's key. If the right subtree of a node exists, it contains only nodes with keys greater than the node's key. You can look up binary search tree on the web or in your Data Structure textbook. Note that you do not need to balance the binary search tree (that is, you can ignore all those rotation operations) in this assignment.
2. Initially the tree is empty (that is, the root is null). The program reads from the input file (or stdin) one line at a time; If the line is not an empty line and the line is not already in the tree, it should create a tree node that stores a pointer to the string and a count of 1 indicating this is the first occurrence of that string, and then insert the tree node to the binary search tree. An empty line would indicate the end of input for stdin, an empty line or end of file would indicate the end of input for an input file. If the line is not an empty line and the line is already in the tree, increase the count for that node indicating that there are multiple instances of that line.
3. You must develop two string comparison functions, one for case sensitive and the other for case insensitive. You must not use the strcmp() and strcasecmp() functions provided by the C library. You must implement your own version. You will be comparing the ASCII values. Note that using ASCII, all capital letters come before all lower case letters.
4. Once the program has read all the input (when EOF is returned or a blank line encountered), the program then performs an in-order traversal of the binary search tree to print out all the strings one line at a time to the output file or stdout. Next to the line include a count of how many times that line appeared. If the selection was for case insensitive then you should include either the first choice encountered, the last choice encountered or all capital letters.
5. Before the program ends, it must reclaim the tree! You can do this by performing a post-order traversal, i.e., reclaiming the children nodes before reclaiming the node itself. Make sure you also reclaim the memory occupied by the string as well.
6. It is required that you use getopt for processing the command line and use malloc or calloc and free functions for dynamically allocating and deallocating nodes and the buffers for the strings. It is required that you implement your own string comparison functions instead of using the corresponding libc functions.
Here's an example:
bash$ cat myfile
bob is working.
david is a new hire.
Bob is working.
alice is bob's boss.
charles doesn't like bob.
bash$ ./bstsort myfile
1 alice is bob's boss.
2 bob is working.
1 charles doesn't like bob.
1 david is a new hire.
//binarySort.c
#include "header.h"
int insertNode(struct Node *root, char *stringB, int caseSensitive){
// printf("Entering Insert Node Function\n");
// printf("\n~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n");
// printf("Current string being inserted CaseType: %d\nString A: '%s'\nString B: '%s'\n\n", caseSensitive, root->currLine,stringB);
// printf("Duplicate number '%d'\n", root->duplicate);
//check if same
int checkIfSame = sameString(root->currLine,stringB,caseSensitive);
//check if greaterThan
int greaterThanResult = greaterThan(root->currLine,stringB,caseSensitive);
// printf("\t same: '%d' , greaterThan: '%d'\n", checkIfSame, greaterThanResult);
if(checkIfSame ==1) {
root->duplicate++;
// printf("Duplicate number '%d'\n", root->duplicate);
// printf("Leaving Insert Node Function\n");
if(caseSensitive ==0) {
// printf("creating linked list\n");
// creates a temporary list
struct List *tempList;
tempList = (struct List*)malloc(sizeof(struct List));
tempList->currLine = stringB;
tempList->next = NULL;
if(root->list == NULL) {
root->list = tempList;
}else{
root->list->next = tempList;
}
return 0;
}
return 0; // success
}
if(greaterThanResult == 1) {
// printf("String A is Greater than String B\n");
// Means That String B is less than the value inside of root->currLine
if(root->left != NULL) {
// printf("\n\tLEFT CONTAINS '%s'\n", root->left->currLine);
insertNode(root->left, stringB, caseSensitive);
// printf("\tLEFT IS NOT NULL\n");
}else{
// printf("\tLEFT IS NULL\n");
struct Node *tempNode;
tempNode = (struct Node*)malloc(sizeof(struct Node));
tempNode->currLine = stringB;
tempNode->right = NULL;
tempNode->left = NULL;
tempNode->list = NULL;
tempNode->duplicate = 0;
root->left = tempNode;
// printf("\tAdding to LEFTNODE: '%s'\n", root->left->currLine);
// printf("Leaving Insert Node Function\n");
return 0;
}
}else if (greaterThanResult == 0) {
// printf("String B is Greater than String A\n");
if(root->right != NULL) {
// printf("\n\tRIGHT CONTAINS '%s'\n", root->right->currLine);
insertNode(root->right, stringB, caseSensitive);
// printf("\tRIGHT IS NOT NULL\n");
}else{
// printf("\tRIGHT IS NULL\n");
struct Node *tempNode;
tempNode = (struct Node*)malloc(sizeof(struct Node));
tempNode->currLine = stringB;
tempNode->right = NULL;
tempNode->left = NULL;
tempNode->list = NULL;
tempNode->duplicate = 0;
root->right = tempNode;
// printf("\tAdding to RIGHTNODE: '%s'\n", root->right->currLine);
// printf("Leaving Insert Node Function\n");
return 0;
}
}
// printf("Leaving Insert Node Function\n");
return 0;
}
void printInOrder(struct Node *root,FILE *fp){
// printf("\nEntering printOrder Function\n");
// This is only initialized once
static int index = 0;
if(root != NULL) {
// printf("Printing Out left\n");
printInOrder(root->left,fp);
// char *someString = root->currLine;
// stores the values inside of this global array
if(fp != NULL) {
fprintf(fp, "%s\n", root->currLine);
printLinkedList(fp, root);
}else{
printf("Index '%d' value'%s'\n", index, root->currLine);
printLinkedList(fp, root);
}
index++;
// printf("Printing out right\n");
printInOrder(root->right,fp);
}
// printf("\nLeaving printOrder function\n");
}
int printLinkedList(FILE *fp, struct Node *root){
// printf("Inside of printLinkedList\n");
if(root->list == NULL) {
return 0;
}
int true = 1;
struct List *tempList;
// printf("Linked List Items\n");
tempList = root->list;
while(true) {
if(fp != NULL) {
char message[] = "Linked List Item: ";
strcat(message, tempList->currLine);
fprintf(fp, "%s\n", message);
}else{
printf("\t\tLinkList Item: '%s'\n",tempList->currLine);
}
if(tempList->next == NULL) {
true = 0;
}else{
tempList = tempList->next;
}
}
// printf("Leaving printLinkedList\n");
return 0;
}
void printPostOrder(struct Node *root){
// printf("\nEntering printOrder Function\n");
// This is only initialized once
static int index = 0;
if(root != NULL) {
// printf("Printing Out left\n");
printPostOrder(root->left);
// printf("Printing out right\n");
printPostOrder(root->right);
free(root->currLine);
index++;
}
// printf("\nLeaving printOrder function\n");
}
//determineoutput.c
#include "header.h"
void determineOutput(char *fileName){
if(fileName != NULL) {
writeToOutputFile(fileName);
}else{
writeToScreen();
}
}
//determineread.c
#include "header.h"
// Returns 1 if true and 0 if false
int determineRead(char *fileName){
if(fileName != NULL) {
return 1;
}
return 0;
}
//header.h
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <ctype.h>
#include <stdlib.h>
// Struct Declartions
struct Node {
char *currLine;
int duplicate;
struct Node *left;
struct Node *right;
struct List *list;
} root;
struct List {
struct List *next;
char *currLine;
};
//##########Function Declarations
//String Functions
int sameString(char stringA[],char stringB[],int caseType);
void toUpperCase(char *word,int count);
void toLowerCase(char *word,int count);
int stringLength(char *word);
int greaterThan(char stringA[],char stringB[],int caseType);
//Command Line Function
void parseCommandLineOptions(int argc, char *argv[],int *caseSensitive,char **outputFile,char **inputFile);
//OpenFile Function
int determineRead(char *fileName);
void readFile(char *fileName, int caseSensitive);
void readFromInput(int caseSensitive);
//WriteFile Function
void determineOutput(char *fileName);
void writeToOutputFile(char *fileName);
void writeToScreen();
//BinarySort
int insertNode(struct Node *root,char *line, int caseSensitive);
void printInOrder(struct Node *root,FILE *fp);
void printPostOrder(struct Node *root);
int printLinkedList(FILE *fp, struct Node *root);
//main.c
#include "header.h"
int main(int argc, char *argv[]){
printf("Welcome To assignment 1\n");
int caseSensitive = 0;
char *outputFile = NULL;
char *inputFile = NULL;
parseCommandLineOptions(argc,argv,&caseSensitive,&outputFile,&inputFile);
// printf("Case '%d' , outputFile '%s' inputfile '%s'\n",caseSensitive,outputFile,inputFile);
int readType = determineRead(inputFile);
// printf("Read Type '%d'\n", readType);
if(readType){
readFile(inputFile,caseSensitive);
}else{
readFromInput(caseSensitive);
}
determineOutput(outputFile);
printPostOrder(&root);
printf("End Of Assingment\n");
return 0;
}
//parsecommandlineoption.c
#include "header.h"
void parseCommandLineOptions(int argc, char *argv[],int *caseSensitive,char **outputFile,char **inputFile){
int c;
/*
opterr: If the value of this variable is nonzero then getopts prints an error message
to the standard error stream if it encounters an unkown option character or an
option with a missing required argument (this is the default behavior)
Setting it to 0 will not make getopts print any messages, but it returns
the character ? to indicate an error
*/
opterr = 0;
while((c = getopt(argc,argv, "co:")) != -1) {
switch(c) {
case 'c':
*caseSensitive = 1;
break;
case 'o':
/*
optarg: This variable is set by getopt to point at the value of the
option argument, for those that accept arguments
*/
*outputFile = optarg;
break;
case '?':
/*
optopt: When getopt encounters an unkown option character or an option
with a missing required argument it stores that option character
in this variable
*/
if(optopt == 'c')
fprintf(stderr, "Option -%c requires an argument.\n", optopt);
else if(isprint(optopt))
/*
isprint: Checks whether a passed character is printable
*/
fprintf(stderr, "Unkown Option '-%c'\n", optopt);
else
// %x prints out hex
fprintf(stderr, "Unkown Option character '\\x%x'\n", optopt);
exit(1);
default:
abort();
}
}
*inputFile = argv[optind];
// for(index = optind; index < argc; index++) {
// printf("Non-option argument %s\n\n", argv[index]);
// }
}
//readfile.c
#include "header.h"
/*
Reads in the text from the passed in file
and then outputs those values into a the
binarytree creation function
*/
void readFile(char *fileName,int caseType){
// filePath Location
char path[254];
if(getcwd(path, sizeof(path)) == NULL) {
fprintf(stderr, "Error with getting path\n");
exit(1);
}
FILE *fp = NULL;
strcat(path,"/");
strcat(path,fileName);
fp = fopen(path,"r");
if(!fp) {
fprintf(stderr, "File Not Found '-%s'\n", path);
exit(1);
}
root.left = NULL;
root.right = NULL;
root.duplicate = 0;
char lineBuffer[1024];
char ch = NULL;
int count = 0;
int initalizeRoot = 0;
/*
Streams through the file char by char
Creates a line from those chars
stores those lines in an array
*/
while((ch = getc(fp)) != EOF) {
if((initalizeRoot == 0) && (ch == '\n') && (count != 0)){
// This should only be printed once
char *temp = (char *)malloc(sizeof(char));
lineBuffer[count] = '\0';
strcpy(temp,lineBuffer);
// printf("First Output '%s'\n", temp);
root.currLine = temp;
initalizeRoot++;
count= 0;
}else if(ch == '\n' && count != 0 && initalizeRoot > 0) {
// Copies the lineBuffer String into a arrayStorage
char *temp = (char *)malloc(sizeof(char));
lineBuffer[count] = '\0';
// printf("Output '%s'\n", lineBuffer);
// printf("In the root Node '%s'\n", root.currLine);
strcpy(temp, lineBuffer);
insertNode(&root,temp,caseType);
count = 0;
}
if(ch != '\n') {
lineBuffer[count] = ch;
count++;
}
}
// Closes the file
fclose(fp);
// printf("End of ReadFile\n\n");
// int value = binarySort(lineStorage,index);
}
//readfilefrominput.c
#include "header.h"
void readFromInput(int caseSensitive){
// intiate reading from the terminal input
// printf("Inside of Read Input function\n");
//Creates root node
root.left = NULL;
root.right = NULL;
root.list = NULL;
root.duplicate = 0;
int end = 0;
char *inputString = (char *)malloc(sizeof(char));
char *rootVal = (char *)malloc(sizeof(char));
printf("Enter Values To Sort\n");
scanf("%[^\n]", rootVal );
root.currLine = rootVal;
// printf("Root Value '%s'\n", root.currLine);
// Loops through users inputs then creates nodes for the tree
while(end == 0){
// Scanf needs spaces infront of it so that it can ignore
// the \n which was created by the previous scanf
printf("Enter Values To Sort\n");
scanf(" %[^\n]", inputString );
printf("You Typed '%s'\n", inputString);
char *temp = (char *)malloc(sizeof(char));
strcpy(temp, inputString);
// printf("The value of root '%s'\n", root.currLine);
insertNode(&root, temp, caseSensitive);
printf("Enter 1 to stop and 0 to continue\n");
scanf(" %d", &end );
};
printf("Thank you\n");
}
//stringcomparision.c
#include "header.h"
int stringLength(char *word){
int count = 0;
while(word[count]) count++;
return count;
}
void toUpperCase(char *word, int count){
int a =0;
for(a = 0; a < count; a++) {
if(word[a] > 96) {
word[a] = word[a] - 32;
}
}
}
void toLowerCase(char *word, int count){
int a =0;
for(a = 0; a < count; a++) {
if(word[a] < 96) {
word[a] = word[a] + 32;
}
}
}
/*
Returns 1 if true or 0 if false
*/
int sameString(char stringA[], char stringB[], int caseType){
/*
Case type will determine if method should be ran with
checking for case or not
1 = true
0 = false
*/
// printf("\t a:%s | b:%s type:%d\n",stringA,stringB,caseType);
int lengthA = stringLength(stringA);
int lengthB = stringLength(stringB);
if(lengthA != lengthB) return 0;
// Creates temporary placeholders
char tempA[lengthA], tempB[lengthB];
strcpy(tempA,stringA);
strcpy(tempB,stringB);
int size = 0;
// Does not check for case
if(caseType == 0) {
// printf("\tDoes not check for case\n");
toUpperCase(tempA, lengthA);
toUpperCase(tempB, lengthA);
for(size = 0; size < lengthA; size++) {
if(tempA[size] != tempB[size]) {
return 0;
}
}
return 1;
}
// Case checking matters
for(size = 0; size < lengthA; size++) {
if(tempA[size] != tempB[size]) {
// printf("\tTEst\n");
return 0;
}
}
return 1;
}
/*
This program will essentially determine if stringA is greater than StringB
It will return:
0 if stringA is less than StringB
1 if stringA is greater than StringB
*/
int greaterThan(char stringA[],char stringB[],int caseType){
/*
Determines if one string is greater than the other
This gets called on after the method that determines if
the strings are the same
Case types
0 = false;
1 = true;
*/
int lengthA = stringLength(stringA);
int lengthB = stringLength(stringB);
// Temporary holders
char tempA[lengthA], tempB[lengthB];
strcpy(tempA,stringA);
strcpy(tempB,stringB);
int count = 0;
// Does not check for case
if(caseType == 0) {
toUpperCase(tempA, lengthA);
toUpperCase(tempB, lengthB);
if(lengthB < lengthA ) {
for(count = 0; count < lengthB; count++) {
if(tempA[count] != tempB[count]) {
if(tempA[count] > tempB[count]) {
return 1;
}else{
return 0;
}
}
}
}else {
// printf("Inside of String comparison Less Than\n");
for(count = 0; count < lengthA; count++) {
if(tempA[count] != tempB[count]) {
if(tempA[count] > tempB[count]) {
return 1;
}else{
return 0;
}
}
}
}
}
if(lengthB < lengthA ) {
for(count = 0; count < lengthB; count++) {
if(tempA[count] != tempB[count]) {
if(tempA[count] > tempB[count]) {
return 1;
}else{
return 0;
}
}
}
}else {
for(count = 0; count < lengthA; count++) {
if(tempA[count] != tempB[count]) {
if(tempA[count] > tempB[count]) {
return 1;
}else{
return 0;
}
}
}
}
return 0;
}
//writetooutputfile.c
#include "header.h"
void writeToOutputFile(char *fileName){
// printf("Inside of writeToOutputFile function\n");
// get current Directory
char path[1024];
if(getcwd(path,sizeof(path)) == NULL) {
fprintf(stderr, "Error with getting path\n");
exit(1);
}
FILE *fp = NULL;
strcat(path,"/");
strcat(path,fileName);
fp = fopen(path,"w+");
if(!fp) {
fprintf(stderr, "File Not Found '-%s'\n", path);
exit(1);
}
printInOrder(&root,fp);
fprintf(fp, "\n");
fclose(fp);
}
//writetoscreen.c
#include "header.h"
void writeToScreen(){
FILE *fp = NULL;
printInOrder(&root,fp);
}
//test.txt
bob is working.
david is a new hire.
alice is bob's boss.
charles doesn't like bob.
apple
Apple
APPLE