Question

In: Computer Science

External sort: Write an external merge sort in c++ that takes in an input file with...

External sort: Write an external merge sort in c++ that takes in an input file with 120 integers. You are allowed to hold a maximum of 20 integers in memory. You must create 2 files to process the integers and they must be sorted and placed in the original file.

Solutions

Expert Solution

// C++ program to implement external sorting using

// merge sort

#include <bits/stdc++.h>

using namespace std;

  

struct MinHeapNode

{

    // The element to be stored

    int element;

  

    // index of the array from which the element is taken

    int i;

};

  

// Prototype of a utility function to swap two min heap nodes

void swap(MinHeapNode* x, MinHeapNode* y);

  

// A class for Min Heap

class MinHeap

{

    MinHeapNode* harr; // pointer to array of elements in heap

    int heap_size;     // size of min heap

  

public:

    // Constructor: creates a min heap of given size

    MinHeap(MinHeapNode a[], int size);

  

    // to heapify a subtree with root at given index

    void MinHeapify(int);

  

    // to get index of left child of node at index i

    int left(int i) { return (2 * i + 1); }

  

    // to get index of right child of node at index i

    int right(int i) { return (2 * i + 2); }

  

    // to get the root

    MinHeapNode getMin() { return harr[0]; }

  

    // to replace root with new node x and heapify()

    // new root

    void replaceMin(MinHeapNode x)

    {

        harr[0] = x;

        MinHeapify(0);

    }

};

  

// Constructor: Builds a heap from a given array a[]

// of given size

MinHeap::MinHeap(MinHeapNode a[], int size)

{

    heap_size = size;

    harr = a; // store address of array

    int i = (heap_size - 1) / 2;

    while (i >= 0)

    {

        MinHeapify(i);

        i--;

    }

}

  

// A recursive method to heapify a subtree with root

// at given index. This method assumes that the

// subtrees are already heapified

void MinHeap::MinHeapify(int i)

{

    int l = left(i);

    int r = right(i);

    int smallest = i;

    if (l < heap_size && harr[l].element < harr[i].element)

        smallest = l;

    if (r < heap_size && harr[r].element < harr[smallest].element)

        smallest = r;

    if (smallest != i)

    {

        swap(&harr[i], &harr[smallest]);

        MinHeapify(smallest);

    }

}

  

// A utility function to swap two elements

void swap(MinHeapNode* x, MinHeapNode* y)

{

    MinHeapNode temp = *x;

    *x = *y;

    *y = temp;

}

  

// Merges two subarrays of arr[].

// First subarray is arr[l..m]

// Second subarray is arr[m+1..r]

void merge(int arr[], int l, int m, int r)

{

    int i, j, k;

    int n1 = m - l + 1;

    int n2 = r - m;

  

    /* create temp arrays */

    int L[n1], R[n2];

  

    /* Copy data to temp arrays L[] and R[] */

    for(i = 0; i < n1; i++)

        L[i] = arr[l + i];

    for(j = 0; j < n2; j++)

        R[j] = arr[m + 1 + j];

  

    /* Merge the temp arrays back into arr[l..r]*/

    i = 0; // Initial index of first subarray

    j = 0; // Initial index of second subarray

    k = l; // Initial index of merged subarray

    while (i < n1 && j < n2)

    {

        if (L[i] <= R[j])

            arr[k++] = L[i++];

        else

            arr[k++] = R[j++];

    }

  

    /* Copy the remaining elements of L[], if there

       are any */

    while (i < n1)

        arr[k++] = L[i++];

  

    /* Copy the remaining elements of R[], if there

       are any */

    while(j < n2)

        arr[k++] = R[j++];

}

  

/* l is for left index and r is right index of the

   sub-array of arr to be sorted */

void mergeSort(int arr[], int l, int r)

{

    if (l < r)

    {

        // Same as (l+r)/2, but avoids overflow for

        // large l and h

        int m = l + (r - l) / 2;

  

        // Sort first and second halves

        mergeSort(arr, l, m);

        mergeSort(arr, m + 1, r);

  

        merge(arr, l, m, r);

    }

}

  

FILE* openFile(char* fileName, char* mode)

{

    FILE* fp = fopen(fileName, mode);

    if (fp == NULL)

    {

        perror("Error while opening the file.\n");

        exit(EXIT_FAILURE);

    }

    return fp;

}

  

// Merges k sorted files. Names of files are assumed

// to be 1, 2, 3, ... k

void mergeFiles(char *output_file, int n, int k)

{

    FILE* in[k];

    for (int i = 0; i < k; i++)

    {

        char fileName[2];

  

        // convert i to string

        snprintf(fileName, sizeof(fileName), "%d", i);

  

        // Open output files in read mode.

        in[i] = openFile(fileName, "r");

    }

  

    // FINAL OUTPUT FILE

    FILE *out = openFile(output_file, "w");

  

    // Create a min heap with k heap nodes. Every heap node

    // has first element of scratch output file

    MinHeapNode* harr = new MinHeapNode[k];

    int i;

    for (i = 0; i < k; i++)

    {

        // break if no output file is empty and

        // index i will be no. of input files

        if (fscanf(in[i], "%d ", &harr[i].element) != 1)

            break;

  

        harr[i].i = i; // Index of scratch output file

    }

    MinHeap hp(harr, i); // Create the heap

  

    int count = 0;

  

    // Now one by one get the minimum element from min

    // heap and replace it with next element.

    // run till all filled input files reach EOF

    while (count != i)

    {

        // Get the minimum element and store it in output file

        MinHeapNode root = hp.getMin();

        fprintf(out, "%d ", root.element);

  

        // Find the next element that will replace current

        // root of heap. The next element belongs to same

        // input file as the current min element.

        if (fscanf(in[root.i], "%d ", &root.element) != 1 )

        {

            root.element = INT_MAX;

            count++;

        }

  

        // Replace root with next element of input file

        hp.replaceMin(root);

    }

  

    // close input and output files

    for (int i = 0; i < k; i++)

        fclose(in[i]);

  

    fclose(out);

}

  

// Using a merge-sort algorithm, create the initial runs

// and divide them evenly among the output files

void createInitialRuns(char *input_file, int run_size,

                       int num_ways)

{

    // For big input file

    FILE *in = openFile(input_file, "r");

  

    // output scratch files

    FILE* out[num_ways];

    char fileName[2];

    for (int i = 0; i < num_ways; i++)

    {

        // convert i to string

        snprintf(fileName, sizeof(fileName), "%d", i);

  

        // Open output files in write mode.

        out[i] = openFile(fileName, "w");

    }

  

    // allocate a dynamic array large enough

    // to accommodate runs of size run_size

    int* arr = (int*)malloc(run_size * sizeof(int));

  

    bool more_input = true;

    int next_output_file = 0;

  

    int i;

    while (more_input)

    {

        // write run_size elements into arr from input file

        for (i = 0; i < run_size; i++)

        {

            if (fscanf(in, "%d ", &arr[i]) != 1)

            {

                more_input = false;

                break;

            }

        }

  

        // sort array using merge sort

        mergeSort(arr, 0, i - 1);

  

        // write the records to the appropriate scratch output file

        // can't assume that the loop runs to run_size

        // since the last run's length may be less than run_size

        for (int j = 0; j < i; j++)

            fprintf(out[next_output_file], "%d ", arr[j]);

  

        next_output_file++;

    }

  

    // close input and output files

    for (int i = 0; i < num_ways; i++)

        fclose(out[i]);

  

    fclose(in);

}

  

// For sorting data stored on disk

void externalSort(char* input_file, char *output_file,

                  int num_ways, int run_size)

{

    // read the input file, create the initial runs,

    // and assign the runs to the scratch output files

    createInitialRuns(input_file, run_size, num_ways);

  

    // Merge the runs using the K-way merging

    mergeFiles(output_file, run_size, num_ways);

}

  

  

// Driver program to test above

int main()

{

    // No. of Partitions of input file.

    int num_ways = 10;

  

    // The size of each partition

    int run_size = 1000;

  

    char input_file[] = "input.txt";

    char output_file[] = "output.txt";

  

    FILE* in = openFile(input_file, "w");

  

    srand(time(NULL));

  

    // generate input

    for (int i = 0; i < num_ways * run_size; i++)

        fprintf(in, "%d ", rand());

  

    fclose(in);

  

    externalSort(input_file, output_file, num_ways,

                run_size);

  

    return 0;

}


Related Solutions

C Write a function int sort(int** arr, int n) that takes as input an array of...
C Write a function int sort(int** arr, int n) that takes as input an array of int pointers, multiplies the ints the pointers point to by -1, and then sorts the array such that the values pointed to by the pointers are in decreasing order. For example, if the array is size 10 the pointer at index 0 will point to the largest value after multiplying by -1 and the pointer at index 9 will point to the smallest value...
Write a program that takes three sets ’A’, ’B’, ’C’ as input read from the file...
Write a program that takes three sets ’A’, ’B’, ’C’ as input read from the file prog2 input.txt. The first line of the file corresponds to the set ’A’, the second line is the set ’B’, and the third line is the set ’C’. Every element of each set is a character, and the characters are separated by space. Implement algorithms for the following operations on the sets. Each of these algorithms must be in separate methods or subroutines. The...
write a java merge sort called MERGE-SORT-A(), Using recursive calls and NO INSERTION-SORT() as a sub-procedure.
write a java merge sort called MERGE-SORT-A(), Using recursive calls and NO INSERTION-SORT() as a sub-procedure.
Write a function in C that uses the Merge Sort sorting algorithm with arrays. The function...
Write a function in C that uses the Merge Sort sorting algorithm with arrays. The function must not be void and must output type int* i.e. it must take the form: int* merge_sort(int a[], int n) where a[ ] is the input matrix and n is the size of the matrix. You may use an auxiliary functions such as "merge." The returned array should be sorted using merge_sort and should not modify the array that was input (a[ ] ).
(C++)Counting Sort: Write C++ codes for counting sort. The input array is A = {20, 18,...
(C++)Counting Sort: Write C++ codes for counting sort. The input array is A = {20, 18, 5, 7, 16, 10, 9, 3, 12, 14, 0}
[In Python] Write a program that takes a .txt file as input. This .txt file contains...
[In Python] Write a program that takes a .txt file as input. This .txt file contains 10,000 points (i.e 10,000 lines) with three co-ordinates (x,y,z) each. From this input, use relevant libraries and compute the convex hull. Now, using all the points of the newly constructed convex hull, find the 50 points that are furthest away from each other, hence giving us an evenly distributed set of points.
Write a program in Java to sort the given array using merge sort, quick sort, insertion...
Write a program in Java to sort the given array using merge sort, quick sort, insertion sort, selection sort and bubble sort based on the input from the user which sorting technique they wanted to use. Get the array size, array elements from the user, and also display the sorted array along with the name of the sorting technique used.
C++.how to write a selection sort to sort it. read_book_data() This member function takes one parameter,...
C++.how to write a selection sort to sort it. read_book_data() This member function takes one parameter, a string that contains the name of a file. This string parameter can be a C++ string or a C string (your choice). The function returns nothing. This constructor should do the following: Declare and open an input file stream variable using the file name string passed in as a parameter. Check to make sure the file was opened successfully. If not, print an...
in c++ Write a function that takes a C string as an input parameter and reverses...
in c++ Write a function that takes a C string as an input parameter and reverses the string. The function should use two pointers, front and rear. The front pointer should initially reference the first character in the string, and the rear pointer should initially reference the last character in the string. Reverse the string by swapping the characters referenced by front and rear, then increment front to point to the next character and decrement rear to point to the...
C Programming file.c takes in one input argument that denotes a file to be read. It...
C Programming file.c takes in one input argument that denotes a file to be read. It needs to convert the contents of that file into a character array (char *) and then into a an unsigned character array (unsigned char *). Please fix and or complete the program, and explain any of the changes too: ---- #include <stdio.h> #include <stdlib.h> #include <string.h> int main(int argc, char *argv[]) { FILE *f; f = fopen(argv[1], "r"); if( !f ) { exit(1); }...
ADVERTISEMENT
ADVERTISEMENT
ADVERTISEMENT