In: Computer Science
Create a C++ program using native C++ (STL is acceptable) that produces Huffman code for a string of text entered by the user. Must accept all ASCII characters.
Provide an explanation for how you got frequencies of characters, how you sorted them, and how you got codes.
//This is huffmann coding
#include <iostream>
#include <string>
#include <queue>
#include <unordered_map>
using namespace std;
// A Tree node
struct Node
{
char ch;
int freq;
Node *left, *right;
};
// function to allocate a new tree node
Node* getNode(char ch, int freq, Node* left, Node* right)
{
Node* node = new Node();
node->ch = ch;
node->freq = freq;
node->left = left;
node->right = right;
return node;
}
// comparison object to be used to order the heap
struct comp
{
bool operator()(Node* l, Node* r)
{
// highest priority item has lowest frequency
return l->freq > r->freq;
}
};
// traverse the Huffman Tree and store Huffman Codes
// in a map.
void encode(Node* root, string str,
unordered_map<char, string> &huffmanCode)
{
if (root == nullptr)
return;
// found a leaf node
if (!root->left && !root->right) {
huffmanCode[root->ch] = str;
}
encode(root->left, str + "0", huffmanCode);
encode(root->right, str + "1", huffmanCode);
}
// traverse the Huffman Tree and decode the encoded string
void decode(Node* root, int &index, string str)
{
if (root == nullptr) {
return;
}
// found a leaf node
if (!root->left && !root->right)
{
cout << root->ch;
return;
}
index++;
if (str[index] =='0')
decode(root->left, index, str);
else
decode(root->right, index, str);
}
// Builds Huffman Tree and decode given input text
void huffmanTree(string text)
{
// count frequency of appearance of each character
// and store it in a map
unordered_map<char, int> freq;
for (char ch: text) {
freq[ch]++;
}
// Create a priority queue to store live nodes of
// Huffman tree;
priority_queue<Node*, vector<Node*>, comp> p;
// Create a leaf node for each character and add it
// to the priority queue.
for (auto pair: freq) {
p.push(getNode(pair.first, pair.second, nullptr, nullptr));
}
// do till there is more than one node in the queue
while (p.size() != 1)
{
// Remove the two nodes of highest priority
// (lowest frequency) from the queue
Node *left = p.top(); p.pop();
Node *right = p.top(); p.pop();
// Create a new internal node with these two nodes
// as children and with frequency equal to the sum
// of the two nodes' frequencies. Add the new node
// to the priority queue.
int sum = left->freq + right->freq;
p.push(getNode('\0', sum, left, right));
}
// root stores pointer to root of Huffman Tree
Node* root = p.top();
// traverse the Huffman Tree and store Huffman Codes
// in a map. Also prints them
unordered_map<char, string> code;
encode(root, "", code);
cout << "Huffman Codes are :\n" << '\n';
for (auto pair: code) {
cout << pair.first << " " << pair.second << '\n';
}
cout << "\nOriginal string was :\n" << text << '\n';
// print encoded string
string str = "";
for (char ch: text) {
str += code[ch];
}
cout << "\nEncoded string is :\n" << str << '\n';
// traverse the Huffman Tree again and this time
// decode the encoded string
int index = -1;
cout << "\nDecoded string is: \n";
while (index < (int)str.size() - 2) {
decode(root, index, str);
}
}
// Huffman coding algorithm
int main()
{
string text = "This is a demo.";
huffmanTree(text);
return 0;
}
Explanation of Code
1. We have used queue and unordered map which are 2 standard libraries to implement Huffman Coding.
#include <queue>
#include <unordered_map>
2. In Huffman coding first we have to fetch the frequencies of each character from inputted string.
3. Then we have to make a huffman tree and assign binary codes to each character.
4. Then we have to replace each character in the string with corresponding binary code.
5. Last but not the least we have to decode it by replacing each binary code with the corresponding character in the string.
6. We have made a getNode() which adds a new node to the huffman tree.
Node* getNode(char ch, int freq, Node* left, Node*
right)
{
Node* node = new Node();
node->ch = ch;
node->freq = freq;
node->left = left;
node->right = right;
return node;
}
7. In the comp structure we have a function which takes 2 parameters left and right pointer which returns the lowest frequency from the priority queue.
struct comp
{
bool operator()(Node* l, Node* r)
{
// highest priority item has lowest
frequency
return l->freq >
r->freq;
}
};
8. In main() we have made a string text on which we will perform huffman coding. Then we have made a call to huffmanTree().
int main()
{
string text = "This is a demo.";
huffmanTree(text);
return 0;
}
9. Now in the huffmanTree() we have made a container map which stores the values in the format key and its corresponding value. The key represents the char and int represents the associated frequency of that character. Then, with the help of a range based for loop we have calculated the frequency of each character.
// count frequency of appearance of each
character
// and store it in a map
unordered_map<char, int> freq;
for (char ch: text) {
freq[ch]++;
}
10. Then we have made a priority queue p which will store the leaf nodes of the tree.
priority_queue<Node*, vector<Node*>, comp> p;
11. Then we have used a range based for loop. The freq represents the character and its associated frequency in the string. Now we need to insert the element to the priority queue. For inserting we have used the push method and we simply insert the character and its frequency. So, both the parameters will be inserted in the priority queue.
for (auto pair: freq) {
p.push(getNode(pair.first,
pair.second, nullptr, nullptr));
}
12. Next we have a while loop in which we will pop the 2 lowest frequency characters and add them together to create a new node with these 2 nodes. push() will add the new node to the priority queue.
while (p.size() != 1)
{
// Remove the two nodes of highest
priority
// (lowest frequency) from the
queue
Node *left = p.top();
p.pop();
Node *right = p.top();
p.pop();
// Create a new
internal node with these two nodes
// as children and with frequency
equal to the sum
// of the two nodes' frequencies.
Add the new node
// to the priority queue.
int sum = left->freq +
right->freq;
p.push(getNode('\0', sum, left,
right));
}
13. Then we have made a root node which stores the address of the root node in huffman tree.
Node* root = p.top();
14. Then we have made another map called code and it will store the character and the associated binary code. For converting characters to binary codes we called the encode().
unordered_map<char, string> code;
encode(root, "", code);
15. In encode() we have traversed the huffman tree and then we have stored them in the map. Here we have already assigned that the left edges will have 0 and right edges will have 1. We have called encode recursively so that we can append the 0s and 1s depending on where we are moving in the tree.
void encode(Node* root, string
str,unordered_map<char, string> &huffmanCode)
{
if (root == nullptr)
return;
// found a leaf node
if (!root->left && !root->right){
huffmanCode[root->ch] = str;
}
encode(root->left, str + "0", huffmanCode);
encode(root->right, str + "1", huffmanCode);
}
16. Now in the decode() we are traversing the huffman tree and then we decode the encoded string.
17. The first if condition works it root is null and second if works until we reach to the leaf node. Then we have incremented top_index because it was set to -1 and then in the if condition if top_index is 0 then we traverse to the left-subtree otherwise we traverse to the right-subtree. After executing this decode function we will get the original characters of our string text.
void decode(Node* root, int &index, string
str)
{
if (root == nullptr) {
return;
}
// found a leaf node
if (!root->left && !root->right)
{
cout << root->ch;
return;
}
index++;
if (str[index] =='0')
decode(root->left, index, str);
else
decode(root->right, index, str);
}
18. Now come to huffmanTree() after the statement where we called the encode(). We have used a for loop and we are printing the huffman codes for each character of string text.
cout << "Huffman Codes are :\n" <<
'\n';
for (auto pair: code) {
cout << pair.first << "
" << pair.second << '\n';
}
19. Then we have printed the encoded string.
cout << "\nOriginal string was :\n" << text << '\n';
20. Then we have made top_index assigned to -1 and after that we have called the decode() which decodes the encoded string. We have called the decode function recursively because we have to traverse the binary codes of each character. After decoding we get the original string.
string str = "";
for (char ch: text) {
str += code[ch];
}
cout << "\nEncoded string is :\n" << str << '\n';
// traverse the Huffman Tree again and
this time
// decode the encoded string
int index = -1;
cout << "\nDecoded string is: \n";
while (index < (int)str.size() - 2) {
decode(root, index, str);
}
OUTPUT