Open In App

K Mmost Frequent Words in a File

Last Updated : 26 Mar, 2025
Comments
Improve
Suggest changes
Like Article
Like
Report

Given a book of words and an integer K. Assume you have enough main memory to accommodate all words. Design a dynamic data structure to find the top K most frequent words in a book. The structure should allow new words to be added in main memory.

Examples:

Input: fileData = "Welcome to the world of Geeks. This portal has been created to provide well written well thought and well explained solutions for selected questions If you like Geeks for Geeks and would like to contribute here is your chance You can write article and mail your article to contribute at geeksforgeeks org See your article appearing on the Geeks for Geeks main page and help thousands of other Geeks"
Output:
"your" : 3
"well" : 3
"and" : 4
"to" : 4
"Geeks" : 6

Using Hash Map and Heap

  • Store all words and their frequencies in a hash map.
  • Store top k frequent items in a min heap (Please refer k Largest Elements in an Array for details)
  • Print the words and their frequencies in the decreasing order of frequencies.

Important Points about Implementations

  • In Python, we have a direct function most_common()
  • In JavaScript, we do not have direct implementation of min heap, so we have used sorting.
C++
#include <bits/stdc++.h>
using namespace std;

void processText(const string& text, int k) {
    
    // Store Frequencies of all words
    unordered_map<string, int> freqMap;
    istringstream iss(text);
    for (string word; iss >> word;) freqMap[word]++;
    
    // Store frequency map items in a priority queue (or min heap)
    // with frequency as key
    priority_queue<pair<int, string>, vector<pair<int, string>>, greater<>> pq;
    for (auto x : freqMap) {
        pq.emplace(x.second, x.first);
        if (pq.size() > k) pq.pop();
    }
    
    // Get the top frequenty items 
    vector<pair<int, string>> res;
    while (!pq.empty()) {
        res.push_back(pq.top());
        pq.pop();
    }
    
    // Reverse to get the desired order
    reverse(res.begin(), res.end());
    for (auto x : res) 
       cout << x.second << " : " << x.first << endl;
}

int main() {
    string text = "Welcome to the world of Geeks Geeks for Geeks is great";
    processText(text, 5);

    // to read from file
    // ifstream file("file.txt");
    // if (!file) {
    //     cerr << "File doesn't exist" << endl;
    //     return 1;
    // }
    // printKMostFreq(file, k);

    // using string instead of file to 
    // test and run the code
    
    return 0;
}
Java
import java.util.*;
import java.io.*;

public class Main {
    public static void processText(String text, int k) {
        
        // Store Frequencies of all words
        Map<String, Integer> freqMap = new HashMap<>();
        String[] words = text.split(" ");
        for (String word : words) {
            freqMap.put(word, freqMap.getOrDefault(word, 0) + 1);
        }

        // Store frequency map items in a priority queue (or min heap)
        // with frequency as key
        PriorityQueue<Map.Entry<String, Integer>> pq = new PriorityQueue<>(
            (a, b) -> a.getValue() - b.getValue()
        );
        for (Map.Entry<String, Integer> entry : freqMap.entrySet()) {
            pq.offer(entry);
            if (pq.size() > k) pq.poll();
        }

        // Get the top frequency items 
        List<Map.Entry<String, Integer>> res = new ArrayList<>();
        while (!pq.isEmpty()) {
            res.add(pq.poll());
        }

        // Reverse to get the desired order
        Collections.reverse(res);
        for (Map.Entry<String, Integer> entry : res) 
            System.out.println(entry.getKey() + " : " + entry.getValue());
    }

    public static void main(String[] args) {
        String text = "Welcome to the world of Geeks Geeks for Geeks is great";
        processText(text, 5);

        // to read from file
        // try (Scanner file = new Scanner(new File("file.txt"))) {
        //     printKMostFreq(file, k);
        // }

        // using string instead of file to 
        // test and run the code
    }
}
Python
from collections import Counter


def process_text(text, k):
    
    # Store Frequencies of all words
    freq_map = Counter(text.split())
    
    # Get the top k frequent items
    res = freq_map.most_common(k)
    
    for word, freq in res:
        print(f'{word} : {freq}')

if __name__ == '__main__':
    text = 'Welcome to the world of Geeks Geeks for Geeks is great'
    process_text(text, 5)
    # to read from file
    # with open('file.txt', 'r') as file:
    #     text = file.read()
    #     process_text(text, k)
C#
using System;
using System.Collections.Generic;
using System.Linq;

class MainClass {
    public static void ProcessText(string text, int k) {
        
        // Store Frequencies of all words
        Dictionary<string, int> freqMap = new Dictionary<string, int>();
        string[] words = text.Split(' ');
        foreach (string word in words) {
            if (freqMap.ContainsKey(word)) {
                freqMap[word]++;
            } else {
                freqMap[word] = 1;
            }
        }

        // Store frequency map items in a priority queue 
        // (max heap) with frequency as key
        var pq = new PriorityQueue<string, int>();
        foreach (var entry in freqMap) {
            pq.Enqueue(entry.Key, -entry.Value); 
            if (pq.Count > k) pq.Dequeue();
        }

        // Get the top frequency items 
        List<KeyValuePair<string, int>> res = new List<KeyValuePair<string, int>>();
        while (pq.Count > 0) {
            var item = pq.Dequeue();
            res.Add(new KeyValuePair<string, int>(item.Item1, -item.Item2)); 
        }
        res.Reverse(); // To get the highest frequency first
        foreach (var entry in res) 
            Console.WriteLine(entry.Key + " : " + entry.Value);
    }

    public static void Main(string[] args) {
        string text = "Welcome to the world of Geeks Geeks for Geeks is great";
        ProcessText(text, 5);
    }
}
JavaScript
function processText(text, k) {
    
    // Store Frequencies of all words
    const freqMap = {};
    const words = text.split(' ');
    for (let word of words) {
        freqMap[word] = (freqMap[word] || 0) + 1;
    }
    
    // Store frequency map items in an array and sort
    const sortedWords = Object.entries(freqMap).sort((a, b) => a[1] - b[1]);
    
    // Get the top k frequent items
    const res = sortedWords.slice(-k).reverse();
    
    for (const [word, freq] of res) {
        console.log(`${word} : ${freq}`);
    }
}

const text = 'Welcome to the world of Geeks Geeks for Geeks is great';
processText(text, 5);
// to read from file
// const fs = require('fs');
// fs.readFile('file.txt', 'utf8', (err, data) => {
//     if (err) {
//         console.error("File doesn't exist");
//         return;
//     }
//     processText(data, k);
// });


Time Complexity : O(n + n Log k) where n is the number of words in the file. We assume that every word is of constant length.


Using Trie and Min Heap

The approach leverages a Trie to efficiently store and search words as they are read from the file, while simultaneously keeping track of each word's occurrence count. Each Trie node is enhanced with an additional field, indexMinHeap, which indicates the position of the word in the Min Heap if it is currently among the top k frequent words (or -1 if it is not). In parallel, a Min Heap of fixed size k is maintained to record the k most frequent words encountered so far. Each node in the Min Heap contains the word, its frequency, and a pointer to the corresponding Trie leaf node. As words are processed, the algorithm updates their frequencies in the Trie and then reflects these changes in the Min Heap by either updating an existing entry, inserting a new entry if space is available, or replacing the root of the Min Heap (which represents the least frequent word among the top k) when the new word’s frequency exceeds it.

Step-by-Step Process to Execute the Code

  • Open the input file and ensure it is accessible; report an error if the file cannot be opened.
  • Read words from the file one by one. For each word, insert it into the Trie: if the word already exists, increment its frequency counter; if not, create a new node and initialize its count to 1.
  • For every word inserted or updated in the Trie, update the Min Heap as follows:
    • If the word is already present in the Min Heap (i.e., its indexMinHeap is not -1), simply update its frequency in the heap and call minHeapify() at the respective index.
    • If the word is not present and the Min Heap has available space, insert the new word into the heap, update its corresponding Trie node's indexMinHeap, and rebuild the heap.
    • If the Min Heap is full, compare the frequency of the new word with the frequency at the root of the heap (the smallest frequency among the top k). If the new word’s frequency is lower, do nothing; if it is higher, replace the root with the new word, update the Trie node of the word being replaced (setting its indexMinHeap to -1), and call minHeapify() to restore the heap property.
  • After processing all words, the Min Heap will contain the k most frequent words. Finally, iterate over the Min Heap and print each word along with its frequency.

Below is given the implementation:

C++
#include <bits/stdc++.h>
using namespace std;

class Node {
public:
    bool isEnd;
    unsigned freq;
    int ind;
    vector<Node*> child;

    Node() : isEnd(false), freq(0), 
    ind(-1), child(26, nullptr) {}
};

class minHeapNode {
public:
    Node* root;
    unsigned freq;
    string word;

    minHeapNode() : 
    root(nullptr), freq(0), word("") {}
};

class MinHeap {
public:
    int cap;
    int count;
    vector<minHeapNode> arr;

    MinHeap(int cap) : 
    cap(cap), count(0), arr(cap) {}

    void swapNodes(int a, int b) {
        swap(arr[a], arr[b]);
        arr[a].root->ind = a;
        arr[b].root->ind = b;
    }

    void heapify(int idx) {
        int left = 2 * idx + 1;
        int right = 2 * idx + 2;
        int mini = idx;
        if (left < count && 
            arr[left].freq < arr[mini].freq)
            mini = left;
        if (right < count && 
            arr[right].freq < arr[mini].freq)
            mini = right;
        if (mini != idx) {
            swapNodes(idx, mini);
            heapify(mini);
        }
    }

    void build() {
        for (int i = (count - 1) / 2; i >= 0; --i)
            heapify(i);
    }
};

void insert(MinHeap& mH, Node* root, 
                    const string& word) {

    // Case 1: word is already in mH, 
    // so update its freq.
    if (root->ind != -1) {
        ++mH.arr[root->ind].freq;
        mH.heapify(root->ind);
    }

    // Case 2: Word is not in mH and 
    // there's still room.
    else if (mH.count < mH.cap) {
        minHeapNode node;
        node.root = root;
        node.freq = root->freq;
        node.word = word;
        mH.arr[mH.count] = node;
        root->ind = mH.count++;
        mH.build();
    }

    // Case 3: Heap is full and freq of new 
    // word is greater than the root.
    else if (root->freq > mH.arr[0].freq) {
        mH.arr[0].root->ind = -1;
        minHeapNode node;
        node.root = root;
        node.freq = root->freq;
        node.word = word;
        mH.arr[0] = node;
        root->ind = 0;
        mH.heapify(0);
    }
}

void insertUtil(Node*& root, MinHeap& mH, 
    const string& word, size_t index = 0) {
    if (!root)
        root = new Node();
    if (index < word.size()) {
        int pos = tolower(word[index]) - 'a';
        if (pos >= 0 && pos < 26)
            insertUtil(root->child[pos], 
                mH, word, index + 1);
    } else {
        if (root->isEnd)
            ++root->freq;
        else {
            root->isEnd = true;
            root->freq = 1;
        }
        insert(mH, root, word);
    }
}

void insertTrieAndHeap(const string& word,
    Node*& root, MinHeap& mH) {
    insertUtil(root, mH, word);
}

void displayMinHeap(const MinHeap& mH) {
    for (int i = 0; i < mH.count; ++i)
        cout << mH.arr[i].word << " : " 
        << mH.arr[i].freq << endl;
}

void printKMostFreq(ifstream& file, int k) {
    MinHeap mH(k);
    Node* root = nullptr;

    // to process the words in file
    string word;
    while (file >> word) {
        insertTrieAndHeap(word, root, mH);
    }
    displayMinHeap(mH);
    
    // Clean up the Trie memory
    if (root) {
        delete root;
    }
}

void printKMostFreq(string str, int k) {
    MinHeap mH(k);
    Node* root = nullptr;
    
    istringstream iss(str);
    string word;
    while (iss >> word) {
        insertTrieAndHeap(word, root, mH);
    }

    displayMinHeap(mH);
    
    // Clean up the Trie memory
    if (root) {
        delete root;
    }
}

int main() {
    int k = 5;
    string str = "Welcome to the world of Geeks . This portal has been created to provide well written well thought and well explained solutions for selected questions If you like Geeks for Geeks and would like to contribute here is your chance You can write article and mail your article to contribute at geeksforgeeks org See your article appearing on the Geeks for Geeks main page and help thousands of other Geeks";
    printKMostFreq(str, k);
    

    // to read from file
    // ifstream file("file.txt");
    // if (!file) {
    //     cerr << "File doesn't exist" << endl;
    //     return 1;
    // }
    // printKMostFreq(file, k);

    // using string instead of file to 
    // test and run the code
    return 0;
}
Java
import java.io.*;
import java.util.*;
import java.util.regex.*;

class Node {
    boolean isEnd;
    int freq;
    int ind;
    Node[] child;

    Node() {
        isEnd = false;
        freq = 0;
        ind = -1;
        child = new Node[26];
    }
}

class MinHeapNode {
    Node root;
    int freq;
    String word;

    MinHeapNode() {
        root = null;
        freq = 0;
        word = "";
    }
}

class MinHeap {
    int cap;
    int count;
    MinHeapNode[] arr;

    MinHeap(int cap) {
        this.cap = cap;
        count = 0;
        arr = new MinHeapNode[cap];
        for (int i = 0; i < cap; i++) {
            arr[i] = new MinHeapNode();
        }
    }

    void swapNodes(int a, int b) {
        MinHeapNode temp = arr[a];
        arr[a] = arr[b];
        arr[b] = temp;
        arr[a].root.ind = a;
        arr[b].root.ind = b;
    }

    void heapify(int idx) {
        int left = 2 * idx + 1;
        int right = 2 * idx + 2;
        int mini = idx;
        if (left < count && arr[left].freq < arr[mini].freq)
            mini = left;
        if (right < count && arr[right].freq < arr[mini].freq)
            mini = right;
        if (mini != idx) {
            swapNodes(idx, mini);
            heapify(mini);
        }
    }

    void build() {
        for (int i = (count - 1) / 2; i >= 0; --i)
            heapify(i);
    }
}

class GfG {

    static void insert(MinHeap mH, Node root, String word) {
        if (root.ind != -1) {
            ++mH.arr[root.ind].freq;
            mH.heapify(root.ind);
        } else if (mH.count < mH.cap) {
            MinHeapNode node = new MinHeapNode();
            node.root = root;
            node.freq = root.freq;
            node.word = word;
            mH.arr[mH.count] = node;
            root.ind = mH.count++;
            mH.build();
        } else if (root.freq > mH.arr[0].freq) {
            mH.arr[0].root.ind = -1;
            MinHeapNode node = new MinHeapNode();
            node.root = root;
            node.freq = root.freq;
            node.word = word;
            mH.arr[0] = node;
            root.ind = 0;
            mH.heapify(0);
        }
    }

    static void insertUtil(Node root, MinHeap mH, String word, int index) {
        if (index < word.length()) {
            int pos = Character.toLowerCase(word.charAt(index)) - 'a';
            if (pos >= 0 && pos < 26) {
                if (root.child[pos] == null) {
                    root.child[pos] = new Node();
                }
                insertUtil(root.child[pos], mH, word, index + 1);
            }
        } else {
            if (root.isEnd)
                ++root.freq;
            else {
                root.isEnd = true;
                root.freq = 1;
            }
            insert(mH, root, word);
        }
    }

    static void insertTrieAndHeap(String word, Node root, MinHeap mH) {
        insertUtil(root, mH, word, 0);
    }

    static void displayMinHeap(MinHeap mH) {
        for (int i = 0; i < mH.count; ++i)
            System.out.println(mH.arr[i].word + " : " + mH.arr[i].freq);
    }

    static void printKMostFreq(BufferedReader file, int k) throws IOException {
        MinHeap mH = new MinHeap(k);
        Node root = new Node();
        String line;

        while ((line = file.readLine()) != null) {
            for (String word : line.split("\\W+")) {
                if (!word.isEmpty()) {
                    insertTrieAndHeap(word.toLowerCase(), root, mH);
                }
            }
        }
        displayMinHeap(mH);
    }

    static void printKMostFreq(String str, int k) {
        MinHeap mH = new MinHeap(k);
        Node root = new Node();

        for (String word : str.split("\\W+")) {
            if (!word.isEmpty()) {
                insertTrieAndHeap(word.toLowerCase(), root, mH);
            }
        }

        displayMinHeap(mH);
    }

    public static void main(String[] args) throws IOException {
        int k = 5;

        // to read from file
        // BufferedReader file = new BufferedReader(new FileReader("file.txt"));
        // printKMostFreq(file, k);

        // using string instead of file to 
        // test and run the code
        String str = "Welcome to the world of Geeks . This portal has been created to provide well written well thought and well explained solutions for selected questions If you like Geeks for Geeks and would like to contribute here is your chance You can write article and mail your article to contribute at geeksforgeeks org See your article appearing on the Geeks for Geeks main page and help thousands of other Geeks";
        printKMostFreq(str, k);
    }
}
Python
import heapq
import string

class Node:
    def __init__(self):
        self.isEnd = False
        self.freq = 0
        self.ind = -1
        self.child = [None] * 26

class MinHeapNode:
    def __init__(self):
        self.root = None
        self.freq = 0
        self.word = ""

class MinHeap:
    def __init__(self, cap):
        self.cap = cap
        self.count = 0
        self.arr = [MinHeapNode() for _ in range(cap)]

    def swapNodes(self, a, b):
        self.arr[a], self.arr[b] = self.arr[b], self.arr[a]
        self.arr[a].root.ind = a
        self.arr[b].root.ind = b

    def heapify(self, idx):
        left = 2 * idx + 1
        right = 2 * idx + 2
        mini = idx
        if left < self.count and self.arr[left].freq < self.arr[mini].freq:
            mini = left
        if right < self.count and self.arr[right].freq < self.arr[mini].freq:
            mini = right
        if mini != idx:
            self.swapNodes(idx, mini)
            self.heapify(mini)

    def build(self):
        for i in range((self.count - 1) // 2, -1, -1):
            self.heapify(i)

def insert(mH, root, word):

    # Case 1: word is already in mH, 
    # so update its freq.
    if root.ind != -1:
        mH.arr[root.ind].freq += 1
        mH.heapify(root.ind)

    # Case 2: Word is not in mH and 
    # there's still room.
    elif mH.count < mH.cap:
        node = MinHeapNode()
        node.root = root
        node.freq = root.freq
        node.word = word
        mH.arr[mH.count] = node
        root.ind = mH.count
        mH.count += 1
        mH.build()

    # Case 3: Heap is full and freq of new 
    # word is greater than the root.
    elif root.freq > mH.arr[0].freq:
        mH.arr[0].root.ind = -1
        node = MinHeapNode()
        node.root = root
        node.freq = root.freq
        node.word = word
        mH.arr[0] = node
        root.ind = 0
        mH.heapify(0)

def insertUtil(root, mH, word, index=0):
    if root is None:
        root = Node()
    if index < len(word):
        pos = ord(word[index].lower()) - ord('a')
        if 0 <= pos < 26:
            if root.child[pos] is None:
                root.child[pos] = Node()
            insertUtil(root.child[pos], mH, word, index + 1)
    else:
        if root.isEnd:
            root.freq += 1
        else:
            root.isEnd = True
            root.freq = 1
        insert(mH, root, word)

def insertTrieAndHeap(word, root, mH):
    insertUtil(root, mH, word)

def displayMinHeap(mH):
    for i in range(mH.count):
        print(mH.arr[i].word, ":", mH.arr[i].freq)

def printKMostFreq(file, k):
    mH = MinHeap(k)
    root = Node()

    # to process the words in file
    for word in file.read().split():
        insertTrieAndHeap(word, root, mH)
    displayMinHeap(mH)

def printKMostFreqString(str, k):
    mH = MinHeap(k)
    root = Node()

    for word in str.split():
        insertTrieAndHeap(word, root, mH)
    displayMinHeap(mH)

if __name__ == "__main__":
    k = 5

    # to read from file
    # with open("file.txt", "r") as file:
    #     printKMostFreq(file, k)

    # using string instead of file to 
    # test and run the code
    str = "Welcome to the world of Geeks . This portal has been created to provide well written well thought and well explained solutions for selected questions If you like Geeks for Geeks and would like to contribute here is your chance You can write article and mail your article to contribute at geeksforgeeks org See your article appearing on the Geeks for Geeks main page and help thousands of other Geeks"
    printKMostFreqString(str, k)
C#
using System;
using System.IO;
using System.Collections.Generic;
using System.Text.RegularExpressions;

class Node {
    public bool isEnd;
    public int freq;
    public int ind;
    public Node[] children;

    public Node() {
        isEnd = false;
        freq = 0;
        ind = -1;
        children = new Node[26];
    }
}

class MinHeapNode {
    public Node root;
    public int freq;
    public string word;

    public MinHeapNode() {
        root = null;
        freq = 0;
        word = "";
    }
}

class MinHeap {
    public int cap;
    public int count;
    public MinHeapNode[] arr;

    public MinHeap(int cap) {
        this.cap = cap;
        count = 0;
        arr = new MinHeapNode[cap];
        for (int i = 0; i < cap; i++) {
            arr[i] = new MinHeapNode();
        }
    }

    public void SwapNodes(int a, int b) {
        MinHeapNode temp = arr[a];
        arr[a] = arr[b];
        arr[b] = temp;
        arr[a].root.ind = a;
        arr[b].root.ind = b;
    }

    public void Heapify(int idx) {
        int left = 2 * idx + 1;
        int right = 2 * idx + 2;
        int mini = idx;
        if (left < count && arr[left].freq < arr[mini].freq)
            mini = left;
        if (right < count && arr[right].freq < arr[mini].freq)
            mini = right;
        if (mini != idx) {
            SwapNodes(idx, mini);
            Heapify(mini);
        }
    }

    public void Build() {
        for (int i = (count - 1) / 2; i >= 0; --i)
            Heapify(i);
    }
}

class GfG {

    static void Insert(MinHeap mH, Node root, string word) {

        // Case 1: word is already in mH, 
        // so update its freq.
        if (root.ind != -1) {
            ++mH.arr[root.ind].freq;
            mH.Heapify(root.ind);
        }

        // Case 2: Word is not in mH and 
        // there's still room.
        else if (mH.count < mH.cap) {
            MinHeapNode node = new MinHeapNode();
            node.root = root;
            node.freq = root.freq;
            node.word = word;
            mH.arr[mH.count] = node;
            root.ind = mH.count++;
            mH.Build();
        }

        // Case 3: Heap is full and freq of new 
        // word is greater than the root.
        else if (root.freq > mH.arr[0].freq) {
            mH.arr[0].root.ind = -1;
            MinHeapNode node = new MinHeapNode();
            node.root = root;
            node.freq = root.freq;
            node.word = word;
            mH.arr[0] = node;
            root.ind = 0;
            mH.Heapify(0);
        }
    }

    static void InsertUtil(Node root, MinHeap mH, string word, int index = 0) {
        if (index < word.Length) {
            int pos = Char.ToLower(word[index]) - 'a';
            if (pos >= 0 && pos < 26) {
                if (root.children[pos] == null) {
                    root.children[pos] = new Node();
                }
                InsertUtil(root.children[pos], mH, word, index + 1);
            }
        } else {
            if (root.isEnd)
                ++root.freq;
            else {
                root.isEnd = true;
                root.freq = 1;
            }
            Insert(mH, root, word);
        }
    }

    static void InsertTrieAndHeap(string word, Node root, MinHeap mH) {
        InsertUtil(root, mH, word);
    }

    static void DisplayMinHeap(MinHeap mH) {
        for (int i = 0; i < mH.count; ++i)
            Console.WriteLine(mH.arr[i].word + " : " + mH.arr[i].freq);
    }

    static void PrintKMostFreq(StreamReader file, int k) {
        MinHeap mH = new MinHeap(k);
        Node root = new Node();

        // to process the words in file
        string line;
        while ((line = file.ReadLine()) != null) {
            foreach (string word in Regex.Split(line, @"\W+")) {
                if (!string.IsNullOrEmpty(word)) {
                    InsertTrieAndHeap(word.ToLower(), root, mH);
                }
            }
        }
        DisplayMinHeap(mH);
    }

    static void PrintKMostFreq(string str, int k) {
        MinHeap mH = new MinHeap(k);
        Node root = new Node();

        foreach (string word in Regex.Split(str, @"\W+")) {
            if (!string.IsNullOrEmpty(word)) {
                InsertTrieAndHeap(word.ToLower(), root, mH);
            }
        }

        DisplayMinHeap(mH);
    }

    public static void Main() {
        int k = 5;

        // to read from file
        // using (StreamReader file = new StreamReader("file.txt")) {
        //     PrintKMostFreq(file, k);
        // }

        // using string instead of file to 
        // test and run the code
        string str = "Welcome to the world of Geeks . This portal has been created to provide well written well thought and well explained solutions for selected questions If you like Geeks for Geeks and would like to contribute here is your chance You can write article and mail your article to contribute at geeksforgeeks org See your article appearing on the Geeks for Geeks main page and help thousands of other Geeks";
        PrintKMostFreq(str, k);
    }
}
JavaScript
class Node {
    constructor() {
        this.isEnd = false;
        this.freq = 0;
        this.ind = -1;
        this.child = new Array(26).fill(null);
    }
}

class MinHeapNode {
    constructor() {
        this.root = null;
        this.freq = 0;
        this.word = "";
    }
}

class MinHeap {
    constructor(cap) {
        this.cap = cap;
        this.count = 0;
        this.arr = new Array(cap).fill(null).map(() => new MinHeapNode());
    }

    swapNodes(a, b) {
        [this.arr[a], this.arr[b]] = [this.arr[b], this.arr[a]];
        this.arr[a].root.ind = a;
        this.arr[b].root.ind = b;
    }

    heapify(idx) {
        let left = 2 * idx + 1;
        let right = 2 * idx + 2;
        let mini = idx;
        if (left < this.count && this.arr[left].freq < this.arr[mini].freq)
            mini = left;
        if (right < this.count && this.arr[right].freq < this.arr[mini].freq)
            mini = right;
        if (mini !== idx) {
            this.swapNodes(idx, mini);
            this.heapify(mini);
        }
    }

    build() {
        for (let i = Math.floor((this.count - 1) / 2); i >= 0; --i)
            this.heapify(i);
    }
}

function insert(mH, root, word) {

    // Case 1: word is already in mH, 
    // so update its freq.
    if (root.ind !== -1) {
        mH.arr[root.ind].freq++;
        mH.heapify(root.ind);
    }

    // Case 2: Word is not in mH and 
    // there's still room.
    else if (mH.count < mH.cap) {
        let node = new MinHeapNode();
        node.root = root;
        node.freq = root.freq;
        node.word = word;
        mH.arr[mH.count] = node;
        root.ind = mH.count++;
        mH.build();
    }

    // Case 3: Heap is full and freq of new 
    // word is greater than the root.
    else if (root.freq > mH.arr[0].freq) {
        mH.arr[0].root.ind = -1;
        let node = new MinHeapNode();
        node.root = root;
        node.freq = root.freq;
        node.word = word;
        mH.arr[0] = node;
        root.ind = 0;
        mH.heapify(0);
    }
}

function insertUtil(root, mH, word, index = 0) {
    if (!root)
        root = new Node();
    if (index < word.length) {
        let pos = word[index].toLowerCase().charCodeAt(0) - 'a'.charCodeAt(0);
        if (pos >= 0 && pos < 26) {
            if (!root.child[pos]) 
                root.child[pos] = new Node();
            insertUtil(root.child[pos], mH, word, index + 1);
        }
    } else {
        if (root.isEnd)
            root.freq++;
        else {
            root.isEnd = true;
            root.freq = 1;
        }
        insert(mH, root, word);
    }
}

function insertTrieAndHeap(word, root, mH) {
    insertUtil(root, mH, word);
}

function displayMinHeap(mH) {
    for (let i = 0; i < mH.count; ++i)
        console.log(mH.arr[i].word + " : " + mH.arr[i].freq);
}

function printKMostFreq(str, k) {
    let mH = new MinHeap(k);
    let root = new Node();

    let words = str.split(/\s+/);
    for (let word of words) {
        insertTrieAndHeap(word, root, mH);
    }

    displayMinHeap(mH);
}

function main() {
    let k = 5;

    // using string instead of file to 
    // test and run the code
    let str = "Welcome to the world of Geeks . This portal has been created to provide well written well thought and well explained solutions for selected questions If you like Geeks for Geeks and would like to contribute here is your chance You can write article and mail your article to contribute at geeksforgeeks org See your article appearing on the Geeks for Geeks main page and help thousands of other Geeks";
    printKMostFreq(str, k);
}

main();

Output
your : 3
well : 3
and : 4
to : 4
Geeks : 6

The above output is for a file with following content. 

Welcome to the world of Geeks . This portal has been created to provide well written well thought and well explained solutions for selected questions If you like Geeks for Geeks and would like to contribute here is your chance You can write article and mail your article to contribute at geeksforgeeks org See your article appearing on the Geeks for Geeks main page and help thousands of other Geeks.


Next Article

Similar Reads