import heapq
import string
class Node:
def __init__(self):
self.isEnd = False
self.freq = 0
self.ind = -1
self.child = [None] * 26
class MinHeapNode:
def __init__(self):
self.root = None
self.freq = 0
self.word = ""
class MinHeap:
def __init__(self, cap):
self.cap = cap
self.count = 0
self.arr = [MinHeapNode() for _ in range(cap)]
def swapNodes(self, a, b):
self.arr[a], self.arr[b] = self.arr[b], self.arr[a]
self.arr[a].root.ind = a
self.arr[b].root.ind = b
def heapify(self, idx):
left = 2 * idx + 1
right = 2 * idx + 2
mini = idx
if left < self.count and self.arr[left].freq < self.arr[mini].freq:
mini = left
if right < self.count and self.arr[right].freq < self.arr[mini].freq:
mini = right
if mini != idx:
self.swapNodes(idx, mini)
self.heapify(mini)
def build(self):
for i in range((self.count - 1) // 2, -1, -1):
self.heapify(i)
def insert(mH, root, word):
# Case 1: word is already in mH,
# so update its freq.
if root.ind != -1:
mH.arr[root.ind].freq += 1
mH.heapify(root.ind)
# Case 2: Word is not in mH and
# there's still room.
elif mH.count < mH.cap:
node = MinHeapNode()
node.root = root
node.freq = root.freq
node.word = word
mH.arr[mH.count] = node
root.ind = mH.count
mH.count += 1
mH.build()
# Case 3: Heap is full and freq of new
# word is greater than the root.
elif root.freq > mH.arr[0].freq:
mH.arr[0].root.ind = -1
node = MinHeapNode()
node.root = root
node.freq = root.freq
node.word = word
mH.arr[0] = node
root.ind = 0
mH.heapify(0)
def insertUtil(root, mH, word, index=0):
if root is None:
root = Node()
if index < len(word):
pos = ord(word[index].lower()) - ord('a')
if 0 <= pos < 26:
if root.child[pos] is None:
root.child[pos] = Node()
insertUtil(root.child[pos], mH, word, index + 1)
else:
if root.isEnd:
root.freq += 1
else:
root.isEnd = True
root.freq = 1
insert(mH, root, word)
def insertTrieAndHeap(word, root, mH):
insertUtil(root, mH, word)
def displayMinHeap(mH):
for i in range(mH.count):
print(mH.arr[i].word, ":", mH.arr[i].freq)
def printKMostFreq(file, k):
mH = MinHeap(k)
root = Node()
# to process the words in file
for word in file.read().split():
insertTrieAndHeap(word, root, mH)
displayMinHeap(mH)
def printKMostFreqString(str, k):
mH = MinHeap(k)
root = Node()
for word in str.split():
insertTrieAndHeap(word, root, mH)
displayMinHeap(mH)
if __name__ == "__main__":
k = 5
# to read from file
# with open("file.txt", "r") as file:
# printKMostFreq(file, k)
# using string instead of file to
# test and run the code
str = "Welcome to the world of Geeks . This portal has been created to provide well written well thought and well explained solutions for selected questions If you like Geeks for Geeks and would like to contribute here is your chance You can write article and mail your article to contribute at geeksforgeeks org See your article appearing on the Geeks for Geeks main page and help thousands of other Geeks"
printKMostFreqString(str, k)