Practical File: Be (Cse) 6 Semester
Practical File: Be (Cse) 6 Semester
Compiler Design
January, 2018 – May, 2018
Submitted By
Om Prakash Sharma
Roll Number:
UE153068
Submitted To
Mr. Aakashdeep Sir
Token: -
A lexical token is a sequence of characters that can be treated as a unit in the
grammar of the programming languages.
Example of tokens:
▪ Type token (id, number, real, . . .)
▪ Punctuation tokens (IF, void, return, . . .
▪ Alphabetic tokens (keywords)
Code: -
f = open("program.txt","r")
str = f.read()
#print(str)
f.close()
ops = ['+','*','/','-',"++","--"]
keywords =
["auto","break","bool","case","char","const","continue","default",
"do","double","else","enum","extern","float","for","goto",
"if","int","iostream","include","long","namespace","main","std",
"register","return","short","signed","sizeof","static",
"struct","switch","typedef","union","unsigned","using",
"void","volatile","while","true","false"]
symbols = ["(",")","{","}",";",",","<<",">>","#","<",">"]
temp = []
i=0
while i<len(str):
#print(i)
char = str[i]
j=i+2
temp.append(str[j])
j=j+1
temp=[]
i=j+1
j=i+2
while str[j]!="\n":
temp.append(str[j])
j=j+1
i=j
j=i+1
while str[j]!='"':
temp.append(str[j])
j=j+1
temp=[]
i=j
j=i
while str[j].isnumeric()==True:
temp.append(str[j])
j=j+1
temp=[]
i=j-1
i=i+1
char="++"
i=i+1
char="--"
i=i+1
char="<<"
i=i+1
char=">>"
elif char.isalnum()==True:
temp.append(char)
#print(temp)
#print(''.join(temp))
if keywords.count(''.join(temp))==1:
temp=[]
else:
temp=[]
i=i+1
OUTPUT SNAPSHOT: -
#include <iostream>
using namespace std;
int main()
{
int n, i;
bool isPrime = true;
return 0;
}
Output Snapshot.
Entered Program: -
#include <iostream>
using namespace std;
int main()
{
string str = "C++ Programming";
return 0;
}
Output Snapshot
Entered Test Case 3.
Entered Program
#include <iostream>
using namespace std;
int main()
{
cout << "Hello, World!";
return 0;
}
Output Snapshot
PRACTICAL No.2
%{
Declarations
%}
Definitions
%%
Rules
%%
User subroutines
The optional Declarations and User subroutines sections are used for
ordinary C code that you want copied verbatim to the generated C file.
Declarations are copied to the top of the file, user subroutines to the bottom.
The optional Definitions section is where you specify options for the
scanner and can set up definitions to give names to regular expressions as a
simple substitution mechanism that allows for more readable entries in the
Rules section that follows.
The required Rules section is where you
specified the patterns that identify your tokens and the action to perform
upon recognizing each token.
Flex Rules
Arbitrary character . The period matches any single character except newline.
LETTER [A-Za-z]
Comment [//]
LST [ \n\t\r]+
ID ({LETTER}|_)({LETTER}|{DIGIT})*
SEPERATORS [;,.:]
Code: -
%{
#include <math.h>
#include <conio.h>
%}
DIGIT [0-9]
LETTER [A-Za-z]
Comment [//]
LST [ \n\t\r]+
ID ({LETTER}|_)({LETTER}|{DIGIT})*
SEPERATORS [;,.:]
Keywords
(include|iostream|bool|auto|double|using|cin|cout|namespace|std|int|struct|bre
ak|else|long|switch|case|enum|register|typedef|char|extern|return|union|const|f
loat|short|unsigned|continue|for|signed|void|default|goto|sizeof|volatile|do|if|st
atic|while)
%%
{LST}+
"("|"#"|")"|"++"|"+"|"--"|"-
"|"*"|"/"|"%"|"<<"|"<"|">>"|">"|"="|"=="|"!="|">="|"<="|"&&"|"&"|"||"|"|"|"^"
|"["|"]"|"{"|"}"|"?" {printf("An Operator: %s\n",yytext);}
%%
main()
yyin=fopen("program.txt","r");
yylex();
getch();
int yywrap()
return 1; }
OUTPUT SNAPSHOT: -
Entered Program 1.
#include <iostream>
using namespace std;
int main()
{
int n, i;
bool isPrime = true;
return 0;
}
Output: -
Entered Program 2.
#include <iostream>
using namespace std;
int main()
{
string str = "C++ Programming";
return 0;
}
Ouput: -
PRACTICAL No. 3
The regular expression parser, which we will create here will support these
three operations:
What is NFA?
List state keeps track of the starting and final state of each operation.
Code: -
# Python program to convert infix expression to postfix
class Conversion:
self.top = -1
self.capacity = capacity
self.array = []
# Precedence setting
self.output = []
def isEmpty(self):
def peek(self):
return self.array[-1]
def pop(self):
if not self.isEmpty():
self.top -= 1
return self.array.pop()
else:
return "$"
self.top += 1
self.array.append(op)
# is operand
try:
a = self.precedence[i]
b = self.precedence[self.peek()]
except KeyError:
return False
# to postfix expression
for i in exp:
# add it to output
if self.isOperand(i):
self.output.append(i)
self.push(i)
elif i == ')':
a = self.pop()
self.output.append(a)
return -1
else:
self.pop()
# An operator is encountered
else:
self.output.append(self.pop())
self.push(i)
self.output.append(self.pop())
regex = "".join(self.output)
return regex
exp = "a.b*.(a|b)*"
obj = Conversion(len(exp))
regex = obj.infixToPostfix(exp)
print(regex)
s_id = 1
Transitions = []
state = []
for i in range(len(regex)):
ch = regex[i]
if ch == "a" or ch == "b":
st_state = s_id
end_state = s_id + 1
state.append([st_state, end_state])
s_id = s_id + 2
Transitions.append([st_state , ch , end_state])
elif ch == ".":
b = state.pop()
a = state.pop()
st_state = s_id
end_state = s_id + 1
state.append([st_state,end_state])
s_id = s_id + 2
elif ch == "|":
b = state.pop()
a = state.pop()
st_state = s_id
end_state = s_id + 1
state.append([st_state, end_state])
s_id = s_id + 2
elif ch == "*":
a = state.pop()
st_state = s_id
end_state = s_id + 1
state.append([st_state, end_state])
s_id = s_id + 2
for i in range(len(Transitions)):
print(Transitions[i])
OUTPUT SNAPSHOT: -
Test case 1.
Test Case 2.
Test Case 3.
PRACTICAL No. 4
Aim: - To convert NFA into DFA.
Theory: -
Deterministic Finite Automata (DFA)
In a DFA, for a particular input character, machine goes to one state only. A
transition function is defined on every state for every input symbol. Also in
DFA null (or ε) move is not allowe, i.e., DFA can not change state without
any input character.
For example, below DFA with ∑ = {0, 1} accepts all strings ending with 0.
Implementation: -
Two main functions used in this Program are: -
2.Move(a, T) – This function takes two input first being the input character
and second set of states. This function returns a list containing all the states
that can be reached on applying “a” input on set of states T.
Code: -
# epsilon closure and Move function
def epsilon_closure(T):
t_stack=T[:]
while len(t_stack)!=0:
t = t_stack.pop()
for i in range(len(Transitions)):
u = Transitions[i][2]
if u not in T:
T.append(u)
t_stack.append(u)
T.sort()
return(T)
#print(epsilon_closure([5]))
result = []
for i in range(len(T)):
for t in Transitions:
result.append(t[2])
result.sort()
return result
T=epsilon_closure([7])
print(T)
T=Move("a",T)
print(T)
NFA_st_state = []
NFA_st_state.append(state[-1][0])
DFA_states = []
DFA_states.append([epsilon_closure(NFA_st_state),"unmark"])
#print(DFA_states)
DFA_trans=[]
input_symbol = ["a","b"]
#flag=0
def unmark(T):
for t in T:
if t[1]=="unmark":
return True
for i in range(len(DFA_states)):
if DFA_states[i][0]==U:
return False
return True
while unmark(DFA_states):
for i in range(len(DFA_states)):
if DFA_states[i][1]=="unmark":
T=DFA_states[i][0]
DFA_states[i][1]="mark"
break
#print(T)
#print(DFA_states)
for ch in input_symbol:
U = Move(ch,T)
U = epsilon_closure(U)
if len(U)!=0:
DFA_trans.append([T,ch,U])
if Not_in(U, DFA_states):
DFA_states.append([U,"unmark"])
#print(DFA_states)
print("DFA_states")
for i in range(len(DFA_states)):
print(DFA_states[i][0])
for i in range(len(DFA_states)):
if state[-1][0] in DFA_states[i][0]:
for i in range(len(DFA_states)):
if state[-1][1] in DFA_states[i][0]:
print("\nDFA_transitions\n")
for i in range(len(DFA_trans)):
The production is left-recursive if the leftmost symbol on the right side is the
same as the non terminal on the left side. For example,
expr → expr + term.
E →E+T|T
E →T*F|F
F → (E) | id.
E → TE`
E`→ + TE` | E
T → FT`
T → * FT` | E
F → (E) | id
Implementation: -
String formatting is used for removing in the following manner: -
A - > Aa | b
for i in range(c):
trans = []
for i in range(count):
new_trans=[]
new_trans1=[]
for i in range(len(trans)):
if trans[i][0]==s_state:
else:
new_trans1.append("e")
Test Case 1.
Test Case 2.
Test Case 3.
PRACTICAL NO. 6
AIM: - To implement Recursive Descent Parser.
Theory: - Recursive Descent Parsing
Recursive descent is a top-down parsing technique that constructs the parse
tree from the top and the input is read from left to right. It uses procedures
for every terminal and non-terminal entity. This parsing technique recursively
parses the input to make a parse tree, which may or may not require back-
tracking. But the grammar associated with it (if not left factored) cannot avoid
back-tracking. A form of recursive-descent parsing that does not require any
back-tracking is known as predictive parsing.
This parsing technique is regarded recursive as it uses context-free grammar
which is recursive in nature.
Implementation: -
The Grammar used for the parser is
E -> T | T + E
Variable pt and save are used for backtracking and to keep the track of the
pointer.
If the pointer reaches the end of the string entered by the user then the input
is accepted by the parser.
E -> T | T + E
"""
string=[]
pt=0
for i in range(n):
string.append(input())
def match(char):
global pt
if pt==len(string):
return False
if string[pt]==char:
pt = pt + 1
return True
else :
return False
def E1():
return T()
def E2():
def E():
global pt
save=pt
if E2()==True:
return True
else:
pt = save
return E1()
def T1():
return match('int')
def T2():
def T3():
def T():
global pt
save=pt
if T2()==True:
return True
else:
pt = save
if T3()==True:
return True
else:
pt = save
return T1()
def main():
E()
global pt
if pt==len(string):
print("Accepted",pt)
else :
print("Not Accepted",pt)
if __name__=="__main__":
main()
OUTPUT SNAPSHOT: -
1. Entered String ( int * int )
Implementation: -
Compare(a, b): - function is used for comparing two strings that start with
same character. This function returns the max length of the common string in
a and b.
Minimum pattern that is common in all the productions is found out using
the Compare(a, b) function inside the for loop with the help of variable of
min to keep track the minimum count.
count=0
if len(a)<len(b):
j=len(a)
else: j=len(b)
for i in range(j):
if(a[i]==b[i]):
count=count+1
else: break
return count
trans = []
for i in range(count):
s1=[]
for i in range(len(trans)):
s1.append([trans[i][0],i])
s1
done=[]
l1=[]
l2=[]
for i in range(len(s1)):
t=[]
t.append(s1[i])
done.append(s1[i][0])
for j in range(i+1,len(s1)):
if s1[j][0]==s1[i][0]:
t.append(s1[j])
if(len(t)>1):
min=compare(trans[t[0][1]],trans[t[1][1]])
for i in range(1,len(t)-1):
if compare(trans[t[i][1]],trans[t[i+1][1]])<min:
min=compare(trans[t[i][1]],trans[t[i+1][1]])
l1.append([trans[t[0][1]][:min]+s_state+"'"])
for i in range(len(t)):
if len(trans[t[i][1]][min:])==0:
l2.append("e")
else:
l2.append([trans[t[i][1]][min:]])
else:
l1.append([trans[t[0][1]]])
else: pass
print(l1)
print(l2)
OUTPUT SNAPSHOT: -
Test Case 1.
Test Case 2.
Test Case 3.
EXPERIMENT NO. 8
CODE: -
for i in range(n):
print(nt[i]," -> ",end=' ')
#for j in range(len(prod[i])):
print(prod[i][:],sep=" | ")
first = []
for i in range(n):
first.append([])
def findfirst(c):
global first
n = nt.index(c)
for item in prod[n]:
if item[0] == "e":
#print("lower")
first[n].append("e") # adding epsilon in first if in production
elif item[0].islower()==True or item[0].isalpha()==False:
#print("Epsilon")
if item == "id": # for adding id instead of i
first[n].append("id")
else:
first[n].append(item[0]) # adding terminals in first if exist in
production
elif item[0].isupper() == True:
#print("Upper")
findfirst(item[0])
for f in first[nt.index(item[0])]:
first[n].append(f)
if "e" in first[nt.index(item[0])]:
findfirst(item[1]) # recursion call
for f in first[nt.index(item[1])]:
first[n].append(f)
return first[n]
findfirst("E")
findfirst("X")
findfirst("Y")
for i in range(n):
print("first of",nt[i],"is",first[i])
Finding Follow of the Grammar
follow = []
for i in range(n):
follow.append([])
for i in range(len(nt)):
for j in range(len(prod)):
for k in range(len(prod[j])):
for l in range(len(prod[j][k])):
if prod[j][k][l] == nt[i] and l!=len(prod[j][k])-1:
#print("matches",nt[i],"and",prod[j][k][l],"and next in line
is",prod[j][k][l+1])
if prod[j][k][l+1].islower()==True or prod[j][k][l+1].isalpha()
== False:
follow[i].append(prod[j][k][l+1])
elif prod[j][k][l+1].isupper() == True:
for m in first[nt.index(prod[j][k][l+1])]:
if m!="e" and m not in follow[nt.index(prod[j][k][l])]:
follow[nt.index(prod[j][k][l])].append(m)
for i in range(len(prod)):
for j in range(len(prod[i])):
if len(prod[i][j])>=2:
if prod[i][j][-1].isupper()==True:
#print("last capital",prod[i][j][-1])
for m in follow[i]:
#print(m,"in follow of",prod[i][j][-1])
if m not in follow[nt.index(prod[i][j][-1])]:
follow[nt.index(prod[i][j][-1])].append(m)
if prod[i][j][-2].isupper()==True and prod[i][j][-1].isupper()==True
and "e" in first[nt.index(prod[i][j][-1])]:
#print("second last capital",prod[i][j][-2])
#print("e in first of",prod[i][j][-1])
for m in follow[i]:
if m not in follow[nt.index(prod[i][j][-2])]:
follow[nt.index(prod[i][j][-2])].append(m)
for i in range(n):
print("follow of",nt[i],"is",follow[i])
OUTPUT SNAPSHOT: -
Test Case 1.
The Grammar Entered by the User