CD Lab RECORD PRINT' 1
CD Lab RECORD PRINT' 1
AIM:
To write a C program to implement a symbol table.
ALGORITHM:
Step 1: Start the program.
Step 2: Get the input from the user with the terminating symbol ‘$’.
Step 3: Allocate memory for the variable by dynamic memory allocation function.
Step 4: If the next character of the symbol is an operator then only the memory is allocated.
Step 5: While reading, the input symbol is inserted into symbol table along with its memory address.
Step 6: The steps are repeated till ‘$’ is reached.
Step 7: To reach a variable, enter the variable to the searched and symbol table has been checked for
corresponding variable, the variable along with its address is displayed as result.
Step 8: Stop the program.
PROGRAM CODING:
#include <stdio.h>
#include<conio.h>
#include<ctype.h>
#include<alloc.h>
#include<string.h>
#include<math.h>
void main()
{
int i=0,j=0,x=0,n,flag=0;
void *p,*add[5];
char ch,srch,b[15],d[15],c;
clrscr();
printf("Expression terminated by $ : ");
while((c=getchar())!='$')
{
b[i]=c;
i++;
}
n=i-1;
printf("Given Expression : ");
i=0;
while(i<=n)
{
printf("%c",b[i]);
i++;
}
printf("\n Symbol Table\n");
printf("Symbol\taddr\ttype");
while(j<=n)
{
c=b[j];
1
if(isalpha(toascii(c)))
{
if(j==n)
{
p=malloc(c);
add[x]=p;
d[x]=c;
printf("%c\t%d\tidentifier",c,p);
}
else
{
ch=b[j+1];
if(ch=='+'||ch=='-'||ch=='*'||ch=='=')
{
p=malloc(c);
add[x]=p;
d[x]=c;
printf("\n%c\t%d\tidentifier\n",c,p);
x++;
}}}
j++;
}
printf("\nThe symbol is to be searched");
srch=getch();
for(i=0;i<=x;i++)
{
if(srch==d[i])
{
printf("\nSymbol Found");
printf("\n%c%s%d\n",srch," @address ",add[i]);
flag=1;
}
}
if(flag==0)
printf("\nSymbol Not Found");
getch();
}
OUTPUT:
2
Result: Thus the C program for implementing symbol table has been executed and verified.
Ex No 2: IMPLEMENTATION OF LEXICAL ANALYZER USING C
AIM:
To write a c program to implement the lexical analyzer.
ALGORITHM:
PROGRAM:
#include<stdio.h>
#include<conio.h>
#include<string.h>
void main ()
{
int a,b,I,c;
char*str;
char *key[]={"int","char","for","while","void","main"};
char oper[]={'+','-','*','/','&','+','=','!'};
char spl[]={'(',')','{','}','[',']',';',':','%'};
char *func[]={"printf","scanf","gatch","clrscr"};
3
FILE *fp1,*fp2;
fp1=fopen("add.c","r");
fp2=fopen("output.txt","w");
clrscr();
while(!feof(fp1))
{
fscanf(fp1,"%s",str);
b=0;c=0;
for(i=0;i<6;i++)
{
if(strcmp(str,key[i])==0)
{
b=1;
fprintf(fp2,"%s is a keyword\n",str);
break;
}
}
for (i=0;i<4;i++)
{
if(strcmp(str,func[i])==0)
{
b=1;
fprintf(fp2,"%s is a function\n",str);
break;
}
}
for(i=0;i<7;i++)
{
if(str[0]==oper[i])
{
b=1;
fprintf(fp2,"%s is a operator\n",str);
break;
}
}
for(i=0;i<11;i++)
{
if(str[0]==spl[i])
{
b=1;
fprintf(fp2,"%s is a special character\n",str);
break;
}
}
if(b==0)
{
c=atoi(str);
4
if(c!=0)
fprintf(fp2,"%s is a constant\n",c);
else
fprintf(fp2,"%s is a identifier\n",str);
}
}
fclose(fp1);
fclose(fp2);
getch();
}
OUTPUT:
ADD.c
void main()
{
int a=10;
printf("sample %d",a);
}
OUTPUT.TXT
void is a keyword
main() is a identifier
{ is a special character
int is a keyword
a=10; is a identifier
printf("sample is a identifier
%d",a); is a special character
} is a special character
5
RESULT: Thus the program for implementing of lexical analysis using C was written and
executed successfully.
AIM:
To study about the basics of LEX and YACC Compiler
DESCRIPTION:
Lex helps to write programs whose control flow is directed by instances of regular
expressions in the input stream. It is well suited for editor-script type transformations and for
segmenting input in preparation for a parsing routine.
Lex source is a table of regular expressions and corresponding program fragments. The table
is translated to a program which reads an input stream, copying it to an output stream and
partitioning the input into strings which match the given expressions. As each such string is
recognized the corresponding program fragment is executed. The recognition of the expressions is
performed by a deterministic finite automaton generated by Lex. The program fragments written by
6
the user are executed in the order in which the corresponding regular expressions occur in the input
stream.
The lexical analysis programs written with Lex accept ambiguous specifications and choose
the longest match possible at each input point. If necessary, substantial lookahead is performed on
the input, but the input stream will be backed up to the end of the current partition, so that the user
has general freedom to manipulate it.
Lex can generate analyzers in either C or Ratfor, a language which can be translated
automatically to portable Fortran. It is available on the PDP-11 UNIX, Honeywell GCOS, and IBM
OS systems. This manual, however, will only discuss generating analyzers in C on the UNIX system,
which is the only supported form of Lex under UNIX Version 7. Lex is designed to simplify
interfacing with Yacc, for those with access to this compiler-compiler system.
1. Introduction
Lex is a program generator designed for lexical processing of character input streams. It
accepts a high-level, problem oriented specification for character string matching, and produces a
program in a general purpose language which recognizes regular expressions. The regular
expressions are specified by the user in the source specifications given to Lex. The Lex written code
recognizes these expressions in an input stream and partitions the input stream into strings matching
the expressions. At the boundaries between strings program sections provided by the user are
executed. The Lex source file associates the regular expressions and the program fragments. As each
expression appears in the input to the program written by Lex, the corresponding fragment is
executed.
Lex is not a complete language, but rather a generator representing a new language feature
which can be added to different programming languages, called ``host languages.'' Just as general
purpose languages can produce code to run on different computer hardware, Lex can write code in
different host languages. The host language is used for the output code generated by Lex and also for
the program fragments added by the user. Compatible run-time libraries for the different host
languages are also provided. This makes Lex adaptable to different environments and different users.
Lex turns the user's expressions and actions (called source in this memo) into the host general-
purpose language; the generated program is named yylex. The yylex program will recognize
expressions in a stream (called input in this memo) and perform the specified actions for each
expression as it is detected. See Figure 1.
+-------+
Source -> | Lex | -> yylex
+-------+
+-------+
Input -> | yylex | -> Output
+-------+
An overview of Lex
Figure 1
7
For a trivial example, consider a program to delete from the input all blanks or tabs at the ends of
lines.
%%
[ \t]+$ ;
is all that is required. The program contains a %% delimiter to mark the beginning of the rules, and
one rule. This rule contains a regular expression which matches one or more instances of the
characters blank or tab (written \t for visibility, in accordance with the C language convention) just
prior to the end of a line. The brackets indicate the character class made of blank and tab; the +
indicates ``one or more ...''; and the $ indicates ``end of line,'' as in QED. No action is specified, so
the program generated by Lex (yylex) will ignore these characters. Everything else will be copied.
To change any remaining string of blanks or tabs to a single blank, add another rule:
%%
[ \t]+$ ;
[ \t]+ printf(" ");
The finite automaton generated for this source will scan for both rules at once, observing at
the termination of the string of blanks or tabs whether or not there is a newline character, and
executing the desired rule action. The first rule matches all strings of blanks or tabs at the end of
lines, and the second rule all remaining strings of blanks or tabs.
Lex can be used alone for simple transformations, or for analysis and statistics gathering on a
lexical level. Lex can also be used with a parser generator to perform the lexical analysis phase; it is
particularly easy to interface Lex and Yacc [3]. Lex programs recognize only regular expressions;
Yacc writes parsers that accept a large class of context free grammars, but require a lower level
analyzer to recognize input tokens. Thus, a combination of Lex and Yacc is often appropriate. When
used as a preprocessor for a later parser generator, Lex is used to partition the input stream, and the
parser generator assigns structure to the resulting pieces. The flow of control in such a case (which
might be the first half of a compiler, for example) is shown in Figure 2. Additional programs, written
by other generators or by hand, can be added easily to programs written by Lex.
lexical grammar
rules rules
| |
v v
+---------+ +---------+
| Lex | | Yacc |
+---------+ +---------+
| |
v v
+---------+ +---------+
Input -> | yylex | -> | yyparse | -> Parsed input
+---------+ +---------+
Lex with Yacc
Figure 2
Yacc users will realize that the name yylex is what Yacc expects its lexical analyzer to be
named, so that the use of this name by Lex simplifies interfacing.
8
Lex generates a deterministic finite automaton from the regular expressions in the source [4].
The automaton is interpreted, rather than compiled, in order to save space. The result is still a fast
analyzer. In particular, the time taken by a Lex program to recognize and partition an input stream is
proportional to the length of the input. The number of Lex rules or the complexity of the rules is not
important in determining speed, unless rules which include forward context require a significant
amount of rescanning. What does increase with the number and complexity of rules is the size of the
finite automaton, and therefore the size of the program generated by Lex.
In the program written by Lex, the user's fragments (representing the actions to be performed
as each regular expression is found) are gathered as cases of a switch. The automaton interpreter
directs the control flow. Opportunity is provided for the user to insert either declarations or
additional statements in the routine containing the actions, or to add subroutines outside this action
routine.
Lex is not limited to source which can be interpreted on the basis of one character lookahead.
For example, if there are two rules, one looking for ab and another for abcdefg, and the input stream
is abcdefh, Lex will recognize ab and leave the input pointer just before cd. . . Such backup is more
costly than the processing of simpler languages.
2. Lex Source.
In the outline of Lex programs shown above, the rules represent the user's control decisions;
they are a table, in which the left column contains regular expressions (see section 3) and the right
column contains actions, program fragments to be executed when the expressions are recognized.
Thus an individual rule might appear
9
mechanise printf("mechanize");
petrol printf("gas");
would be a start. These rules are not quite enough, since the word petroleum would become gaseum;
a way of dealing with this will be described later.
The definitions of regular expressions are very similar to those in QED [5]. A regular
expression specifies a set of strings to be matched. It contains text characters (which match the
corresponding characters in the strings being compared) and operator characters (which specify
repetitions, choices, and other features). The letters of the alphabet and the digits are always text
characters; thus the regular expression
integer
matches the string integer wherever it appears and the expression
a57D
looks for the string a57D.
"\[]^-?.*+|()$/{}%<>
and if they are to be used as text characters, an escape should be used. The quotation mark operator
(") indicates that whatever is contained between a pair of quotes is to be taken as text characters.
Thus
xyz"++"
matches the string xyz++ when it appears. Note that a part of a string may be quoted. It is harmless
but unnecessary to quote an ordinary text character; the expression
"xyz++"
is the same as the one above. Thus by quoting every non-alphanumeric character being used as a text
character, the user can avoid remembering the list above of current operator characters, and is safe
should further extensions to Lex lengthen the list.
An operator character may also be turned into a text character by preceding it with \ as in
xyz\+\+
which is another, less readable, equivalent of the above expressions. Another use of the quoting
mechanism is to get a blank into an expression; normally, as explained above, blanks or tabs end a
rule. Any blank character not contained within [] (see below) must be quoted. Several normal C
escapes with \ are recognized: \n is newline, \t is tab, and \b is backspace. To enter \ itself, use \\.
Since newline is illegal in an expression, \n must be used; it is not required to escape tab and
backspace. Every character but blank, tab, newline and the list above is always a text character.
Character classes :Classes of characters can be specified using the operator pair []. The
construction [abc] matches a single character, which may be a, b, or c. Within square brackets, most
operator meanings are ignored. Only three characters are special: these are \ - and ^. The - character
indicates ranges. For example,
[a-z0-9<>_]
10
indicates the character class containing all the lower case letters, the digits, the angle brackets, and
underline. Ranges may be given in either order. Using - between any pair of characters which are not
both upper case letters, both lower case letters, or both digits is implementation dependent and will
get a warning message. (E.g., [0-z] in ASCII is many more characters than it is in EBCDIC). If it is
desired to include the character - in a character class, it should be first or last; thus
[-+0-9]
matches all the digits and the two signs.
In character classes, the ^ operator must appear as the first character after the left bracket; it
indicates that the resulting string is to be complemented with respect to the computer character set.
Thus
[^abc]
matches all characters except a, b, or c, including all special or control characters; or
[^a-zA-Z]
is any character which is not a letter. The \ character provides the usual escapes within character
class brackets.
Arbitrary character. To match almost any character, the operator character . is the class of all
characters except newline. Escaping into octal is possible although non-portable:
[\40-\176]
matches all printable characters in the ASCII character set, from octal 40 (blank) to octal 176 (tilde).
ab?c
matches either ac or abc.
a*
is any number of consecutive a characters, including zero; while
a+
is one or more instances of a. For example,
[a-z]+
is all strings of lower case letters and indicates all alphanumeric strings with a leading alphabetic
character.
[A-Za-z][A-Za-z0-9]*
This is a typical expression for recognizing identifiers in computer languages.
(ab|cd)
11
matches either ab or cd. Note that parentheses are used for grouping, although they are not necessary
on the outside level;
ab|cd
would have sufficed. Parentheses can be used for more complex expressions:
(ab|cd+)?(ef)*
matches such strings as abefef, efefef, cdef, or cddd; but not abc, abcd, or abcdef.
Context sensitivity. Lex will recognize a small amount of surrounding context. The two simplest
operators for this are ^ and $. If the first character of an expression is ^, the expression will only be
matched at the beginning of a line (after a newline character, or at the beginning of the input stream).
This can never conflict with the other meaning of ^, complementation of character classes, since that
only applies within the [] operators. If the very last character is $, the expression will only be
matched at the end of a line (when immediately followed by newline). The latter operator is a special
case of the / operator character, which indicates trailing context. The expression
ab/cd
matches the string ab, but only if followed by cd. Thus
ab$
is the same as
ab/\n
Left context is handled in Lex by start conditions as explained in section 10. If a rule is only to be
executed when the Lex automaton interpreter is in start condition x, the rule should be prefixed by
<x>
using the angle bracket operator characters. If we considered ``being at the beginning of a line'' to be
start condition ONE, then the ^ operator would be equivalent to
<ONE>
Start conditions are explained more fully later.
Repetitions and Definitions. The operators {} specify either repetitions (if they enclose numbers) or
definition expansion (if they enclose a name). For example
{digit}
looks for a predefined string named digit and inserts it at that point in the expression. The definitions
are given in the first part of the Lex input, before the rules. In contrast,
a{1,5}
looks for 1 to 5 occurrences of a.
Finally, initial % is special, being the separator for Lex source segments.
4. Lex Actions.
When an expression written as above is matched, Lex executes the corresponding action.
This section describes some features of Lex which aid in writing actions. Note that there is a default
action, which consists of copying the input to the output. This is performed on all strings not
otherwise matched. Thus the Lex user who wishes to absorb the entire input, without producing any
output, must provide rules to match everything. When Lex is being used with Yacc, this is the normal
situation. One may consider that actions are what is done instead of copying the input to the output;
12
thus, in general, a rule which merely copies can be omitted. Also, a character combination which is
omitted from the rules and which appears as input is likely to be printed on the output, thus calling
attention to the gap in the rules.
One of the simplest things that can be done is to ignore the input. Specifying a C null
statement, ; as an action causes this result. A frequent rule is
[ \t\n] ;
which causes the three spacing characters (blank, tab, and newline) to be ignored.
Another easy way to avoid writing actions is the action character |, which indicates that the
action for this rule is the action for the next rule. The previous example could also have been written
""
"\t"
"\n"
with the same result, although in different style. The quotes around \n and \t are not required.
In more complex actions, the user will often want to know the actual text that matched some
expression like [a-z]+. Lex leaves this text in an external character array named yytext. Thus, to print
the name found, a rule like
[a-z]+ printf("%s", yytext);
will print the string in yytext. The C function printf accepts a format argument and data to be printed;
in this case, the format is ``print string'' (% indicating data conversion, and s indicating string type),
and the data are the characters in yytext. So this just places the matched string on the output. This
action is so common that it may be written as ECHO:
[a-z]+ ECHO;
is the same as the above. Since the default action is just to print the characters found, one might ask
why give a rule, like this one, which merely specifies the default action? Such rules are often
required to avoid matching some other rule which is not desired. For example, if there is a rule
which matches read it will normally match the instances of read contained in bread or readjust; to
avoid this, a rule of the form [a-z]+ is needed. This is explained further below.
Sometimes it is more convenient to know the end of what has been found; hence Lex also
provides a count yyleng of the number of characters matched. To count both the number of words
and the number of characters in words in the input, the user might write [a-zA-Z]+ {words++; chars
+= yyleng;} which accumulates in chars the number of characters in the words recognized. The last
character in the string matched can be accessed by yytext[yyleng-1]
13
1) Any line which is not part of a Lex rule or action which begins with a blank or tab is copied into
the Lex generated program. Such source input prior to the first %% delimiter will be external to any
function in the code; if it appears immediately after the first %%, it appears in an appropriate place
for declarations in the function written by Lex which contains the actions. This material must look
like program fragments, and should precede the first Lex rule. As a side effect of the above, lines
which begin with a blank or tab, and which contain a comment, are passed through to the generated
program. This can be used to include comments in either the Lex source or the generated code. The
comments should follow the host language convention.
2) Anything included between lines containing only %{ and %} is copied out as above. The
delimiters are discarded. This format permits entering text like preprocessor statements that must
begin in column 1, or copying lines that do not look like programs.
3) Anything after the third %% delimiter, regardless of formats, etc., is copied out after the Lex
output.
6. Usage.
There are two steps in compiling a Lex source program. First, the Lex source must be turned
into a generated program in the host general purpose language. Then this program must be compiled
and loaded, usually with a library of Lex subroutines. The generated program is on a file named
lex.yy.c. The I/O library is defined in terms of the C standard library [6].
The C programs generated by Lex are slightly different on OS/370, because the OS compiler
is less powerful than the UNIX or GCOS compilers, and does less at compile time. C programs
generated on GCOS and UNIX is the same.
UNIX. The library is accessed by the loader flag -ll. So an appropriate set of commands is lex source
cc lex.yy.c -ll. The resulting program is placed on the usual file a.out for later execution. To use Lex
with Yacc see below. Although the default Lex I/O routines use the C standard library, the Lex
automata themselves do not do so; if private versions of input, output and unput are given, the
library can be avoided.
7. Character Set.
The programs generated by Lex handle character I/O only through the routines input, output,
and unput. Thus the character representation provided in these routines is accepted by Lex and
employed to return values in yytext. For internal use a character is represented as a small integer
which, if the standard library is used, has a value equal to the integer value of the bit pattern
representing the character on the host computer. Normally, the letter a is represented as the same
form as the character constant 'a'. If this interpretation is changed, by providing I/O routines which
translate the characters, Lex must be told about it, by giving a translation table. This table must be in
the definitions section, and must be bracketed by lines containing only ``%T''. The table contains
lines of the form
{integer} {character string}
which indicate the value associated with each character. Thus the next example
%T
1 Aa
2 Bb
14
...
26 Zz
27 \n
28 +
29 -
30 0
31 1
...
39 9
%T
Sample character table:
maps the lower and upper case letters together into the integers 1 through 26, newline into 27, + and
- into 28 and 29, and the digits into 30 through 39. Note the escape for newline. If a table is supplied,
every character that is to appear either in the rules or in any valid input must be included in the table.
No character may be assigned the number 0, and no character may be assigned a bigger number than
the size of the hardware character set.
%{ code%}
4) Start conditions, given in the form
%S name1 name2 ...
5) Character set tables, in the form
%T
number space character-string
...
%T
6) Changes to internal array sizes, in the form
%x nnn
where nnn is a decimal integer representing an array size and x selects the parameter as follows:
Letter Parameter
p positions
n states
15
e tree nodes
a transitions
k packed character classes
o output array size
Lines in the rules section have the form ``expression action'' where the action may be continued on
succeeding lines by using braces to delimit it.
AIM:
To write a LEX program to implement Lexical analyser.
ALGORITHM:
Step 1: Start the program
Step 2 : Using %{ and %} declare the flag comment=0
Step 3 : Specify the coordinates
Step 4 : In the main function, get the regular expression through a C program as run time
arguments
Step 5 : Open the given program file with C exe and print the identical tokens in the program
Step 6 : Stop the program
PROGRAM
16
%{
%}
identifier[a-zA-Z][a-zA-Z0-9]*
%%
#.* printf("\n%s is PREPROCESSOR DIRECTIVE\n",yytext);
int |
float |
double |
char |
for |
if printf("%s is a keyword\n",yytext);
{identifier}\( printf("\n\n FUNCTION CALL\n %s",yytext);
\{
printf("BLOCK BEGINS\n");
\}
printf("BLOCK ENDS\n");
= printf("%s is a ASSIGNMENT OPERATOR\n",yytext);
[0-9]+ printf("%s is NUMBER\n",yytext);
\< |
\> |
\== |
\>= |
\<= printf("%s is a RELATIONAL OPERATOR\n",yytext);
\( { ECHO;printf("\n");}
\) { ECHO; printf("\n");}
\+ |
\- |
\* printf("%s is a ARITHMETIC OPERATOR \n");
\++ printf("%s is a INCREMENTAL OPERATOR\n");
\; { ECHO; printf("\n");}
%%
main()
{
yylex();
}
int yywrap()
{
return 1;
}
SAMPLE C PROGRAM
#include<stdio.h>
main()
{
int a=16;
printf("The value of a is%d ",a);
17
}
OUTPUT
[user@localhost ~]$ lex lex2.l
[user@localhost ~]$ cc lex.yy.c
[user@localhost ~]$ ./a.out <samp.c
RESULT:
Thus the LEX program to implement Lexical analyser is executed and verified successfully.
AIM:
To study about the basics of YACC Compiler
Computer program input generally has some structure; in fact, every computer program that
does input can be thought of as defining an ``input language'' which it accepts. An input language
may be as complex as a programming language, or as simple as a sequence of numbers.
Unfortunately, usual input facilities are limited, difficult to use, and often are lax about checking
their inputs for validity.
Yacc provides a general tool for describing the input to a computer program. The Yacc user
specifies the structures of his input, together with code to be invoked as each such structure is
recognized. Yacc turns such a specification into a subroutine that handles the input process;
18
frequently, it is convenient and appropriate to have most of the flow of control in the user's
application handled by this subroutine.
The input subroutine produced by Yacc calls a user-supplied routine to return the next basic
input item. Thus, the user can specify his input in terms of individual input characters or in terms of
higher level constructs such as names and numbers. The user-supplied routine may also handle
idiomatic features such as comment and continuation conventions, which typically defy easy
grammatical specification.
Yacc is written in portable C. The class of specifications accepted is a very general one:
LALR(1) grammars with disambiguating rules.
In addition to compilers for C, APL, Pascal, RATFOR, etc., Yacc has also been used for less
conventional languages, including a phototypesetter language, several desk calculator languages, a
document retrieval system, and a Fortran debugging system.
1. Introduction
Yacc provides a general tool for imposing structure on the input to a computer program. The
Yacc user prepares a specification of the input process; this includes rules describing the input
structure, code to be invoked when these rules are recognized, and a low-level routine to do the basic
input. Yacc then generates a function to control the input process. This function, called a parser, calls
the user-supplied low-level input routine (the lexical analyzer) to pick up the basic items (called
tokens) from the input stream. These tokens are organized according to the input structure rules,
called grammar rules; when one of these rules has been recognized, then user code supplied for this
rule, an action, is invoked; actions have the ability to return values and make use of the values of
other actions.
Yacc is written in a portable dialect of C[1] and the actions, and output subroutine, are in C as well.
Moreover, many of the syntactic conventions of Yacc follow C.
The heart of the input specification is a collection of grammar rules. Each rule describes an
allowable structure and gives it a name. For example, one grammar rule might be
An important part of the input process is carried out by the lexical analyzer. This user routine reads
the input stream, recognizing the lower level structures, and communicates these tokens to the
parser. For historical reasons, a structure recognized by the lexical analyzer is called a terminal
symbol, while the structure recognized by the parser is called a nonterminal symbol. To avoid
confusion, terminal symbols will usually be referred to as tokens.
19
There is considerable leeway in deciding whether to recognize structures using the lexical analyzer
or grammar rules. For example, the rules
. . .
Literal characters such as ``,'' must also be passed through the lexical analyzer, and are also
considered tokens.
Specification files are very flexible. It is realively easy to add to the above example the rule
The input being read may not conform to the specifications. These input errors are detected
as early as is theoretically possible with a left-to-right scan; thus, not only is the chance of reading
and computing with bad input data substantially reduced, but the bad data can usually be quickly
found. Error handling, provided as part of the input specifications, permits the reentry of bad data, or
the continuation of the input process after skipping over the bad data.
In some cases, Yacc fails to produce a parser when given a set of specifications. For example,
the specifications may be self contradictory, or they may require a more powerful recognition
mechanism than that available to Yacc. The former cases represent design errors; the latter cases can
often be corrected by making the lexical analyzer more powerful, or by rewriting some of the
grammar rules. While Yacc cannot handle all possible specifications, its power compares favorably
with similar systems;
2. Basic Specifications
Names refer to either tokens or nonterminal symbols. Yacc requires token names to be
declared as such. In addition, for reasons discussed in Section 3, it is often desirable to include the
lexical analyzer as part of the specification file; it may be useful to include other programs as well.
Thus, every specification file consists of three sections: the declarations, (grammar) rules, and
programs. The sections are separated by double percent ``%%'' marks. (The percent ``%'' is generally
used in Yacc specifications as an escape character.)
20
In other words, a full specification file looks like
declarations
%%
rules
%%
programs
The declaration section may be empty. Moreover, if the programs section is omitted, the second %%
mark may be omitted also;
%%
rules
Blanks, tabs, and newlines are ignored except that they may not appear in names or multi-character
reserved symbols. Comments may appear wherever a name is legal; they are enclosed in /* . . . */, as
in C and PL/I.
The rules section is made up of one or more grammar rules. A grammar rule has the form:
A : BODY ;
A represents a nonterminal name, and BODY represents a sequence of zero or more names and
literals. The colon and the semicolon are Yacc punctuation.
Names may be of arbitrary length, and may be made up of letters, dot ``.'', underscore ``_'', and non-
initial digits. Upper and lower case letters are distinct. The names used in the body of a grammar rule
may represent tokens or nonterminal symbols.
A literal consists of a character enclosed in single quotes ``'''. As in C, the backslash ``\'' is an escape
character within literals, and all the C escapes are recognized. Thus
'\n' newline
'\r' return
'\'' single quote ``'''
'\\' backslash ``\''
'\t' tab
'\b' backspace
'\f' form feed
'\xxx' ``xxx'' in octal
For a number of technical reasons, the NUL character ('\0' or 0) should never be used in grammar
rules.
If there are several grammar rules with the same left hand side, the vertical bar ``|'' can be used to
avoid rewriting the left hand side. In addition, the semicolon at the end of a rule can be dropped
before a vertical bar. Thus the grammar rules
A : B C D ;
A : E F ;
A : G ;
can be given to Yacc as
21
A : B C D
| E F
| G
;
It is not necessary that all grammar rules with the same left side appear together in the grammar rules
section, although it makes the input much more readable, and easier to change.
If a nonterminal symbol matches the empty string, this can be indicated in the obvious way:
empty : ;
Names representing tokens must be declared; this is most simply done by writing
Of all the nonterminal symbols, one, called the start symbol, has particular importance. The parser is
designed to recognize the start symbol; thus, this symbol represents the largest, most general
structure described by the grammar rules. By default, the start symbol is taken to be the left hand
side of the first grammar rule in the rules section. It is possible, and in fact desirable, to declare the
start symbol explicitly in the declarations section using the %starts keyword:
%start symbol
The end of the input to the parser is signaled by a special token, called the endmarker. If the tokens
up to, but not including, the endmarker form a structure which matches the start symbol, the parser
function returns to its caller after the endmarker is seen; it accepts the input. If the endmarker is seen
in any other context, it is an error.
It is the job of the user-supplied lexical analyzer to return the endmarker when appropriate; see
section 3, below. Usually the endmarker represents some reasonably obvious I/O status, such as
``end-of-file'' or ``end-of-record''.
3: Lexical Analysis
The user must supply a lexical analyzer to read the input stream and communicate tokens (with
values, if desired) to the parser. The lexical analyzer is an integer-valued function called yylex. The
function returns an integer, the token number, representing the kind of token read. If there is a value
associated with that token, it should be assigned to the external variable yylval.
The parser and the lexical analyzer must agree on these token numbers in order for communication
between them to take place. The numbers may be chosen by Yacc, or chosen by the user. In either
case, the ``# define'' mechanism of C is used to allow the lexical analyzer to return these numbers
symbolically. For example, suppose that the token name DIGIT has been defined in the declarations
section of the Yacc specification file. The relevant portion of the lexical analyzer might look like:
yylex(){
22
extern int yylval;
int c;
. . .
c = getchar();
. . .
switch( c ) {
. . .
case '0':
case '1':
. . .
case '9':
yylval = c-'0';
return( DIGIT );
. . .
}
. . .
The intent is to return a token number of DIGIT, and a value equal to the numerical value of the
digit. Provided that the lexical analyzer code is placed in the programs section of the specification
file, the identifier DIGIT will be defined as the token number associated with the token DIGIT.
This mechanism leads to clear, easily modified lexical analyzers; the only pitfall is the need to avoid
using any token names in the grammar that are reserved or significant in C or the parser; for
example, the use of token names if or while will almost certainly cause severe difficulties when the
lexical analyzer is compiled. The token name error is reserved for error handling, and should not be
used naively (see Section 7).
Yacc turns the specification file into a C program, which parses the input according to the
specification given. The algorithm used to go from the specification to the parser is complex, and
will not be discussed here (see the references for more information). The parser itself, however, is
relatively simple, and understanding how it works, while not strictly necessary, will nevertheless
make treatment of error recovery and ambiguities much more comprehensible.
The parser produced by Yacc consists of a finite state machine with a stack. The parser is also
capable of reading and remembering the next input token (called the lookahead token). The current
state is always the one on the top of the stack. The states of the finite state machine are given small
integer labels; initially, the machine is in state 0, the stack contains only state 0, and no lookahead
token has been read.
The machine has only four actions available to it, called shift, reduce, accept, and error. A
move of the parser is done as follows:
1. Based on its current state, the parser decides whether it needs a lookahead token to decide what
action should be done; if it needs one, and does not have one, it calls yylex to obtain the next token.
2. Using the current state, and the lookahead token if needed, the parser decides on its next action,
and carries it out. This may result in states being pushed onto the stack, or popped off of the stack,
and in the lookahead token being processed or left alone.
23
The shift action is the most common action the parser takes. Whenever a shift action is taken, there
is always a lookahead token. For example, in state 56 there may be an action:
IF shift 34
which says, in state 56, if the lookahead token is IF, the current state (56) is pushed down on the
stack, and state 34 becomes the current state (on the top of the stack). The lookahead token is
cleared.
The reduce action keeps the stack from growing without bounds. Reduce actions are appropriate
when the parser has seen the right hand side of a grammar rule, and is prepared to announce that it
has seen an instance of the rule, replacing the right hand side by the left hand side. It may be
necessary to consult the lookahead token to decide whether to reduce, but usually it is not; in fact,
the default action (represented by a ``.'') is often a reduce action.
Reduce actions are associated with individual grammar rules. Grammar rules are also given small
integer numbers, leading to some confusion. The action
. reduce 18
refers to grammar rule 18, while the action
IF shift 34
refers to state 34.
5. Error Handling
Error handling is an extremely difficult area, and many of the problems are semantic ones.
When an error is found, for example, it may be necessary to reclaim parse tree storage, delete or alter
symbol table entries, and, typically, set switches to avoid generating any further output.
It is seldom acceptable to stop all processing when an error is found; it is more useful to
continue scanning the input to find further syntax errors. This leads to the problem of getting the
parser ``restarted'' after an error. A general class of algorithms to do this involves discarding a
number of tokens from the input string, and attempting to adjust the parser so that input can
continue.
To allow the user some control over this process, Yacc provides a simple, but reasonably general,
feature. The token name ``error'' is reserved for error handling. This name can be used in grammar
rules; in effect, it suggests places where errors are expected, and recovery might take place. The
parser pops its stack until it enters a state where the token ``error'' is legal. It then behaves as if the
token ``error'' were the current lookahead token, and performs the action encountered. The
lookahead token is then reset to the token that caused the error. If no special error rules have been
specified, the processing halts when an error is detected.
In order to prevent a cascade of error messages, the parser, after detecting an error, remains in error
state until three tokens have been successfully read and shifted. If an error is detected when the
parser is already in error state, no message is given, and the input token is quietly deleted.
24
6. Reserved Words
Some programming languages permit the user to use words like ``if'', which are normally
reserved, as label or variable names, provided that such use does not conflict with the legal use of
these names in the programming language. This is extremely hard to do in the framework of Yacc; it
is difficult to pass information to the lexical analyzer telling it ``this instance of `if' is a keyword, and
that instance is a variable''.
7. A Simple Example
This example gives the complete Yacc specification for a small desk calculator; the desk
calculator has 26 registers, labeled ``a'' through ``z'', and accepts arithmetic expressions made up of
the operators +, -, *, /, % (mod operator), & (bitwise and), | (bitwise or), and assignment. If an
expression at the top level is an assignment, the value is not printed; otherwise it is. As in C, an
integer that begins with 0 (zero) is assumed to be octal; otherwise, it is assumed to be decimal.
As an example of a Yacc specification, the desk calculator does a reasonable job of showing
how precedences and ambiguities are used, and demonstrating simple error recovery. The major
oversimplifications are that the lexical analysis phase is much simpler than for most applications,
and the output is produced immediately, line by line. Note the way that decimal and octal integers
are read in by the grammar rules; This job is probably better done by the lexical analyzer.
RESULT:
Ex 6 (a) YACC program to recognize a valid arithmetic expression that uses operators +, -, * &
/
Aim:
To write a YACC Program to recognize a valid arithmetic expression that uses operators +, -,
* and /.
Algorithm:
25
Step 1:Start the program
Step 2: Using %{ and %} declare the variables yylval
Step 3: Specify the coordinates
Step 4: In the main function, get the expression through a C program as run time arguments
Step 5: Open the given programs file with C exe and print the string is valid or not
Step 6: Stop the program
LEX
%{
#include"y.tab.h"
extern yylval;
%}
%%
[0-9]+ {yylval=atoi(yytext); return NUMBER;}
[a-zA-Z]+ {return ID;}
[\t]+ ;
\n {return 0;}
. {return yytext[0];}
%%
YACC
%{
#include<stdio.h>
%}
%token NUMBER ID
%left '+' '-'
%left '*' '/'
%%
expr: expr '+' expr
|expr '-' expr
|expr '*' expr
|expr '/' expr
|'-'NUMBER
|'-'ID
|'('expr')'
|NUMBER
|ID
;
%%
main()
{
printf("Enter the expression\n");
yyparse();
printf("\nExpression is valid\n");
exit(0);
}
int yyerror(char *s)
26
{
printf("\nExpression is invalid");
exit(0);
}
OUTPUT
$lex ex4a.l
$yacc –d ex4a.y
$cc lex.yy.c y.tab.c –ll
$./a.out
Enter the expression
(a*b+5)
Expression is valid
$./a.out
Enter the expression
(a+6-)
Expression is invalid
RESULT:
Thus the YACC Program to recognize a valid arithmetic expression that uses operators +, -, *
and /.
Ex 6.b) Program to recognize a valid variable which starts with a letter followed by any number of
letters or digits.
Aim:
To write a YACC Program to recognize a valid variable which starts with a letter followed by any
number of letters or digits.
Algorithm:
Program:
LEX
27
%{
#include"y.tab.h"
extern yylval;
%}
%%
[0-9]+ {yylval=atoi(yytext); return DIGIT;}
[a-zA-Z]+ {return LETTER;}
[\t] ;
\n return 0;
. {return yytext[0];}
%%
YACC
%{
#include<stdio.h>
%}
%token LETTER DIGIT
%%
variable: LETTER|LETTER rest
;
rest: LETTER rest
|DIGIT rest
|LETTER
|DIGIT
;
%%
main()
{
yyparse();
printf("The string is a valid variable\n");
}
int yyerror(char *s)
{
printf("this is not a valid variable\n");
exit(0);
}
OUTPUT
$lex p4b.l
$yacc –d p4b.y
$cc lex.yy.c y.tab.c –ll
$./a.out
input34
The string is a valid variable
$./a.out
28
89file
This is not a valid variable
RESULT:
Thus the YACC Program to recognize a valid variable which starts with a letter followed by any
number of letters or digits.
AIM:
To write semantic rules to the YACC program and implement a calculator that takes an
expression with digits + and * and computes and prints its values.
DESCRIPTION
In this programs two classical tools for compilers, Lex and Yacc are used to create a simple, desk-
calculator program that performs addition, subtraction, multiplication, and division operations.
LEX tool : Input to Lex is divided into three sections with %% dividing the sections. This is best
illustrated by example.
….definitions……..
%%
……rules……….
29
%%
……subroutines……….
YACC tool
%token symbol...symbol
Declare the given symbols as tokens (terminal symbols). These symbols are added as constant
constructors for the token concrete type.
%token <type>symbol...symbol
Declare the given symbols as tokens with an attached attribute of the given type.
%start symbol...symbol
Declare the given symbols as entry points for the grammar.
%type <type>symbol...symbol
%left symbol...symbol
%right symbol...symbol
%nonassocsymbol...symbol
Associate precedences and associativities to the given symbols. All symbols on the same line are given
the same precedence. They have higher precedence than symbols declared before in a %left, %right or
%nonassoc line. They have lower precedence than symbols declared after in a %left, %right or
%nonassoc line. The symbols are declared to associate to the left (%left), to the right (%right), or to be
non-associative (%nonassoc).
%% ……………….
……………………%%
In this program two classical tools for compilers are user, that are
o Lex: A Lexical Analyzer Generator
o Yacc: “Yet Another Compiler Compiler” (Parser Generator)
Lex creates programs that scan tokens one by one.
Yacc takes a grammar (sentence structure) and generates a parser.
In the first part of the program contains source code for Lex tool and the second part of the program
contains YACC tool which groups the tokens logically.
30
To create the desk calculator example program, do the following:
1. Process the yacc grammar file using the -d optional flag (which informs the yacc command to create a
file that defines the tokens used in addition to the C language source code):
yacc -d desk.yacc
2. Use the ls command to verify that the following files were created:
y.tab.c
The C language source file that the yacc command created for the parser
y.tab.h
A header file containing define statements for the tokens used by the parser
4. Use the ls command to verify that the following file was created:
lex.yy.c
The C language source file that the lex command created for the lexical analyzer
6. Use the ls command to verify that the following files were created:
y.tab.o
The object file for the y.tab.c source file
lex.yy.o
The object file for the lex.yy.c source file
a.out
31
OR
To move the program to a file with a more descriptive name, as in the following example, and run it,
type:
$ mva.out calculate
$ calculate
ALGORITHM
YACC TOOL
%{
#include<math.h>
#include<stdio.h>
%}
%union
{
double dval;
char vblname;
}
%token <vblname> NAME
%token <dval> NUMBER
%left '+' '-'
%left '*' '/'
32
%nonassoc UMINUS
%type <dval> expression
%%
statement: NAME '=' expression { printf("%c = %g \n",$1,$3); }
| expression { printf("= %g \n",$1); }
;
expression: expression '+' expression { $$ = $1 + $3; }
| expression '-' expression { $$ = $1 - $3; }
| expression '*' expression { $$ = $1 * $3; }
| expression '/' expression { if($3 == 0.0)
{
yyerror("Divide by zero");
}
else
$$ = $1 / $3;
}
| '(' expression ')' { $$ = $2; }
| '-' expression %prec UMINUS { $$ = -$2; }
| NUMBER { $$ = $1; }
;
%%
main()
{
yyparse();
}
intyyerror (char *s)
{
printf("%s\n",s);
exit(0);
}
OUTPUT
[root@localhost ~] # lexex4c.l
[root@localhost ~] # yacc –d ex4c.y
[root@localhost ~] # lex.yy.cy.tab.c –ll
[root@localhost ~] #./a.out
((2+3) + (4+5))
= 26
RESULT
Thus the program using lex and yacc tool is developed and a calculator operation is achieved.
Ex No: 7 Convert the BNF rules into YACC form and write code to generate
abstract syntax tree.
AIM:
33
To convert BNF rules into YACC form and write code to generate abstract syntax tree.
ALGORITHM
identifier [a-zA-Z][_a-zA-Z0-9]*
number [0-9]+|([0-9]*\.[0-9]+)
%%
if return IF;
else return ELSE;
while return WHILE;
int |
char |
float return TYPE;
{identifier} {strcpy(yylval.var,yytext);
return VAR;}
{number} {strcpy(yylval.var,yytext);
return NUM;}
\< |
\> |
\>= |
\<= |
== {strcpy(yylval.var,yytext);
return RELOP;}
[ \t] ;
\n LineNo++;
. return yytext[0];
%%
YACC TOOL
34
<ex5.y>
%{
#include<string.h>
#include<stdio.h>
struct quad
{
char op[5];
char arg1[10];
char arg2[10];
char result[10];
}QUAD[30];
struct stack
{
int items[100];
int top;
}stk;
int Index=0,tIndex=0,StNo,Ind,tInd;
extern int LineNo;
%}
%union
{
char var[10];
}
%token <var> NUM VAR RELOP
%token MAIN IF ELSE WHILE TYPE
%%
CODE: BLOCK
| STATEMENT CODE
| STATEMENT
;
STATEMENT: DESCT ';'
| ASSIGNMENT ';'
| CONDST
35
| WHILEST
;
CONDST: IFST{
Ind=pop();
sprintf(QUAD[Ind].result,"%d",Index);
Ind=pop();
sprintf(QUAD[Ind].result,"%d",Index);
}
| IFST ELSEST
;
36
BLOCK {
strcpy(QUAD[Index].op,"GOTO");
strcpy(QUAD[Index].arg1,"");
strcpy(QUAD[Index].arg2,"");
strcpy(QUAD[Index].result,"-1");
push(Index);
Index++;
}
;
ELSEST: ELSE{
tInd=pop();
Ind=pop();
push(tInd);
sprintf(QUAD[Ind].result,"%d",Index);
}
BLOCK{
Ind=pop();
sprintf(QUAD[Ind].result,"%d",Index);
}
;
37
strcpy(QUAD[Index].result,"-1");
push(Index);
Index++;
}
;
%%
extern FILE *yyin;
int main(int argc,char *argv[])
{
FILE *fp;
int i;
if(argc>1)
{
fp=fopen(argv[1],"r");
if(!fp)
{
printf("\n File not found");
exit(0);
}
yyin=fp;
}
yyparse();
printf("\n\n\t\t ----------------------------""\n\t\t Pos Operator Arg1 Arg2 Result" "\n\t\t
--------------------");
for(i=0;i<Index;i++)
{
printf("\n\t\t %d\t %s\t %s\t %s\t
%s",i,QUAD[i].op,QUAD[i].arg1,QUAD[i].arg2,QUAD[i].result);
}
printf("\n\t\t -----------------------");
printf("\n\n");
return 0;
}
void push(int data)
{
stk.top++;
if(stk.top==100)
{
printf("\n Stack overflow\n");
exit(0);
}
stk.items[stk.top]=data;
}
int pop()
{
int data;
if(stk.top==-1)
38
{
printf("\n Stack underflow\n");
exit(0);
}
data=stk.items[stk.top--];
return data;
}
void AddQuadruple(char op[5],char arg1[10],char arg2[10],char result[10])
{
strcpy(QUAD[Index].op,op);
strcpy(QUAD[Index].arg1,arg1);
strcpy(QUAD[Index].arg2,arg2);
sprintf(QUAD[Index].result,"t%d",tIndex++);
strcpy(result,QUAD[Index++].result);
}
yyerror()
{ printf("\n Error on line no:%d",LineNo);
}
SAMPLE OUTPUT:
39
$lex ex5.l
$yacc –d ex5.y
$gcc lex.yy.c y.tab.c –ll –lm
$./a.out <test.c
RESULT:
Thus the LEX and YACC programs were written and executed to convert BNF rules into YACC
form and write code to generate abstract syntax tree.
40
AIM:
To implement the type checking concept in C Language.
ALGORITHM:
Step-1: Read the variables.
Step-2: Check the size of these variables.
Step-3: On any assignment operation, if the size of the variable on the left hand side does not match with
the size of the result obtained in the right hand side, then type checker must pop the error statement.
Step-4: Else the assignment can be done successfully.
PROGRAM:
#include<stdio.h>
#include<conio.h>
void main()
{ int a=5;
//float b=8.45;
int b;
clrscr();
if(sizeof(a)==sizeof(b))
{ printf("\n Type conversion possible");
b=a;
printf("\n %f",b);
}
else
printf("\n type conversion not possible");
getch();
}
OUTPUT:
Type conversion possible
RESULT:
Thus the simple type checker have been successfully implemented in C Language.
AIM:
41
To write a C program to implement Control flow and data flow analysis.
ALGORITHM:
STEP 1: Preprocess the input C file (also called the translation unit). This generates
the preprocessed translation unit.
STEP 2: Parse the preprocessed translation unit as an abstract syntax tree (AST).
STEP 3: Traverse the AST, creating a graph node n for each function declaration. Add (function
name, n) to a map.
STEP 4: Traverse the AST, building a graph of the control flow. Consider how you are going to
represent the following, special cases in the control flow graph:
Labelled statements
if/else
if not followed by else.
goto
switch
Fall-through cases and break within switch.
Loops such as do...while, while, and for.
break within a loop
continue within a loop
return
Regular function calls
Calling the target of a function pointer
End of a void function definition (no return)
End of int main() and int main(int, char**), which does not require return
exit
Intermediate values
STEP 5: Output the graph in DOT format DOT format.
PROGRAM:
#include <stddef.h>
#include <stdio.h>
#include <stdlib.h>
int g_a;
void init()
42
{
g_a = 3;
}
int return_4()
{
return 4;
}
void uninit()
{
}
if (argc <= 1) {
usage(argv[0]);
}
if (argc > 2) {
printf("You only need to pass one argument.\n");
}
else {
init();
}
before_switch: j = 0;
switch_i: switch (i) {
case 3:
for(; j < 3; ++j)
printf(".");
case 17:
for(; j < 17; ++j)
printf(".");
if (i == 3 || i == 17)
printf("\n");
case -4:
printf("You picked one of my favorite numbers (17, 3, and -4)!\n");
break;
43
case -1:
printf("Cleaning up\n");
goto cleanup;
default:
printf("I don't like that number.\n");
}
j = 0;
do_loop_1: do {
if (j++ % 2 == 0)
continue;
if (j == 10)
break;
j = 10;
while (j > 0) {
if (4 == return_4())
break;
--j;
}
cleanup:
uninit();
return EXIT_SUCCESS;
}
44
RESULT:
Thus the C Program to implement control flow and data flow analysis has been executed and
verified.
ALGORITHM:
INSERTION
PUSH(item)
1. If (item = max of stack)
Print “overflow”
Return
2. top = top + 1
3. stack[top] = item
4. Return
DELETION
POP(item)
1. If (top = - 1)
Print “underflow”
Return
2. Item = stack[top]
3. top = top - 1
4. Return
DISPLAY
1. If top = - 1
Print “underflow”
2. repeat step 3 for i = top to i >= 0
3. Print stack[i]
4. Return
PROGRAM:
#include<stdio.h>
#include<conio.h>
#define MAXSIZE 10
void push();
int pop();
void traverse();
int stack[MAXSIZE];
int Top=-1;
void main()
{
int choice;
46
char ch;
do
{
clrscr();
printf("\n1. PUSH ");
printf("\n2. POP ");
printf("\n3. TRAVERSE ");
printf("\nEnter your choice");
scanf("%d",&choice);
switch(choice)
{
case 1: push();
break;
case 2: printf("\nThe deleted element is %d",pop());
break;
case 3: traverse();
break;
default: printf("\nYou Entered Wrong Choice");
}
printf("\nDo You Wish To Continue (Y/N)");
fflush(stdin);
scanf("%c",&ch);
}
while(ch=='Y' || ch=='y');
}
void push()
{
int item;
if(Top == MAXSIZE - 1)
{
printf("\nThe Stack Is Full");
getch();
47
exit(0);
}
else
{
printf("Enter the element to be inserted");
scanf("%d",&item);
Top= Top+1;
stack[Top] = item;
}
}
int pop()
{
int item;
if(Top == -1)
{
printf("The stack is Empty");
getch();
exit(0);
}
else
{
item = stack[Top];
Top = Top-1;
}
return(item);
}
void traverse()
{
int i;
if(Top == -1)
{
printf("The Stack is Empty");
48
getch();
exit(0);
}
else
{
for(i=Top;i>=0;i--)
{
printf("Traverse the element");
printf("\n%d",stack[i]);
}
}
}
SAMPLE INPUT AND OUTPUT:
1. PUSH
2. POP
3. TRAVERSE
Enter your choice 3
Traverse the element
4Traverse the element
3Traverse the element
2Traverse the element
1
Do You Wish To Continue (Y/N)
RESULT:
Thus the C Program for implementing stack using array is executed and the required output is
obtained.
49
To write a C program to implement stack using linked list.
ALGORITHM:
INSERTION: PUSH( )
1. t = newnode( )
2. Enter info to be inserted
3. Read n
4. tinfo = n
5. tnext = top
6. top = t
7. Return
DELETION: POP ( )
1. If (top = NULL)
Print “ underflow”
Return
2. x = top
3. top = top next
4. delnode(x)
5. Return
PROGRAM:
#include<stdio.h>
#include<conio.h>
struct stack
{
int no;
struct stack *next;
}
*start=NULL;
typedef struct stack st;
void push();
int pop();
void display();
void main()
{
char ch;
int choice,item;
do
{
50
clrscr();
printf("\n 1: push");
printf("\n 2: pop");
printf("\n 3: display");
printf("\n Enter your choice");
scanf("%d",&choice);
switch (choice)
{
case 1: push();
break;
case 2: item=pop();
printf("The delete element in %d",item);
break;
case 3: display();
break;
default : printf("\n Wrong choice");
};
printf("\n do you want to continue(Y/N)");
fflush(stdin);
scanf("%c",&ch);
}
while (ch=='Y'||ch=='y');
}
void push()
{
st *node;
node=(st *)malloc(sizeof(st));
printf("\n Enter the number to be insert");
scanf("%d",&node->no);
node->next=start;
start=node;
}
51
int pop()
{
st *temp;
temp=start;
if(start==NULL)
{
printf("stack is already empty");
getch();
exit();
}
else
{
start=start->next;
free(temp);
}
return(temp->no);
}
void display()
{
st *temp;
temp=start;
while(temp->next!=NULL)
{
printf("\nno=%d",temp->no);
temp=temp->next;
}
printf("\nno=%d",temp->no);
}
RESULT:
Thus the C Program for implementing stack using linked list is executed and the required output
is obtained.
Ex No 10c: HEAP STORAGE ALLOCATION STRATEGY
AIM:
53
To write a C program to implement heap data structure.
ALGORITHM:
PROGRAM:
#include"stdio.h"
#include"conio.h"
#include"stdlib.h"
#define TRUE 1
#define FALSE 0
typedef struct Heap
{
int data;
struct Heap *next;
}node;
node *create();
void main()
{/
*local declarations*/
int choice,val;
char ans;
node *head;
void display(node *);
node *search(node *,int);
node *insert(node *);
void dele(node **);
head=NULL;
do
{ clrscr();
printf(“\n Program to perform various operations on heap
using dynamic memory management”);
printf (“\n1.Create”):
printf (“\n2.Display”):
54
printf (“\n3.Insert an element in a list”);
printf (“\n4.Delete an element from list”);
printf (“\n5.Quit”);
printf (“\n Enter Your Choice(1-5)”);
scanf(“%d,&choice”);
switch(choice)
{ case 1:head=create();
break;
case 2:display(head);
break;
case 3:head=insert(head);
break;
case 4:dele(&head);
break;
case 5:exit(0);
default:clrscr();
printf(“Invalid Choice,Try again”);
getch();
}
}while(choice!=5);
}
/*The create function creates a list of allocated node
*Input:None
*Output:Retyurns a pointer to head of list
*Parameter Passing Methopd:Node
**/
node *create()
{
node *temp,*new,* head;
int val,flag;
char ans=’y’;
node *get_node();
55
temp=NULL;
flag=TRUE;
/*flag to indicate whether a new node is created for the first time or not*/
do
{
printf(“\n Enter the Element”);
scanf(“%d”,&val);
/*allocate new node*/
new =get_node();
if(new==NULL)
printf(“\n Memory is not allocated”);
new-> data=val;
if (flag==TRUE)/* Executed only for the first time*/
{
head=new;
temp=head; /*head is the first node in the heap*/
flag=FALSE;
} else
{/
*temp keeps track of the most recently created node*/
temp->next=new;
temp=new;
}
printf(\nDo you want to enter more elements?(y/n)”);
ans=getch();
}while(ans= = ‘y’);
printf(“\nThe list is created”);
getch();
clrscr();
return head;
}
node *get_node()
56
{
node *temp;
temp=(node*)malloc(sizeof(node));
//using the mem. Allocation function
temp->next=NULL;
return temp;
}
/*
*The display function
*Input:Address of the first node of the list
*Output:Displays the list of allocated nodes
*Parameter Passing Method : call by value
*Called by main
**/
void display(node*head)
{
node *temp;
temp=head;
if(temp= =NULL)
{
printf(“\n The list is empty\n”);
getch();
clrscr();
return;
}
while(temp!= NULL)
{
printf(“%d->”,temp-> data);
temp=temp->next;
}
print(“NULL”);
getch();
57
clrscr();
}
/*
*The search function
*Input: Address of the starting node and the element which is *to be searched
*Output:Searches for the element in list
*If found returns pointer to that node Otherwise NULL
*Parameter passing Method:call by value
*Called by:main
*Calls:None
**/
node *search(node *head,int key)
{
node*temp;
int found;
temp=head;
if (temp= =Null)
{
printf(“The linked list is empty\n”);
getch();
clrscr();
return NULL;
}
found=FALSE;
While(temp!= NULL && found= =FALSE)
{i
f(temp->data != key)
temp = temp->next;
else
found = True;
}
if(found == TRUE)
58
{
printf(“\n The Elements is present in the list”\n);
getch();
return temp;
} else
printf(“\n The Element is not present in the list\n”);
getch();
return NULL;
}
/*
*The insert function
*Input: Address of starting node of the list
*Output:inserts element into the list
*Parameter Passing Methods: call by value
*Called by : main
*Calls : search()
**/
node *insert(node *head)
{i
nt choice;
node *insert_head(node*);
void insert_after(node*);
void insert_last(node*);
printf(“\n”1.Insert a node as a head node”);
printf(“\n”1.Insert a node as a last node”);
printf(“\n”1.Insert a node as at the intermediate position in the list ”);
printf(“\n”1.Enter your choice for insertion of node ”);
scanf(“%d”,&choice);
switch(choice)
{ case 1:head =
insert_head(head);
break;
59
case2:insert_last(head);
break;
case2:insert_after (head);
break;
} return head;
}
/*Insertion of node at first position*/
node *insert_head(node*head)
{
node *New,*temp;
New = get_node();
printf (“\n Enter the element which you want to insert ”);
scanf(“%d”,&New->data);
if(head == NULL)
head = New;
else
{t
emp=head;
New->next = temp;
head= New;
} return head;
}
/*Insertion of node at last position*/
void insert_last(node *head)
{
node *New,*temp;
New = get_node();
printf (“\n Enter the element which you want to insert ”);
scanf(“%d”,&New->data);
if(head == NULL)
{
head = New;
60
} else
{t
emp=head;
while(temp->next!=NULL)
temp=temp->next;
temp->next=New;
New->next=NULL;
}}
/*Insertion of node at intermediate position*/
void insert_after(node *head)
{i
nt key;
node *New,*temp;
New = get_node();
printf(“Enter the element after which you want to insert ”);
scanf(“%d”,&key);
temp=head;
do
{i
f(temp->data==key)
{
printf (“Enter element which you want to insert ”);
scanf(“%d”,&New->data);
New->next=temp->next;
temp->next=New;
return;
} else
temp=temp->next;
}while(temp!=NULL);
}
/*
*The get prev function
61
*Input: Address of starting node and the elemnt to be *searched
*Output:looks for the element in the list
*If found returns pointer to the previous node otherwise NULL
*Parameter Passing Methods: call by value
*Called by : dele()
*Calls : none
**/
node *get_prev(node *head,int val)
{
node*temp.*prev;
int flag;
temp = head;
if(temp == NULL)
return NULL;
flag = FALSE;
prev = NULL;
while(temp!=NULL && !flag)
{i
f(temp->data!=val)
{
prev = temp;
temp = temp->next;
} else
flag = TRUE;
}
if(flag) /*if Flag is true*/
return prev;
else
return NULL;
}
/*
*The get prev function
62
*Input: Address of starting node and the elemnt to be *searched
*Output:looks for the element in the list
*If found returns pointer to the previous node otherwise NULL
*Parameter Passing Methods: call by value
*Called by : dele()
*Calls : none
**/
void dele(node **head)
{i
nt key;
node *New,*temp;
temp=*head;
if (temp== NULL)
{
printf (“\n The list is empty\n ”);
getch();
clrscr();
return;
}
clrscr();
printf("\nENTER the Element you want to delete:");
scanf("%d".&key);
temp= search(*head,key);
if(temp !=NULL)
{
prev = get_prev(*head,key);
if(prev != NULL)
{
prev ->next = temp-> next;
free(temp);
} else
{
63
*head = temp->next;
free(temp); // using the mem. Dellocation function
}
printf(“\n”The Element is deleted\n”);
getch();
clrscr();
}}
SAMPLE INPUT AND OUTPUT:
Program to perform various operations on heap using Dynamic memory management.
1. Create
2. Display
3. Insert an element in a list
4. Delete an element from list
5. Quit
Enter your choice(1-5) 1
Enter the element: 10
Do you want to enter more elements? (y/n) y
Enter the element:20
Do you want to enter more elements?(y/n)y
Enter the element:30
Do you want to enter more elements?(y/n)n
The List is created
Program to perform various operations on Heap using Dynamic memory management.
1. Create
2. Display
3. Insert an element in a list
4. Delete an element from list
5. Quit
Enter your choice(1-5) 4
Enter the element you want to delete: 20
The element is present in the list
The element is deleted
64
Program to perform various operations on Heap using Dynamic memory management.
1. Create
2. Display
3. Insert an element in a list
4. Delete an element from list
5. Quit
Enter your choice(1-5) 2
10-> 30-> NULL
Result:
Thus the C Program to implement data structure has been executed and verified.
Ex No 11 CONSTRUCTION OF DAG
65
AIM
To write a program for optimization of the given input code using constant folding
technique.
ALGORITHM
Case 2: else if Node's left child's label >= 1 && Node's right child's label == 0
{
gencode(Node's left child);
print "op Node's right child's data,R[top]"
}
Case 3: else if Node's left child's label < Node's right child's label
{
int temp;
Swap Register Stack's top and second top element;
gencode(Node's right child);
temp=pop();
gencode(Node's left child);
push(temp);
Swap Register Stack's top and second top element;
print "op R[top-1],R[top]"
}
66
Case 4: else if Node's left child's label >= Node's right child's label
{
int temp;
gencode(Node's left child);
temp=pop();
gencode(Node's right child);
push(temp);
print "op R[top-1],R[top]"
}
else if Node is leaf node and it is left child of it's immediate parent
{
print "MOV Node's data,R[top]"
}
PROGRAM:
#include<stdlib.h>
#include<iostream>
using namespace std;
/* We will implement DAG as Strictly Binary Tree where each node has zero or two children */
struct bin_tree
{
char data;
int label;
struct bin_tree *right, *left;
};
typedef bin_tree node;
class dag
{
private:
/* R is stack for storing registers */
int R[10];
int top;
/* op will be used for opcode name w.r.t. arithmetic operator e.g. ADD for + */
char *op;
public:
67
void initializestack(node *root)
{
/* value of top = index of topmost element of stack R = label of Root of tree(DAG) minus one */
top=root->label - 1;
if(!(*tree))
{
temp = (node *)malloc(sizeof(node));
temp->left = temp->right = NULL;
temp->data = val;
temp->label=-1;
*tree = temp;
}
}
insertnode(tree, val);
cout << "\nEnter number of children of " << val <<" :";
cin >> numofchildren;
if(numofchildren==2)
{
cout << "\nEnter Left Child of " << val <<" :";
cin >> l;
insertnode(&(*tree)->left,l);
cout << "\nEnter Right Child of " << val <<" :";
68
cin >> r;
insertnode(&(*tree)->right,r);
insert(&(*tree)->left,l);
insert(&(*tree)->right,r);
}
}
else
{
tree->label=val;
}
else if(tree->right->label==-1)
{
findinteriornodelabel(tree->right);
}
else
{
if(tree->left->label == tree->right->label)
69
{
tree->label=(tree->left->label)+1;
}
else
{
}
}
}
}
/* function print_inorder() will print inorder of nodes. Here we are also printing label of each node
of tree(DAG) */
/* function swap() will swap the top and second top elements of Register stack R */
void swap()
{
int temp;
temp=R[0];
R[0]=R[1];
R[1]=temp;
}
70
int pop()
{
int temp=R[top];
top--;
return temp;
}
/* function push() will increment top by one and will insert element at top position of Register stack
*/
71
cout << op << " " << tree->right->data << ",R[" << R[top] << "]\n";
}
72
};
int main()
{
node *root;
root = NULL;
node *tmp;
char val;
int i,temp;
dag d;
d.insert(&root,val);
return 0;
}
Thus the C Program to generate Assembly Code using DAG has been executed and verified.
74
AIM:
To implement the back end of the compiler which takes the three address code and produces
the 8086 assembly language instructions that can be assembled and run using a 8086 assembler. The
target assembly instructions can be simple move, add, sub, jump. Also simple addressing modes are
used.
DESCRIPTION: The back end is responsible for translating the intermediate representation of the
source code from the middle-end into assembly code.
OBJECTIVE : To implement the back end of the compiler which takes the three address code and
produces the 8086 assembly language instructions that can be assembled and run using a 8086 assembly.
In the first part of the program Open a file with read mode and read the content of the file one by one and
get the first three address code. Check the arithmetic operator If the operator is an addition (+) then
display the assembly code “ADD” and store the result to the corresponding R and if the operator is a
subtraction (-) then display the assembly code “SUB” and store the result to the corresponding register
HOW TO EXECUTE THE PROGRAM :
75
as separate passes, or the front end may call the back end as a subroutine, passing it the intermediate
representation.
This approach mitigates complexity separating the concerns of the front end, which typically
revolve around language semantics, error checking, and the like, from the concerns of the back end,
which concentrates on producing output that is both efficient and correct.
ADVANTAGES AND LIMITATIONS:
In this program, single back end is developed for single source language. It also has the advantage of
allowing the use of a single back end for multiple source languages, and similarly allows the use of
different back ends for different targets.
APPLICATIONS: This program can be used to develop a back end of a compiler using C programming
language.
ALGORITHM:
#include<stdio.h>
#include<conio.h>
#include<ctype.h>
#include<stdlib.h>
void main()
{
inti=2,j=0,k=2,k1=0;
charip[10];
FILE *fp;
clrscr();
fp=fopen(“d:\\codein.txt”,"r");
if(fp==NULL)
{
printf("\nError in Opening the file");
getch();
76
}
clrscr();
while(!feof(fp))
{
fscanf(fp,"%s\n",ip);
printf("\t\t%s\n",ip);
}
rewind(fp);
printf("\n------------------------------\n");
printf("\tStatement \t\t target code\n");
printf("\n------------------------------\n");
while(!feof(fp))
{
fscanf(fp,"%s",ip);
printf("\t%s",ip);
printf("\t\tMOV %c,R%d\n\t",ip[i+k],j);
if(ip[i+1]=='+')
printf("\t\tADD");
elseprintf("\t\tSUB");
if(islower(ip[i]))
printf("%c,R%d\n\n",ip[i+k1],j);
else
printf("%c,%c\n",ip[i],ip[i+2]);
j++;k1=2;k=0;
}
printf("\n-------------------------------\n");
getch();
fclose(fp);
}
SAMPLE INPUT FILE : codein.txt
X=a-b
Y=a-c
z=a+b
77
C=a-b
C=a-b
SAMPLE OUTPUT:
------------------------------
Statement target code
------------------------------
X=a-b MOV b,R0
SUBa,R0
Y=a-c MOV a,R1
SUBc,R1
z=a+b MOV a,R2
ADDb,R2
C=a-b MOV a,R3
SUBb,R3
C=a-b MOV a,R4
SUBb,R4
-------------------------------
RESULT:
Thus the C Program for implementing back end of the compiler is executed and the required
output is obtained.
Ex No 13 CODE OPTIMIZATION TECHNIQUES (CONSTANT FOLDING)
AIM
78
To write a program for optimization of the given input code using constant folding
technique.
ALGORITHM
PROGRAM:
#include<stdio.h>
#include<string.h>
#include<conio.h>
#include<stdlib.h>
#include<ctype.h>
struct ConstFold
(
char new_Str[10];
char str[10];
}Opt_Data[20];
int main()
{
file *In_file,*Out_file;
char Buffer[100],ch;
int i=0;
In_file = fopen(“d:\\code.txt”,”r”);
Out_file = fopen(“d:\\output.txt”,”w”);
clrscr( );
while(1)
{
ch = fgetc(In_file);
i=0;
while(1)
{
if(ch == ‘\n’)
break;
Buffer[i++]=ch;
ch = fgetc(In_file);
79
if(ch == EOF)
break;
}//End while
if(ch ==EOF)
break;
Buffer[i]=’\0’;
ReadInput(Bufer, Out_file);//writing to the output file
}//End while
return 0;
}//End main
80
{
strcat(tem,Token[i]);
if(Token[i+1][0]!=’,’||Token[i+1][0] != ‘,’)
strcat(temp,” “);
}//End for
strcat(temp,”\n\0”);
fwrite(&temp,strlen(temp),1,Out_file);
}
/*The Gen_Token function breaks the input line into tokens*/
int Gen_Token(char str[], char Token[][10])
{
int i=0;j=0,k=0;
while(str[k]!=’\0’)
{
j=0;
while(str[k] ==’ ‘|| str[k] ==’\t’)
k++;
while(str[k])!=’ ’&& str[k]!=’\0’
&& str[k]!= ‘=’ && str[k] != ‘/’
&& str[k]!= ‘+’ && str[k] != ‘-’
&& str[k]!= ‘*’ && str[k] != ‘,’ && str[k]!= ‘;’)
Token[i][j++] = str[k++];
Token[i++][j] = ‘\0’;
if(str[k] == ‘=’|| str[k] == ‘/’|| str[k] == ‘+’|| str[k]
== ‘-’|| str[k] == ‘*’|| str[k] == ‘*’|| str[k] == ‘,’||
str[k] == ‘;’)
{ Token[i][0] = str[k++];
Token[i++][1] = ‘\0’;
}//End if
if (str[k] == ‘\0’)
break; }//End while
return i; }
#include<stdio.h>
main()
{
float pi=3.14,r,a;
a = pi*r*r;
81
printf(“a = %f”,a);
return 0;
}
#include<stdio.h>
main()
{
float pi = 3.14, r, a;
a = 3.14 * r * r;
printf(“a = %f”,a);
return 0;
}
RESULT:
Thus the program for code optimization was implemented, executed and verified.
AIM
82
To write a C program to implement Shift Reduce Parser
.
ALGORITHM
PROGRAM
#include<stdio.h>
#include<conio.h>
#include<string.h>
int shift(int,int);
int reduce(int,int);
struct pro
{
char l,r[10];
}ip,pr[10],st;
void main()
{
int n=0,i=0,j=0,l=0,s=0,k=0;
clrscr();
printf("Enter the number of production");
scanf("%d",&n);
printf("\n Enter the Production");
for(i=0;i<n;i++)
{
printf("%d",i+1);
pr[i].l=getche();
printf("->");
scanf("%s",pr[i].r);
}
printf("\n Enter the input");
scanf("%s",ip.r);
printf("\n\t STACK\t\tINPUT\t\tACTION");
printf("\n \t $\t\t%s$",ip.r);
k=l=strlen(ip.r);
for(j=0;j<=k;j++)
{
if(l!=0)
{
s=shift(s,l);
83
l=strlen(ip.r);
}
else if(l==0&&strlen(st.r)==1&&(st.r[0]!=pr[0].l))
{
printf("\t\t ERROR\a\a");
printf("\n ENter the valid input for the given production");
getch();
exit();
}
s=reduce(s,n);
}
getch();
}
int shift(int s,int l)
{
int i;
st.r[s]=ip.r[0];
s++;
l--;
for(i=0;i<l+1;i++)
{
if(ip.r[i+1]!='\0')
ip.r[i]=ip.r[i+1];
else if(ip.r[i+1]=='\0')
ip.r[i]='\0';
}
printf("\t\t SHIFT\n\t$%s\t\t%s$",st.r,ip.r);
return(s);
}
int reduce(int s,int n)
{
int a,b,c,i,g;
char ch;
for(i=0;i<n;i++)
{
c=strlen(pr[i].r);
ch=st.r[s-1];
if((pr[i].r[0]==ch)&&islower(ch))
{
st.r[s-1]=pr[i].l;
printf("\t\tREDUCE\n\t$%s\t\t%s$",st.r,ip.r);
}
}
for(i=0;i<n;i++)
{
a=strlen(st.r);
84
b=strlen(pr[i].r);
g=strcmp(st.r,pr[i].r);
if(a==b&&g==0)
{
st.r[s-a]=pr[i].l;
for(c=0;c<s;c++)
st.r[c+1]='\0';
s=(s-a)+1;
printf("\t\tREDUCE\n\t$%s\t\t%s$",st.r,ip.r);
}
}
return(s);
}
OUTPUT
Enter the no of productions:3
Enter the productions:
1E->E+E
2E->E*E
3E->a
Enter the input a+a*a
RESULT
Thus the C program to implement Shift Reduce Parser is executed and verified successfully .
Ex No 15 IMPLEMENTATION OF LR PARSER
AIM:
85
To write a C program to construct LR Parsing Table.
ALGORITHM:
Step1: Start the program.
Step2: Read the context free grammar.
Step3: Get the input from the user
Step4: Push into stack do step 5 to 7.
Step5: Otherwise goto step 8.
Step6: If the starting symbol of inp is terminal then shift it into stack by pushing top+=1.
Step7: Reduce the terminal as stack content by the production.
Step8: Goto step 4.
Step9: Stop the program.
PROGRAM:
#include<stdio.h>
#include<conio.h>
#include<stdlib.h>
#include<string.h>
shift()
{
int i,j;
char ip[20];
sprintf(rel,"%c%d",input[0],reln);
strcat(stack,rel);
if(reln>=10)
top=strlen(stack)-2;
else
top=strlen(stack)-1;
for(i=1,j=0;i<strlen(input);i++,j++)
ip[j]=input[i];
ip[j]='\0';
strcpy(input,ip);
86
printf("\t\t\t shift");
printf("\n %s \t\t\t %s",stack,input);
return 0;
}
reduce()
{
int plen=0,ntcount=0;
char nt,ch,cat[10];
reln=reln*-1;
nt=NT[reln];
plen=strlen(prodn[reln]);
for(i=strlen(stack)-1;i>=0;i--)
{
ch=stack[i];
if(!(ch>=48 && ch<=57))
ntcount++;
if(ntcount==plen)
break;
}
stack[i]='\0';
sprintf(cat,"%c",nt);
strcat(stack,cat);
get(0);
sprintf(cat,"%d",ptab[sttop][cur]);
strcat(stack,cat);
printf("\t\treduce%c->%s",NT[reln],prodn[reln]);
printf("\n%s\t\t\t%s",stack,input);
top=strlen(stack)-strlen(cat);
return 0;
}
get(int ipflag)
{
int diff,l=0,m=0;
if(ipflag)
{
in=input[0];
stlen=strlen(stack);
if(stlen-1==top)
sprintf(st,"%c",stack[top]);
else
{
for(l=top;l<=stlen;l++)
st[m++]=stack[l];
st[m]='\0';
}
sttop=atoi(st);
}
87
else
{
in=stack[i];
sprintf(st,"%c",stack[i-1]);
sttop=atoi(st);
}
switch(in)
{
case'i':cur=IDR;
break;
case'+':cur=PLU;
break;
case'*':cur=AST;
break;
case'(':cur=OPS;
break;
case')':cur=CPS;
break;
case'$':cur=DOL;
break;
case'E':cur=E;
break;
case'T':cur=T;
break;
case'F':cur=F;
break;
default:
printf("\n %d invalid input symbol",stack[top]);
invalid=1;
break;
}
return 0;
}
main()
{
clrscr();
printf("\n LR PARSER \n");
printf("\n given cfg");
for(i=1;i<7;i++)
{
printf("\n %c->%s",NT[i],prodn[i]);
}
printf("\n enter the input string");
scanf("%s",input);
printf("\n input:%s\n stack \t\t input \t\t\t action \n %s\t\t\t %s$",input,stack,input);
strcat(input,"$");
while(!invalid)
88
{
stlen=strlen(stack);
get(1);
reln=ptab[sttop][cur];
if(reln>0 && reln!=100)
shift();
else if(reln<0)
reduce();
else if(reln==100)
break;
else
invalid=1;
}
if(invalid)
printf("\n rejected");
else
printf("\n accepted");
getch();
return 0;
}
OUTPUT:
Given cfg
E->E+T
89
E->T
T->T*F
T->F
F->(E)
F->i
RESULT:
Thus the C program to implement L-R Parser is executed and verified successfully.
90
91