Thanks to visit codestin.com
Credit goes to www.scribd.com

0% found this document useful (0 votes)
26 views61 pages

CD Final File Compiler Design Lab File

cpmp desin lab

Uploaded by

asharma1821032
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
26 views61 pages

CD Final File Compiler Design Lab File

cpmp desin lab

Uploaded by

asharma1821032
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 61

lOMoARcPSD|34549134

lOMoARcPSD|34549134

DELHI TECHNOLOGICAL
UNIVERSITY

Compiler Design
Lab File

Submitted by: Submitted to:


Devesh Mittal Dr. Pawan S. Mehra

2K21/CO/152
A2(G3)
lOMoARcPSD|34549134

INDEX
S. No. EXPERIMENT Date

1 Write a program to convert Non-Deterministic Finite Automata 13/01/2022


(NFA) to Deterministic Finite Automata (DFA).

2 Program to build a DFA to accept strings that start and end with 20/01/2022
same character(given a string of characters a & b)

3 Program to detect tokens in a Program 27/01/2022


(Eg-Keywords,operators, identifiers etc)
4 Write a program to implement lexical analyser 03/02/2022

5 Write a program to implement recursive descent parser 24/02/2022

6 Program to find first and follow of the given grammar 10/03/2022

Program to eliminate left factoring in the given grammar


7 24/03/2022

8 Program to eliminate left recursion in a grammar. 31/03/2022

2
lOMoARcPSD|34549134

EXPERIMENT - 1
AIM
Write a program to convert Non-Deterministic Finite Automata (NFA) to Deterministic Finite
Automata (DFA).

THEORY
Deterministic Finite Automaton
In DFA, for each input symbol, one can determine the state to which the machine will move.
Hence, it is called Deterministic Automaton. As it has a finite number of states, the machine is
called Deterministic Finite Machine or Deterministic Finite Automaton.

Formal Definition of a DFA

A DFA can be represented by a 5-tuple (Q, ∑, δ, q0, F) where −


 Q is a finite set of states.
 ∑ is a finite set of symbols called the alphabet.
 δ is the transition function where δ: Q × ∑ → Q
 q0 is the initial state from where any input is processed (q0 ∈ Q).
 F is a set of final state/states of Q (F ⊆ Q).

Non-Deterministic Finite Automaton


In NDFA, for a particular input symbol, the machine can move to any combination of the states
in the machine. In other words, the exact state to which the machine moves cannot be
determined. Hence, it is called Non-deterministic Automaton. As it has finite number of states,
the machine is called Non-deterministic Finite Machine or Non-deterministic Finite
Automaton.
Formal Definition of an NDFA
An NDFA can be represented by a 5-tuple (Q, ∑, δ, q0, F) where −
 Q is a finite set of states.
 ∑ is a finite set of symbols called the alphabets.

3
lOMoARcPSD|34549134

 δ is the transition function where δ: Q × ∑ → 2Q


(Here the power set of Q (2Q) has been taken because in case of NDFA, from a state,
transition can occur to any combination of Q states)
 q0 is the initial state from where any input is processed (q0 ∈ Q).
 F is a set of final state/states of Q (F ⊆ Q).
Converting NFA to DFA
In NFA, when a specific input is given to the current state, the machine goes to multiple states.
It can have zero, one or more than one move on a given input symbol. On the other hand,
in DFA, when a specific input is given to the current state, the machine goes to only one state.
DFA has only one move on a given input symbol.
Let, M = (Q, ∑, δ, q0, F) is an NFA which accepts the language L(M). There should be
equivalent DFA denoted by M' = (Q', ∑', q0', δ', F') such that L(M) = L(M').

Steps for converting NFA to DFA:


Step 1: Initially Q' = ϕ
Step 2: Add q0 of NFA to Q'. Then find the transitions from this start state.
Step 3: In Q', find the possible set of states for each input symbol. If this set of states is not in Q',
then add it to Q'.
Step 4: In DFA, the final state will be all the states which contain F(final states of NFA)

Example :

NFA Transition Table:

4
lOMoARcPSD|34549134

State 0 1

→q0 q0 q1

q1 {q1, q2} q1

*q2 q2 {q1, q2}

DFA Transition Table :

State 0 1

→[q0] [q0] [q1]

[q1] [q1, q2] [q1]

*[q2] [q2] [q1, q2]

*[q1, q2] [q1, q2] [q1, q2]

DFA Transition Diagram

5
lOMoARcPSD|34549134

CODE

#include<iostream>
#include<unordered_map>
#include<unordered_set>
#include<vector> #include<queue>
#include<math.h>

using namespace std ;

string unionStates(string str, int alphabet, unordered_map<string, vector<string> > map)


{
string ans = "" ;

for(int i = 0 ; i < str.length() ; i++)


{
string ch ;
ch += str[i] ;
ans += map[ch][alphabet] ;
}

6
lOMoARcPSD|34549134

return ans ;
}

int main()
{
int states , alphabets ;

cout << "Enter number of States : " << endl ; cin


>> states ;

// take input for states


cout<<"\nEnter "<<states<<" states : \n"; string
sa[states] ;
for (int i = 0; i < states; i++)
{
cin >> sa[i] ;
}

cout << "\nEnter number of inputs : " << endl ; cin >>
alphabets ;

unordered_map<string, vector<string> > map ; cout<<"\n\

nEnter the transitions (Enter X for null) : \n";

for (int s = 0; s < states; s++)


{
for(int a = 0 ; a < alphabets ; a++)
{
cout<<"\nState\tInput";
cout << "\n" << sa[s] << "\t" << a << endl ; string
transition ;
cout<<"Trnasition to : ";
cin >> transition ;

7
lOMoARcPSD|34549134

if(transition == "X")
transition = "" ;

map[sa[s]].push_back(transition) ;
}
}

int max = (int)(pow(2,states) + 0.5) ;


unordered_map<string,vector<string> > nfa ;

string is, fs ;
cout << "\nEnter initial and final states : " << endl ; cin
>> is >> fs ;

queue<string> q ;
unordered_set<string> visited ;

q.push(is) ; while(!

q.empty())
{
string present = q.front() ;
q.pop() ;

if(visited.count(present))
continue ;

visited.insert(present) ;

for (int a = 0; a < alphabets; a++)


{
string ans = unionStates(present, a, map) ;
nfa[present].push_back(ans) ;
q.push(ans) ;
}

8
lOMoARcPSD|34549134

cout << "\n\nFINAL ANSWER" << endl ;

unordered_map<string,vector<string> >::iterator i = nfa.begin(); for( ;i!

=nfa.end() ;i++)
{
cout << i->first << "\t\t" ;

for (int a = 0; a < alphabets; a++)


{
cout << i->second[a] << "\t";
}

cout << endl ;

return 0 ;
}

9
lOMoARcPSD|34549134

OUTPUT

LEARNING OUTCOME

 What is Deterministic Finite Automaton


 Formal definition of Deterministic Finite Automaton
 What is acceptance
 Acceptance by Deterministic Finite Automaton
 Check if a string is accepted or rejected by a DFA

10
lOMoARcPSD|34549134

EXPERIMENT - 2
AIM
Write a program for acceptance of string by DFA

THEORY
Deterministic Finite Automaton
In DFA, for each input symbol, one can determine the state to which the machine will move.
Hence, it is called Deterministic Automaton. As it has a finite number of states, the machine is
called Deterministic Finite Machine or Deterministic Finite Automaton.

Formal Definition of a DFA

A DFA can be represented by a 5-tuple (Q, ∑, δ, q0, F) where −


 Q is a finite set of states.
 ∑ is a finite set of symbols called the alphabet.
 δ is the transition function where δ: Q × ∑ → Q
 q0 is the initial state from where any input is processed (q0 ∈ Q).
 F is a set of final state/states of Q (F ⊆ Q).

Acceptance by DFA
A string w is accepted by a DFA < Q, Σ, q0, δ, A >, if and only if δ*( q0, w ) ϵ A . That is a
string is accepted by a DFA if and only if the DFA starting at the initial state ends in an accepting
state after reading the string. If the DFA does not end on a final state, then the string is rejected.
A language L is accepted by a DFA < Q, Σ, q0, δ, A >, if and only if L = { w δ*( q0 , w ) ϵ
A } . That is, the language accepted by a DFA is the set of strings accepted by the DFA. If any
one string in the set is not accepted by the DFA then the Language is rejected by the DFA.

A DFA =(Q,Σ,s,F,Τ), accepts a string w iff T(s,w)∈ F The


language of the automaton A is

11
lOMoARcPSD|34549134

L(A)={w | A accepts w}.


More formally,
L(A)={w | T(Start(A),w) ∈ Final(A)}

Example 1 :

This DFA accepts { } because it can go from the initial state to the accepting state (also the
initial state) without reading any symbol of the alphabet i.e. by reading an empty string . It
accepts nothing else because any non-empty symbol would take it to state 1, which is not an
accepting state, and it stays there.

Example 2 :

This DFA does not accept any string because it has no accepting state. Thus the language it
accepts is the empty set Φ .

Example 3 :

12
lOMoARcPSD|34549134

This DFA has a cycle: 1 - 2 - 1 and it can go through this cycle any number of times by reading
substring ab repeatedly.
To find the language it accepts, first from the initial state go to state 1 by reading one a.
Then from state 1 go through the cycle 1 - 2 - 1 any number of times by reading
substring ab any number of times to come back to state 1. This is represented by (ab)*. Then
from state 1 go to state 2 and then to state 3 by reading aa. Thus a string that is accepted by this
DFA can be represented by a(ab)*aa .

CODE

#include<stdio.h>
#include<conio.h>

int ninputs;

int check(char,int ); //function declaration int


dfa[10][10];
char c[10], string[10];
int main()
{
int nstates, nfinals;
int f[10];
int i,j;
printf("Enter the number of states in the DFA : ");
scanf("%d",&nstates);
printf("\nEnter the number of input symbol : ");
scanf("%d",&ninputs);

13
lOMoARcPSD|34549134

printf("\nEnter the input symbols\t"); for(i=0;


i<ninputs; i++)
{
printf("\n\n %d input\t", i+1);
printf("%c",c[i]=getch());
}
printf("\n\nEnter number of final states\t");
scanf("%d",&nfinals);

for(i=0;i<nfinals;i++)
{
printf("\n\nFinal state %d : q",i+1);
scanf("%d",&f[i]);
}

printf(" "); printf("\


n\nDefine transition rule as (initial state, input symbol ) = final state\n"); for(i=0; i<nstates;
i++)
{
for(j=0; j<ninputs; j++)
{
printf("\n(q%d , %c ) = q",i,c[j]);
scanf("%d",&dfa[i][j]);
}
}

do
{ i=
0;
int final=0, s=0; printf("\n\nEnter
Input String : "); scanf("%s",string);

while(string[i]!='\0')

14
lOMoARcPSD|34549134

if((s=check(string[i++],s))<0)
break;

for(i=0 ;i<nfinals ;i++)


if(f[i] ==s )
final=1;
if(final==1)
printf("\nVALID STRING");
else
printf("\nINVALID STRING");
getch();

printf("\n\nDo you want to continue? (y/n) ");


} while(getch()=='y');

// getch();
}

int check(char b,int d)


{
int j;
for(j=0; j<ninputs; j++)
if(b==c[j]) return(dfa[d]
[j]);
return -1;
}

OUTPUT

Screenshot 1 –
(DFA to accept Binary strings that ends with “01”)

15
lOMoARcPSD|34549134

16
lOMoARcPSD|34549134

(DFA to accept strings containing exactly two '0' over input alphabets ∑ = {0, 1})

LEARNING OUTCOME

 What is Deterministic Finite Automaton


 Formal definition of Deterministic Finite Automaton
 What is acceptance
 Acceptance by Deterministic Finite Automaton
 Check if a string is accepted or rejected by a DFA

17
lOMoARcPSD|34549134

EXPERIMENT - 3
AIM
Write a program to find different tokens in a program.

THEORY
Lexical Analysis is the first phase of the compiler also known as a scanner. It converts the
High level input program into a sequence of Tokens.
 Lexical Analysis can be implemented with the Deterministic finite Automata.
 The output is a sequence of tokens that is sent to the parser for syntax analysis

Token
A lexical token is a sequence of characters that can be treated as a unit in the grammar of the
programming languages.

Example of tokens:
Type token (id, number, real, . . . ) Punctuation
tokens (IF, void, return, . . . ) Alphabetic tokens
(keywords)

Keywords; Examples-for, while, if etc.


Identifier; Examples-Variable name, function name, etc.
Operators; Examples '+', '++', '-' etc.
Separators; Examples ',' ';' etc

Example of Non-Tokens:
Comments, preprocessor directive, macros, blanks, tabs, newline, etc.

18
lOMoARcPSD|34549134

Lexeme: The sequence of characters matched by a pattern to form


the corresponding token or a sequence of input characters that comprises a single token is
called a lexeme. eg- “float”, “abs_zero_Kelvin”, “=”, “-”, “273”, “;” .

How Lexical Analyzer functions

1. Tokenization i.e. Dividing the program into valid tokens.


2. Remove white space characters.
3. Remove comments.
4. It also provides help in generating error messages by providing row numbers and
column numbers.

The lexical analyzer identifies the error with the help of the automation machine and the
grammar of the given language on which it is based like C, C++, and gives row number and
column number of the error.

Suppose we pass a statement through lexical analyzer – a = b


+c; It will generate token sequence like this:
id=id+id; Where each id refers to it’s variable in the symbol table referencing all details

19
lOMoARcPSD|34549134

Example :

int max(int i);

Lexical analyzer first read int and finds it to be valid and accepts as token max is
read by it and found to be a valid function name after reading ( int is also a token ,
then again i as another token and finally ;

Answer: Total number of tokens 7 : int, max, (, int, i, ), ;

CODE

#include<iostream> #include
<bits/stdc++.h> using
namespace std;

// Returns 'true' if the character is a DELIMITER. bool


isDelimiter(char ch)
{
if (ch == ' ' || ch == '+' || ch == '-' || ch == '*' || ch == '/'
|| ch == ',' || ch == ';' || ch == '>' || ch == '<' || ch ==
'=' || ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch
== '{' || ch == '}') return (true);
return (false);
}

// Returns 'true' if the character is an OPERATOR. bool


isOperator(char ch)
{
if (ch == '+' || ch == '-' || ch == '*' || ch ==
'/' || ch == '>' || ch == '<' || ch == '=')
return (true);
return (false);

20
lOMoARcPSD|34549134

// Returns 'true' if the string is a VALID IDENTIFIER.


bool validIdentifier(char* str)
{
if (str[0] == '0' || str[0] == '1' || str[0] == '2' ||
str[0] == '3' || str[0] == '4' || str[0] == '5' ||
str[0] == '6' || str[0] == '7' || str[0] == '8' || str[0] ==
'9' || isDelimiter(str[0]) == true) return (false);
return (true);
}

// Returns 'true' if the string is a KEYWORD.


bool isKeyword(char* str)
{
if (!strcmp(str, "if") || !strcmp(str, "else") ||
!strcmp(str, "while") || !strcmp(str, "do") ||
!strcmp(str, "break") ||
!strcmp(str, "continue") || !strcmp(str, "int")
|| !strcmp(str, "double") || !strcmp(str, "float")
|| !strcmp(str, "return") || !strcmp(str, "char")
|| !strcmp(str, "case") || !strcmp(str, "char")
|| !strcmp(str, "sizeof") || !strcmp(str, "long")
|| !strcmp(str, "short") || !strcmp(str, "typedef")
|| !strcmp(str, "switch") || !strcmp(str, "unsigned")
|| !strcmp(str, "void") || !strcmp(str, "static")
|| !strcmp(str, "struct") || !strcmp(str, "goto")) return
(true);
return (false);
}

// Returns 'true' if the string is an INTEGER.


bool isInteger(char* str)
{

21
lOMoARcPSD|34549134

int i, len = strlen(str);

if (len == 0) return
(false);
for (i = 0; i < len; i++) {
if (str[i] != '0' && str[i] != '1' && str[i] != '2'
&& str[i] != '3' && str[i] != '4' && str[i] != '5'
&& str[i] != '6' && str[i] != '7' && str[i] != '8'
&& str[i] != '9' || (str[i] == '-' && i > 0))
return (false);
}
return (true);
}

// Returns 'true' if the string is a REAL NUMBER. bool


isRealNumber(char* str)
{
int i, len = strlen(str); bool
hasDecimal = false;

if (len == 0) return
(false);
for (i = 0; i < len; i++) {
if (str[i] != '0' && str[i] != '1' && str[i] != '2'
&& str[i] != '3' && str[i] != '4' && str[i] != '5'
&& str[i] != '6' && str[i] != '7' && str[i] != '8'
&& str[i] != '9' && str[i] != '.' ||
(str[i] == '-' && i > 0))
return (false);
if (str[i] == '.')
hasDecimal = true;
}
return (hasDecimal);
}

22
lOMoARcPSD|34549134

// Extracts the SUBSTRING.


char* subString(char* str, int left, int right)
{
int i;
char* subStr = (char*)malloc( sizeof(char) *
(right - left + 2));

for (i = left; i <= right; i++)


subStr[i - left] = str[i];
subStr[right - left + 1] = '\0';
return (subStr);
}

// Parsing the input STRING.


void parse(char* str)
{
int left = 0, right = 0; int
len = strlen(str);

while (right <= len && left <= right) { if


(isDelimiter(str[right]) == false)
right++;

if (isDelimiter(str[right]) == true && left == right) { if


(isOperator(str[right]) == true)
printf("'%c' IS AN OPERATOR\n", str[right]);

right++; left
= right;
} else if (isDelimiter(str[right]) == true && left != right
|| (right == len && left != right)) { char*
subStr = subString(str, left, right - 1);

if (isKeyword(subStr) == true) printf("'%s' IS


A KEYWORD\n", subStr);

23
lOMoARcPSD|34549134

else if (isInteger(subStr) == true) printf("'%s'


IS AN INTEGER\n", subStr);

else if (isRealNumber(subStr) == true) printf("'%s' IS


A REAL NUMBER\n", subStr);

else if (validIdentifier(subStr) == true


&& isDelimiter(str[right - 1]) == false)
printf("'%s' IS A VALID IDENTIFIER\n", subStr);

else if (validIdentifier(subStr) == false


&& isDelimiter(str[right - 1]) == false) printf("'%s' IS
NOT A VALID IDENTIFIER\n", subStr);
left = right;
}
}
return;
}

// DRIVER FUNCTION
int main()
{
// maximum length of string is 100 here char
str[100];
cout<<"Enter code : ";
cin.getline(str,100);
cout<<"\n";
parse(str); // calling the parse function

return (0);
}

24
lOMoARcPSD|34549134

OUTPUT

LEARNING OUTCOME

 Lexical Analysis is the first phase of the compiler also known as a scanner.
 It converts the High level input program into a sequence of Tokens.
 A lexical token is a sequence of characters that can be treated as a unit in the grammar
of the programming languages.
 The sequence of characters matched by a pattern to form
the corresponding token or a sequence of input characters that comprises a single token is
called a lexeme.
 C++ program to identify token in a program.

25
lOMoARcPSD|34549134

EXPERIMENT - 4
AIM
To implement lexical analyser.

THEORY
Lexical analysis is the first phase of a compiler. It takes the modified source code from language
preprocessors that are written in the form of sentences. The lexical analyzer breaks these
syntaxes into a series of tokens, by removing any whitespace or comments in the source code.
If the lexical analyzer finds a token invalid, it generates an error. The lexical analyzer works
closely with the syntax analyzer. It reads character streams from the source code, checks for legal
tokens, and passes the data to the syntax analyzer when it demands.

CODE

#include<bits/stdc++.h>
#include<stdlib.h>
#include<string.h>
#include<ctype.h>
using namespace std;

int isKeyword(char buffer[]) {


char keywords[32][10] = {
"auto",
"break",
"case",
"char",
"const",
"continue",
"default",
"do",
"double",

26
lOMoARcPSD|34549134

"else",
"enum",
"extern",
"float",
"for",
"goto",
"if",
"int",
"long",
"register",
"return",
"short",
"signed",
"sizeof",
"static",
"struct",
"switch",
"typedef",
"union",
"unsigned",
"void",
"volatile",
"while"
};
int i, flag = 0;
for (i = 0; i < 32; ++i) {
if (strcmp(keywords[i], buffer) == 0) {
flag = 1;
break;
}
}
return flag;
}
int main() {
char ch, buffer[15], b[30], logical_op[] =
"><", math_op[] = "+-*/=", numer[] = ".0123456789", other[] = ",;\(){}[]'':";

27
lOMoARcPSD|34549134

ifstream fin("lexicalinput.txt");
int mark[1000] = {
0
};
int i, j = 0, kc = 0, ic = 0, lc = 0, mc = 0, nc = 0, oc = 0, aaa = 0; vector
< string > k;
vector < char > id;
vector < char > lo;
vector < char > ma;
vector < string > nu;
vector < char > ot; if
(!fin.is_open()) {
cout << "error while opening the file\n";
exit(0);
}
while (!fin.eof()) {
ch = fin.get();
for (i = 0; i < 12; ++i) {
if (ch == other[i]) { int
aa = ch;
if (mark[aa] != 1) {
ot.push_back(ch);
mark[aa] = 1;
++oc;
}
}
}
for (i = 0; i < 5; ++i) {
if (ch == math_op[i]) {
int aa = ch;
if (mark[aa] != 1) {
ma.push_back(ch);
mark[aa] = 1;
++mc;
}
}

28
lOMoARcPSD|34549134

}
for (i = 0; i < 2; ++i) {
if (ch == logical_op[i]) {
int aa = ch;
if (mark[aa] != 1) {
lo.push_back(ch);
mark[aa] = 1;
++lc;
}
}
}
if (ch == '0' || ch == '1' || ch == '2' || ch == '3' || ch == '4' || ch == '5' || ch == '6' || ch == '7' ||
ch == '8' || ch == '9' || ch == '.' || ch == ' ' || ch == '\n' || ch == ';') {
if (ch == '0' || ch == '1' || ch == '2' || ch == '3' || ch == '4' || ch == '5' || ch == '6' || ch == '7' ||
ch == '8' || ch == '9' || ch == '.') b[aaa++] = ch;
if ((ch == ' ' || ch == '\n' || ch == ';') && (aaa != 0)) {
b[aaa] = '\0';
aaa = 0; char
arr[30];
strcpy(arr, b);
nu.push_back(arr);
++nc;
}
}
if (isalnum(ch)) {
buffer[j++] = ch;
} else if ((ch == ' ' || ch == '\n') && (j != 0)) {
buffer[j] = '\0';
j = 0;
if (isKeyword(buffer) == 1) {
k.push_back(buffer);
++kc;
} else {
if (buffer[0] >= 97 && buffer[0] <= 122) {

29
lOMoARcPSD|34549134

if (mark[buffer[0] - 'a'] != 1) {
id.push_back(buffer[0]);
++ic;
mark[buffer[0] - 'a'] = 1;
}
}
}
}
}
fin.close();
printf("Keywords: ");
for (int f = 0; f < kc; ++f) { if
(f == kc - 1) {
cout << k[f] << "\n";
} else {
cout << k[f] << ", ";
}
}
printf("Identifiers: ");
for (int f = 0; f < ic; ++f) { if
(f == ic - 1) {
cout << id[f] << "\n";
} else {
cout << id[f] << ", ";
}
}
printf("Math Operators: ");
for (int f = 0; f < mc; ++f) { if
(f == mc - 1) {
cout << ma[f] << "\n";
} else {
cout << ma[f] << ", ";
}
}
printf("Logical Operators: ");
for (int f = 0; f < lc; ++f) {

30
lOMoARcPSD|34549134

if (f == lc - 1) {
cout << lo[f] << "\n";
} else {
cout << lo[f] << ", ";
}
}
printf("Numerical Values: ");
for (int f = 0; f < nc; ++f) { if
(f == nc - 1) {
cout << nu[f] << "\n";
} else {
cout << nu[f] << ", ";
}
}
printf("Others: ");
for (int f = 0; f < oc; ++f) { if
(f == oc - 1) {
cout << ot[f] << "\n";
} else {
cout << ot[f] << " ";
}
}
return 0;
}

31
lOMoARcPSD|34549134

OUTPUT

LEARNING OUTCOME

 What is Deterministic Finite Automaton


 Formal definition of Deterministic Finite Automaton
 What is acceptance
 Acceptance by Deterministic Finite Automaton
 Check if a string is accepted or rejected by a DFA

32
lOMoARcPSD|34549134

EXPERIMENT - 5
AIM
Write a program to implement recursive descent parser.

THEORY
Recursive descent is a top-down parsing technique that constructs the parse tree from the top
and the input is read from left to right. It uses procedures for every terminal and nonterminal
entity. This parsing technique recursively parses the input to make a parse tree, which may or
may not require backtracking. But the grammar associated with it (if not left factored) cannot
avoid back-tracking. A form of recursive-descent parsing that does not require any back-
tracking is known as predictive parsing.

Consider the grammar used before for simple arithmetic expressions:


P ---> E
E ---> E + T | E - T | T
T ---> T * S | T / S | S
S ---> F ^ S | F
F ---> ( E ) | char

The above grammar won't work for recursive descent because of the left recursion in the
second and third rules. (The recursive function for E would immediately call E recursively,
resulting in an indefinite recursive regression.)

In order to eliminate left recursion, one simple method is to introduce new notation: curly
brackets, where {xx} means "zero or more repetitions of xx", and parentheses () used for
grouping, along with the or-symbol: |. Because of the many metasymbols, it is a good idea to
enclose all terminals in single quotes. Also put a '$' at the end. The resulting grammar looks as
follows :

P ---> E '$'
E ---> T {('+'|'-') T}
T ---> S {('*'|'/') S}

33
lOMoARcPSD|34549134

S ---> F '^' S | F F
---> '(' E ')' | char

Now the grammar is suitable for creation of a recursive descent parser. Notice that this is a
different grammar that describes the same language, that is the same sentences or strings of
terminal symbols. A given sentence will have a similar parse tree to one given by the previous
grammar, but not necessarily the same parse tree.

One could alter the first grammar in other ways to make it work for recursive descent. For
example, one could write the rule for E as:
E ---> T '+' E | T

CODE

#include<stdio.h>
#include<conio.h>
#include<string.h>

char input[100];
int i,l;

void main()
{
printf("\nRecursive descent parsing for the grammar shown below:\n"); printf("\
nE->TE'\nE'->+TE'/@\nT->FT'\nT'->*FT'/@\nF->(E)/ID\n"); printf("\nEnter
the string to be checked:");
gets(input);

if(E())
{
if(input[i+1]=='\0') printf("\nString
is accepted");
else
printf("\nString is not accepted");
}

else
printf("\nString not accepted");

34
lOMoARcPSD|34549134

getch();
}

E()
{
if(T())
{
if(EP())
return(1);
else
return(0);
}

else
return(0);
}

EP()
{
if(input[i]=='+')
{
i++;
if(T())
{
if(EP())
return(1);
else
return(0);
}
else
return(0);
}

else
return(1);
}

T()
{

35
lOMoARcPSD|34549134

if(F())
{
if(TP())
return(1);
else
return(0);
}

else
return(0);
}

TP()
{
if(input[i]=='*')
{
i++;
if(F())
{
if(TP())
return(1);
else
return(0);
}
else
return(0);
}

else
return(1);
}

F()
{
if(input[i]=='(')
{
i++;
if(E()
)
{
if(input[i]==')')

36
lOMoARcPSD|34549134

{
i++;
return(1);
}
else
return(0);
}
else
return(0);
}

else if(input[i]>='a'&&input[i]<='z'||input[i]>='A'&&input[i]<='Z')
{
i++;
return(1);
}

else
return(0);
}

OUTPUT

LEARNING OUTCOME

 Recursive descent is a top-down parsing technique


 The input is read from left to right
 Implementation of Recursive Descent Parser in C++

37
lOMoARcPSD|34549134

EXPERIMENT - 7

AIM
Program to eliminate left factoring in the given grammar

THEORY
A grammar is said to be left factored when it is of the form: –

A -> αβ1 | αβ2 | αβ3 | …… | αβn |


γ
i.e the productions start with the same terminal (or set of terminals).

On seeing the input α we cannot immediately tell which production to choose to expand A.
Hence, left factoring is a grammar transformation that is useful for producing grammar suitable
for predictive or top-down parsing. When the choice between two alternative A-productions is
not clear, we may be able to rewrite the productions to defer the decision until enough of the
input has been seen to make the right choice.

For the grammar,

A -> αβ1 | αβ2 | αβ3 | …… | αβn |


γ
The equivalent left factored grammar will be: –

A -> αA’ | γ
A’ -> β1 | β2 | β3 | …… | βn

CODE

#include <stdbool.h>
#include <stdio.h>

38
lOMoARcPSD|34549134

#include <string.h>

struct production
{
int length;
char prod[10][10];
} typedef production;

struct grammer
{

int number; production


nonT[26];
} typedef grammer;

grammer gram;

void printProd(production *);


void printGram();

int getF(char *str)


{
int i = 1;
while (i < strlen(str) && str[i - 1] != '>') i++; while
(i < strlen(str) && str[i] == ' ') i++; return i;

39
lOMoARcPSD|34549134

void setProd(char *str)


{
production *produce = &gram.nonT[str[0] - 'A']; int
prod = 0, j;
for (int i = getF(str); i < strlen(str); i++)

{
for (j = 0; i < strlen(str) && str[i] != '|'; i++) if
(str[i] != ' ')
produce->prod[prod][j++] = str[i];
produce->prod[prod++][j++] = '\0';
}

produce->length = prod;
}

void copy(char *src, char *dest, int i)


{
int j = i + 1;
do
{

dest[j - i - 1] = src[j];
} while (src[j++] != '\0');

if (dest[0] == '\0')

40
lOMoARcPSD|34549134

{
dest[0] = 'e';
dest[1] = '\0';
}
}

bool factorProd(production *prod)


{
bool prev[10] = {1, 1, 1, 1, 1, 1, 1, 1, 1, 1};
bool next[10] = {0};
char c[128] = {0}, ch;
int j = 0, maxx; while
(true)
{

maxx = 0;
for (int i = 0; i < 10; i++)
if (prev[i] && strlen(prod->prod[i]) > j)
c[prod->prod[i][j]]++;

for (int i = 0; i < 128; i++)


{
if (c[i] > maxx) maxx = c[i], ch = i;
c[i] = 0;
}

41
lOMoARcPSD|34549134

if (maxx < 2) break;

for (int i = 0; i < 10; i++)


prev[i] = j < strlen(prod->prod[i]) && prod->prod[i][j] == ch; j++;
}

if (j == 0) return false;

int newNonT = 0, p_no = 0;


while (gram.nonT[newNonT].length != 0) newNonT++;

production *newP = &gram.nonT[newNonT];

for (int i = 0; i < 10; i++)


if (prev[i] && prod->prod[i][0] != '\0')
{
copy(prod->prod[i], newP->prod[p_no], j - 1);
prod->prod[i][j] = newNonT + 'A';
prod->prod[i][j + 1] = '\0';
p_no++;
}

gram.number++; newP-
>length = p_no;

42
lOMoARcPSD|34549134

int i = 0, num = 0;
while (prev[i] == false) i++;
for (i++; i < 10; i++)
if (prev[i] && prod->prod[i][0] != '\0')
prod->prod[i][0] = '\0', num++;

prod->length -= num;
return true;
}

void leftFactor()
{
for (int i = 0; i < 26; i++)
{
if (gram.number == 26)
{

printf("No new production can be formed.\n"); return;


}

if (gram.nonT[i].length >= 2)
while (factorProd(&gram.nonT[i]))
{

printf("\n\nAfter left factoring production from %c:", i + 'A');


printGram();
}

43
lOMoARcPSD|34549134

}
}

int main()
{

char str[100];

printf("Enter the number of productions: ");


scanf("%d", &(gram.number));

printf("\nEnter productions in the format:\n");


printf("A -> aAb | aAab | e\n"); printf("(Spaces will
be skipped, e is null)\n");

char c;
for (int i = 0; i < gram.number; i++)
{
scanf("%c", &c);
scanf("%[^\n]s", str);
setProd(str);
}

printf("\nInput Grammer:");
printGram();

leftFactor();

44
lOMoARcPSD|34549134

printf("\n\nFinally..................");
printGram();
printf("\n");
return 0;
}

void printProd(production *prod)


{
printf("%s ", prod->prod[0]);
for (int i = 1; i < 10; i++)
if (prod->prod[i][0] != '\0')
printf("| %s ", prod->prod[i]);
}

void printGram()
{

for (int i = 0; i < 26; i++) if


(gram.nonT[i].length)
{

printf("\n%c -> ", i + 'A');


printProd(&gram.nonT[i]);
}
}

45
lOMoARcPSD|34549134

OUTPUT

LEARNING OUTCOME

 We have learned about Factoring and what is “Left Factoring”.


 We have learned how to identify a “Left Factoring” in a given language and write a
program for the same.

46
lOMoARcPSD|34549134

EXPERIMENT - 8

AIM
Program to eliminate left recursion in a grammar.

THEORY
The production is left-recursive if the leftmost symbol on the right side is the same as the non
terminal on the left side. For example, expr → expr + term.

For each rule which contains a left-recursive option,


A -> Aα | β introduce
a new nonterminal A' and rewrite the rule as
A -> βA' A' -> αA' | epsilon

Thus the production:


E --> E + T | T

is left-recursive with "E" playing the role of "A","+ T" playing the role of , and "T" playing
the role of A'. Introducing the new nonterminal E', the production can be replaced by:

E --> T E'
E' --> | + T E'

For example, the left-recursive grammar is:


E→E+T|T
E→T*F|
F F → (E) |
id

We can redefine E and T without left-recursion as:


E → TE`
E`→ + TE` |
E
T → FT`

47
lOMoARcPSD|34549134

T → * FT` |
E F → (E) |
id

CODE

#include <bits/stdc++.h>
#include <iostream> #define
pb push_back using
namespace std;

set<string> prod[26], ans[52]; bool


used[26];

int h(char ch)


{
return ch-65;
}

char rh(int x)
{
return x+65;
}

bool isTerminal(char ch)


{
if(ch=='^'||(ch>=65&&ch<=90)) return
false;
return true;
}

void convert_to_right(int i, set<string> conv, set<string> S)


{
for(set<string>::iterator j=S.begin();j!=S.end();j++)
{
string x= *j+rh(i);
x+="'";

48
lOMoARcPSD|34549134

ans[i].insert(x);
}

for(set<string>::iterator j=conv.begin();j!=conv.end();j++)
{
string x=(*j).substr(1)+rh(i);
x+="'";
ans[i+26].insert(x);
}

ans[i+26].insert("^");
}

int main()
{
int m;
cout<<"\nEnter number of productions: ";
cin>>m;
cout<<"\nEnter production rules:\n";

int i, st;
string s;

for(i=0;i<m;i++)
{
cin>>s;
int x=h(s[0]);
used[x]=1; int
j=1;
while(s[j]=='-'||s[j]=='>') j++;
while(j<s.size())
{
string v="";
while(s[j]==' ')j++;

49
lOMoARcPSD|34549134

while(j<s.size()&&s[j]!='|')
{
v+=s[j];
j++;
}
prod[x].insert(v); j++;
}
}

for(i=0;i<26;i++)
{
if(prod[i].empty()) continue;
set<string> S=prod[i], conv, aux;
set<string>::iterator it;
for(it=S.begin();it!=S.end();it++)
{
string v=*it; int
j=h(v[0]);
if(j>=0&&j<i)
{
//modify this production
set<string>::iterator k=ans[j].begin();
while(k!=ans[j].end())
{
aux.insert(*k+v.substr(1)); k++;
}
}
else
aux.insert(v);
}

for(it=aux.begin();it!=aux.end();it++)

50
lOMoARcPSD|34549134

{
string v=*it;
int j=h(v[0]);
if(j==i)
conv.insert(v); else
ans[i].insert(v);
}

if(conv.size()==0) continue;

S=ans[i];
ans[i].clear();
convert_to_right(i, conv, S);
}

cout<<"Productions after removal of left Recursion\n"; for(i=0;i<26;i++)


{
if(ans[i].empty())continue;
cout<<rh(i)<<"-> ";

for(set<string>::iterator j=ans[i].begin();j!=ans[i].end();j++)
{
cout<<*j;
set<string>::iterator k=j; k+
+;
if(k!=ans[i].end()) cout<<"
| ";
}
cout<<endl;

if(ans[i+26].empty())
continue;

51
lOMoARcPSD|34549134

cout<<rh(i)<<"'-> ";

for(set<string>::iterator j=ans[i+26].begin();j!=ans[i+26].end();j++)
{
cout<<*j;
set<string>::iterator k=j; k+
+;
if(k!=ans[i+26].end())
cout<<" | ";
}
cout<<endl;
}
}

OUTPUT

52
lOMoARcPSD|34549134

EXPERIMENT - 6

AIM
Write a program to compute First and Follow.

THEORY
First(y) is the set of terminals that begin the strings derived from y. Follow(A) is the set of
terminals that can appear to the right of A. First and Follow are used in the construction of the
parsing table.

A → abc / def / ghi First(A) = { a , d , g }

 To compute First :-

 X is a terminal First(X) = {X}


 X → ε is a production add ε to First(X)
 X is a non-terminal and X → Y1 Y2 … Yk is a production place z in First(X) if z
is in First(Yi) for some i and ε is in all of First(Y1) … First(Yi-1)

 To compute Follow :-

 Place $ in Follow(S), where S is the start symbol and $ is the end-of-input marker.
 There is a production A → α B β everything in First(β) except for ε is placed in
Follow(B).
 There is a production A → α B, or a production A → α B β where First(β)
contains ε everything in Follow(A) is placed in Follow(B).

So, now lets see C code to compute First and Follow.

53
lOMoARcPSD|34549134

CODE
#include<stdio.h>
#include<ctype.h>
#include<string.h>

void followfirst(char, int, int);


void follow(char c);

void findfirst(char, int, int);


int count, n = 0;
char calc_first[10][100];

char calc_follow[10][100];
int m = 0;

char production[10][10];
char f[10], first[10];
int k;
char ck;
int e;

int main(int argc, char **argv)


{
int jm = 0;
int km = 0;
int i, choice;
char c, ch;
count = 8;

strcpy(production[0], "S=AR");
strcpy(production[1], "R=+AR");
strcpy(production[2], "R=#");

54
lOMoARcPSD|34549134

strcpy(production[3], "A=FY");
strcpy(production[4], "Y=*FY");
strcpy(production[5], "Y=#");
strcpy(production[6], "F=(S)");
strcpy(production[7], "F=i");

int kay;
char done[count];
int ptr = -1;

for(k = 0; k < count; k++) { for(kay =


0; kay < 100; kay++) {
calc_first[k][kay] = '!';
}
}
int point1 = 0, point2, xxx;

for(k = 0; k < count; k++)


{
c = production[k][0];
point2 = 0;
xxx = 0;

for(kay = 0; kay <= ptr; kay++)


if(c == done[kay])
xxx = 1;

if (xxx == 1)
continue;

findfirst(c, 0, 0);
ptr += 1;

done[ptr] = c;
printf("\n First(%c) = { ", c);
calc_first[point1][point2++] = c;

55
lOMoARcPSD|34549134

for(i = 0 + jm; i < n; i++) {


int lark = 0, chk = 0;

for(lark = 0; lark < point2; lark++) {

if (first[i] == calc_first[point1][lark])
{
chk = 1;
break;
}
}
if(chk == 0)
{
printf("%c, ", first[i]); calc_first[point1]
[point2++] = first[i];
}
}
printf("}");
jm = n;
point1++;
}
printf("\n\n");
printf(" \n\n");
char donee[count];
ptr = -1;

for(k = 0; k < count; k++) { for(kay =


0; kay < 100; kay++) {
calc_follow[k][kay] = '!';
}
}
point1 = 0;
int land = 0;
for(e = 0; e < count; e++)
{

56
lOMoARcPSD|34549134

ck = production[e][0];
point2 = 0;
xxx = 0;

for(kay = 0; kay <= ptr; kay++)


if(ck == donee[kay])
xxx = 1;

if (xxx == 1)
continue;
land += 1;

follow(ck);
ptr += 1;

donee[ptr] = ck;
printf(" Follow(%c) = { ", ck);
calc_follow[point1][point2++] = ck;

for(i = 0 + km; i < m; i++) {


int lark = 0, chk = 0;
for(lark = 0; lark < point2; lark++)
{
if (f[i] == calc_follow[point1][lark])
{
chk = 1;
break;
}
}
if(chk == 0)
{
printf("%c, ", f[i]); calc_follow[point1]
[point2++] = f[i];
}
}
printf(" }\n");

57
lOMoARcPSD|34549134

km = m;
point1++;
}
}
void follow(char c)
{
int i, j;
if(production[0][0] == c) {
f[m++] = '$';
}
for(i = 0; i < 10; i++)
{
for(j = 2;j < 10; j++)
{
if(production[i][j] == c)
{
if(production[i][j+1] != '\0')
{
followfirst(production[i][j+1], i, (j+2));
}

if(production[i][j+1]=='\0' && c!=production[i][0])


{
follow(production[i][0]);
}
}
}
}
}
void findfirst(char c, int q1, int q2)
{
int j;

58
lOMoARcPSD|34549134

if(!(isupper(c))) {
first[n++] = c;
}
for(j = 0; j < count; j++)
{
if(production[j][0] == c)
{
if(production[j][2] == '#')
{
if(production[q1][q2] == '\0')
first[n++] = '#';
else if(production[q1][q2] != '\0'
&& (q1 != 0 || q2 != 0))
{
findfirst(production[q1][q2], q1, (q2+1));
}
else
first[n++] = '#';
}
else if(!isupper(production[j][2]))
{
first[n++] = production[j][2];
}
else
{
findfirst(production[j][2], j, 3);
}
}
}
}

void followfirst(char c, int c1, int c2)


{
int k; if(!
(isupper(c)))
59
lOMoARcPSD|34549134

f[m++] = c;
else
{
int i = 0, j = 1;
for(i = 0; i < count; i++)
{
if(calc_first[i][0] == c)
break;
}

while(calc_first[i][j] != '!')
{
if(calc_first[i][j] != '#')
{
f[m++] = calc_first[i][j];
}
else
{
if(production[c1][c2] == '\0')
{
follow(production[c1][0]);
}
else
{
followfirst(production[c1][c2], c1, c2+1);
}
}
j++;
}
}
}

60
lOMoARcPSD|34549134

OUTPUT

LEARNING OUTCOME

 We have learned and briefly grasped the concept of “First and Follow”.
 We have also learned how to compute the “First and Follow” for the given
grammar.

61

You might also like