[CSE304] - Compiler
Construction
Amity University, Greater Noida
Practical File
Supervisor:
Ms Meenakshi
Department of Computer Science and Engineering
Avinash Prasad Sinha
A41105222211
B-Tech (Computer Science Engineering) Semester – VI
Table Of Contents
Serial Main Content
Number
1. Consider the following regular expressions: a) (0 + 1) * + 01 b) (ab*c + (def)
+ + a*d+e)+ c) ((a + b)(c + d))+ + abcd. write separate programs for each of
the regular expressions mentioned above.
2. Design a Lexical analyser for identifying different types of token used in C
language.
3. Write a program which accepts a regular expression from the user and
generates a regular grammar which is equivalent to the R.E. entered by user.
The grammar will be printed to a text file, with only one production rule in
eac h line. Also, make sure that all production rules are displayed in
compact forms e.g. the production rules: S--> aB, S--> cd S--> PQ Should be
written as S--> aB | cd | PQ And not as three different production rules.
Also, there should not be any repetition of product
4. Write a program to eliminate left recursion
5. Write a program for Recursive Descent Calculator.
6. Write that recognizes different a program types of English words
7. Consider the following grammar: S --> ABC A--> abA | ab B--> b | BC C-->
c | cC Following any suitable parsing technique (prefer top-down), design
a parser which accepts a string and tells whether the string is accepted by
above grammar or not.
8. Write a program which accepts a regular grammar with no left-recursion, and
no null-production rules, and then it accepts a string and reports
whether the string is accepted by the grammar or not.
9. Design a parser which accepts a mathematical expression (containing integers
only). If the expression is valid, then evaluate the expression else
report that the expression is invalid. [Note: Design first the Grammar and then
implement using Shift-Reduce parsing technique. Your program should
generate an output file clearly showing each step of parsing/evaluation of the
intermediate sub-expressions.]
10. Open Ended program: Designing of various type parser
1. Consider the following regular expressions: a) (0 + 1)* + 01 b) (ab*c
+ (def)+ + a*d+e)+ c) ((a + b)(c + d))+ + abcd. write separate
programs for each of the regular expressions mentioned above.
a) (0 + 1)* + 01
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
bool matches_a(const char *str) {
// Check for (0 + 1)* case - any combination of 0s and 1s
bool case1 = true;
for (int i = 0; str[i] != '\0'; i++) {
if (str[i] != '0' && str[i] != '1') {
case1 = false;
break;
}
}
// Check for "01" case
bool case2 = (strlen(str) == 2 && str[0] == '0' && str[1] == '1');
return case1 || case2;
}
int main() {
char input[100];
printf("Enter a string to test against (0 + 1)* + 01: ");
scanf("%s", input);
if (matches_a(input)) {
printf("The string matches the pattern.\n");
} else {
printf("The string does not match the pattern.\n");
}
return 0;
}
b) (ab*c + (def)+ + a*d+e)+
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
bool matches_b(const char *str) {
int len = strlen(str);
if (len == 0) return false; // Empty string doesn't match
int i = 0;
while (i < len) {
// Check for ab*c pattern
if (i < len && str[i] == 'a') {
i++;
int b_count = 0;
while (i < len && str[i] == 'b') {
b_count++;
i++;
}
if (i < len && str[i] == 'c') {
i++;
continue;
} else if (b_count == 0 && i < len && str[i] == 'c') {
i++;
continue;
}
}
// Check for (def)+ pattern
if (i < len && str[i] == 'd') {
bool def_matched = true;
while (i + 2 < len && str[i] == 'd' && str[i+1] == 'e' && str[i+2] == 'f') {
i += 3;
}
if (i >= len || (str[i] != 'd' && str[i] != 'a')) {
continue;
}
}
// Check for a*d+e pattern
if (i < len && str[i] == 'a') {
int a_count = 0;
while (i < len && str[i] == 'a') {
a_count++;
i++;
}
if (i < len && str[i] == 'd') {
i++;
int d_count = 0;
while (i < len && str[i] == 'd') {
d_count++;
i++;
}
if (i < len && str[i] == 'e' && d_count >= 1) {
i++;
continue;
}
}
}
// If none of the patterns matched at current position
return false;
}
return true;
}
int main() {
char input[100];
printf("Enter a string to test against (ab*c + (def)+ + a*d+e)+: ");
scanf("%s", input);
if (matches_b(input)) {
printf("The string matches the pattern.\n");
} else {
printf("The string does not match the pattern.\n");
}
return 0;
}
c) ((a + b)*(c + d)*)+ + ab*c*d
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
bool matches_c(const char *str) {
int len = strlen(str);
// Check for ((a + b)(c + d))* case
bool case1 = true;
for (int i = 0; i < len; i += 2) {
if (i + 1 >= len) {
case1 = false;
break;
}
if (!(str[i] == 'a' || str[i] == 'b')) {
case1 = false;
break;
}
if (!(str[i+1] == 'c' || str[i+1] == 'd')) {
case1 = false;
break;
}
}
if (case1) return true;
// Check for ab*c*d case
bool case2 = true;
int i = 0;
// Check for 'a'
if (i >= len || str[i] != 'a') {
case2 = false;
} else {
i++;
}
// Check for b*
while (i < len && str[i] == 'b') {
i++;
}
// Check for c*
while (i < len && str[i] == 'c') {
i++;
}
// Check for d
if (i != len - 1 || (i < len && str[i] != 'd')) {
case2 = false;
}
return case2;
}
int main() {
char input[100];
printf("Enter a string to test against ((a + b)(c + d))* + ab*c*d: ");
scanf("%s", input);
if (matches_c(input)) {
printf("The string matches the pattern.\n");
} else {
printf("The string does not match the pattern.\n");
}
return 0;
}
2. Design a Lexical analyzer for identifying different types of token used
in C language.
#include <stdio.h>
#include <ctype.h>
#include <string.h>
// Token types
enum {KEYWORD, IDENTIFIER, NUMBER, OPERATOR, PUNCTUATOR};
// C keywords
const char *kws[] = {"int","float","if","else","while","return"};
int isKeyword(char *s) {
for(int i=0; i<6; i++)
if(!strcmp(s,kws[i])) return 1;
return 0;
}
void analyze(char *src) {
char token[32];
int i=0, j, len=strlen(src);
while(i<len) {
// Skip whitespace
while(i<len && isspace(src[i])) i++;
if(i>=len) break;
// Check for numbers
if(isdigit(src[i])) {
j=0;
while(i<len && (isdigit(src[i]) || src[i]=='.'))
token[j++] = src[i++];
token[j]='\0';
printf("%s (NUMBER)\n",token);
continue;
}
// Check for identifiers/keywords
if(isalpha(src[i])) {
j=0;
while(i<len && isalnum(src[i]))
token[j++] = src[i++];
token[j]='\0';
printf("%s (%s)\n",token,isKeyword(token)?"KEYWORD":"IDENTIFIER");
continue;
}
// Check for operators
if(strchr("+-*/%=&|<>!",src[i])) {
printf("%c (OPERATOR)\n",src[i++]);
continue;
}
// Check for punctuators
if(strchr(";,(){}[]",src[i])) {
printf("%c (PUNCTUATOR)\n",src[i++]);
continue;
}
i++; // Skip unknown chars
}
}
int main() {
char code[] = "int main() { int x = 5+3; return 0; }";
analyze(code);
return 0;
}
3. Write a program which accepts a regular expression from the user
and generates a regular grammar which is equivalent to the R.E.
entered by user. The grammar will be printed to a text file, with only
one production rule in eac h line. Also, make sure that all production
rules are displayed in compact forms e.g. the production rules: S-->
aB, S--> cd S--> PQ Should be written as S--> aB | cd | PQ And not as
three different production rules. Also, there should not be any
repetition of product
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_PROD 100
#define MAX_RHS 50
typedef struct {
char lhs;
char rhs[MAX_RHS][20];
int rhs_count;
} Production;
Production productions[MAX_PROD];
int prod_count = 0;
char non_terminals[26] = {0};
char start_symbol = 'S';
void add_prod(char lhs, const char *rhs) {
for (int i = 0; i < prod_count; i++) {
if (productions[i].lhs == lhs) {
for (int j = 0; j < productions[i].rhs_count; j++)
if (strcmp(productions[i].rhs[j], rhs) == 0) return;
strcpy(productions[i].rhs[productions[i].rhs_count++], rhs);
return;
}
}
productions[prod_count].lhs = lhs;
strcpy(productions[prod_count].rhs[0], rhs);
productions[prod_count++].rhs_count = 1;
}
char new_nt() {
for (char c = 'A'; c <= 'Z'; c++)
if (!non_terminals[c - 'A'] && c != start_symbol) {
non_terminals[c - 'A'] = 1;
return c;
}
return '\0';
}
void re_to_grammar(const char *re, int s, int e, char lhs) {
if (s > e) return;
int i = s;
while (i <= e) {
if (re[i] == '(') {
int j = i + 1, p = 1;
while (j <= e && p > 0) {
if (re[j] == '(') p++;
if (re[j] == ')') p--;
j++;
}
j--;
if (j < e && re[j + 1] == '*') {
char nt = new_nt();
add_prod(lhs, "ε");
add_prod(lhs, nt);
re_to_grammar(re, i + 1, j - 1, nt);
add_prod(nt, nt);
i = j + 2;
} else if (j < e && re[j + 1] == '+') {
char nt = new_nt();
re_to_grammar(re, i + 1, j - 1, nt);
add_prod(nt, nt);
add_prod(lhs, nt);
i = j + 2;
} else {
re_to_grammar(re, i + 1, j - 1, lhs);
i = j + 1;
}
} else if (re[i] == '|') {
re_to_grammar(re, i + 1, e, lhs);
break;
} else if (strchr("*+", re[i])) {
i++;
} else {
if (i + 1 <= e && re[i + 1] == '*') {
char nt = new_nt();
add_prod(lhs, "ε");
add_prod(lhs, nt);
char r[2] = {re[i], '\0'};
add_prod(nt, r);
add_prod(nt, nt);
i += 2;
} else if (i + 1 <= e && re[i + 1] == '+') {
char nt = new_nt();
char r[2] = {re[i], '\0'};
add_prod(nt, r);
add_prod(nt, nt);
add_prod(lhs, nt);
i += 2;
} else {
if (i + 1 <= e && !strchr("|)*+", re[i + 1])) {
char nt = new_nt();
char r[3] = {re[i], nt, '\0'};
add_prod(lhs, r);
lhs = nt;
} else {
char r[2] = {re[i], '\0'};
add_prod(lhs, r);
}
i++;
}
}
}
}
void write_grammar(const char *fn) {
FILE *f = fopen(fn, "w");
if (!f) { printf("Error opening file!\n"); return; }
for (int i = 0; i < prod_count; i++) {
fprintf(f, "%c --> ", productions[i].lhs);
for (int j = 0; j < productions[i].rhs_count; j++)
fprintf(f, "%s%s", j ? " | " : "", productions[i].rhs[j]);
fprintf(f, "\n");
}
fclose(f);
printf("Grammar written to %s\n", fn);
}
int main() {
char re[100];
printf("Enter a regular expression: ");
scanf("%s", re);
non_terminals[start_symbol - 'A'] = 1;
re_to_grammar(re, 0, strlen(re) - 1, start_symbol);
write_grammar("grammar.txt");
return 0;
}
4. Write a program to eliminate left recursion
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#define MAX_PROD 20
#define MAX_RHS 10
#define MAX_SYM 20
typedef struct {
char lhs;
char rhs[MAX_RHS][MAX_SYM];
int rhs_count;
} Production;
Production productions[MAX_PROD];
int prod_count = 0;
void add_production(char lhs, const char *rhs) {
for (int i = 0; i < prod_count; i++) {
if (productions[i].lhs == lhs) {
strcpy(productions[i].rhs[productions[i].rhs_count], rhs);
productions[i].rhs_count++;
return;
}
}
productions[prod_count].lhs = lhs;
strcpy(productions[prod_count].rhs[0], rhs);
productions[prod_count].rhs_count = 1;
prod_count++;
}
void print_grammar() {
printf("\nGrammar:\n");
for (int i = 0; i < prod_count; i++) {
printf("%c -> ", productions[i].lhs);
for (int j = 0; j < productions[i].rhs_count; j++) {
if (j > 0) printf(" | ");
printf("%s", productions[i].rhs[j]);
}
printf("\n");
}
}
int is_non_terminal(char c) {
return isupper(c);
}
void eliminate_left_recursion() {
for (int i = 0; i < prod_count; i++) {
char A = productions[i].lhs;
// Check for immediate left recursion
int has_left_recursion = 0;
for (int j = 0; j < productions[i].rhs_count; j++) {
if (productions[i].rhs[j][0] == A) {
has_left_recursion = 1;
break;
}
}
if (!has_left_recursion) continue;
// Create new non-terminal
char A_prime = A + 1; // Simple way to get a new non-terminal
// Separate productions into left recursive and others
char alpha[MAX_RHS][MAX_SYM], beta[MAX_RHS][MAX_SYM];
int alpha_count = 0, beta_count = 0;
for (int j = 0; j < productions[i].rhs_count; j++) {
if (productions[i].rhs[j][0] == A) {
strcpy(alpha[alpha_count], productions[i].rhs[j] + 1);
alpha_count++;
} else {
strcpy(beta[beta_count], productions[i].rhs[j]);
beta_count++;
}
}
// Remove the original production
productions[i].rhs_count = 0;
// Add new productions for A
for (int j = 0; j < beta_count; j++) {
char new_rhs[MAX_SYM];
strcpy(new_rhs, beta[j]);
strcat(new_rhs, (char[]){A_prime, '\0'});
add_production(A, new_rhs);
}
// Add new productions for A'
for (int j = 0; j < alpha_count; j++) {
char new_rhs[MAX_SYM];
strcpy(new_rhs, alpha[j]);
strcat(new_rhs, (char[]){A_prime, '\0'});
add_production(A_prime, new_rhs);
}
add_production(A_prime, "ε"); // Epsilon production
}
}
void input_grammar() {
printf("Enter productions (one per line, empty line to stop):\n");
printf("Format: A->aB|b\n");
char line[100];
while (1) {
fgets(line, sizeof(line), stdin);
if (line[0] == '\n') break;
// Parse the production
char lhs = line[0];
char *rhs_start = strchr(line, '>') + 1;
// Split multiple RHS alternatives
char *token = strtok(rhs_start, "|\n");
while (token != NULL) {
add_production(lhs, token);
token = strtok(NULL, "|\n");
}
}
}
int main() {
input_grammar();
print_grammar();
eliminate_left_recursion();
printf("\nGrammar after eliminating left recursion:\n");
print_grammar();
return 0;
}
5. Write a program for Recursive Descent Calculator.
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
char *input;
int position = 0;
void error() {
fprintf(stderr, "Syntax error at position %d\n", position);
exit(1);
}
char peek() {
return input[position];
}
char consume() {
return input[position++];
}
int is_at_end() {
return input[position] == '\0';
}
void skip_whitespace() {
while (isspace(peek())) consume();
}
int number() {
int result = 0;
while (isdigit(peek())) {
result = result * 10 + (consume() - '0');
}
return result;
}
int expression();
int factor() {
skip_whitespace();
if (peek() == '(') {
consume(); // '('
int result = expression();
skip_whitespace();
if (peek() != ')') error();
consume(); // ')'
return result;
} else if (isdigit(peek())) {
return number();
} else {
error();
return 0;
}
}
int term() {
int result = factor();
skip_whitespace();
while (peek() == '*' || peek() == '/') {
char op = consume();
int next = factor();
if (op == '*') {
result *= next;
} else {
if (next == 0) {
fprintf(stderr, "Division by zero\n");
exit(1);
}
result /= next;
}
skip_whitespace();
}
return result;
}
int expression() {
int result = term();
skip_whitespace();
while (peek() == '+' || peek() == '-') {
char op = consume();
int next = term();
if (op == '+') {
result += next;
} else {
result -= next;
}
skip_whitespace();
}
return result;
}
int main() {
char buffer[256];
printf("Enter an arithmetic expression: ");
fgets(buffer, sizeof(buffer), stdin);
input = buffer;
int result = expression();
if (!is_at_end()) {
error();
}
printf("Result: %d\n", result);
return 0;
}
6. Write that recognizes different a program types of English words
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdbool.h>
#define MAX_WORD_LENGTH 50
// Structure to hold word categories
typedef struct {
char word[MAX_WORD_LENGTH];
char category[20];
} WordEntry;
// Sample dictionary of words with their categories
WordEntry dictionary[] = {
{"run", "verb"}, {"jump", "verb"}, {"write", "verb"}, {"read", "verb"},
{"dog", "noun"}, {"cat", "noun"}, {"book", "noun"}, {"computer", "noun"},
{"happy", "adjective"}, {"sad", "adjective"}, {"quick", "adjective"}, {"slow",
"adjective"},
{"quickly", "adverb"}, {"slowly", "adverb"}, {"happily", "adverb"}, {"sadly",
"adverb"},
{"the", "article"}, {"a", "article"}, {"an", "article"},
{"in", "preposition"}, {"on", "preposition"}, {"at", "preposition"}, {"with",
"preposition"}
};
int dictionary_size = sizeof(dictionary) / sizeof(dictionary[0]);
// Function to get the category of a word
const char* get_word_category(const char *word) {
for (int i = 0; i < dictionary_size; i++) {
if (strcmp(dictionary[i].word, word) == 0) {
return dictionary[i].category;
}
}
// If not found, try to guess based on common suffixes
int len = strlen(word);
// Check for common noun suffixes
if (len >= 3) {
if (strcmp(word + len - 3, "ion") == 0 || strcmp(word + len - 3, "ity") == 0 ||
strcmp(word + len - 3, "ment") == 0 || strcmp(word + len - 3, "ness") == 0) {
return "noun";
}
}
// Check for common verb suffixes
if (len >= 2) {
if (strcmp(word + len - 2, "ed") == 0 || strcmp(word + len - 3, "ing") == 0) {
return "verb";
}
}
// Check for common adjective suffixes
if (len >= 3) {
if (strcmp(word + len - 3, "ful") == 0 || strcmp(word + len - 4, "able") == 0 ||
strcmp(word + len - 4, "ible") == 0 || strcmp(word + len - 2, "ic") == 0) {
return "adjective";
}
}
// Check for common adverb suffix
if (len >= 4 && strcmp(word + len - 2, "ly") == 0) {
return "adverb";
}
return "unknown";
}
// Function to check if a character is a word character
bool is_word_char(char c) {
return isalpha(c) || c == '\'';
}
// Function to extract words from a sentence
void classify_words_in_sentence(const char *sentence) {
char word[MAX_WORD_LENGTH];
int word_pos = 0;
int sentence_pos = 0;
printf("Word Classification:\n");
printf("-------------------\n");
while (sentence[sentence_pos] != '\0') {
// Skip non-word characters
while (sentence[sentence_pos] != '\0' && !
is_word_char(sentence[sentence_pos])) {
sentence_pos++;
}
// Extract word
word_pos = 0;
while (sentence[sentence_pos] != '\0' &&
is_word_char(sentence[sentence_pos])) {
word[word_pos++] = tolower(sentence[sentence_pos++]);
}
word[word_pos] = '\0';
// Classify and print if we found a word
if (word_pos > 0) {
printf("%-15s : %s\n", word, get_word_category(word));
}
}
}
int main() {
char sentence[256];
printf("Enter an English sentence: ");
fgets(sentence, sizeof(sentence), stdin);
// Remove newline character if present
size_t len = strlen(sentence);
if (len > 0 && sentence[len-1] == '\n') {
sentence[len-1] = '\0';
}
classify_words_in_sentence(sentence);
return 0;
}
7. Consider the following grammar: S --> ABC A--> abA | ab B--> b |
BC C--> c | cC Following any suitable parsing technique (prefer top-
down), design a parser which accepts a string and tells whether the
string is accepted by above grammar or not.
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
const char *input;
int position = 0;
// Function prototypes
bool parse_S();
bool parse_A();
bool parse_B();
bool parse_C();
// Helper functions
char peek() {
return input[position];
}
void consume() {
position++;
}
bool match(char expected) {
if (peek() == expected) {
consume();
return true;
}
return false;
}
// Grammar rule implementations
bool parse_S() {
return parse_A() && parse_B() && parse_C();
}
bool parse_A() {
// A → abA | ab
if (match('a') && match('b')) {
if (peek() == 'a') {
return parse_A(); // abA case
}
return true; // ab case
}
return false;
}
bool parse_B() {
// B → b | BC
if (match('b')) {
if (peek() == 'c' || peek() == 'C') {
return parse_B() && parse_C(); // BC case
}
return true; // b case
}
return false;
}
bool parse_C() {
// C → c | cC
if (match('c')) {
if (peek() == 'c') {
return parse_C(); // cC case
}
return true; // c case
}
return false;
}
int main() {
char buffer[100];
printf("Enter a string to parse: ");
scanf("%s", buffer);
input = buffer;
if (parse_S() && position == strlen(input)) {
printf("String is accepted by the grammar.\n");
} else {
printf("String is NOT accepted by the grammar.\n");
}
return 0;
}
8. Write a program which accepts a regular grammar with no left-
recursion, and no null-production rules, and then it accepts a string
and reports whether the string is accepted by the grammar or not.
#include <stdio.h>
#include <string.h>
#include <stdbool.h>
#include <ctype.h>
#define MAX_PROD 20
#define MAX_RHS 10
#define MAX_SYM 20
typedef struct {
char lhs;
char rhs[MAX_RHS][MAX_SYM];
int rhs_count;
} Production;
Production productions[MAX_PROD];
int prod_count = 0;
char start_symbol;
void add_production(char lhs, const char *rhs) {
for (int i = 0; i < prod_count; i++) {
if (productions[i].lhs == lhs) {
strcpy(productions[i].rhs[productions[i].rhs_count], rhs);
productions[i].rhs_count++;
return;
}
}
productions[prod_count].lhs = lhs;
strcpy(productions[prod_count].rhs[0], rhs);
productions[prod_count].rhs_count = 1;
prod_count++;
}
void input_grammar() {
printf("Enter productions (one per line, empty line to stop):\n");
printf("Format: A->aB|a\n");
char line[100];
while (1) {
fgets(line, sizeof(line), stdin);
if (line[0] == '\n') break;
// Parse the production
char lhs = line[0];
if (prod_count == 0) start_symbol = lhs;
char *rhs_start = strchr(line, '>') + 1;
// Split multiple RHS alternatives
char *token = strtok(rhs_start, "|\n");
while (token != NULL) {
add_production(lhs, token);
token = strtok(NULL, "|\n");
}
}
}
bool recognize_string(const char *str, char current_symbol, int str_pos) {
// Base case: end of string
if (str[str_pos] == '\0') {
// Check if current symbol can produce empty string (but grammar has no null
productions)
return false;
}
// Find productions for current symbol
for (int i = 0; i < prod_count; i++) {
if (productions[i].lhs == current_symbol) {
// Check each production alternative
for (int j = 0; j < productions[i].rhs_count; j++) {
const char *rhs = productions[i].rhs[j];
// Check if first symbol matches current input
if (rhs[0] == str[str_pos]) {
// Case 1: terminal followed by non-terminal (aB)
if (strlen(rhs) > 1 && isupper(rhs[1])) {
if (recognize_string(str, rhs[1], str_pos + 1)) {
return true;
}
}
// Case 2: single terminal (a) at end of string
else if (strlen(rhs) == 1 && str[str_pos + 1] == '\0') {
return true;
}
}
}
}
}
return false;
}
int main() {
input_grammar();
char str[100];
printf("Enter a string to check: ");
scanf("%s", str);
if (recognize_string(str, start_symbol, 0)) {
printf("String is accepted by the grammar.\n");
} else {
printf("String is NOT accepted by the grammar.\n");
}
return 0;
}
9. Design a parser which accepts a mathematical expression (containing
integers only). If the expression is valid, then evaluate the expression
else report that the expression is invalid. [Note: Design first the
Grammar and then implement using Shift-Reduce parsing technique.
Your program should generate an output file clearly showing each
step of parsing/evaluation of the intermediate sub-expressions.]
#include <stdio.h>
#include <stdlib.h>
#include <ctype.h>
#include <string.h>
#define MAX_STACK 100
typedef struct {
int type; // 0: number, 1: operator, 2: parenthesis
int value;
char op;
} Token;
typedef struct {
Token tokens[MAX_STACK];
int top;
} Stack;
void init_stack(Stack *s) {
s->top = -1;
}
void push(Stack *s, Token t) {
if (s->top >= MAX_STACK - 1) {
fprintf(stderr, "Stack overflow\n");
exit(1);
}
s->tokens[++s->top] = t;
}
Token pop(Stack *s) {
if (s->top < 0) {
fprintf(stderr, "Stack underflow\n");
exit(1);
}
return s->tokens[s->top--];
}
Token peek(Stack *s) {
return s->tokens[s->top];
}
int is_empty(Stack *s) {
return s->top == -1;
}
void print_stack(Stack *s, FILE *out) {
for (int i = 0; i <= s->top; i++) {
if (s->tokens[i].type == 0) {
fprintf(out, "%d ", s->tokens[i].value);
} else {
fprintf(out, "%c ", s->tokens[i].op);
}
}
fprintf(out, "\n");
}
int precedence(char op) {
switch(op) {
case '+':
case '-': return 1;
case '*':
case '/': return 2;
default: return 0;
}
}
int apply_op(int a, int b, char op) {
switch(op) {
case '+': return a + b;
case '-': return a - b;
case '*': return a * b;
case '/':
if (b == 0) {
fprintf(stderr, "Division by zero\n");
exit(1);
}
return a / b;
default: return 0;
}
}
void evaluate_expression(const char *expr, FILE *out) {
Stack values, ops;
init_stack(&values);
init_stack(&ops);
fprintf(out, "Parsing steps:\n");
fprintf(out, "--------------\n");
for (int i = 0; expr[i]; i++) {
if (expr[i] == ' ') continue;
if (expr[i] == '(') {
Token t = {2, 0, '('};
push(&ops, t);
fprintf(out, "Shift '(': ");
print_stack(&values, out);
}
else if (isdigit(expr[i])) {
int val = 0;
while (expr[i] && isdigit(expr[i])) {
val = val * 10 + (expr[i] - '0');
i++;
}
i--;
Token t = {0, val, 0};
push(&values, t);
fprintf(out, "Shift %d: ", val);
print_stack(&values, out);
}
else if (expr[i] == ')') {
fprintf(out, "Shift ')': ");
print_stack(&values, out);
while (!is_empty(&ops) && peek(&ops).op != '(') {
Token op = pop(&ops);
Token b = pop(&values);
Token a = pop(&values);
int res = apply_op(a.value, b.value, op.op);
Token result = {0, res, 0};
push(&values, result);
fprintf(out, "Reduce %d %c %d = %d: ", a.value, op.op, b.value, res);
print_stack(&values, out);
}
if (!is_empty(&ops) pop(&ops); // Pop '('
}
else {
while (!is_empty(&ops) && precedence(peek(&ops).op) >=
precedence(expr[i])) {
Token op = pop(&ops);
Token b = pop(&values);
Token a = pop(&values);
int res = apply_op(a.value, b.value, op.op);
Token result = {0, res, 0};
push(&values, result);
fprintf(out, "Reduce %d %c %d = %d: ", a.value, op.op, b.value, res);
print_stack(&values, out);
}
Token t = {1, 0, expr[i]};
push(&ops, t);
fprintf(out, "Shift '%c': ", expr[i]);
print_stack(&values, out);
}
}
while (!is_empty(&ops)) {
Token op = pop(&ops);
Token b = pop(&values);
Token a = pop(&values);
int res = apply_op(a.value, b.value, op.op);
Token result = {0, res, 0};
push(&values, result);
fprintf(out, "Reduce %d %c %d = %d: ", a.value, op.op, b.value, res);
print_stack(&values, out);
}
if (values.top == 0) {
fprintf(out, "\nFinal result: %d\n", values.tokens[0].value);
} else {
fprintf(out, "\nInvalid expression\n");
}
}
int main() {
char expr[100];
printf("Enter a mathematical expression (integers only): ");
fgets(expr, sizeof(expr), stdin);
expr[strcspn(expr, "\n")] = 0; // Remove newline
FILE *out = fopen("parsing_steps.txt", "w");
if (!out) {
perror("Failed to open output file");
return 1;
}
evaluate_expression(expr, out);
fclose(out);
printf("Parsing complete. Results written to parsing_steps.txt\n");
return 0;
}
10. Open Ended program: Designing of various type parser
#include <stdio.h>
#include <ctype.h>
#include <stdbool.h>
// Recursive Descent Parser
bool rd_expr(const char *s, int *p);
bool rd_term(const char *s, int *p);
bool rd_factor(const char *s, int *p);
// Operator Precedence Parser
int op_expr(const char *s);
// Helper functions
int apply_op(int a, int b, char op);
int precedence(char op);
int main() {
char expr[100];
printf("Enter expression: ");
fgets(expr, 100, stdin);
// Recursive Descent check
int pos = 0;
bool valid = rd_expr(expr, &pos) && expr[pos] == '\n';
printf("Recursive Descent: %s\n", valid ? "Valid" : "Invalid");
// Operator Precedence evaluation
if(valid) {
int result = op_expr(expr);
printf("Operator Precedence Result: %d\n", result);
}
return 0;
}
// Recursive Descent Implementation
bool rd_expr(const char *s, int *p) {
if(!rd_term(s, p)) return false;
while(s[*p] == '+' || s[*p] == '-') {
(*p)++;
if(!rd_term(s, p)) return false;
}
return true;
}
bool rd_term(const char *s, int *p) {
if(!rd_factor(s, p)) return false;
while(s[*p] == '*' || s[*p] == '/') {
(*p)++;
if(!rd_factor(s, p)) return false;
}
return true;
}
bool rd_factor(const char *s, int *p) {
if(s[*p] == '(') {
(*p)++;
if(!rd_expr(s, p)) return false;
if(s[*p] != ')') return false;
(*p)++;
return true;
}
if(isdigit(s[*p])) {
while(isdigit(s[*p])) (*p)++;
return true;
}
return false;
}
// Operator Precedence Implementation
int op_expr(const char *s) {
int val = 0, stack[100], top = -1;
char ops[100], *p = (char*)s;
while(*p) {
if(isdigit(*p)) {
val = 0;
while(isdigit(*p))
val = val*10 + (*p++ - '0');
stack[++top] = val;
}
else if(*p == '(') {
ops[++top] = *p++;
}
else if(*p == ')') {
while(top >= 0 && ops[top] != '(') {
int b = stack[top--];
int a = stack[top--];
stack[++top] = apply_op(a, b, ops[top+1]);
}
top--; // Remove '('
p++;
}
else if(*p == '+' || *p == '-' || *p == '*' || *p == '/') {
while(top >= 0 && precedence(ops[top]) >= precedence(*p)) {
int b = stack[top--];
int a = stack[top--];
stack[++top] = apply_op(a, b, ops[top+1]);
}
ops[++top] = *p++;
}
else p++;
}
while(top >= 0) {
int b = stack[top--];
int a = stack[top--];
stack[++top] = apply_op(a, b, ops[top+1]);
}
return stack[0];
}
int apply_op(int a, int b, char op) {
switch(op) {
case '+': return a + b;
case '-': return a - b;
case '*': return a * b;
case '/': return a / b;
default: return 0;
}
}
int precedence(char op) {
if(op == '+' || op == '-') return 1;
if(op == '*' || op == '/') return 2;
return 0;
}