CSE2012 DESIGN AND ANALYSIS OF
ALGORITHMS
LAB DIGITAL ASSIGNMENT 2
Name: Akshat Swaminath
Regd. No: 20BCE2231
Slot: L47+48
Submitted to: Mr. Sivanesan S Sir
Aim: To write an executable program for Naïve String-matching with proper pseudocode
code and flow chart.
Problem Analysis:
Naïve String-matching Algorithms is basically brute force for string matching algorithm. It is
very easy to understand as depicted below. This string matching is one of least efficient. Let
us consider a string of length of n. The main objective of this searching algorithm is to find
the position of the given pattern length of which is less than n in minimum number
searches/iterations
Example of Naïve String-matching Algorithms
Flowchart:
Pseudocode:
void search(char* pat, char* txt)
{
int M = strlen(pat);
int N = strlen(txt);
/* A loop to slide pat[] one by one */
for (int i = 0; i <= N - M; i++) {
int j;
/* For current index i, check for pattern match */
for (j = 0; j < M; j++)
if (txt[i + j] != pat[j])
break;
if (j == M) // if pat[0...M-1] = txt[i, i+1, ...i+M-1]
printf("Pattern found at index %d \n", i);
}
Actual code:
#include <bits/stdc++.h>
using namespace std;
void search(char* pat, char* txt)
{
int M = strlen(pat);
int N = strlen(txt);
/* A loop to slide pat[] one by one */
for (int i = 0; i <= N - M; i++) {
int j;
/* For current index i, check for pattern match */
for (j = 0; j < M; j++)
if (txt[i + j] != pat[j])
break;
if (j == M) // if pat[0...M-1] = txt[i, i+1, ...i+M-1]
cout << "Pattern found at index " << i << endl;
}
}
// Driver Code
int main()
{
char txt[100];
char pat[100];
cin>>txt;
cin>>pat;
search(pat, txt);
return 0;
}
Output screenshot:
Aim: Using KMP algorithm for pattern searching
Analysis:
The KMP matching algorithm uses property of pattern having same sub-patterns appearing
more than once in the pattern of the pattern. The basic idea behind KMP’s algorithm is:
whenever we detect a mismatch (after some matches), we already know some of the
characters in the text of the next window. We take advantage of this information to avoid
matching the characters that we know will anyway match.
Flowchart :
Pseudocode:
n ← length [Text]
m ← length [Pattern]
LPS← COMPUTE-PREFIX-FUNCTION (P)
q←0 // numbers of characters matched
for i ← 1 to n // scan S from left to right
do while q > 0 and Pattern[q + 1] ≠ T [i]
do q ← LPS[q] // next character does not match
If Pattern[q + 1] = Text[i]
then q ← q + 1 // next character matches
If q = m // is all of p matched?
then print "Pattern occurs with shift" i - m
q ← LPS[q] // look for the next match
Code
#include <bits/stdc++.h>
void computeLPSArray(char* pat, int M, int* lps);
// Prints occurrences of txt[] in pat[]
void KMPSearch(char* pat, char* txt)
{
int M = strlen(pat);
int N = strlen(txt);
// create lps[] that will hold the longest prefix suffix
// values for pattern
int lps[M];
// Preprocess the pattern (calculate lps[] array)
computeLPSArray(pat, M, lps);
int i = 0; // index for txt[]
int j = 0; // index for pat[]
while (i < N) {
if (pat[j] == txt[i]) {
j++;
i++;
}
if (j == M) {
printf("Found pattern at index %d ", i - j);
j = lps[j - 1];
}
// mismatch after j matches
else if (i < N && pat[j] != txt[i]) {
// Do not match lps[0..lps[j-1]] characters,
// they will match anyway
if (j != 0)
j = lps[j - 1];
else
i = i + 1;
}
}
}
// Fills lps[] for given patttern pat[0..M-1]
void computeLPSArray(char* pat, int M, int* lps)
{
// length of the previous longest prefix suffix
int len = 0;
lps[0] = 0; // lps[0] is always 0
// the loop calculates lps[i] for i = 1 to M-1
int i = 1;
while (i < M) {
if (pat[i] == pat[len]) {
len++;
lps[i] = len;
i++;
}
else (pat[i] != pat[len])
{
if (len != 0) {
len = lps[len - 1];
}
Else if (len == 0)
{
lps[i] = 0;
i++;
}
}
}
}
int main()
{
char txt[100];
char pat[100];
cin>>txt;
cin>>pat;
KMPSearch(pat, txt);
return 0;
}
Output screenshots
AIM:
Using Rabin-Karp algorithm for pattern searching
Problem analysis:
Rabin-Karp algorithm slides the pattern one by one. Rabin Karp algorithm matches the hash
value of the pattern with the hash value of current substring of text, and if the hash values
match then only it starts matching individual characters.
Flowchart:
Pseudocode :
Begin
patLen := pattern Length
strLen := string Length
patHash := 0 and strHash := 0, h := 1
maxChar := total number of characters in character set
for index i of all character in pattern, do
h := (h*maxChar) mod prime
done
for all character index i of pattern, do
patHash := (maxChar*patHash + pattern[i]) mod prime
strHash := (maxChar*strHash + text[i]) mod prime
done
for i := 0 to (strLen - patLen), do
if patHash = strHash, then
for charIndex := 0 to patLen -1, do
if text[i+charIndex] ≠ pattern[charIndex], then
break the loop
done
if charIndex = patLen, then
print the location i as pattern found at i position.
if i < (strLen - patLen), then
strHash := (maxChar*(strHash – text[i]*h)+text[i+patLen]) mod prime, then
if strHash < 0, then
strHash := strHash + prime
done
End
CODE :
#include <bits/stdc++.h>
using namespace std;
#define d 256
void search(char pat[], char txt[], int q)
{
int M = strlen(pat);
int N = strlen(txt);
int i, j;
int p = 0; // hash value for pattern
int t = 0; // hash value for txt
int h = 1;
// The value of h would be "pow(d, M-1)%q"
for (i = 0; i < M - 1; i++)
h = (h * d) % q;
for (i = 0; i < M; i++)
{
p = (d * p + pat[i]) % q;
t = (d * t + txt[i]) % q;
}
for (i = 0; i <= N - M; i++)
{
if ( p == t )
{
bool flag = true;
for (j = 0; j < M; j++)
{
if (txt[i+j] != pat[j])
{
flag = false;
break;
}
if(flag)
cout<<i<<" ";
}
if (j == M)
cout<<"Pattern found at index "<< i<<endl;
}
if ( i < N-M )
{
t = (d*(t - txt[i]*h) + txt[i+M])%q;
if (t < 0)
t = (t + q);
}
}
}
/* Driver code */
int main()
{
char txt[100];
char pat[100];
cin>>txt;
cin>>pat;
int q;
cin>>q;
search(pat, txt, q);
return 0;
}
Output:
Result analysis:
Complexity analysis table
Name of Name of sample Time Space Ranking based
techniques outcome complexity complexity on
(in Big O complexities*
notation )
Naive String Naive String O(m*n) O(n) 1
matching matching
Algorithm Algorithm
(Brute force)
KMP algorithm KMP algorithm O(n) O(n) 2
*one being the least complex
Optimised code
#include <bits/stdc++.h>
using namespace std;
void search(string pat, string txt)
{
int M = pat.size();
int N = txt.size();
int i = 0;
while (i <= N - M)
{
int j;
for (j = 0; j < M; j++)
if (txt[i + j] != pat[j])
break;
if (j == M) // if pat[0...M-1] = txt[i, i+1, ...i+M-1]
{
cout << "Pattern found at index " << i << endl;
i = i + M;
}
else if (j == 0)
i = i + 1;
else
i = i + j; // slide the pattern by j
}
}
int main()
{
char txt[100];
char pat[100];
cin>>txt;
cin>>pat;
search(pat, txt);
return 0;
}
Output screenshot
Conclusion:
We have written more optimised code for Naive String-matching Algorithm for a specific
case i.e., when all the characters of the pattern are different.
We have analysed and studied the KMP algorithm and Rabin-Karp algorithm.