Algorithm Implementation/String searching/Knuth-Morris-Pratt pattern matcher
< Algorithm Implementation < String searchingPascal
// COPY, COMPILE, AND RUN ON DEV-PASCAL // DL LINK: http://download.cnet.com/Bloodshed-Dev-Pascal/3000-2069_4-10019919.html // PROGRAM BY : AMMAR QADRI // FROM : WOBURN COLLEGIATE INSTITUTE
{$H+}
uses crt,Dos;
var
h1,h2,m1,m2,se1,se2,ms1,ms2:word;
b,comparisons:longint;
t,matchfound:array[0..1000000] of longint;
s,f:string;
procedure START;
begin
GETTIME(h1,m1,se1,ms1);
end;
procedure FINISH;
begin
GETTIME(h2,m2,se2,ms2);
writeln;
writeln('**************************************************************');
writeln('**************************************************************');
writeln('************************ COMPARISONS : ',comparisons:4,' ******************');
writeln('**************************************************************');
writeln('************************ TEXT LENGTH : ',length(f):4,' ******************');
writeln('**************************************************************');
writeln('***************************** RUNTIME ************************');
writeln('*************************** ',((se2-se1)+(m2-m1)*60+(h2-h1)*60*60+(ms2-ms1)/100):0:3,' SECONDS ********************');
writeln('**************************************************************');
writeln('************** IMPLEMENTATION # 001 - KMP AGLORITHM **********');
writeln('**************************************************************');
readln;
end;
procedure maketable;
var
x,y:longint;
begin
t[0]:=0; {empty string}
t[1]:=0; {single letter}
x:=1; {match of characters found plus 1}
y:=2; {position in the pattern string starts off as 2}
{since t[0] and t[1] are already set to 0 as default}
while y <= length(s) do begin
if s[x] = s[y] then begin {found another match}
t[y]:=x;
inc(x);
inc(y);
end
else begin
if x <> 0 then begin {did not find match so trying for a smaller one}
x:=t[x];
end;
if x = 0 then begin {smallest match found (empty string)}
t[y]:=x;
inc(x);
inc(y);
end;
end;
end;
end;
procedure printtable;
var
aaa:longint;
begin
writeln('TABLE : ');
for aaa:= 0 to length(s) do begin
writeln(aaa:4,' : ',t[aaa]:4);
end;
writeln;
end;
procedure findall;
var
x,y:longint;
begin
x:=0; {match so far (empty string)}
y:=1; {position in text (looking for a place to begin the search)}
while y+length(s)-1 <= length(f) do begin
if s[x+1] = f[y+x] then begin
while s[x+1] = f[y+x] do begin
{write(upcase(s[x+1])); }
inc(comparisons);
inc(x);
if x = length(s) then begin
inc(b);
matchfound[b]:=y;
break;
end;
end;
{ writeln;
if x <> length(s) then
writeln(y+x,' ',s[x+1]); }
y:=y+x-t[x];
x:=t[x];
end
}
inc(comparisons);
x:=0;
inc(y);
end;
end;
end;
procedure printall;
var
aaa:longint;
begin
writeln('MATCHES : ');
for aaa:= 1 to b do begin
writeln(aaa:4,' : ',matchfound[aaa]:4);
end;
if b = 0 then writeln('NONE =P');
end;
begin
//read in the two strings...the text and the pattern
write('TEXT : ');
readln(f); {text}
write('PATTERN: ');
readln(s); {pattern}
writeln;
START;{note start time of program}
maketable;{makes the table}
printtable;{prints the table}
b:=0; {initialize matchfound array}
comparisons:=0;
findall;{finds the matches}
printall;{prints the matches}
FINISH;{note finish time of program and print}
end.
Python
# Knuth-Morris-Pratt string matching
# David Eppstein, UC Irvine, 1 Mar 2002
#from http://code.activestate.com/recipes/117214/
def KnuthMorrisPratt(text, pattern):
'''Yields all starting positions of copies of the pattern in the text.
Calling conventions are similar to string.find, but its arguments can be
lists or iterators, not just strings, it returns all matches, not just
the first one, and it does not need the whole text in memory at once.
Whenever it yields, it will have read the text exactly up to and including
the match that caused the yield.'''
# allow indexing into pattern and protect against change during yield
pattern = list(pattern)
# build table of shift amounts
shifts = [1] * (len(pattern) + 1)
shift = 1
for pos in range(len(pattern)):
while shift <= pos and pattern[pos] != pattern[pos-shift]:
shift += shifts[pos-shift]
shifts[pos+1] = shift
# do the actual search
startPos = 0
matchLen = 0
for c in text:
while matchLen == len(pattern) or \
matchLen >= 0 and pattern[matchLen] != c:
startPos += shifts[matchLen]
matchLen -= shifts[matchLen]
matchLen += 1
if matchLen == len(pattern):
yield startPos
Ada
The following Ada implementation contains both the algorithms as well as a test program to test correctness of implementation.
Ada Programming/Algorithms/Knuth-Morris-Pratt pattern matcher
C++
The following C++ implementation contains only the algorithms without a test program to test correctness of implementation.
#include <iostream>
#include <vector>
using namespace std;
//----------------------------
//Returns a vector containing the zero based index of
// the start of each match of the string K in S.
// Matches may overlap
//----------------------------
vector<int> KMP(string S, string K)
{
vector<int> T(K.size() + 1, -1);
vector<int> matches;
if(K.size() == 0)
{
matches.push_back(0);
return matches;
}
for(int i = 1; i <= K.size(); i++)
{
int pos = T[i - 1];
while(pos != -1 && K[pos] != K[i - 1]) pos = T[pos];
T[i] = pos + 1;
}
int sp = 0;
int kp = 0;
while(sp < S.size())
{
while(kp != -1 && (kp == K.size() || K[kp] != S[sp])) kp = T[kp];
kp++;
sp++;
if(kp == K.size()) matches.push_back(sp - K.size());
}
return matches;
}
C and Java
C and Java implementations can be found in the history of the Wikipedia page on the same topic. The correctness and licensing status of these implementations are not clear at this time.
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
void computeLPSArray(char *pat, int M, int *lps);
void KMPSearch(char *pat, char *txt)
{
int M = strlen(pat);
int N = strlen(txt);
// create lps[] that will hold the longest prefix suffix values for pattern
int *lps = (int *)malloc(sizeof(int)*M);
int j = 0; // index for pat[]
// Preprocess the pattern (calculate lps[] array)
computeLPSArray(pat, M, lps);
int i = 0; // index for txt[]
while(i < N)
{
if(pat[j] == txt[i])
{
j++;
i++;
}
if (j == M)
{
printf("Found pattern at index %d \n", i-j);
j = lps[j-1];
}
// mismatch after j matches
else if(pat[j] != txt[i])
{
// Do not match lps[0..lps[j-1]] characters,
// they will match anyway
if(j != 0)
j = lps[j-1];
else
i = i+1;
}
}
free(lps); // to avoid memory leak
}
void computeLPSArray(char *pat, int M, int *lps)
{
int len = 0; // lenght of the previous longest prefix suffix
int i;
lps[0] = 0; // lps[0] is always 0
i = 1;
// the loop calculates lps[i] for i = 1 to M-1
while(i < M)
{
if(pat[i] == pat[len])
{
len++;
lps[i] = len;
i++;
}
else // (pat[i] != pat[len])
{
if( len != 0 )
{
// This is tricky. Consider the example AAACAAAA and i = 7.
len = lps[len-1];
// Also, note that we do not increment i here
}
else // if (len == 0)
{
lps[i] = 0;
i++;
}
}
}
}
// Driver program to test above function
int main()
{
char *txt = "apurba mandal loves ayoshi loves";
char *pat = "loves";
KMPSearch(pat, txt);
return 0;
}
Go
func computeLPS(pat string) (lps []int) {
M := len(pat)
lps = make([]int, M) // lps[0] = 0
l := 0 // length of the previous longest prefix suffix
// the loop calculates lps[i] for i = 1 to M-1
for i := 1; i < M; i++ {
for {
if pat[i] == pat[l] {
l++
break
}
if l == 0 {
break
}
l = lps[l-1]
}
lps[i] = l
}
return lps
}
func KMPSearch(pat, txt string) {
M, N := len(pat), len(txt)
// Preprocess the pattern that will hold the longest prefix suffix values for pattern
lps := computeLPS(pat)
for i, j := 0, 0; i < N; i++ {
for {
if pat[j] == txt[i] {
j++
if j == M {
fmt.Printf("Found pattern at index %d \n", i-j+1)
j = lps[j-1]
}
break
}
if j > 0 {
j = lps[j-1]
} else {
break
}
}
}
}
Lua
This implementation requires Lua version 5.1 or better.
-- Implementation of the Knuth Morris Pratt algorithm to find
-- substrings within strings.
-- Sean Brewer
-- Berry College CS 320
-- March 25, 2008
-- Generate the failure table using the substring and the length
-- of the substring
function generate_fail_table(substring,sublen)
comparisons = 0
fail = {0}
for i=2,sublen do
temp = fail[i - 1]
while temp > 0 and string.sub(substring,temp,temp) ~= string.sub(substring,i - 1,i - 1) do
comparisons = comparisons + 1
temp = fail[temp]
end
fail[i] = temp + 1
end
return {fail, comparisons + 1}
end
--The actual kmp algorithm which takes
--the substring and text as arguments
function kmp_algorithm(substring,text)
--starting positions...
subloc = 1
textloc = 1
--get the length of substring and text..
sublen = string.len(substring)
textlen = string.len(text)
--generate the fail links...
failure = generate_fail_table(substring,sublen)
--store failure comparisons
comparisons = failure[2]
--store fail table
fail = failure[1]
--the main loop..
while textloc <= textlen and subloc <= sublen do
if subloc == 0 or string.sub(text,textloc,textloc) == string.sub(substring,subloc,subloc) then
comparisons = comparisons + 1
textloc = textloc + 1
subloc = subloc + 1
else --no match found so follow fail link
subloc = fail[subloc]
end
end
if subloc > sublen then
return {textloc - sublen, comparisons}
else
return {0, comparisons}
end
end
This article is issued from Wikibooks. The text is licensed under Creative Commons - Attribution - Sharealike. Additional terms may apply for the media files.