|
|
|
The shift amount when there is a mismatch with a non-empty prefix:
Suppose mismatch happens at X and the matched prefix is found at pos k: T = . . . . . . . . . . . . . X y y y . . . . . P = . . y y y . . . x y y y ^ ^ ^ | | <---->| s k pre m <------------------> m-k New state: T = . . . . . . . . . . . . . X y y y . . . . . P = . . y y y . . . x y y y ^ | s' = s + m - k - len(pre) |
The shift amount for edge cases:
Suppose mismatch happens at X and a portion P' is found at start: T = . . . . . . . . . . . . . X a b c . . . . . P = b c . . . . . . x a b c ^ ^ | | <-> s P' <-------------------> m - len(P') New state: T = . . . . . . . . . . . . . X a b c . . . . . P = b c . . . . . . x a b c ^ | s' = s + m - len(P') |
int goodSuffixShift(string P, int pos)
{
string matchedSuffix = P.substr(pos+1, P.length());
int len = matchedSuffix.length();
// (1) Check string P[k] .. P[k+len-1] == matchedSuffix
for ( int k = pos; k >= 0; k-- )
{
string s1 = P.substr(k, len);
if ( s1 == matchedSuffix )
return P.length() - k - matchedSuffix.length();
}
// (2) Check edge cases
for ( int k = matchedSuffix.length()-1; k >= 1; k-- )
{
if ( P.substr(0,k) == P.substr(P.length()-k, k) )
return P.length() - k;
}
// (3) No suffix in P --> shift P.length()
return P.length();
}
|
int bm(string T, string P, int s) // s = start position
{
int n = T.length(), m = P.length();
int j = m-1; // Start match with character pos = m-1
while ( s+(m-1) < n ) // Last char pos in P is valid character in T
{
if ( P[j] == T[s+j] )
{
if ( j == 0 )
return(s); // Pattern found at pos s
j--; // Continue with next character
}
else
{
int shift = 1; // Default shift
if ( j < m-1 ) // Make sure len(suffix) >= 1
shift = goodSuffixShift(P, j); // Find suffix P[j+1]..P[m-1]
s += shift;
j = m-1; // reset to 1st char in pattern to match
}
}
return -1; // Pattern not found
}
|
DEMO: Progs/bm-2.cc