# Did I discover a new string search algorithm?

• March 5th, 2010, 01:36 PM
Cliff Huylebroeck
Did I discover a new string search algorithm?
I wrote a string search algorithm.

It has a few limitations:
The length of the word is limited to 255 characters.
(This can be overcome by using numbers of a bigger type.)
It was written for search and replace, so it doesn't find overlapping words.
If it finds the word then the shift is equal to the length of the word.
• March 5th, 2010, 01:38 PM
Cliff Huylebroeck
Re: Did I discover a new string search algorithm?
StringSearch.h
Code:

```#pragma once typedef unsigned char UInt8; typedef signed short SInt16; typedef signed long SInt32; typedef UInt8 Array_256_of_UInt8[256]; int main(); void StringSearch(); bool IsEqual(const UInt8 *tooFar,             const UInt8 *a,             const UInt8 *b); void CalculateShift(const UInt8 *x,                     UInt8 m,                     UInt8 i,                     UInt8 c,                     UInt8 &shift); void Preprocess(const UInt8 *x,                 UInt8 m,                 Array_256_of_UInt8 *shift); bool Search(const UInt8 *x,             UInt8 m,             const UInt8 *y,             SInt32 n,             SInt32 &character_comparisons);```
• March 5th, 2010, 01:40 PM
Cliff Huylebroeck
Re: Did I discover a new string search algorithm?
StringSearch.cp
Code:

```#include "StringSearch.h" int main() {        StringSearch();     return 0; } void StringSearch() {     const UInt8* const x=reinterpret_cast<UInt8*>("gcagagag");     const UInt8* const y=reinterpret_cast<UInt8*>("gcatcgcagagagtatacagtacg");     SInt32 character_comparisons;     if (!Search(x,                 8,                 y,                 24,                 character_comparisons))         {         cout << "Search fails." << endl;         return;         }     cout << "Character comparisons : " << character_comparisons << endl; } bool IsEqual(const UInt8* const tooFar,             const UInt8 *a,             const UInt8 *b) {     while (a<tooFar)         {         if (*a!=*b)             {             return false;             }         a++;         b++;         }     return true; } void CalculateShift(const UInt8* const x,                     const UInt8 m,                     const UInt8 i,                     const UInt8 c,                     UInt8 &shift) {     // 1. We search c+x[i+1..m-1] at a position starting from i-1 in the direction of 0.     const UInt8* const x_plus_1=x+1;     for (SInt16 j=static_cast<SInt16>(i-1);j>=0;j--)         {         const UInt8 x_j=x[j];         if (c!=x_j)             {             continue;             }         if (IsEqual(x+m,                     x_plus_1+i,                     x_plus_1+j))             {             shift=static_cast<UInt8>(i-j);             return;             }         }     // 2. We check whether x starts with an end of x[i+1..m-1].     for (SInt16 j=static_cast<SInt16>(i+1);j<m;j++)         {         if (IsEqual(x+m,                     x+j,                     x))             {             shift=static_cast<UInt8>(j);             return;             }         }     // 3. Nothing found.     shift=m; } void Preprocess(const UInt8* const x,                 const UInt8 m,                 Array_256_of_UInt8* const shift) {     // rem: shift[0] is unused.     for (SInt16 j=0;j<256;j++)         {         const UInt8 c=static_cast<UInt8>(j);         shift[0][c]=0;         }     for (UInt8 i=1;i<m;i++)         {         const UInt8 x_i=x[i];         for (SInt16 j=0;j<256;j++)             {             const UInt8 c=static_cast<UInt8>(j);             if (c==x_i)                 {                 shift[i][c]=0;                 continue;                 }             // rem: we find c on position i and that's wrong.             CalculateShift(x,                           m,                           i,                           c,                           shift[i][c]);             }         } } bool Search(const UInt8* const x,             const UInt8 m,             const UInt8* const y,             const SInt32 n,             SInt32 &character_comparisons) {     Array_256_of_UInt8* const shift=new Array_256_of_UInt8[m];     if (!shift)         {         cout << "Out of memory." << endl;         return false;         }     Preprocess(x,               m,               shift);     character_comparisons=0;     const UInt8 m_minus_1=m-1;     const SInt32 last=n-m;     SInt32 j=0;     while (j<=last)         {         SInt16 i=m_minus_1;         while (i>=0)             {             character_comparisons++;             if (x[i]!=y[i+j])                 {                 break;                 }             i--;             }         if (i<0)             {             cout << "Found at position " << j << endl;             j+=m;             }         else             {             j+=shift[i][y[i+j]];             }         }     delete [] shift;     return true; }```
• March 5th, 2010, 01:41 PM
Cliff Huylebroeck
Re: Did I discover a new string search algorithm?
When searching gcagagag in the Bible it was 5.5% faster than Boyer-Moore.
Is it right?
Is it new?
I didn't find it in the book of Professor Lecrocq.
http://www-igm.univ-mlv.fr/~lecroq/string/node1.html
• March 8th, 2010, 09:08 AM
Cliff Huylebroeck
Re: Did I discover a new string search algorithm?
The shift[0][c] have to be initialized like the rest of the table. Otherwise the program can run in an infinite loop.

Code:

```void Preprocess(const UInt8* const x,                 const UInt8 m,                 Array_256_of_UInt8* const shift) {     for (UInt8 i=0;i<m;i++)         {         const UInt8 x_i=x[i];         for (SInt16 j=0;j<256;j++)             {             const UInt8 c=static_cast<UInt8>(j);             if (c==x_i)                 {                 shift[i][c]=0;                 continue;                 }             CalculateShift(x,                           m,                           i,                           c,                           shift[i][c]);             }         } }```
It can also be further improved:
1) x+m and x_plus_1+i are constant expressions.
2) shift[i][c]=0 is not necessary because these values are never used.
3) I can use a pointer p instead of an index j. Then I can replace y[i+j] with p[i].
• March 12th, 2010, 05:18 AM
Cliff Huylebroeck
Re: Did I discover a new string search algorithm?
Something like this existed already. It's described here:

http://www-igm.univ-mlv.fr/~lecroq/articles/cl2008.pdf