CodeGuru Home VC++ / MFC / C++ .NET / C# Visual Basic VB Forums Developer.com
Results 1 to 6 of 6
  1. #1
    Join Date
    Mar 2010
    Posts
    11

    Did I discover a new string search algorithm?

    I wrote a string search algorithm.

    It has a few limitations:
    The length of the word is limited to 255 characters.
    (This can be overcome by using numbers of a bigger type.)
    It was written for search and replace, so it doesn't find overlapping words.
    If it finds the word then the shift is equal to the length of the word.

  2. #2
    Join Date
    Mar 2010
    Posts
    11

    Re: Did I discover a new string search algorithm?

    StringSearch.h
    Code:
    #pragma once
    
    typedef unsigned char UInt8;
    typedef signed short SInt16;
    typedef signed long SInt32;
    
    typedef UInt8 Array_256_of_UInt8[256];
    
    int main();
    
    void StringSearch();
    
    bool IsEqual(const UInt8 *tooFar,
                 const UInt8 *a,
                 const UInt8 *b);
    
    void CalculateShift(const UInt8 *x,
                        UInt8 m,
                        UInt8 i,
                        UInt8 c,
                        UInt8 &shift);
    
    void Preprocess(const UInt8 *x,
                    UInt8 m,
                    Array_256_of_UInt8 *shift);
    
    bool Search(const UInt8 *x,
                UInt8 m,
                const UInt8 *y,
                SInt32 n,
                SInt32 &character_comparisons);

  3. #3
    Join Date
    Mar 2010
    Posts
    11

    Re: Did I discover a new string search algorithm?

    StringSearch.cp
    Code:
    #include "StringSearch.h"
    
    int main()
    {    
        StringSearch();
        return 0;
    }
    
    void StringSearch()
    {
        const UInt8* const x=reinterpret_cast<UInt8*>("gcagagag");
        const UInt8* const y=reinterpret_cast<UInt8*>("gcatcgcagagagtatacagtacg");
        SInt32 character_comparisons;
        if (!Search(x,
                    8,
                    y,
                    24,
                    character_comparisons))
            {
            cout << "Search fails." << endl;
            return;
            }
        cout << "Character comparisons : " << character_comparisons << endl;
    }
    
    bool IsEqual(const UInt8* const tooFar,
                 const UInt8 *a,
                 const UInt8 *b)
    {
        while (a<tooFar)
            {
            if (*a!=*b)
                {
                return false;
                }
            a++;
            b++;
            }
        return true;
    }
    
    void CalculateShift(const UInt8* const x,
                        const UInt8 m,
                        const UInt8 i,
                        const UInt8 c,
                        UInt8 &shift)
    {
        // 1. We search c+x[i+1..m-1] at a position starting from i-1 in the direction of 0.
        const UInt8* const x_plus_1=x+1;
        for (SInt16 j=static_cast<SInt16>(i-1);j>=0;j--)
            {
            const UInt8 x_j=x[j];
            if (c!=x_j)
                {
                continue;
                }
            if (IsEqual(x+m,
                        x_plus_1+i,
                        x_plus_1+j))
                {
                shift=static_cast<UInt8>(i-j);
                return;
                }
            }
    
        // 2. We check whether x starts with an end of x[i+1..m-1].
        for (SInt16 j=static_cast<SInt16>(i+1);j<m;j++)
            {
            if (IsEqual(x+m,
                        x+j,
                        x))
                {
                shift=static_cast<UInt8>(j);
                return;
                }
            }
    
        // 3. Nothing found.
        shift=m;
    }
    
    void Preprocess(const UInt8* const x,
                    const UInt8 m,
                    Array_256_of_UInt8* const shift)
    {
        // rem: shift[0] is unused.
        for (SInt16 j=0;j<256;j++)
            {
            const UInt8 c=static_cast<UInt8>(j);
            shift[0][c]=0;
            }
        for (UInt8 i=1;i<m;i++)
            {
            const UInt8 x_i=x[i];
            for (SInt16 j=0;j<256;j++)
                {
                const UInt8 c=static_cast<UInt8>(j);
                if (c==x_i)
                    {
                    shift[i][c]=0;
                    continue;
                    }
                // rem: we find c on position i and that's wrong.
                CalculateShift(x,
                               m,
                               i,
                               c,
                               shift[i][c]);
                }
            }
    }
    
    bool Search(const UInt8* const x,
                const UInt8 m,
                const UInt8* const y,
                const SInt32 n,
                SInt32 &character_comparisons)
    {
        Array_256_of_UInt8* const shift=new Array_256_of_UInt8[m];
        if (!shift)
            {
            cout << "Out of memory." << endl;
            return false;
            }
        Preprocess(x,
                   m,
                   shift);
        character_comparisons=0;
        const UInt8 m_minus_1=m-1;
        const SInt32 last=n-m;
        SInt32 j=0;
        while (j<=last)
            {
            SInt16 i=m_minus_1;
            while (i>=0)
                {
                character_comparisons++;
                if (x[i]!=y[i+j])
                    {
                    break;
                    }
                i--;
                }
            if (i<0)
                {
                cout << "Found at position " << j << endl;
                j+=m;
                }
            else
                {
                j+=shift[i][y[i+j]];
                }
            }
        delete [] shift;
        return true;
    }

  4. #4
    Join Date
    Mar 2010
    Posts
    11

    Re: Did I discover a new string search algorithm?

    When searching gcagagag in the Bible it was 5.5% faster than Boyer-Moore.
    Is it right?
    Is it new?
    I didn't find it in the book of Professor Lecrocq.
    http://www-igm.univ-mlv.fr/~lecroq/string/node1.html

  5. #5
    Join Date
    Mar 2010
    Posts
    11

    Re: Did I discover a new string search algorithm?

    The shift[0][c] have to be initialized like the rest of the table. Otherwise the program can run in an infinite loop.

    Code:
    void Preprocess(const UInt8* const x,
                    const UInt8 m,
                    Array_256_of_UInt8* const shift)
    {
        for (UInt8 i=0;i<m;i++)
            {
            const UInt8 x_i=x[i];
            for (SInt16 j=0;j<256;j++)
                {
                const UInt8 c=static_cast<UInt8>(j);
                if (c==x_i)
                    {
                    shift[i][c]=0;
                    continue;
                    }
                CalculateShift(x,
                               m,
                               i,
                               c,
                               shift[i][c]);
                }
            }
    }
    It can also be further improved:
    1) x+m and x_plus_1+i are constant expressions.
    2) shift[i][c]=0 is not necessary because these values are never used.
    3) I can use a pointer p instead of an index j. Then I can replace y[i+j] with p[i].

  6. #6
    Join Date
    Mar 2010
    Posts
    11

    Re: Did I discover a new string search algorithm?

    Something like this existed already. It's described here:

    http://www-igm.univ-mlv.fr/~lecroq/articles/cl2008.pdf

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  





Click Here to Expand Forum to Full Width

Featured