CodeGuru Home VC++ / MFC / C++ .NET / C# Visual Basic VB Forums Developer.com
Results 1 to 5 of 5
  1. #1
    Join Date
    Jul 2002
    Posts
    30

    Fiding a string in a text file

    What´s the fastest way to find a string in a text file (among hundreds of words)? I´m developing a app that needs to filter some "Text Messages" using the words stored in a text file.

    Any help will be very much appreciated!

    Thanks.

  2. #2
    Join Date
    Jun 2002
    Location
    Letchworth, UK
    Posts
    1,020
    Depends on how much memory you have and how big the file is. The quickest way is to read the whole file into memory and then search for the word in memory.

    Another way would be to read in one line at a time and perform the search on each line. Uses a lot less memory but is a lot slower (if you are running on a 25MHz 386).

    If it is continuous text and it is not possible to read one line at a time, then go for a double buffering scheme.
    Succinct is verbose for terse

  3. #3
    Join Date
    Aug 2000
    Location
    New Jersey
    Posts
    968
    Here's one method:

    #include <fstream>

    using namespace std;


    bool ReadUntilKeyIsFound(std::ifstream &OpenedFile, const unsigned char* KeyWord, int SizeOfKeyWord, int &PositionFound)
    {
    int Position = SizeOfKeyWord;
    unsigned char *Buff = new unsigned char[SizeOfKeyWord];
    OpenedFile.read((char*)Buff, SizeOfKeyWord);
    while(!OpenedFile.eof())
    {
    if (!memcmp(KeyWord, Buff, SizeOfKeyWord))
    {
    delete []Buff;
    PositionFound = Position - SizeOfKeyWord;
    return true;
    }
    memmove(Buff, Buff + 1, SizeOfKeyWord-1);
    OpenedFile.read((char*)Buff+SizeOfKeyWord-1,1);
    ++Position;
    }
    delete []Buff;
    return false;
    }

    const char* filename = "C:\\TenforeFeed.trc";

    int main(int, char*)
    {
    ifstream is (filename);
    const char* SearchStr = "include";
    int PosFound = 0;
    if (ReadUntilKeyIsFound(is,(unsigned char *)SearchStr, strlen(SearchStr),PosFound))
    {
    printf("Found Keyword at position %i\n", PosFound);
    }
    else
    {
    printf("Didn't find %s", SearchStr);
    }

    system("pause");
    return 0;
    }
    David Maisonave
    Author of Policy Based Synchronized Smart Pointer
    http://axter.com/smartptr


    Top ten member of C++ Expert Exchange.
    C++ Topic Area

  4. #4

    There is a problem with the suggestion....

    You are reading in the size of the keyword. What if the keyword is split between two buffers that you read in?

    If you don't want to load up the whole file in memory, you have to read line by line or a single char at a time. Otherwise - the keyword could overlap buffers.

    Your method would work fine if every word was the same length - but it's not.

    I worked on this little binary find program a long time ago - can't remember if it works properly or not. But it might give you some ideas. I didn't finish it, I planned on making it use another thread to do it, and create a window (you can see by code that's not implemented but declared) - but didn't and just called the function I was going to use as the thread one directly.

    #include <windows.h>
    #include <process.h>
    #include <stdlib.h>
    #include <stdio.h>

    typedef struct {
    enum SearchType { AnsiString = 1,SignedChar,UnsignedChar,
    SignedShort,UnsignedShort,SignedLong,UnsignedLong,
    SignedInt64,UnsignedInt64 };
    char szSearchFile[256];
    char szResultsFile[256];
    char szSearchParam[128];
    unsigned long dwParamType;
    static void * __cdecl operator new(unsigned int nSize) { return ::HeapAlloc(::GetProcessHeap(),0,nSize); }
    static void __cdecl operator delete(void * pBuf) { ::HeapFree(::GetProcessHeap(),0,pBuf); }
    } BIN_FIND_DATA;

    int __stdcall BinFindWndProc(HWND hWnd,unsigned int message,WPARAM wParam,LPARAM lParam) throw();
    unsigned long __stdcall BinFindThreadProc(void * lpData) throw();

    HINSTANCE g_hInstance = 0;
    HWND g_hMainWnd = HWND_DESKTOP;
    HANDLE g_hFindThread = 0;

    extern "C" int __stdcall WinMain(HINSTANCE hInstance,HINSTANCE hPrevInstance,char * pCmdLine,int nShowCmd) throw()
    {
    BIN_FIND_DATA myData;
    myData.dwParamType = BIN_FIND_DATA::AnsiString;
    ::lstrcpy(myData.szSearchParam,"009");
    ::lstrcpy(myData.szSearchFile,"d:\\computer.txt");
    ::lstrcpy(myData.szResultsFile,"d:\\gone.saf-unsigned short-3765.txt");
    BinFindThreadProc(&myData);
    return 0;
    }

    bool BinFindInFile(HANDLE,unsigned long,const BIN_FIND_DATA *,unsigned long&,unsigned long&) throw();

    bool BinFindInFile(HANDLE hFile,unsigned long dwStart,const BIN_FIND_DATA * pFindData,unsigned long & dwFoundFileIndex,unsigned long & dwFoundLength) throw()
    {
    bool bFound(false);
    unsigned long dwLength = ::SetFilePointer(hFile,0,0,FILE_END);
    if (dwLength == 0xFFFFFFFF)
    {
    return bFound;
    }
    dwLength -= dwStart;
    ::SetFilePointer(hFile,dwStart,0,FILE_BEGIN);
    unsigned char ucTempData[8]; // if not a ANSI string search...
    unsigned long dwSizeOfParam = 0;
    unsigned char * pSearch = 0; // will point to search data...
    switch(pFindData->dwParamType)
    {
    case BIN_FIND_DATA::AnsiString:
    pSearch = (unsigned char *)pFindData->szSearchParam;
    dwSizeOfParam = ::lstrlen(pFindData->szSearchParam);
    break;
    case BIN_FIND_DATA::SignedChar:
    pSearch = ucTempData;
    ucTempData[0] = (char)::atoi(pFindData->szSearchParam);
    dwSizeOfParam = sizeof(char);
    break;
    case BIN_FIND_DATA::UnsignedChar:
    pSearch = ucTempData;
    ucTempData[0] = (unsigned char)::atol(pFindData->szSearchParam);
    dwSizeOfParam = sizeof(unsigned char);
    break;
    case BIN_FIND_DATA::SignedShort:
    pSearch = ucTempData;
    ::sscanf(pFindData->szSearchParam,"%hd",(short *)ucTempData);
    dwSizeOfParam = sizeof(short);
    break;
    case BIN_FIND_DATA::UnsignedShort:
    pSearch = ucTempData;
    ::sscanf(pFindData->szSearchParam,"%hu",(unsigned short *)ucTempData);
    dwSizeOfParam = sizeof(unsigned short);
    break;
    case BIN_FIND_DATA::SignedLong:
    pSearch = ucTempData;
    ::sscanf(pFindData->szSearchParam,"%ld",(long *)ucTempData);
    dwSizeOfParam = sizeof(long);
    break;
    case BIN_FIND_DATA::UnsignedLong:
    pSearch = ucTempData;
    ::sscanf(pFindData->szSearchParam,"%lu",(unsigned long *)ucTempData);
    dwSizeOfParam = sizeof(unsigned long);
    break;
    case BIN_FIND_DATA::SignedInt64:
    pSearch = ucTempData;
    ::sscanf(pFindData->szSearchParam,"%i64d",(__int64 *)ucTempData);
    dwSizeOfParam = sizeof(__int64);
    break;
    case BIN_FIND_DATA::UnsignedInt64:
    pSearch = ucTempData;
    ::sscanf(pFindData->szSearchParam,"%i64u",(unsigned __int64 *)ucTempData);
    dwSizeOfParam = sizeof(unsigned __int64);
    break;
    default:
    return false;
    break;
    }
    unsigned long dwBytesRead(0);
    char ucTemp;
    unsigned long j = 1;
    for (unsigned long x = 0; x < dwLength; ++x)
    {
    ::SetFilePointer(hFile,(j - 1) * -1,0,FILE_CURRENT); // if we find, this will never happen again so no worries...
    for (j = 0; j < dwSizeOfParam; ++j)
    {
    ucTemp = 0;
    ::ReadFile(hFile,&ucTemp,sizeof(char),&dwBytesRead,0);
    if (pSearch[j] != ucTemp)
    {
    break;
    }
    else if (j == dwSizeOfParam - 1)
    {
    bFound = true;
    dwFoundFileIndex = dwStart + x;
    dwFoundLength = dwSizeOfParam;
    break;
    }
    }
    if (bFound == true)
    {
    break;
    }
    }
    return bFound;
    }

    unsigned long __stdcall BinFindThreadProc(void * lpData) throw()
    {
    BIN_FIND_DATA * pFindData = reinterpret_cast<BIN_FIND_DATA *>(lpData);
    char szOutputString[256];
    unsigned long dwStart(0);
    unsigned long dwFoundIndex(0),dwFoundLength(0);
    bool bFound = true;
    char szFormatString[] = "%lu \r\n";
    unsigned long dwBytesWritten(0);
    int nLen = 0;

    HANDLE hSearchFile = ::CreateFile(pFindData->szSearchFile,GENERIC_READ,FILE_SHARE_READ,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
    if (hSearchFile != INVALID_HANDLE_VALUE)
    {
    HANDLE hResultsFile = ::CreateFile(pFindData->szResultsFile,GENERIC_WRITE,FILE_SHARE_READ,0,CREATE_ALWAYS,FILE_ATTRIBUTE_NORMAL,0);
    if (hResultsFile != INVALID_HANDLE_VALUE)
    {
    do
    {
    bFound = ::BinFindInFile(hSearchFile,dwStart,pFindData,dwFoundIndex,dwFoundLength);
    if (bFound == true)
    {
    ::wsprintf(szOutputString,szFormatString,dwFoundIndex);
    nLen = ::lstrlen(szOutputString);
    ::WriteFile(hResultsFile,szOutputString,nLen,&dwBytesWritten,0);
    dwStart = dwFoundIndex + dwFoundLength;
    }
    else
    {
    break;
    }
    } while (true);

    ::CloseHandle(hResultsFile);
    ::CloseHandle(hSearchFile);
    ::MessageBoxA(::g_hMainWnd,"Done searching file...","BINFIND",MB_OK);
    }
    else
    {
    ::CloseHandle(hSearchFile);
    ::MessageBoxA(::g_hMainWnd,"Couldn't create results file...","BINFIND - ERROR",MB_ICONERROR | MB_OK);
    }
    }
    else
    {
    ::MessageBoxA(::g_hMainWnd,"Couldn't open search file...","BINFIND - ERROR",MB_ICONERROR | MB_OK);
    }

    return 0;
    }
    Last edited by JamesSchumacher; September 19th, 2002 at 05:57 PM.

  5. #5
    Join Date
    Jul 2002
    Posts
    30
    That´s it guys!! Your info. were very useful as usual!!

    I decided to read line by line instead of loading all the file on the memory and it´s been working fine so far. I´m not sure about the performance when my "database" gets bigger, we´ll see it..

    Thanks again..

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  





Click Here to Expand Forum to Full Width

Featured