|
-
September 19th, 2002, 08:11 AM
#1
Fiding a string in a text file
What´s the fastest way to find a string in a text file (among hundreds of words)? I´m developing a app that needs to filter some "Text Messages" using the words stored in a text file.
Any help will be very much appreciated!
Thanks.
-
September 19th, 2002, 08:31 AM
#2
Depends on how much memory you have and how big the file is. The quickest way is to read the whole file into memory and then search for the word in memory.
Another way would be to read in one line at a time and perform the search on each line. Uses a lot less memory but is a lot slower (if you are running on a 25MHz 386).
If it is continuous text and it is not possible to read one line at a time, then go for a double buffering scheme.
Succinct is verbose for terse
-
September 19th, 2002, 02:14 PM
#3
Here's one method:
#include <fstream>
using namespace std;
bool ReadUntilKeyIsFound(std::ifstream &OpenedFile, const unsigned char* KeyWord, int SizeOfKeyWord, int &PositionFound)
{
int Position = SizeOfKeyWord;
unsigned char *Buff = new unsigned char[SizeOfKeyWord];
OpenedFile.read((char*)Buff, SizeOfKeyWord);
while(!OpenedFile.eof())
{
if (!memcmp(KeyWord, Buff, SizeOfKeyWord))
{
delete []Buff;
PositionFound = Position - SizeOfKeyWord;
return true;
}
memmove(Buff, Buff + 1, SizeOfKeyWord-1);
OpenedFile.read((char*)Buff+SizeOfKeyWord-1,1);
++Position;
}
delete []Buff;
return false;
}
const char* filename = "C:\\TenforeFeed.trc";
int main(int, char*)
{
ifstream is (filename);
const char* SearchStr = "include";
int PosFound = 0;
if (ReadUntilKeyIsFound(is,(unsigned char *)SearchStr, strlen(SearchStr),PosFound))
{
printf("Found Keyword at position %i\n", PosFound);
}
else
{
printf("Didn't find %s", SearchStr);
}
system("pause");
return 0;
}
-
September 19th, 2002, 05:23 PM
#4
There is a problem with the suggestion....
You are reading in the size of the keyword. What if the keyword is split between two buffers that you read in?
If you don't want to load up the whole file in memory, you have to read line by line or a single char at a time. Otherwise - the keyword could overlap buffers.
Your method would work fine if every word was the same length - but it's not.
I worked on this little binary find program a long time ago - can't remember if it works properly or not. But it might give you some ideas. I didn't finish it, I planned on making it use another thread to do it, and create a window (you can see by code that's not implemented but declared) - but didn't and just called the function I was going to use as the thread one directly.
#include <windows.h>
#include <process.h>
#include <stdlib.h>
#include <stdio.h>
typedef struct {
enum SearchType { AnsiString = 1,SignedChar,UnsignedChar,
SignedShort,UnsignedShort,SignedLong,UnsignedLong,
SignedInt64,UnsignedInt64 };
char szSearchFile[256];
char szResultsFile[256];
char szSearchParam[128];
unsigned long dwParamType;
static void * __cdecl operator new(unsigned int nSize) { return ::HeapAlloc(::GetProcessHeap(),0,nSize); }
static void __cdecl operator delete(void * pBuf) { ::HeapFree(::GetProcessHeap(),0,pBuf); }
} BIN_FIND_DATA;
int __stdcall BinFindWndProc(HWND hWnd,unsigned int message,WPARAM wParam,LPARAM lParam) throw();
unsigned long __stdcall BinFindThreadProc(void * lpData) throw();
HINSTANCE g_hInstance = 0;
HWND g_hMainWnd = HWND_DESKTOP;
HANDLE g_hFindThread = 0;
extern "C" int __stdcall WinMain(HINSTANCE hInstance,HINSTANCE hPrevInstance,char * pCmdLine,int nShowCmd) throw()
{
BIN_FIND_DATA myData;
myData.dwParamType = BIN_FIND_DATA::AnsiString;
::lstrcpy(myData.szSearchParam,"009");
::lstrcpy(myData.szSearchFile,"d:\\computer.txt");
::lstrcpy(myData.szResultsFile,"d:\\gone.saf-unsigned short-3765.txt");
BinFindThreadProc(&myData);
return 0;
}
bool BinFindInFile(HANDLE,unsigned long,const BIN_FIND_DATA *,unsigned long&,unsigned long&) throw();
bool BinFindInFile(HANDLE hFile,unsigned long dwStart,const BIN_FIND_DATA * pFindData,unsigned long & dwFoundFileIndex,unsigned long & dwFoundLength) throw()
{
bool bFound(false);
unsigned long dwLength = ::SetFilePointer(hFile,0,0,FILE_END);
if (dwLength == 0xFFFFFFFF)
{
return bFound;
}
dwLength -= dwStart;
::SetFilePointer(hFile,dwStart,0,FILE_BEGIN);
unsigned char ucTempData[8]; // if not a ANSI string search...
unsigned long dwSizeOfParam = 0;
unsigned char * pSearch = 0; // will point to search data...
switch(pFindData->dwParamType)
{
case BIN_FIND_DATA::AnsiString:
pSearch = (unsigned char *)pFindData->szSearchParam;
dwSizeOfParam = ::lstrlen(pFindData->szSearchParam);
break;
case BIN_FIND_DATA::SignedChar:
pSearch = ucTempData;
ucTempData[0] = (char)::atoi(pFindData->szSearchParam);
dwSizeOfParam = sizeof(char);
break;
case BIN_FIND_DATA::UnsignedChar:
pSearch = ucTempData;
ucTempData[0] = (unsigned char)::atol(pFindData->szSearchParam);
dwSizeOfParam = sizeof(unsigned char);
break;
case BIN_FIND_DATA::SignedShort:
pSearch = ucTempData;
::sscanf(pFindData->szSearchParam,"%hd",(short *)ucTempData);
dwSizeOfParam = sizeof(short);
break;
case BIN_FIND_DATA::UnsignedShort:
pSearch = ucTempData;
::sscanf(pFindData->szSearchParam,"%hu",(unsigned short *)ucTempData);
dwSizeOfParam = sizeof(unsigned short);
break;
case BIN_FIND_DATA::SignedLong:
pSearch = ucTempData;
::sscanf(pFindData->szSearchParam,"%ld",(long *)ucTempData);
dwSizeOfParam = sizeof(long);
break;
case BIN_FIND_DATA::UnsignedLong:
pSearch = ucTempData;
::sscanf(pFindData->szSearchParam,"%lu",(unsigned long *)ucTempData);
dwSizeOfParam = sizeof(unsigned long);
break;
case BIN_FIND_DATA::SignedInt64:
pSearch = ucTempData;
::sscanf(pFindData->szSearchParam,"%i64d",(__int64 *)ucTempData);
dwSizeOfParam = sizeof(__int64);
break;
case BIN_FIND_DATA::UnsignedInt64:
pSearch = ucTempData;
::sscanf(pFindData->szSearchParam,"%i64u",(unsigned __int64 *)ucTempData);
dwSizeOfParam = sizeof(unsigned __int64);
break;
default:
return false;
break;
}
unsigned long dwBytesRead(0);
char ucTemp;
unsigned long j = 1;
for (unsigned long x = 0; x < dwLength; ++x)
{
::SetFilePointer(hFile,(j - 1) * -1,0,FILE_CURRENT); // if we find, this will never happen again so no worries...
for (j = 0; j < dwSizeOfParam; ++j)
{
ucTemp = 0;
::ReadFile(hFile,&ucTemp,sizeof(char),&dwBytesRead,0);
if (pSearch[j] != ucTemp)
{
break;
}
else if (j == dwSizeOfParam - 1)
{
bFound = true;
dwFoundFileIndex = dwStart + x;
dwFoundLength = dwSizeOfParam;
break;
}
}
if (bFound == true)
{
break;
}
}
return bFound;
}
unsigned long __stdcall BinFindThreadProc(void * lpData) throw()
{
BIN_FIND_DATA * pFindData = reinterpret_cast<BIN_FIND_DATA *>(lpData);
char szOutputString[256];
unsigned long dwStart(0);
unsigned long dwFoundIndex(0),dwFoundLength(0);
bool bFound = true;
char szFormatString[] = "%lu \r\n";
unsigned long dwBytesWritten(0);
int nLen = 0;
HANDLE hSearchFile = ::CreateFile(pFindData->szSearchFile,GENERIC_READ,FILE_SHARE_READ,0,OPEN_EXISTING,FILE_ATTRIBUTE_NORMAL,0);
if (hSearchFile != INVALID_HANDLE_VALUE)
{
HANDLE hResultsFile = ::CreateFile(pFindData->szResultsFile,GENERIC_WRITE,FILE_SHARE_READ,0,CREATE_ALWAYS,FILE_ATTRIBUTE_NORMAL,0);
if (hResultsFile != INVALID_HANDLE_VALUE)
{
do
{
bFound = ::BinFindInFile(hSearchFile,dwStart,pFindData,dwFoundIndex,dwFoundLength);
if (bFound == true)
{
::wsprintf(szOutputString,szFormatString,dwFoundIndex);
nLen = ::lstrlen(szOutputString);
::WriteFile(hResultsFile,szOutputString,nLen,&dwBytesWritten,0);
dwStart = dwFoundIndex + dwFoundLength;
}
else
{
break;
}
} while (true);
::CloseHandle(hResultsFile);
::CloseHandle(hSearchFile);
::MessageBoxA(::g_hMainWnd,"Done searching file...","BINFIND",MB_OK);
}
else
{
::CloseHandle(hSearchFile);
::MessageBoxA(::g_hMainWnd,"Couldn't create results file...","BINFIND - ERROR",MB_ICONERROR | MB_OK);
}
}
else
{
::MessageBoxA(::g_hMainWnd,"Couldn't open search file...","BINFIND - ERROR",MB_ICONERROR | MB_OK);
}
return 0;
}
Last edited by JamesSchumacher; September 19th, 2002 at 05:57 PM.
-
September 19th, 2002, 08:55 PM
#5
That´s it guys!! Your info. were very useful as usual!!
I decided to read line by line instead of loading all the file on the memory and it´s been working fine so far. I´m not sure about the performance when my "database" gets bigger, we´ll see it..
Thanks again..
Posting Permissions
- You may not post new threads
- You may not post replies
- You may not post attachments
- You may not edit your posts
-
Forum Rules
|
Click Here to Expand Forum to Full Width
|