CodeGuru Home VC++ / MFC / C++ .NET / C# Visual Basic VB Forums Developer.com
Page 10 of 11 FirstFirst ... 7891011 LastLast
Results 136 to 150 of 156
  1. #136
    2kaud's Avatar
    2kaud is offline Super Moderator Power Poster
    Join Date
    Dec 2012
    Location
    England
    Posts
    7,822

    Re: Read binary file with line delimeter

    _itoa_s doesn't return a pointer to the buffer like _itoa does. So you can't do something like

    Code:
    sub += _itoa(CONVDEC(i), num, 10);
    you have to do it in two parts

    Code:
    _itoa_s(CONVDEC(i), num, 10, 10);
    sub += num;
    The way I'm using _itoa is OK but I'll change usage of _itoa to the above form to make changing it to _itoa_s easy if required.
    All advice is offered in good faith only. All my code is tested (unless stated explicitly otherwise) with the latest version of Microsoft Visual Studio (using the supported features of the latest standard) and is offered as examples only - not as production quality. I cannot offer advice regarding any other c/c++ compiler/IDE or incompatibilities with VS. You are ultimately responsible for the effects of your programs and the integrity of the machines they run on. Anything I post, code snippets, advice, etc is licensed as Public Domain https://creativecommons.org/publicdomain/zero/1.0/ and can be used without reference or acknowledgement. Also note that I only provide advice and guidance via the forums - and not via private messages!

    C++23 Compiler: Microsoft VS2022 (17.6.5)

  2. #137
    2kaud's Avatar
    2kaud is offline Super Moderator Power Poster
    Join Date
    Dec 2012
    Location
    England
    Posts
    7,822

    Re: Read binary file with line delimeter

    Changed code as per your post for the mapping.

    Have some more fun!

    Code:
    #include <iostream>
    #include <fstream>
    #include <string>
    #include <ctime>
    #include <cstdlib>
    using namespace std;
    
    typedef unsigned char BYTE;
    typedef unsigned short int WORD;
    typedef unsigned long int DWORD;
    
    #ifndef LOBYTE
    	#define LOBYTE(w)	((BYTE)((WORD)(w) & 0xff))
    #endif
    
    #ifndef HIBYTE
    	#define HIBYTE(w)	((BYTE)((WORD)(w) >> 8))
    #endif
    
    #define CONVDEC(num)	(convh[cx[c + (num)] - '0'] * 16 + convh[cx[c + (num) + 1] - '0'])
    
    const char hconv[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
    const int convh[23] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 10, 11, 12, 13, 14, 15};
    const int aindx[23] = {0, 1, 3, 4, 8, 9, 6, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2, 5, 9, 9, 9, 9};
    const WORD SEPAR = 0xFF77;
    const char SBLOCK[] = "FF79";
    
    class FileFields
    {
    private:
    	ifstream	ifs;
    	bool		opened;
    
    public:
    	FileFields() : opened(false) {}
    
    	~FileFields() {
    		if (opened)
    			ifs.close();
    	}
    
    	bool open(const char* name);
    
    	bool getBlock(string& field, DWORD& number, string& firstpart, WORD delim = SEPAR);
    	bool getField(string& field, WORD delim = SEPAR);
    
    };
    
    bool FileFields::open(const char* name) {
    	ifs.open(name, ios::binary);
    	return (opened = ifs.is_open());
    }
    
    bool FileFields::getBlock(string& field, DWORD& number, string& firstpart, WORD delim)
    {
    BYTE	num[3],
    	first[16],
    	by,
    	ub,
    	lb;
    
    	number = 0;
    	firstpart = "|";
    
    	if (!opened || !ifs.good())
    		return false;
    
    	ifs.read((char*)num, 3);
    	number = (num[0] << 16) + (num[1] << 8) + num[2];
    
    	if (!ifs.good())
    		return false;
    
    	ifs.read((char*)first, 16);
    
    	for (int p = 1; p <= 2; p++) {
    		const int last = p * 8;
    		for (int i = (p - 1) * 8; i < last; i++)
    			if ((ub = ((by = first[i]) >> 4)) < 0xf) {
    				firstpart += hconv[ub];
    				if ((lb = (by & 0x0f)) < 0xf)
    					firstpart += hconv[lb];
    				else
    					break;
    			} else
    				break;
    
    		if (p == 1)
    			firstpart += '|';
    	}
    
    	return getField(field);
    }
    
    bool FileFields::getField(string& field, WORD delim)
    {
    char	by;
    
    bool	cont = true;
    
    	field = "";
    
    	if (!opened || !ifs.good())
    		return false;
    
    	for (ifs.get(by); cont && ifs.gcount(); ifs.get(by)) {
    		if ((BYTE)by == HIBYTE(delim))
    			if ((BYTE)ifs.peek() == LOBYTE(delim))
    				cont = false;
    
    		if (cont) {
    			field += hconv[(BYTE)by >> 4];
    			field += hconv[(BYTE)by & 0xf];
    		}
    	}
    
    	return true;
    }
    
    int main()
    {
    FileFields	ff;
    
    	//if (!ff.open("d:\\philidor\\bin2g")) {
    	if (!ff.open("d:\\philidor\\binsmall")) {
    		cout << "Cannot open file!" << endl;
    		return 1;
    	}
    
    string	header;
    	ff.getField(header);
    
    string	block;
    	block.reserve(7000);
    
    string preliminar;
    	preliminar.reserve(7000);
    
    string cx;
    	cx.reserve(7000);
    
    string sub;
    	sub.reserve(7000);
    
    DWORD	number;
    
    char num[10];
    
    time_t timest = time(NULL);
    
    	for (DWORD blk = 1; ff.getBlock(block, number, preliminar); blk++) {
    		size_t ff79;
    		bool got4 = false;
    		string sblk[10] = {"", "", "", "", "", "" ,"", "", "", ""};
    
    		if ((ff79 = block.find(SBLOCK)) != string::npos) {
    			size_t five;
    			if ((five = block.find("05", ff79)) != string::npos) {
    				cx = block.substr(five + 2);
    				for (size_t c = 0; c < cx.size() && !got4; c+= 2) {
    					char styp;
    					sub = "";
    					if (cx[c] == '9' && (((styp = cx[c + 1]) >= '0' && styp <= '7') || styp == 'A' || styp == 'B')) {
    						const int slen = CONVDEC(2) * 2;
    						if (got4 = (styp == '4')) {
    							for (int i = 4; i < slen + 4; i += 2) {
    								_itoa(CONVDEC(i), num, 10);
    								sub += num;
    								if (i != slen + 2) 
    									sub += ',';
    							}
    							sblk[aindx[styp - '0']] = sub;
    						} else 
    							if (cx[c + 1] != '5') {
    								_itoa(CONVDEC(6), num, 10);
    								sub += num;
    								sub += ',';
    								int dec = 0;
    								for (int s = 8; s < 16; s += 2)
    									dec = (dec << 8) + CONVDEC(s);
    
    								_itoa(dec, num, 10);
    								sub += num;
    								sub += ',';
    								for (size_t s = c + 16; s < c + 32; s++)
    									if (cx[s] != 'F')
    										sub += cx[s];
    									else
    										break;
    
    								sub += ',';
    								_itoa(CONVDEC(32), num, 10);
    								sub += num;
    								if (slen == 32) {
    									sub += ',';
    									_itoa(CONVDEC(34), num, 10);
    									sub += num;
    								}
    								sblk[aindx[styp - '0']] = sub;
    							}
    
    						c += slen + 2;
    					}
    				}
    			}
    		}
    		if (got4)
    			for (int a = 0; a <= 8; a++)
    				preliminar += "|" + sblk[a];
    		else 
    			preliminar += "|||||||||";
    
    		cout << number << preliminar << endl;
    	}
    
    	cout << "Time taken: " << time(NULL) - timest << endl;
    	return 0;
    }
    All advice is offered in good faith only. All my code is tested (unless stated explicitly otherwise) with the latest version of Microsoft Visual Studio (using the supported features of the latest standard) and is offered as examples only - not as production quality. I cannot offer advice regarding any other c/c++ compiler/IDE or incompatibilities with VS. You are ultimately responsible for the effects of your programs and the integrity of the machines they run on. Anything I post, code snippets, advice, etc is licensed as Public Domain https://creativecommons.org/publicdomain/zero/1.0/ and can be used without reference or acknowledgement. Also note that I only provide advice and guidance via the forums - and not via private messages!

    C++23 Compiler: Microsoft VS2022 (17.6.5)

  3. #138
    Join Date
    Oct 2013
    Posts
    63

    Re: Read binary file with line delimeter

    Hello 2kaud,

    Thank you. I haven't tested yet, only would like to understand better.

    For example, what 2nd y 3rd array you define are for? And why in 2nd array you define some elements with zeros
    And why in 3rd array you define some elements with 9's?

  4. #139
    2kaud's Avatar
    2kaud is offline Super Moderator Power Poster
    Join Date
    Dec 2012
    Location
    England
    Posts
    7,822

    Re: Read binary file with line delimeter

    convh is used to convert a hex char to decimnal. A hex number is char '0' to '9' and 'A' to 'F'. In ASCII there are 7 other characters between '9' and 'A' hence the 7 0s in the middle of the array. The first char is '0' so subtracting '0' from the char gives an index into convh that returns the decimal number. Using an array like this removes having to have an if statement for '0' to '9' and for 'A' to 'F'.

    aindex is for the mapping. Valid values for '9X' are 0 - 4, 6, 7, 10 (A), 11(B). Again subtracing '0' from the 'X' code gives an index into this array. The value returned is the element position in sblk (starting at 0) - so '92' maps to 3. Where X isn't defined I used 9 to map to last position of sblk (which has 10 elements) in case one of these values happens to occur in some file.
    All advice is offered in good faith only. All my code is tested (unless stated explicitly otherwise) with the latest version of Microsoft Visual Studio (using the supported features of the latest standard) and is offered as examples only - not as production quality. I cannot offer advice regarding any other c/c++ compiler/IDE or incompatibilities with VS. You are ultimately responsible for the effects of your programs and the integrity of the machines they run on. Anything I post, code snippets, advice, etc is licensed as Public Domain https://creativecommons.org/publicdomain/zero/1.0/ and can be used without reference or acknowledgement. Also note that I only provide advice and guidance via the forums - and not via private messages!

    C++23 Compiler: Microsoft VS2022 (17.6.5)

  5. #140
    Join Date
    Apr 1999
    Posts
    27,449

    Re: Read binary file with line delimeter

    Quote Originally Posted by Philidor View Post
    Thank you. I haven't tested yet, only would like to understand better.
    Well, this goes back by learning to use the debugger and stepping through the program to see what each line does.
    For example, what 2nd y 3rd array you define are for? And why in 2nd array you define some elements with zeros
    And why in 3rd array you define some elements with 9's?
    Code:
    sblk[aindx[styp - '0']] = sub;
    Those arrays are used in this line of code. It should be obvious that the inner array aindx returns an index that will be used by the sblk array. The " - '0'" part is the 'C' way of turning an ASCII digit into a number.

    If you knew nothing else of the program, then you take this limited knowledge and run the program under the debugger to see what index is being returned and how doing so works.

    Regards,

    Paul McKenzie

  6. #141
    Join Date
    Oct 2013
    Posts
    63

    Re: Read binary file with line delimeter

    Hello 2kaud,

    Thank you, is very close now the output, only I found that in attached file (bin1MB.txt) some blocks, beginning in block number 9128 (0023A8 in hex) is printing not the correct substrings, because in this block there is more than one "05" and maybe is because of that the confusion.

    There is 0590.. (in red), then another "05" in blue and the correct 05 (in green) that is previous to 910f in this case.

    Code:
    ff770023a8532064013451187f65142654973fffff0015000a4800015a000442
    0001330001360001370001660001650001770001690001790000930001220000
    2100010900010a00012600010800012b00002c00002d00002e00005500005600
    072a00002f0000300000310000ff7900901932c90600000000a0e0ca0e540091
    06504113ffffffffff0005900935c90600000000000090193cc9060000000090
    e0ca0e54009106504113ffffffffff000505910f01020000000d9149526905ff
    fff009310010c0000000d9149526905ffffff0101960f010c0000000d9149526
    9559fffff00940e01020102010001ffffff02010201850600000000000007902
    ec918059191495269555fffffff009191495269555fffff000105ca05001a66c
    b0a00000000000000000000cc0101
    for reference, the byte after 9X is between 10 and 141 (in hex 0A to 8D) and the next byte is between 1 to 10 (01 to 0A). With this references could be avoided to print other substrings.
    Attached Files Attached Files
    Last edited by Philidor; October 30th, 2013 at 03:37 PM.

  7. #142
    2kaud's Avatar
    2kaud is offline Super Moderator Power Poster
    Join Date
    Dec 2012
    Location
    England
    Posts
    7,822

    Re: Read binary file with line delimeter

    Currently, the search for subblocks starts straight after the first 05 following an ff79. It was assumed in the absence of information to the contrary that there was only one 05 block following the ff79. This new condition regarding 05 will be tricky as it's not a straight search. I'll have a look sometime and see what can be done.
    All advice is offered in good faith only. All my code is tested (unless stated explicitly otherwise) with the latest version of Microsoft Visual Studio (using the supported features of the latest standard) and is offered as examples only - not as production quality. I cannot offer advice regarding any other c/c++ compiler/IDE or incompatibilities with VS. You are ultimately responsible for the effects of your programs and the integrity of the machines they run on. Anything I post, code snippets, advice, etc is licensed as Public Domain https://creativecommons.org/publicdomain/zero/1.0/ and can be used without reference or acknowledgement. Also note that I only provide advice and guidance via the forums - and not via private messages!

    C++23 Compiler: Microsoft VS2022 (17.6.5)

  8. #143
    2kaud's Avatar
    2kaud is offline Super Moderator Power Poster
    Join Date
    Dec 2012
    Location
    England
    Posts
    7,822

    Re: Read binary file with line delimeter

    I've tried to simply the extra test as much as possible to try to keep the code 'simple' and to keep the run time down. Try this and let me know.

    Code:
    int main()
    {
    FileFields	ff;
    
    	//if (!ff.open("d:\\philidor\\bin2g")) {
    	//if (!ff.open("d:\\philidor\\binsmall")) {
    	if (!ff.open("d:\\philidor\\bin1mb.txt")) {
    		cout << "Cannot open file!" << endl;
    		return 1;
    	}
    
    string	header;
    	ff.getField(header);
    
    string	block;
    	block.reserve(7000);
    
    string preliminar;
    	preliminar.reserve(7000);
    
    string cx;
    	cx.reserve(7000);
    
    string sub;
    	sub.reserve(7000);
    
    DWORD	number;
    
    char num[10];
    
    time_t timest = time(NULL);
    
    	for (DWORD blk = 1; ff.getBlock(block, number, preliminar); blk++) {
    		size_t ff79;
    		bool got4 = false;
    		string sblk[10] = {"", "", "", "", "", "" ,"", "", "", ""};
    
    		if ((ff79 = block.find(SBLOCK)) != string::npos) {
    			size_t five;
    
    			while ((five = block.find("059", ff79)) != string::npos && (block[five + 6] != '0' || convh[block[five + 7] - '0'] > 10))
    				ff79 += 3;
    
    			//if ((five = block.find("059", ff79)) != string::npos) {
    			if (five != string::npos) {
    				cx = block.substr(five + 2);
    				for (size_t c = 0; c < cx.size() && !got4; c+= 2) {
    					char styp;
    					sub = "";
    					if (cx[c] == '9' && (((styp = cx[c + 1]) >= '0' && styp <= '7') || styp == 'A' || styp == 'B')) {
    						const int slen = CONVDEC(2) * 2;
    						if (got4 = (styp == '4')) {
    							for (int i = 4; i < slen + 4; i += 2) {
    								_itoa(CONVDEC(i), num, 10);
    								sub += num;
    								if (i != slen + 2) 
    									sub += ',';
    							}
    							sblk[aindx[styp - '0']] = sub;
    						} else 
    							if (cx[c + 1] != '5') {
    								_itoa(CONVDEC(6), num, 10);
    								sub += num;
    								sub += ',';
    								int dec = 0;
    								for (int s = 8; s < 16; s += 2)
    									dec = (dec << 8) + CONVDEC(s);
    
    								_itoa(dec, num, 10);
    								sub += num;
    								sub += ',';
    								for (size_t s = c + 16; s < c + 32; s++)
    									if (cx[s] != 'F')
    										sub += cx[s];
    									else
    										break;
    
    								sub += ',';
    								_itoa(CONVDEC(32), num, 10);
    								sub += num;
    								if (slen == 32) {
    									sub += ',';
    									_itoa(CONVDEC(34), num, 10);
    									sub += num;
    								}
    								sblk[aindx[styp - '0']] = sub;
    							}
    
    						c += slen + 2;
    					}
    				}
    			}
    		}
    		if (got4)
    			for (int a = 0; a <= 8; a++)
    				preliminar += "|" + sblk[a];
    		else 
    			preliminar += "|||||||||";
    
    		cout << number << preliminar << endl;
    	}
    
    	cout << "Time taken: " << time(NULL) - timest << endl;
    	return 0;
    }
    Last edited by 2kaud; October 30th, 2013 at 05:29 PM.
    All advice is offered in good faith only. All my code is tested (unless stated explicitly otherwise) with the latest version of Microsoft Visual Studio (using the supported features of the latest standard) and is offered as examples only - not as production quality. I cannot offer advice regarding any other c/c++ compiler/IDE or incompatibilities with VS. You are ultimately responsible for the effects of your programs and the integrity of the machines they run on. Anything I post, code snippets, advice, etc is licensed as Public Domain https://creativecommons.org/publicdomain/zero/1.0/ and can be used without reference or acknowledgement. Also note that I only provide advice and guidance via the forums - and not via private messages!

    C++23 Compiler: Microsoft VS2022 (17.6.5)

  9. #144
    Join Date
    Oct 2013
    Posts
    63

    Re: Read binary file with line delimeter

    Hello 2kaud,

    Thank you for your help. I've tested the last main function and it seems to print the correct output and is handling the issue mentioned in previous post. Besides that it seems to be very fast.

    I've tested with a 20 MB and works fine, but with a 2GB file I get "segmentation fault" and stops around the block 425,000.

    I saw the segment of the binary when stops, I saw 2 blocks before and they seems to be normal blocks, the only what I see is that the block number is not consecutive between the last 2 blocks and the last within the binary, for son reason. I'm not sure if the fact that 2 consecutive blocks don't have 2 consecutive block number can cause this issue.

    I get this:
    Code:
    Temporary breakpoint 2 ("C:/Read_blocks/main.cpp:133") pending.
    Child process PID: 12792
    Program received signal SIGSEGV, Segmentation fault.
    In ?? () ()

    Best regards

  10. #145
    2kaud's Avatar
    2kaud is offline Super Moderator Power Poster
    Join Date
    Dec 2012
    Location
    England
    Posts
    7,822

    Re: Read binary file with line delimeter

    Before the latest code change, did it process all the blocks (with some incorrect output) for the 2GB file? I suspect there are some non-sub blocks that seem to look initially like sub-blocks but aren't. I've added some more tests to try to catch this. If the code below still gives the segmentation fault, can you extract a few records from the 20Gb file before and after the rouge block and zip and attach so I can investigate further. The more tests I have to add though, the slower and more complex the code becomes.

    Code:
    int main()
    {
    FileFields	ff;
    
    	//if (!ff.open("d:\\philidor\\bin2g")) {
    	//if (!ff.open("d:\\philidor\\binsmall")) {
    	if (!ff.open("d:\\philidor\\bin1mb.txt")) {
    		cout << "Cannot open file!" << endl;
    		return 1;
    	}
    
    string	header;
    	ff.getField(header);
    
    string	block;
    	block.reserve(7000);
    
    string preliminar;
    	preliminar.reserve(7000);
    
    string cx;
    	cx.reserve(7000);
    
    string sub;
    	sub.reserve(7000);
    
    DWORD	number;
    
    char num[10];
    
    time_t timest = time(NULL);
    
    const int smlblk = 68;
    
    	for (DWORD blk = 1; ff.getBlock(block, number, preliminar); blk++) {
    		size_t ff79;
    		bool got4 = false;
    		string sblk[10] = {"", "", "", "", "", "" ,"", "", "", ""};
    
    		if ((ff79 = block.find(SBLOCK)) != string::npos && (ff79 + smlblk + 4 < block.size())) {
    			size_t five;
    
    			while ((five = block.find("059", ff79)) != string::npos) {
    				if (five + smlblk >= block.size()) {
    					five = string::npos;
    					break;
    				}
    
    				if (block[five + 6] == '0' && convh[block[five + 7] - '0'] <= 10)
    					break;
    				else
    					ff79 += 3;
    			}
    
    			//((five = block.find("059", ff79)) != string::npos) {
    			if (five != string::npos) {
    				cx = block.substr(five + 2);
    				for (size_t c = 0; c < cx.size() && !got4; c+= 2) {
    					char styp;
    					sub = "";
    					if (cx[c] == '9' && (((styp = cx[c + 1]) >= '0' && styp <= '7') || styp == 'A' || styp == 'B')) {
    						const int slen = CONVDEC(2) * 2;
    						if (got4 = (styp == '4')) {
    							for (int i = 4; i < slen + 4; i += 2) {
    								_itoa(CONVDEC(i), num, 10);
    								sub += num;
    								if (i != slen + 2) 
    									sub += ',';
    							}
    							sblk[aindx[styp - '0']] = sub;
    						} else 
    							if (cx[c + 1] != '5') {
    								_itoa(CONVDEC(6), num, 10);
    								sub += num;
    								sub += ',';
    								int dec = 0;
    								for (int s = 8; s < 16; s += 2)
    									dec = (dec << 8) + CONVDEC(s);
    
    								_itoa(dec, num, 10);
    								sub += num;
    								sub += ',';
    								for (size_t s = c + 16; s < c + 32; s++)
    									if (cx[s] != 'F')
    										sub += cx[s];
    									else
    										break;
    
    								sub += ',';
    								_itoa(CONVDEC(32), num, 10);
    								sub += num;
    								if (slen == 32) {
    									sub += ',';
    									_itoa(CONVDEC(34), num, 10);
    									sub += num;
    								}
    								sblk[aindx[styp - '0']] = sub;
    							}
    
    						c += slen + 2;
    					}
    				}
    			}
    		}
    		if (got4)
    			for (int a = 0; a <= 8; a++)
    				preliminar += "|" + sblk[a];
    		else 
    			preliminar += "|||||||||";
    
    		cout << number << preliminar << endl;
    	}
    
    	cout << "Time taken: " << time(NULL) - timest << endl;
    	return 0;
    }
    All advice is offered in good faith only. All my code is tested (unless stated explicitly otherwise) with the latest version of Microsoft Visual Studio (using the supported features of the latest standard) and is offered as examples only - not as production quality. I cannot offer advice regarding any other c/c++ compiler/IDE or incompatibilities with VS. You are ultimately responsible for the effects of your programs and the integrity of the machines they run on. Anything I post, code snippets, advice, etc is licensed as Public Domain https://creativecommons.org/publicdomain/zero/1.0/ and can be used without reference or acknowledgement. Also note that I only provide advice and guidance via the forums - and not via private messages!

    C++23 Compiler: Microsoft VS2022 (17.6.5)

  11. #146
    Join Date
    Oct 2013
    Posts
    63

    Re: Read binary file with line delimeter

    Hello 2kaud,

    With the previous code before last changes I didn't detected incorrect output for 2GB file.

    Regarding the issue of segmentation fault I get, is as follow.
    I have 2 types of files and I get segmentation error in file type2:
    Type1: With SEPAR=FF77, SBLOCK=FF79, and substrings of the form "05 + 9X + .. + 940E + 14bytes"
    Type2: With SEPAR=FF32, SBLOCK=FF34, and substrings of the form "03 + 8X + .. + 840E + 14bytes"

    Both kind of files follows the same rules, only have differences mentioned above, so, I thougth if
    it works for file of type1, changing only the SEPAR to FF32, SBLOCK to FF34 and intead "05" use "03" and
    instead "9X" use 8X would work. Actually is working for almost all cases, but for some reason, for
    the block 425987 (068003 in hex) in the attached file (Bin1KB.txt), I get segmentation fault and I don't see
    much difference between this block an others. Only what I see is that has the SBLOCK string (FF34) but
    don't exist the substring "03 + 8X + .. + 840E + 14bytes".

    Why "smlblk" is initialized with "68"?

    I was trying to see how "smlblk" works with "SBLOCK" in order to fix the issue with files of type2, but I
    haven't understand yet, maybe in that part could be the solution.

    PS1: The attached sample file is of type2 and only contains 6 blocks.
    PS2: When I run the code ove Bin1KB.txt from IDE I don't get error, but block 425987 is not printed, I get error
    when I run the built program.exe in command line, like this "program Bin1KB.txt"

    Thanks in advance for your help.
    Attached Files Attached Files

  12. #147
    2kaud's Avatar
    2kaud is offline Super Moderator Power Poster
    Join Date
    Dec 2012
    Location
    England
    Posts
    7,822

    Re: Read binary file with line delimeter

    Why "smlblk" is initialized with "68"?
    68 should be the minimum size that could be a correct sub-block. Any block with a size less than this can't be valid so skip it.
    All advice is offered in good faith only. All my code is tested (unless stated explicitly otherwise) with the latest version of Microsoft Visual Studio (using the supported features of the latest standard) and is offered as examples only - not as production quality. I cannot offer advice regarding any other c/c++ compiler/IDE or incompatibilities with VS. You are ultimately responsible for the effects of your programs and the integrity of the machines they run on. Anything I post, code snippets, advice, etc is licensed as Public Domain https://creativecommons.org/publicdomain/zero/1.0/ and can be used without reference or acknowledgement. Also note that I only provide advice and guidance via the forums - and not via private messages!

    C++23 Compiler: Microsoft VS2022 (17.6.5)

  13. #148
    2kaud's Avatar
    2kaud is offline Super Moderator Power Poster
    Join Date
    Dec 2012
    Location
    England
    Posts
    7,822

    Re: Read binary file with line delimeter

    I've added a couple of defines at the beginning so that you can more easily select the file format you are dealing with. Just comment in/out the define you don't want. Another way would be to accept a file name and type option from the command line. If you can find a way of identifying the type of the file from the header than the program could auto detect the file type.

    The code below parses bin1kb without any runtime errors.

    However, I think the time is fast approaching when you need to take ownership of this program and understand it so that you can maintain it as required in the future. I'm not in a position where I can keep maintaining this program. This needs to become your responsibility. With a small file like bin1kb I suggest that you walk through the code on paper and use the debugger to see how it works against looking at a hex display of the data at the same time (if you don't know the debugger very well now is a good time to become familiar with it as using it is an essential skill). The big problem with parsing this data is to decide whether something that looks valid at the start is actually valid or not. The code is fairly simple and is the type of program I would be handing to a junior/trainee c++ programmer to maintain. Have a close look at it and try to understand. If there's anything you can't get just ask.

    Code:
    #include <iostream>
    #include <fstream>
    #include <string>
    #include <ctime>
    #include <cstdlib>
    using namespace std;
    
    typedef unsigned char BYTE;
    typedef unsigned short int WORD;
    typedef unsigned long int DWORD;
    
    #ifndef LOBYTE
    	#define LOBYTE(w)	((BYTE)((WORD)(w) & 0xff))
    #endif
    
    #ifndef HIBYTE
    	#define HIBYTE(w)	((BYTE)((WORD)(w) >> 8))
    #endif
    
    #define CONVDEC(num)	(convh[cx[c + (num)] - '0'] * 16 + convh[cx[c + (num) + 1] - '0'])
    
    const char hconv[16] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
    const int convh[23] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 0, 0, 0, 0, 0, 0, 10, 11, 12, 13, 14, 15};
    const int aindx[23] = {0, 1, 3, 4, 8, 9, 6, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 2, 5, 9, 9, 9, 9};
    
    //#define FTYPE1
    #define FTYPE2
    
    #ifdef FTYPE1
    	const WORD SEPAR = 0xFF77;
    	const char SBLOCK[] = "FF79";
    	const char SFIND[] = "059";
    	const char SCHAR = '9';
    #endif
    
    #ifdef FTYPE2
    	const WORD SEPAR = 0xFF32;
    	const char SBLOCK[] = "FF34";
    	const char SFIND[] = "038";
    	const char SCHAR = '8';
    #endif
    
    class FileFields
    {
    private:
    	ifstream	ifs;
    	bool		opened;
    
    public:
    	FileFields() : opened(false) {}
    
    	~FileFields() {
    		if (opened)
    			ifs.close();
    	}
    
    	bool open(const char* name);
    
    	bool getBlock(string& field, DWORD& number, string& firstpart, WORD delim = SEPAR);
    	bool getField(string& field, WORD delim = SEPAR);
    
    };
    
    bool FileFields::open(const char* name) {
    	ifs.open(name, ios::binary);
    	return (opened = ifs.is_open());
    }
    
    bool FileFields::getBlock(string& field, DWORD& number, string& firstpart, WORD delim)
    {
    BYTE	num[3],
    	first[16],
    	by,
    	ub,
    	lb;
    
    	number = 0;
    	firstpart = "|";
    
    	if (!opened || !ifs.good())
    		return false;
    
    	ifs.read((char*)num, 3);
    	number = (num[0] << 16) + (num[1] << 8) + num[2];
    
    	if (!ifs.good())
    		return false;
    
    	ifs.read((char*)first, 16);
    
    	for (int p = 1; p <= 2; p++) {
    		const int last = p * 8;
    		for (int i = (p - 1) * 8; i < last; i++)
    			if ((ub = ((by = first[i]) >> 4)) < 0xf) {
    				firstpart += hconv[ub];
    				if ((lb = (by & 0x0f)) < 0xf)
    					firstpart += hconv[lb];
    				else
    					break;
    			} else
    				break;
    
    		if (p == 1)
    			firstpart += '|';
    	}
    
    	return getField(field);
    }
    
    bool FileFields::getField(string& field, WORD delim)
    {
    char	by;
    
    bool	cont = true;
    
    	field = "";
    
    	if (!opened || !ifs.good())
    		return false;
    
    	for (ifs.get(by); cont && ifs.gcount(); ifs.get(by)) {
    		if ((BYTE)by == HIBYTE(delim))
    			if ((BYTE)ifs.peek() == LOBYTE(delim))
    				cont = false;
    
    		if (cont) {
    			field += hconv[(BYTE)by >> 4];
    			field += hconv[(BYTE)by & 0xf];
    		}
    	}
    
    	return true;
    }
    
    int main()
    {
    FileFields	ff;
    
    	//if (!ff.open("d:\\philidor\\bin2g")) {
    	//if (!ff.open("d:\\philidor\\binsmall")) {
    	//if (!ff.open("d:\\philidor\\bin1mb.txt")) {
    	if (!ff.open("d:\\philidor\\bin1kb.txt")) {
    		cout << "Cannot open file!" << endl;
    		return 1;
    	}
    
    string	header;
    	ff.getField(header);
    
    string	block;
    	block.reserve(7000);
    
    string preliminar;
    	preliminar.reserve(7000);
    
    string cx;
    	cx.reserve(7000);
    
    string sub;
    	sub.reserve(7000);
    
    DWORD	number;
    
    char num[10];
    
    time_t timest = time(NULL);
    
    const int smlblk = 68;
    
    	for (DWORD blk = 1; ff.getBlock(block, number, preliminar); blk++) {
    		size_t ff79;
    		bool got4 = false;
    		string sblk[10] = {"", "", "", "", "", "" ,"", "", "", ""};
    
    		if ((ff79 = block.find(SBLOCK)) != string::npos && (ff79 + smlblk + 4 < block.size())) {
    			size_t five;
    
    			while ((five = block.find(SFIND, ff79)) != string::npos) {
    				if (five + smlblk >= block.size()) {
    					five = string::npos;
    					break;
    				}
    
    				if (block[five + 6] == '0' && convh[block[five + 7] - '0'] <= 10)
    					break;
    				else
    					ff79 += 3;
    			}
    
    			//((five = block.find("059", ff79)) != string::npos) {
    			if (five != string::npos) {
    				cx = block.substr(five + 2);
    				for (size_t c = 0; c < cx.size() && !got4; c+= 2) {
    					char styp;
    					sub = "";
    					if (cx[c] == SCHAR && (((styp = cx[c + 1]) >= '0' && styp <= '7') || styp == 'A' || styp == 'B')) {
    						const int slen = CONVDEC(2) * 2;
    						if (got4 = (styp == '4')) {
    							for (int i = 4; i < slen + 4; i += 2) {
    								_itoa(CONVDEC(i), num, 10);
    								sub += num;
    								if (i != slen + 2) 
    									sub += ',';
    							}
    							sblk[aindx[styp - '0']] = sub;
    						} else 
    							if (cx[c + 1] != '5') {
    								_itoa(CONVDEC(6), num, 10);
    								sub += num;
    								sub += ',';
    								int dec = 0;
    								for (int s = 8; s < 16; s += 2)
    									dec = (dec << 8) + CONVDEC(s);
    
    								_itoa(dec, num, 10);
    								sub += num;
    								sub += ',';
    								for (size_t s = c + 16; s < c + 32; s++)
    									if (cx[s] != 'F')
    										sub += cx[s];
    									else
    										break;
    
    								sub += ',';
    								_itoa(CONVDEC(32), num, 10);
    								sub += num;
    								if (slen == 32) {
    									sub += ',';
    									_itoa(CONVDEC(34), num, 10);
    									sub += num;
    								}
    								sblk[aindx[styp - '0']] = sub;
    							}
    
    						c += slen + 2;
    					}
    				}
    			}
    		}
    		if (got4)
    			for (int a = 0; a <= 8; a++)
    				preliminar += "|" + sblk[a];
    		else 
    			preliminar += "|||||||||";
    
    		cout << number << preliminar << endl;
    	}
    
    	cout << "Time taken: " << time(NULL) - timest << endl;
    	return 0;
    }
    All advice is offered in good faith only. All my code is tested (unless stated explicitly otherwise) with the latest version of Microsoft Visual Studio (using the supported features of the latest standard) and is offered as examples only - not as production quality. I cannot offer advice regarding any other c/c++ compiler/IDE or incompatibilities with VS. You are ultimately responsible for the effects of your programs and the integrity of the machines they run on. Anything I post, code snippets, advice, etc is licensed as Public Domain https://creativecommons.org/publicdomain/zero/1.0/ and can be used without reference or acknowledgement. Also note that I only provide advice and guidance via the forums - and not via private messages!

    C++23 Compiler: Microsoft VS2022 (17.6.5)

  14. #149
    Join Date
    Oct 2013
    Posts
    63

    Re: Read binary file with line delimeter

    Quote Originally Posted by 2kaud View Post
    However, I think the time is fast approaching when you need to take ownership of this program and understand it so that you can maintain it as required in the future. I'm not in a position where I can keep maintaining this program. This needs to become your responsibility.
    Hello 2kaud,

    Many thanks for all the help in this case. Of course I need to understand it to be able to do some change, I don't want to
    bother you anymore, too much help so far

    I'll see how to use the debugger to see each part of the code, if you want to comment a little bit the code to give a better idea would be great, if not I undertand and is fine

    Last question: Which compiler and IDE are you using? I tested Bin1KB.txt with your last code using Code Blocks with GNU GCC compiler and I continue receiving segmentation error. With Visual Studio 2012 the compilation finishes without errors, but when I run the program appears
    Code:
    "Debug assertion failed"
    File: ff:\dd\vctools\crt_bld\self_x86\crt\xtoa.c
    Expression: length < sizeInTChars
    The only change I did to your last code is that I changed _itoa() with _itoa_s() as you suggested me in post #136, because without that change I get error in compilation in Visual Studio.

    Thanks so much again for such help!
    Last edited by Philidor; November 2nd, 2013 at 02:20 PM.

  15. #150
    Join Date
    Apr 1999
    Posts
    27,449

    Re: Read binary file with line delimeter

    Quote Originally Posted by Philidor View Post
    Last question: Which compiler and IDE are you using? I tested Bin1KB.txt with your last code using Code Blocks with GNU GCC compiler and I continue receiving segmentation error. With Visual Studio 2012 the compilation finishes without errors, but when I run the program appears
    Code:
    "Debug assertion failed"
    File: ff:\dd\vctools\crt_bld\self_x86\crt\xtoa.c
    Expression: length < sizeInTChars
    The only change I did to your last code is that I changed _itoa() with _itoa_s() as you suggested me in post #136, because without that change I get error in compilation in Visual Studio.

    Thanks so much again for such help!
    The assertion is telling you what the problem is. If you ran the program under the debugger, the assert() function tests to see if the expression is true. If it isn't true, the program stops. You then use the debugger to see what the call stack is that led to the function call, what the values of the variables are, etc.
    Code:
    length < sizeInTChars
    The code stopped because that expression is false, when the program logic asserts that it must evaluate to true before proceeding.

    Regards,

    Paul McKenzie

Page 10 of 11 FirstFirst ... 7891011 LastLast

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  





Click Here to Expand Forum to Full Width

Featured