dcsimg
CodeGuru Home VC++ / MFC / C++ .NET / C# Visual Basic VB Forums Developer.com
Page 2 of 2 FirstFirst 12
Results 16 to 29 of 29

Thread: [RESOLVED] Filter a txt file based on a grid of data

  1. #16
    2kaud's Avatar
    2kaud is offline Super Moderator Power Poster
    Join Date
    Dec 2012
    Location
    England
    Posts
    7,038

    Re: Filter a txt file based on a grid of data

    Try this C++ code for the Only_Zero VB filter. You may need to change the file names to suit your system. For me, it produces the required output file of the expected 44 lines. Let me know how it goes before I try the Groups_of_30 VB filter.

    If this works for the Only_Zero filter, what are your comparative times for your large (4- 5 million) file(s)?

    Code:
    #import "c:\\Program Files (x86)\\Common Files\\microsoft shared\\OFFICE14\\mso.dll" \
    	rename( "RGB", "MSORGB" ) \
    	rename( "DocumentProperties", "MSDocument") \
    	rename( "SearchPath", "MSSearch")
    
    #import "c:\\Program Files (x86)\\Common Files\\microsoft shared\\VBA\\VBA6\\VBE6EXT.OLB"
    
    #import "c:\\Program Files (x86)\Microsoft Office\\Office14\\excel.exe" \
    	rename( "DialogBox", "ExcelDialogBox" ) \
    	rename( "RGB", "ExcelRGB" ) \
    	rename( "CopyFile", "ExcelCopyFile" ) \
    	rename( "ReplaceText", "ExcelReplaceText" )
    
    
    #include <iostream>
    #include <iomanip>
    #include <string>
    #include <fstream>
    
    using namespace Excel;
    using namespace std;
    
    const string sname = "c:\\MyProgs\\Level_1.xlsm";	// Complete path name to Excel filter file
    const string pname = "ToPurgeFile.txt";			// Input file name
    const string oname = "OutputFile.txt";			// Output file name (over written)
    
    int main()
    {
    	const auto hr {CoInitializeEx(0, COINIT_MULTITHREADED)};
    
    	if (FAILED(hr))
    		return (cout << "Failed to initialize COM library. Error code = 0x" << hex << hr << endl), 1;
    
    	_ApplicationPtr pXL;
    
    	if (FAILED(pXL.CreateInstance("Excel.Application")))
    		return (cout << "Failed to initialize Excel::_Application!" << endl), 2;
    
    	_WorkbookPtr pBook;
    
    	try {
    		pBook = pXL->Workbooks->Open(sname.c_str());
    	}
    	catch (...) {
    		pXL->Quit();
    		return (cout << "Cannot open spreadsheet file " << sname << endl), 3;
    	}
    
    	pXL->PutVisible(0, FALSE);
    
    	const _WorksheetPtr pWksheet {pXL->ActiveSheet};
    	const RangePtr pRange {pWksheet->Cells};
    
    	const int vSize {3};		// Size of vmin/vmax
    	const int nRange {17};	        // Number of ranges
    	const int rRow {15};		// Number of rows per range
    	const int rCol {3};		// Number of cols per range
    	const int rcStart {8};		// H for starting col of first range
    	const int rrStart {10};	        // Starting row for the ranges
    	const int nval {6};		// Number of ints per line in file
    
    	int vMin[vSize] {0};
    	int vMax[vSize] {0};
    	int rData[nRange][rRow][rCol] {0};	// Contains the data for the various required ranges from the spreadsheet
    
    	bool bad {false};	// Has data read failed
    
    	int vpar;
    	int vTotalMax;
    
    	try {
    		vpar = (int)pRange->Item[8][25];	// Y8
    		vTotalMax = (int)pRange->Item[7][52];	// AZ7
    
    		vMin[0] = (int)pRange->Item[4][48];	// AV4
    		vMin[1] = (int)pRange->Item[6][48];	// AV6
    		vMin[2] = (int)pRange->Item[8][48];	// AV8
    
    		vMax[0] = (int)pRange->Item[4][50];	// AX4
    		vMax[1] = (int)pRange->Item[6][50];	// AX6
    		vMax[2] = (int)pRange->Item[8][50];	// AX8
    
    		// Obtain the range data
    		for (int r {0}; r < nRange; ++r)
    			for (int cl {0}; cl < rCol; ++cl)
    				for (int rw {0}; rw < rRow; ++rw)
    					rData[r][rw][cl] = (int)pRange->Item[rrStart + rw][rcStart + (r * rCol) + cl];
    	}
    	catch (...) {
    		bad = true;
    	}
    
    	pWksheet->Release();
    	pBook->Release();
    	pXL->Quit();
    
    	if (bad)
    		return (cout << "Cannot read the data!" << endl), 4;
    
    	ifstream ifs(pname);
    
    	if (!ifs.is_open())
    		return (cout << "Cannot open input file " << pname << endl), 5;
    
    	ofstream ofs(oname);
    
    	if (!ofs.is_open())
    		return (cout << "Cannot open output file " << oname << endl), 6;
    
    	int buf[nval] {0};	// One line of int data
    
    	// Try to read first number on line. If this fails, at EOF
    	while (ifs >> buf[0]) {
    		// Read rest of line of int data
    		for (int v {1}; v < nval; ++v)
    			ifs >> buf[v];
    
    		int cnt3 {0};
    
    		for (int n {0}; n < vSize; ++n) {
    			int cnt1 {0};
    
    			for (int i {n * 4 + 2}, ie {n * 4 + 5}; i <= ie; ++i) {	// Check <= what's with 4, 2, 5??
    				int cnt2 {0};
    
    				for (int j {0}; j < nval; ++j) {
    					// cnt2 = cnt2 + countif()
    					for (int rw {0}; rw < rRow; ++rw)
    						for (int cl {0}; cl < rCol; ++cl)
    							if (rData[i][rw][cl] == buf[j])
    								++cnt2;
    				}
    
    				if (cnt2 == vpar)
    					++cnt1;
    			}
    
    			if ((cnt1 >= vMin[n]) && (cnt1 <= vMax[n]))
    				cnt3 += cnt1;
    		}
    
    		if ((cnt3 > 0) && (cnt3 <= vTotalMax))
    			// Output line of data
    			for (int d {0}; d < nval; ++d)
    				ofs << buf[d] << ((d == (nval - 1)) ? '\n' : ' ');
    	}
    
    	ifs.close();
    	ofs.close();
    }
    Last edited by 2kaud; February 22nd, 2020 at 07:45 AM.
    All advice is offered in good faith only. All my code is tested (unless stated explicitly otherwise) with the latest version of Microsoft Visual Studio (using the supported features of the latest standard) and is offered as examples only - not as production quality. I cannot offer advice regarding any other c/c++ compiler/IDE or incompatibilities with VS. You are ultimately responsible for the effects of your programs and the integrity of the machines they run on. Anything I post, code snippets, advice, etc is licensed as Public Domain https://creativecommons.org/publicdomain/zero/1.0/ and can be used without reference or acknowledgement. Also note that I only provide advice and guidance via the forums - and not via private messages!

    C++17 Compiler: Microsoft VS2019 (16.5.1)

  2. #17
    Join Date
    Feb 2020
    Posts
    14

    Re: Filter a txt file based on a grid of data

    Good morning 2kaud

    good news ... I downloaded and installed VS2019 community, changed some paths to adapt the code to my system, I compiled the code to produce the executable ... the output file produced by the filtering process is exactly the same product as the vb code of the macro in the xlsm file .. excellent !!

    Even more interesting is the aspect of the analysis times..as I expected on a 76.306 KB file (3.999.563 rows) the Vb macro of the .xlsm file took 39 'to produce the filtered file (375.026 rows) the executable program produced by your code 2 '!! :-)

    Thank you for your support and congratulations for the approach taken to solve the 'problem'.

    Here attach the link to download the file of about four million lines in case you want to check the result and the processing time yourself.

    https://we.tl/t-ubpweA2azC

    regards
    RIC63

  3. #18
    2kaud's Avatar
    2kaud is offline Super Moderator Power Poster
    Join Date
    Dec 2012
    Location
    England
    Posts
    7,038

    Re: Filter a txt file based on a grid of data

    On my system, processing the 74.5 Mb (3,999,564 lines) took 30 seconds and produced the output file of 375,026 lines (6.63Mb).

    As you can compile/run the C++ code in post #16, I'll now 'convert' the Groups_of_30 VB code.

    Using the same topurgefile.txt from post #1 and the same level_1.xlsm, would you post the expected outputfile.txt using the Groups_of_30 VB code.
    Last edited by 2kaud; February 23rd, 2020 at 06:21 AM.
    All advice is offered in good faith only. All my code is tested (unless stated explicitly otherwise) with the latest version of Microsoft Visual Studio (using the supported features of the latest standard) and is offered as examples only - not as production quality. I cannot offer advice regarding any other c/c++ compiler/IDE or incompatibilities with VS. You are ultimately responsible for the effects of your programs and the integrity of the machines they run on. Anything I post, code snippets, advice, etc is licensed as Public Domain https://creativecommons.org/publicdomain/zero/1.0/ and can be used without reference or acknowledgement. Also note that I only provide advice and guidance via the forums - and not via private messages!

    C++17 Compiler: Microsoft VS2019 (16.5.1)

  4. #19
    Join Date
    Feb 2020
    Posts
    14

    Re: Filter a txt file based on a grid of data

    .. I guess you have a good pc .. :-)
    I appreciate
    but I remind you that the Groups_of_30 VB code is developed for Level_2.xlsm which has a slightly different layout than Level_1.xlsm
    However
    I am attaching the output produced by Level_2.xlsm using the same purgefile.txt (the result is from 578 to 515 rows)

    thanks 2kaud
    Attached Files Attached Files

  5. #20
    2kaud's Avatar
    2kaud is offline Super Moderator Power Poster
    Join Date
    Dec 2012
    Location
    England
    Posts
    7,038

    Re: Filter a txt file based on a grid of data

    Try this for the groups_of_30 VB. On my system it produces the expected output as per your attachment in post #19.

    Code:
    import "c:\\Program Files (x86)\\Common Files\\microsoft shared\\OFFICE14\\mso.dll" \
    	rename( "RGB", "MSORGB" ) \
    	rename( "DocumentProperties", "MSDocument") \
    	rename( "SearchPath", "MSSearch")
    
    #import "c:\\Program Files (x86)\\Common Files\\microsoft shared\\VBA\\VBA6\\VBE6EXT.OLB"
    
    #import "c:\\Program Files (x86)\Microsoft Office\\Office14\\excel.exe" \
    	rename( "DialogBox", "ExcelDialogBox" ) \
    	rename( "RGB", "ExcelRGB" ) \
    	rename( "CopyFile", "ExcelCopyFile" ) \
    	rename( "ReplaceText", "ExcelReplaceText" )
    
    
    #include <iostream>
    #include <iomanip>
    #include <string>
    #include <fstream>
    
    using namespace Excel;
    using namespace std;
    
    const string sname = "c:\\MyProgs\\Level_2.xlsm";	// Complete path name to Excel filter file
    const string pname = "ToPurgeFile.txt";			// Input file name
    const string oname = "OutputFile.txt";			// Output file name (over written)
    
    int main()
    {
    	const auto hr {CoInitializeEx(0, COINIT_MULTITHREADED)};
    
    	if (FAILED(hr))
    		return (cout << "Failed to initialize COM library. Error code = 0x" << hex << hr << endl), 1;
    
    	_ApplicationPtr pXL;
    
    	if (FAILED(pXL.CreateInstance("Excel.Application")))
    		return (cout << "Failed to initialize Excel::_Application!" << endl), 2;
    
    	_WorkbookPtr pBook;
    
    	try {
    		pBook = pXL->Workbooks->Open(sname.c_str());
    	}
    	catch (...) {
    		pXL->Quit();
    		return (cout << "Cannot open spreadsheet file " << sname << endl), 3;
    	}
    
    	pXL->PutVisible(0, FALSE);
    
    	const _WorksheetPtr pWksheet {pXL->ActiveSheet};
    	const RangePtr pRange {pWksheet->Cells};
    
    	const int vSize {8};	// Size of vmin/vmax
    	const int nRange {17};	// Number of ranges
    	const int rRow {22};	// Number of rows per range
    	const int rCol {3};	// Number of cols per range
    	const int rcStart {8};	// H for starting col of first range
    	const int rrStart {10};	// Starting row for the ranges
    	const int nval {6};	// Number of ints per line in file
    
    	int vMin[vSize] {0};
    	int vMax[vSize] {0};
    	int vPar[vSize] {0};
    	int rData[nRange][rRow][rCol] {0};	// Contains the data for the various required ranges from the spreadsheet
    
    	bool bad {false};	// Has data read failed
    
    	try {
    		for (int r = 1; r <= 8; ++r) {	// Rows 1 to 8
    			vMin[r - 1] = (int)pRange->Item[r][48];	// AV col
    			vPar[r - 1] = (int)pRange->Item[r][25];	// Y col
    			vMax[r - 1] = (int)pRange->Item[r][50];	// AX col
    		}
    
    		// Obtain the range data
    		for (int r {0}; r < nRange; ++r)
    			for (int cl {0}; cl < rCol; ++cl)
    				for (int rw {0}; rw < rRow; ++rw)
    					rData[r][rw][cl] = (int)pRange->Item[rrStart + rw][rcStart + (r * rCol) + cl];
    	}
    	catch (...) {
    		bad = true;
    	}
    
    	pWksheet->Release();
    	pBook->Release();
    	pXL->Quit();
    
    	if (bad)
    		return (cout << "Cannot read the range data!" << endl), 4;
    
    	ifstream ifs(pname);
    
    	if (!ifs.is_open())
    		return (cout << "Cannot open input file " << pname << endl), 5;
    
    	ofstream ofs(oname);
    
    	if (!ofs.is_open())
    		return (cout << "Cannot open output file " << oname << endl), 6;
    
    	int buf[nval] {0};	// One line of int data
    
    	// Try to read first number on line. If this fails, at EOF
    	while (ifs >> buf[0]) {
    		int cntl[vSize] {0};
    
    		// Read rest of line of int data
    		for (int v {1}; v < nval; ++v)
    			ifs >> buf[v];
    
    		for (int i {0}; i < nRange; ++i) {
    			int cnt2 {0};
    
    			for (int j {0}; j < nval; ++j)
    				// cnt2 = cnt2 + countif()
    				for (int rw {0}; rw < rRow; ++rw)
    					for (int cl {0}; cl < rCol; ++cl)
    						if (rData[i][rw][cl] == buf[j])
    							++cnt2;
    
    			// Select Case
    			for (int v {0}; v < vSize; ++v)
    				if (vPar[v] == cnt2) {
    					++cntl[v];
    					break;
    				}
    		}
    
    		bool out {true};
    
    		// Compound if statement
    		for (int v {0}; v < vSize; ++v)
    			if ((cntl[v] < vMin[v]) || (cntl[v] > vMax[v])) {
    				out = false;
    				break;
    			}
    
    		// Output line of data
    		if (out)
    			for (int d {0}; d < nval; ++d)
    				ofs << buf[d] << ((d == (nval - 1)) ? '\n' : ' ');
    	}
    
    	ifs.close();
    	ofs.close();
    }
    For the big data file, it takes 55 seconds.
    Last edited by 2kaud; February 23rd, 2020 at 11:40 AM.
    All advice is offered in good faith only. All my code is tested (unless stated explicitly otherwise) with the latest version of Microsoft Visual Studio (using the supported features of the latest standard) and is offered as examples only - not as production quality. I cannot offer advice regarding any other c/c++ compiler/IDE or incompatibilities with VS. You are ultimately responsible for the effects of your programs and the integrity of the machines they run on. Anything I post, code snippets, advice, etc is licensed as Public Domain https://creativecommons.org/publicdomain/zero/1.0/ and can be used without reference or acknowledgement. Also note that I only provide advice and guidance via the forums - and not via private messages!

    C++17 Compiler: Microsoft VS2019 (16.5.1)

  6. #21
    Join Date
    Feb 2020
    Posts
    14

    Re: Filter a txt file based on a grid of data

    Hi 2kaud

    I tried your last post code # 20 but I get different results probably I made some mistakes myself
    to avoid confusion I use an xlsm file with a new name, can you please check it out? I also attach the output generated by it (6 lines) always using the 578-line ToPurgeFile.txt file as the basis.

    the output file generated by your code in post # 20 gives me the same 578 lines as the starting ToPurgeFile.txt file

    https://we.tl/t-uix8nuzteR

    thank you

  7. #22
    2kaud's Avatar
    2kaud is offline Super Moderator Power Poster
    Join Date
    Dec 2012
    Location
    England
    Posts
    7,038

    Re: Filter a txt file based on a grid of data

    Using your level_3_30.xlsm file (which has format of level_1) with the latest ToPurgeFile.txt (both from the zip file in post #21) and the level 1 code from post #16, I get an output file of 5.75kb with 325 lines. What do you expect?
    All advice is offered in good faith only. All my code is tested (unless stated explicitly otherwise) with the latest version of Microsoft Visual Studio (using the supported features of the latest standard) and is offered as examples only - not as production quality. I cannot offer advice regarding any other c/c++ compiler/IDE or incompatibilities with VS. You are ultimately responsible for the effects of your programs and the integrity of the machines they run on. Anything I post, code snippets, advice, etc is licensed as Public Domain https://creativecommons.org/publicdomain/zero/1.0/ and can be used without reference or acknowledgement. Also note that I only provide advice and guidance via the forums - and not via private messages!

    C++17 Compiler: Microsoft VS2019 (16.5.1)

  8. #23
    Join Date
    Feb 2020
    Posts
    14

    Re: Filter a txt file based on a grid of data

    I expect just 6 rows, also with an alignment of code - see attached picture- the results is different ...
    Attached Images Attached Images  
    Attached Images Attached Images
    Last edited by RIC63; February 25th, 2020 at 10:02 AM.

  9. #24
    2kaud's Avatar
    2kaud is offline Super Moderator Power Poster
    Join Date
    Dec 2012
    Location
    England
    Posts
    7,038

    Re: Filter a txt file based on a grid of data

    The VB code in the given .pdf file in post #23 is not the same VB code as given in the attachment in post #1. There are 2 differences. For the VB code in the .pdf file in post #23, the converted C++ code is:

    Code:
    #import "c:\\Program Files (x86)\\Common Files\\microsoft shared\\OFFICE14\\mso.dll" \
    	rename( "RGB", "MSORGB" ) \
    	rename( "DocumentProperties", "MSDocument") \
    	rename( "SearchPath", "MSSearch")
    
    #import "c:\\Program Files (x86)\\Common Files\\microsoft shared\\VBA\\VBA6\\VBE6EXT.OLB"
    
    #import "c:\\Program Files (x86)\Microsoft Office\\Office14\\excel.exe" \
    	rename( "DialogBox", "ExcelDialogBox" ) \
    	rename( "RGB", "ExcelRGB" ) \
    	rename( "CopyFile", "ExcelCopyFile" ) \
    	rename( "ReplaceText", "ExcelReplaceText" )
    
    
    #include <iostream>
    #include <iomanip>
    #include <string>
    #include <fstream>
    
    using namespace Excel;
    using namespace std;
    
    const string sname = "c:\\MyProgs\\Level_3_30.xlsm";	// Complete path name to Excel filter file
    const string pname = "ToPurgeFile.txt";					// Input file name
    const string oname = "OutputFile.txt";					// Output file name (over written)
    
    int main()
    {
    	const auto hr {CoInitializeEx(0, COINIT_MULTITHREADED)};
    
    	if (FAILED(hr))
    		return (cout << "Failed to initialize COM library. Error code = 0x" << hex << hr << endl), 1;
    
    	_ApplicationPtr pXL;
    
    	if (FAILED(pXL.CreateInstance("Excel.Application")))
    		return (cout << "Failed to initialize Excel::_Application!" << endl), 2;
    
    	_WorkbookPtr pBook;
    
    	try {
    		pBook = pXL->Workbooks->Open(sname.c_str());
    	}
    	catch (...) {
    		pXL->Quit();
    		return (cout << "Cannot open spreadsheet file " << sname << endl), 3;
    	}
    
    	pXL->PutVisible(0, FALSE);
    
    	const _WorksheetPtr pWksheet {pXL->ActiveSheet};
    	const RangePtr pRange {pWksheet->Cells};
    
    	const int vSize {3};		// Size of vmin/vmax
    	const int nRange {17};		// Number of ranges
    	const int rRow {15};		// Number of rows per range
    	const int rCol {3};			// Number of cols per range
    	const int rcStart {8};		// H for starting col of first range
    	const int rrStart {10};		// Starting row for the ranges
    	const int nval {6};			// Number of ints per line in file
    
    	int vMin[vSize] {0};
    	int vMax[vSize] {0};
    	int rData[nRange][rRow][rCol] {0};	// Contains the data for the various required ranges from the spreadsheet
    
    	bool bad {false};	// Has data read failed
    
    	int vpar;
    	int vTotalMax;
    
    	try {
    		vpar = (int)pRange->Item[8][25];		// Y8
    		vTotalMax = (int)pRange->Item[7][52];	// AZ7
    
    		vMin[0] = (int)pRange->Item[4][48];		// AV4
    		vMin[1] = (int)pRange->Item[6][48];		// AV6
    		vMin[2] = (int)pRange->Item[8][48];		// AV8
    
    		vMax[0] = (int)pRange->Item[4][50];		// AX4
    		vMax[1] = (int)pRange->Item[6][50];		// AX6
    		vMax[2] = (int)pRange->Item[8][50];		// AX8
    
    		// Obtain the range data
    		for (int r {0}; r < nRange; ++r)
    			for (int cl {0}; cl < rCol; ++cl)
    				for (int rw {0}; rw < rRow; ++rw)
    					rData[r][rw][cl] = (int)pRange->Item[rrStart + rw][rcStart + (r * rCol) + cl];
    	}
    	catch (...) {
    		bad = true;
    	}
    
    	pWksheet->Release();
    	pBook->Release();
    	pXL->Quit();
    
    	if (bad)
    		return (cout << "Cannot read the data!" << endl), 4;
    
    	ifstream ifs(pname);
    
    	if (!ifs.is_open())
    		return (cout << "Cannot open input file " << pname << endl), 5;
    
    	ofstream ofs(oname);
    
    	if (!ofs.is_open())
    		return (cout << "Cannot open output file " << oname << endl), 6;
    
    	int buf[nval] {0};	// One line of int data
    
    	// Try to read first number on line. If this fails, at EOF
    	while (ifs >> buf[0]) {
    		// Read rest of line of int data
    		for (int v {1}; v < nval; ++v)
    			ifs >> buf[v];
    
    		int cnt3 {0};
    
    		for (int n {0}; n < vSize; ++n) {
    			int cnt1 {0};
    
    			//for (int i {n * 4 + 2}, ie {n * 4 + 5}; i <= ie; ++i) {	// Check <= what's with 4, 2, 5??
    			for (int i {n * 5 + 1}, ie {n * 5 + 5}; i <= ie; ++i) {
    				int cnt2 {0};
    
    				for (int j {0}; j < nval; ++j) {
    					// cnt2 = cnt2 + countif()
    					for (int rw {0}; rw < rRow; ++rw)
    						for (int cl {0}; cl < rCol; ++cl)
    							if (rData[i][rw][cl] == buf[j])
    								++cnt2;
    				}
    
    				if (cnt2 == vpar)
    					++cnt1;
    			}
    
    			if ((cnt1 >= vMin[n]) && (cnt1 <= vMax[n]))
    				cnt3 += cnt1;
    		}
    
    		//if ((cnt3 > 0) && (cnt3 <= vTotalMax))
    		if (cnt3 == vTotalMax)
    			// Output line of data
    			for (int d {0}; d < nval; ++d)
    				ofs << buf[d] << ((d == (nval - 1)) ? '\n' : ' ');
    	}
    
    	ifs.close();
    	ofs.close();
    }
    If a speadsheet has different VB code, then the given C++ code will have to be modified appropriately. If you use other .xlsm files, then for each you will need to review the VB code and if different from the 3 already given and converted, then you will need to amend one of these to match the revised VB code.

    The code above gives the same output as your supplied test outputfile.txt file of 6 lines for level_3_30.xlsm.
    All advice is offered in good faith only. All my code is tested (unless stated explicitly otherwise) with the latest version of Microsoft Visual Studio (using the supported features of the latest standard) and is offered as examples only - not as production quality. I cannot offer advice regarding any other c/c++ compiler/IDE or incompatibilities with VS. You are ultimately responsible for the effects of your programs and the integrity of the machines they run on. Anything I post, code snippets, advice, etc is licensed as Public Domain https://creativecommons.org/publicdomain/zero/1.0/ and can be used without reference or acknowledgement. Also note that I only provide advice and guidance via the forums - and not via private messages!

    C++17 Compiler: Microsoft VS2019 (16.5.1)

  10. #25
    Join Date
    Feb 2020
    Posts
    14

    Re: Filter a txt file based on a grid of data

    I was confident to have highlighted to you the differences between the two type of spreadsheet in post #19 however i exscuse me for this inconvenient and confirm that now the code give the expected results; i tried it also on 74,5 Mb file and compared the output ( 45746 rows ) with the one producede by vb code : same results, all its ok

    thanks

    I put here a last spreadsheet with a more simple code, this represent the fourth -and last- level of check that i must perform on the data base, if you can give it a check surely the code will do what it must do..different if i try to write down le code myself...
    it produce no validated rows for the ToPurgeFile of 11KB and 12292 rows for the file of 74,5Mb.

    in any case thanks for all support and your patience
    Attached Files Attached Files

  11. #26
    2kaud's Avatar
    2kaud is offline Super Moderator Power Poster
    Join Date
    Dec 2012
    Location
    England
    Posts
    7,038

    Re: Filter a txt file based on a grid of data

    This is the C++ converted VB code for the level_4_60.xlsm file. For the test data it produces no output as expected.

    Code:
    #import "c:\\Program Files (x86)\\Common Files\\microsoft shared\\OFFICE14\\mso.dll" \
    	rename( "RGB", "MSORGB" ) \
    	rename( "DocumentProperties", "MSDocument") \
    	rename( "SearchPath", "MSSearch")
    
    #import "c:\\Program Files (x86)\\Common Files\\microsoft shared\\VBA\\VBA6\\VBE6EXT.OLB"
    
    #import "c:\\Program Files (x86)\Microsoft Office\\Office14\\excel.exe" \
    	rename( "DialogBox", "ExcelDialogBox" ) \
    	rename( "RGB", "ExcelRGB" ) \
    	rename( "CopyFile", "ExcelCopyFile" ) \
    	rename( "ReplaceText", "ExcelReplaceText" )
    
    
    #include <iostream>
    #include <iomanip>
    #include <string>
    #include <fstream>
    
    using namespace Excel;
    using namespace std;
    
    const string sname = "c:\\MyProgs\\Level_4_60.xlsm";	// Complete path name to Excel filter file
    const string pname = "ToPurgeFile.txt";			// Input file name
    const string oname = "OutputFile.txt";			// Output file name (over written)
    
    int main()
    {
    	const auto hr {CoInitializeEx(0, COINIT_MULTITHREADED)};
    
    	if (FAILED(hr))
    		return (cout << "Failed to initialize COM library. Error code = 0x" << hex << hr << endl), 1;
    
    	_ApplicationPtr pXL;
    
    	if (FAILED(pXL.CreateInstance("Excel.Application")))
    		return (cout << "Failed to initialize Excel::_Application!" << endl), 2;
    
    	_WorkbookPtr pBook;
    
    	try {
    		pBook = pXL->Workbooks->Open(sname.c_str());
    	}
    	catch (...) {
    		pXL->Quit();
    		return (cout << "Cannot open spreadsheet file " << sname << endl), 3;
    	}
    
    	pXL->PutVisible(0, FALSE);
    
    	const _WorksheetPtr pWksheet {pXL->ActiveSheet};
    	const RangePtr pRange {pWksheet->Cells};
    
    	const int nRange {12};		// Number of ranges
    	const int rRow {15};		// Number of rows per range
    	const int rCol {4};		// Number of cols per range
    	const int rcStart {8};		// H for starting col of first range
    	const int rrStart {10};		// Starting row for the ranges
    	const int nval {6};		// Number of ints per line in file
    
    	int rData[nRange][rRow][rCol] {0};	// Contains the data for the various required ranges from the spreadsheet
    
    	bool bad {false};	// Has data read failed
    
    	int vPar6;
    	int vMin6;
    	int vMax6;
    
    	try {
    		vPar6 = (int)pRange->Item[8][25];		// Y8
    		vMin6 = (int)pRange->Item[8][48];		// AV8
    		vMax6 = (int)pRange->Item[8][50];		// AX8
    
    		// Obtain the range data
    		for (int r {0}; r < nRange; ++r)
    			for (int cl {0}; cl < rCol; ++cl)
    				for (int rw {0}; rw < rRow; ++rw)
    					rData[r][rw][cl] = (int)pRange->Item[rrStart + rw][rcStart + (r * rCol) + cl];
    	}
    	catch (...) {
    		bad = true;
    	}
    
    	pWksheet->Release();
    	pBook->Release();
    	pXL->Quit();
    
    	if (bad)
    		return (cout << "Cannot read the data!" << endl), 4;
    
    	ifstream ifs(pname);
    
    	if (!ifs.is_open())
    		return (cout << "Cannot open input file " << pname << endl), 5;
    
    	ofstream ofs(oname);
    
    	if (!ofs.is_open())
    		return (cout << "Cannot open output file " << oname << endl), 6;
    
    	int buf[nval] {0};	// One line of int data
    
    	// Try to read first number on line. If this fails, at EOF
    	while (ifs >> buf[0]) {
    		// Read rest of line of int data
    		for (int v {1}; v < nval; ++v)
    			ifs >> buf[v];
    
    		int cntlf {0};
    
    		for (int i {0}; i < nRange; ++i) {
    			int cnt2 {0};
    
    			for (int j {0}; j < nval; ++j) {
    				// cnt2 = cnt2 + countif()
    				for (int rw {0}; rw < rRow; ++rw)
    					for (int cl {0}; cl < rCol; ++cl)
    						if (rData[i][rw][cl] == buf[j])
    							++cnt2;
    			}
    
    			if (cnt2 == vPar6)
    				++cntlf;
    		}
    
    		if ((cntlf >= vMin6) && (cntlf <= vMax6))
    			// Output line of data
    			for (int d {0}; d < nval; ++d)
    				ofs << buf[d] << ((d == (nval - 1)) ? '\n' : ' ');
    	}
    
    	ifs.close();
    	ofs.close();
    }
    All advice is offered in good faith only. All my code is tested (unless stated explicitly otherwise) with the latest version of Microsoft Visual Studio (using the supported features of the latest standard) and is offered as examples only - not as production quality. I cannot offer advice regarding any other c/c++ compiler/IDE or incompatibilities with VS. You are ultimately responsible for the effects of your programs and the integrity of the machines they run on. Anything I post, code snippets, advice, etc is licensed as Public Domain https://creativecommons.org/publicdomain/zero/1.0/ and can be used without reference or acknowledgement. Also note that I only provide advice and guidance via the forums - and not via private messages!

    C++17 Compiler: Microsoft VS2019 (16.5.1)

  12. #27
    Join Date
    Feb 2020
    Posts
    14

    Re: Filter a txt file based on a grid of data

    Ok 2kaud

    everything works fine

    many thanks

  13. #28
    2kaud's Avatar
    2kaud is offline Super Moderator Power Poster
    Join Date
    Dec 2012
    Location
    England
    Posts
    7,038

    Re: [RESOLVED] Filter a txt file based on a grid of data

    What difference in timings are you getting between the VB and the C++ versions? I'd be interested - especially for the larger files - as you mentioned an hour for VB in some cases.
    Last edited by 2kaud; February 26th, 2020 at 10:53 AM.
    All advice is offered in good faith only. All my code is tested (unless stated explicitly otherwise) with the latest version of Microsoft Visual Studio (using the supported features of the latest standard) and is offered as examples only - not as production quality. I cannot offer advice regarding any other c/c++ compiler/IDE or incompatibilities with VS. You are ultimately responsible for the effects of your programs and the integrity of the machines they run on. Anything I post, code snippets, advice, etc is licensed as Public Domain https://creativecommons.org/publicdomain/zero/1.0/ and can be used without reference or acknowledgement. Also note that I only provide advice and guidance via the forums - and not via private messages!

    C++17 Compiler: Microsoft VS2019 (16.5.1)

  14. #29
    Join Date
    Feb 2020
    Posts
    14

    Re: [RESOLVED] Filter a txt file based on a grid of data

    Hi 2kaud
    the processing times with reference to the 74.5 mb file vary according to the type of xlsm files taken as input but in any case it ranges from 38 'on my office PC (Hp 800 G4) to 55' and more on mine old Hp 8760 i7 notebook from 2010 ... this using Vb macros from within Excel
    instead using the versions compiled in c ++ it takes about 2 'max which is a great time for me especially if I consider that I have to do 5 levels of filtering ...: 5x55' = 275 'against 5x2' = 10 '!!!

    ;-)

Page 2 of 2 FirstFirst 12

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  


Windows Mobile Development Center


Click Here to Expand Forum to Full Width




On-Demand Webinars (sponsored)