|
-
August 3rd, 2010, 09:19 AM
#1
Text parser
I'm having a little problem with a text parsing program. I want to strip away numbers from a text file and throw them into another text file, separated by tabs or commas for easy importing into excel. Here's my code:
#include <iostream>
#include <fstream>
#include <string>
#include <sstream>
#include <Windows.h>
using namespace std;
int main(int argc, char* argv[]){
char* inFile;
char* outFile;
string line;
int firstMarkPos = 0;
int secondMarkPos = 0;
string busNum;
int pos = 0;
bool check4PosZero = false;
int posPos = 0;
int posZero = 0;
float pos1,pos2,pos3;
float zero1,zero2,zero3;
int temp = 0;
string stringTemp;
//MARKER DECLARATIONS!!!
/******************************************/
/**/ char firstMarker[] = "]"; /**/
/**/ char secondMarker[] = "AREA"; /**/
/******************************************/
if(argc < 3){//Check argc. If too few arguments inform user of syntax.
cout << "Syntax: text_parser.exe <readFile> <writeFile>" << endl;
return 1;
}else{//if you got enough parameters
cout << "\n\nYou input the right amount of arguments!\n\n" << endl;
inFile = argv[1];
outFile = argv[2];
}
//declare input and output file streams
ifstream inputF;
ofstream outputF;
//declare stringstream
stringstream ssp;
stringstream ssz;
//open input file user specified
cout << "Opening " << inFile << "..." << endl;
inputF.open(inFile);
if(inputF.is_open()){
cout << inFile << " opened successfully!" << endl;
Beep(500,100);
Sleep(2000);
cout << "Closing for now...\n\n\n";
inputF.close();
}else{
cout << "Failed to open file: " << inFile << "\nThe program will quit.\n\n" << endl;
Beep(200,1000);
Sleep(3000);
return 1;
}
//open output file user specified
cout << "Opening " << outFile << "..." << endl;
outputF.open(outFile,ios::trunc);
if(outputF.is_open()){
cout << outFile << " opened successfully!" << endl;
Beep(500,100);
Sleep(2000);
cout << "Closing for now...\n\n\n";
outputF.close();
}else{
cout << "Failed to open file: " << outFile << "\nThe program will quit\n\n" << endl;
Beep(200,1000);
Sleep(3000);
return 1;//kill the process
}
cout << "About to enter main while loop...\n\n";
cout << "Opening input file: " << inFile;
inputF.open(inFile);//keep file open
if(inputF.is_open()){
cout << "SUCCESS!" << endl;
//Sleep(2000);
}else{
cout << "Failed" << endl;
Beep(1000,2000);
return 2;
}
cout << "Opening output file: " << outFile << "...";
outputF.open(outFile,ios::app);
if(outputF.is_open()){
cout << "SUCCESS\n\n\n\n" << endl;
}else{
cout << "Failed" << endl;
Beep(1000,2000);
return 2;
}
while(!inputF.eof()){
//grab a line from the text file
getline(inputF,line);
//output it to the screen
cout << line << endl;
if(check4PosZero){
//used for finding float values in the text file
posPos = line.find("POSITIVE");
temp = line.find("NEGATIVE");
posZero = line.find("ZERO");
if(posPos != string::npos&&posZero != string::npos){
if(posPos < posZero){
//PROBLEM AREA!!!!!
ssp << line.substr(posPos + 8, temp - posPos - 8);
ssp >> pos1 >> pos2 >> pos3;
cout << "POSITIVE: ";
cout << pos1<<" "<< pos2 <<" "<< pos3 << endl;
Sleep(2000);
cout << "'" << line.substr(posPos + 8, temp - posPos - 8) <<"'"<< endl;
Sleep(2000);
outputF << pos1 << "\t" << pos2 << "\t";
Sleep(5000);
ssz << line.substr(posZero + 4,line.size() - posZero - 4);
ssz >> zero1 >> zero2 >> zero3;
cout << "ZERO: ";
cout << zero1 <<" "<< zero2 <<" "<< zero3 << endl;
Sleep(2000);
cout <<"'"<< line.substr(posZero + 4,line.size() - posZero - 4) <<"'"<< endl;
Sleep(2000);
outputF << zero1 << "\t" << zero2 << endl;
check4PosZero = false;//set flag false to stop looking for "POSITIVE" and "ZERO"
//END PROBLEM AREA!!!!
}else{
//"ZERO" occurred before "POSITIVE", ignore
}
}else{
//"POSITIVE" and "ZERO" both present in current line
}
}
//get the position of first and second marker
firstMarkPos = line.find(firstMarker);
secondMarkPos = line.find(secondMarker);
if(firstMarkPos != string::npos && secondMarkPos != string::npos){//If both markers are detected...
if(firstMarkPos < secondMarkPos){
/* IMPORTANT!
At this point both markers have been detected and the first marker
precedes the second; however, it is still possible that the data
in between the two is not just the bus number we want it to be!
*/
//cout << "\n\n\n\t\t\t***The first marker precedes the second.***\n\n\n";
//Sleep(5000);
busNum = line.substr(firstMarkPos + 1, secondMarkPos - firstMarkPos - 1);
//remove white spaces...
//cout << "Preparing to perform whitespace removal..." << endl;
while(1){
pos = busNum.find_first_not_of("0123456789");
if(pos != string::npos){
//Found the position of the first non-number
//cout << "Non-number in busNum: '";
//cout << busNum[pos] << "'...";
busNum.erase(pos,1);//erase that character
//cout << "Erased!" << endl;
pos = 0;//reinitialize position integer
}else{
break;
}
}
cout << busNum << endl;
outputF << busNum << "\t";//tab character
//cout << "Contents of busNum written to file, restarting while loop...\n";
//now you need to grab the two floats after 'POSITIVE' and 'ZERO'
check4PosZero = true;
}else{
//cout << "\nThis line's markers are out of order." << endl;
//Sleep(1000);
}
}else{
//only one marker or no markers detected
//cout << "\n\nGrabbed a line where one or less markers were detected." << endl;
//Sleep(1000);
}
}//main while loop; breaks at the end of file
//close both files
cout << "Closing " << inFile << "...";
inputF.close();
cout << "done." << endl;
cout << "Closing " << outFile << "...";
outputF.close();
cout << "done." << endl;
//allow user to see that the program is complete before exiting
system("pause");
return 0;
}
The text file this program has to read from has MANY lines, but i'm only interested in lines like these two:
--AT BUS [CALN HHG000 138.00] 77904 AREA 7 (KV L-G) V+: / 0.000/ 0.00 (KV L-G) VA: / 0.000/ 0.00 V0: / 42.250/ 178.86
AND
--THEV. R, X, X/R: POSITIVE 0.03296 0.13488 4.093 NEGATIVE 0.03297 0.13493 4.092 ZERO 0.08720 0.30104 3.452
From the first type of line i want to extract the "77904" using the ']' and "AREA" as markers to locate that integer. This part of the code is working just fine. I am however having trouble with the extraction of the three floats following "POSITIVE" and "ZERO" in the second type of line. I have the code working to grab the first two floats from behind "POSITIVE" and throwing them into a formated text file, but i am having trouble with the second pair of floats, the output looks like this:
77904 0.03296 0.13488 0.0872 0.30104
77919 0.02726 0.08762 0.0872 0.30104
77923 0.01739 0.04625 0.0872 0.30104
77922 0.00182 0.03204 0.0872 0.30104
7853 0.00275 0.10278 0.0872 0.30104
77917 0.01744 0.04707 0.0872 0.30104
There are about a thousand of these, so i'll only put up a couple. As you can see, it's formated well, and the numbers line up, except that the last two floats for every line are the same. They are equal to the first value they are assigned. I think this has to do with the stringstreams i declared. Can someone tell me what i'm doing wrong? Thanks for all your help!
Last edited by rylar; August 3rd, 2010 at 09:53 AM.
Reason: CODE TAGS!
Posting Permissions
- You may not post new threads
- You may not post replies
- You may not post attachments
- You may not edit your posts
-
Forum Rules
|
Click Here to Expand Forum to Full Width
|