Re: Determining the number of unique words in a .txt file
Ok, heres my revised code -
Code:
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <string>
#include <iomanip>
using namespace std;
int const wordLength = 21;
int const Num = 100;
int const fileSize = 255;
struct words
{
char word[wordLength];
int count;
};
int storeFile( char [], words []);
void wordSearchSetup(char[], int, words[]);
int wordSearch(char[], int, words[]);
void main ()
{
int count;
char fileName[fileSize];
cout << "Please enter the name of the file you wish to open: "<< endl;
cin.getline(fileName,fileSize);
words array[Num];
count = storeFile (fileName, array);
cin.ignore();
}
int storeFile (char fileName[], words array[] )
{
int count = 0;
int i = 0;
ifstream inFile;
char line [Num];
inFile.open(fileName);
while (inFile.getline(line,Num))
{
strncpy(array[i].word, line, wordLength);
count++;
i++;
}
inFile.close();
wordSearchSetup( fileName, count, array);
return count;
}
void wordSearchSetup(char fileName[], int count, words array[])
{
char line[Num];
int i = 0;
int size;
ifstream inFile;
inFile.open(fileName);
while (inFile.getline(line,Num))
size = wordSearch(line, count, array);
cout << size << endl;
}
int wordSearch( char line[], int count, words array[])
{
int i = count;
while (i)
{
if (strcmp(line, array[i-1].word) == 0)
{
array[i-1].count++;
return count;
}
i-- ;
}
strcpy(array[count].word, line) ;
array[count].count = 1 ;
return count+1;
}
My search functions still arent giving me the results I want, it just returns the number of words, not unique words.
Re: Determining the number of unique words in a .txt file
It'll probably be easier if you don't attempt to do anything except search within the search function. You seem to be trying to extend the array in there, which is bad for two reasons:
1) It does more than the function name claims it does, always bad design, and
2) It's not always the appropriate thing to do depending on whether the search succeeds or not.
Re: Determining the number of unique words in a .txt file
Ok thanks, now I'm trying to determine the average occurence of each words, I'm starting out by finding out how many of each word there is, heres my function-
Code:
void averageOccurrence(words array[], int array_length)
{
int n;
char cmp_array[wordLength];
for( int i= 0; i< array_length; i++)
{
strcpy(cmp_array, array[i].word);
for (int j=1; j<array_length; j++)
{
n = (strcmp(array[j].word, cmp_array));
if(n == 0)
array[i].count++;
}
}
}
It just gives me a large count like 150077, or 150079.
Re: Determining the number of unique words in a .txt file
First off, wordLength had better be a compile-time constant or that's not valid code. If it is, you should indicate that in some way in the name; one common convention is to make constants ALL CAPS.
Second, there's no need have a cmp_array at all; you can compare array[i].word to array[j].word directly. Always best to eliminate the middleman when possible.
Second, you probably don't want to start j at 1 each time through. 0 or i+1 might be a better choice. That's dependent on exactly what behavior you want though.
Third, this approach in no way ensures that you only bother counting on the first occurrence of each word. I'd suggest setting array[j].count to -1 or something like that at the same time you increment array[i].count, so that you can easily skip those later.
Fourth, have you added code to ensure that the count members are 0 to start with? A constructor in the struct could ensure that easily.
Re: Determining the number of unique words in a .txt file
Thanks a lot, that helps a lot. I'm on my last stat, I have to find the most commonly occuring word(s). Heres my function-
Code:
void commonWord(double count[], int array_length, words array[])
{
int commonCount[Num];
int max;
max = count[0];
int j = 0;
int i = 0;
int k = 0;
for(i = 1; i<array_length; i++)
{
if(count[i] > max)
max = count[i];
else if( count[i] == max)
{
commonCount[j] = i;
j++;
}
}
cout<< "The most commonly occuring words are: "<< endl;
for( k = 0; k<array_length; k++)
cout<< array[commonCount[k]].word<< endl;
}
I dont get a compile-time error. But when I run the program, I get a message telling me the .exe file stopped working.
Re: Determining the number of unique words in a .txt file
It's good practice to avoid using the word "max" as a variable name or function in your program, because Microsoft stupidly decided to make max() and min() be *macros* in Visual Studio; any code with that word in it may thus behave unexpectedly if you try to compile it there. Just don't use the word "max" or "min" on its own anywhere. "countmax" would be fine.
That function seems overly complex for the task. All you need to do is loop through to find the max count (which you're doing), and then loop through a second time to find all the counts equal to the max and print out their words.
Also, "stopped working" isn't specific enough to make a diagnosis; you need to tell us what the message was and what line the debugger indicates the failure occurred on.
Re: Determining the number of unique words in a .txt file
I'm now trying to write my stats to a text file, but its only writing two of the stats, and those stats come from the same function.
Heres the code -
Code:
#include <iostream>
#include <fstream>
#include <cstdlib>
#include <string>
#include <iomanip>
using namespace std;
int const wordLength = 21;
int const Num = 100;
int const fileSize = 255;
struct words
{
char word[wordLength];
int count;
};
void storeFile(char[], char [], words []);
void displayFile(char[], char[], words[]);
int wordSearch(char word[], int array_size, words []);
void sortSetup(char[], words [], int);
int sort (words [], int, int);
void averageLength(char[], words[], int);
void Occurrence(char[], words[], int);
void commonWord(char[], double[], int, words []);
void averageOccurrence(char[], words[], int);
int main ()
{
words array[Num];
char fileName[fileSize];
char out_file_name[fileSize];
cout << "Please enter the name of the file you wish to open: "<< endl;
cin.getline(fileName,fileSize);
if (!cin.good() ) {
cout << "Error reading cin..." << endl ;
return -1 ;
}
cout<<"Please enter the name of the file you wish to send the data too" << endl;
cin.getline(out_file_name,fileSize);
displayFile(out_file_name, fileName, array);
storeFile(out_file_name, fileName, array);
cin.ignore();
}
void storeFile (char out_file_name[], char fileName[], words array[] )
{
ofstream outFile;
outFile.open(out_file_name);
int i = 0;
ifstream inFile;
char line [Num];
int array_size = 0 ;
inFile.open(fileName);
while (inFile.getline(line,Num))
{
array_size = wordSearch(line, array_size, array);
i++;
}
inFile.close();
outFile<< "The number of unique words are: "<< array_size << endl;
outFile<< "total number of words are: " << i << endl;
outFile<< endl;
}
int wordSearch( char line[], int array_size, words array[])
{
int i = array_size ;
while (i && array_size > 0)
{
if (strcmp(line, array[i-1].word) == 0)
{
array[i-1].count++;
return array_size ;
}
i-- ;
}
strcpy(array[array_size].word, line) ;
array[array_size].count = 1 ;
return array_size+1;
}
void displayFile (char out_file_name[], char fileName[], words array[] )
{
int i = 0;
char line [Num];
ifstream inFile;
inFile.open(fileName);
while (inFile.getline(line,Num))
{
strncpy(array[i].word, line, wordLength);
i++;
}
sortSetup(out_file_name, array, i);
averageLength(out_file_name, array, i);
Occurrence(out_file_name, array, i);
averageOccurrence(out_file_name, array, i);
inFile.close();
}
void sortSetup (char out_file_name[], words array [], int array_length)
{
ofstream outFile;
outFile.open(out_file_name);
char temp[wordLength];
int position;
int i = array_length;
for (int loop = 0; loop < array_length - 1; loop++)
{
position = sort (array, loop, array_length - 1);
if (position != loop)
{
strcpy(temp, array[position].word);
strcpy(array[position].word, array[loop].word);
strcpy(array[loop].word, temp);
}
}
outFile << "The words in alphabetical order are:"<< endl;
for (int j = 0; j< array_length; j++)
{
if(strcmp(array[j].word,array[j-1].word)!=0)
outFile << array[j].word << endl;
}
outFile << endl;
}
int sort (words array[], int start, int stop)
{
int n;
int loc = start;
for (int pos = start + 1; pos <= stop; pos++)
{
n = (strcmp(array[pos].word, array[loc].word));
if (n < 0)
loc = pos;
}
return loc;
}
void averageLength(char out_file_name[], words array[], int i)
{
ofstream outFile;
outFile.open(out_file_name);
double average = 0;
for(int j = 0; j<i; j++)
average = average + strlen(array[j].word);
average = average/i;
outFile << "The average length of the words are: " << average <<endl;
outFile << endl;
}
void Occurrence(char out_file_name[], words array[], int array_length)
{
ofstream outFile;
outFile.open(out_file_name);
int n;
char cmp_array[wordLength];
double count[Num];
for( int i= 0; i< array_length; i++)
{
strcpy(cmp_array, array[i].word);
count[i] = 0;
for (int j=0; j<array_length; j++)
{
n = (strcmp(array[j].word, cmp_array));
if(n == 0)
count[i]++;
}
}
outFile<<"The unique words and the number of times they appear in the text file appears asthe following:"<< endl;
outFile<<"word/times it appears:" << endl;
outFile<< endl;
for (int k = 0; k< array_length; k++)
{
if(strcmp(array[k].word,array[k-1].word)!=0)
outFile <<array[k].word << " / " << count[k] << endl;
}
outFile<<endl;
commonWord(out_file_name, count, array_length, array);
}
void commonWord(char out_file_name[], double count[], int array_length, words array[])
{
ofstream outFile;
outFile.open(out_file_name);
int count_max;
count_max = count[0];
int j = 0;
int i = 0;
for(i = 1; i<array_length; i++)
{
if(count[i] > count_max)
count_max = count[i];
}
outFile<< "The word(s) that occur the most are: "<< endl;
for( j = 0; j<array_length; j++)
{
if(strcmp(array[j].word,array[j-1].word)!=0)
{
if(count[j] == count_max)
outFile << array[j].word<< endl;
}
}
outFile<< endl;
}
void averageOccurrence(char out_file_name[], words array[], int array_length)
{
ofstream outFile;
outFile.open(out_file_name);
int n;
char cmp_array[wordLength];
double count[Num];
for( int i= 0; i< array_length; i++)
{
strcpy(cmp_array, array[i].word);
count[i] = 0;
for (int j=0; j<array_length; j++)
{
n = (strcmp(array[j].word, cmp_array));
if(n == 0)
count[i]++;
}
}
outFile<<"The average occurence of a word appears as the following:" << endl; outFile <<"word/average appearence:" << endl;
outFile<< endl;
for (int k = 0; k< array_length; k++)
{
if(strcmp(array[k].word,array[k-1].word)!=0)
outFile <<array[k].word << " / " << count[k]/array_length << endl;
}
outFile<<endl;
}#include <iostream>
#include <fstream>
#include <cstdlib>
#include <string>
#include <iomanip>
using namespace std;
int const wordLength = 21;
int const Num = 100;
int const fileSize = 255;
struct words
{
char word[wordLength];
int count;
};
void storeFile(char[], char [], words []);
void displayFile(char[], char[], words[]);
int wordSearch(char word[], int array_size, words []);
void sortSetup(char[], words [], int);
int sort (words [], int, int);
void averageLength(char[], words[], int);
void Occurrence(char[], words[], int);
void commonWord(char[], double[], int, words []);
void averageOccurrence(char[], words[], int);
int main ()
{
words array[Num];
char fileName[fileSize];
char out_file_name[fileSize];
cout << "Please enter the name of the file you wish to open: "<< endl;
cin.getline(fileName,fileSize);
if (!cin.good() ) {
cout << "Error reading cin..." << endl ;
return -1 ;
}
cout<<"Please enter the name of the file you wish to send the data too" << endl;
cin.getline(out_file_name,fileSize);
displayFile(out_file_name, fileName, array);
storeFile(out_file_name, fileName, array);
cin.ignore();
}
void storeFile (char out_file_name[], char fileName[], words array[] )
{
ofstream outFile;
outFile.open(out_file_name);
int i = 0;
ifstream inFile;
char line [Num];
int array_size = 0 ;
inFile.open(fileName);
while (inFile.getline(line,Num))
{
array_size = wordSearch(line, array_size, array);
i++;
}
inFile.close();
outFile<< "The number of unique words are: "<< array_size << endl;
outFile<< "total number of words are: " << i << endl;
outFile<< endl;
}
int wordSearch( char line[], int array_size, words array[])
{
int i = array_size ;
while (i && array_size > 0)
{
if (strcmp(line, array[i-1].word) == 0)
{
array[i-1].count++;
return array_size ;
}
i-- ;
}
strcpy(array[array_size].word, line) ;
array[array_size].count = 1 ;
return array_size+1;
}
void displayFile (char out_file_name[], char fileName[], words array[] )
{
int i = 0;
char line [Num];
ifstream inFile;
inFile.open(fileName);
while (inFile.getline(line,Num))
{
strncpy(array[i].word, line, wordLength);
i++;
}
sortSetup(out_file_name, array, i);
averageLength(out_file_name, array, i);
Occurrence(out_file_name, array, i);
averageOccurrence(out_file_name, array, i);
inFile.close();
}
void sortSetup (char out_file_name[], words array [], int array_length)
{
ofstream outFile;
outFile.open(out_file_name);
char temp[wordLength];
int position;
int i = array_length;
for (int loop = 0; loop < array_length - 1; loop++)
{
position = sort (array, loop, array_length - 1);
if (position != loop)
{
strcpy(temp, array[position].word);
strcpy(array[position].word, array[loop].word);
strcpy(array[loop].word, temp);
}
}
outFile << "The words in alphabetical order are:"<< endl;
for (int j = 0; j< array_length; j++)
{
if(strcmp(array[j].word,array[j-1].word)!=0)
outFile << array[j].word << endl;
}
outFile << endl;
}
int sort (words array[], int start, int stop)
{
int n;
int loc = start;
for (int pos = start + 1; pos <= stop; pos++)
{
n = (strcmp(array[pos].word, array[loc].word));
if (n < 0)
loc = pos;
}
return loc;
}
void averageLength(char out_file_name[], words array[], int i)
{
ofstream outFile;
outFile.open(out_file_name);
double average = 0;
for(int j = 0; j<i; j++)
average = average + strlen(array[j].word);
average = average/i;
outFile << "The average length of the words are: " << average <<endl;
outFile << endl;
}
void Occurrence(char out_file_name[], words array[], int array_length)
{
ofstream outFile;
outFile.open(out_file_name);
int n;
char cmp_array[wordLength];
double count[Num];
for( int i= 0; i< array_length; i++)
{
strcpy(cmp_array, array[i].word);
count[i] = 0;
for (int j=0; j<array_length; j++)
{
n = (strcmp(array[j].word, cmp_array));
if(n == 0)
count[i]++;
}
}
outFile<<"The unique words and the number of times they appear in the text file appears asthe following:"<< endl;
outFile<<"word/times it appears:" << endl;
outFile<< endl;
for (int k = 0; k< array_length; k++)
{
if(strcmp(array[k].word,array[k-1].word)!=0)
outFile <<array[k].word << " / " << count[k] << endl;
}
outFile<<endl;
commonWord(out_file_name, count, array_length, array);
}
void commonWord(char out_file_name[], double count[], int array_length, words array[])
{
ofstream outFile;
outFile.open(out_file_name);
int count_max;
count_max = count[0];
int j = 0;
int i = 0;
for(i = 1; i<array_length; i++)
{
if(count[i] > count_max)
count_max = count[i];
}
outFile<< "The word(s) that occur the most are: "<< endl;
for( j = 0; j<array_length; j++)
{
if(strcmp(array[j].word,array[j-1].word)!=0)
{
if(count[j] == count_max)
outFile << array[j].word<< endl;
}
}
outFile<< endl;
}
void averageOccurrence(char out_file_name[], words array[], int array_length)
{
ofstream outFile;
outFile.open(out_file_name);
int n;
char cmp_array[wordLength];
double count[Num];
for( int i= 0; i< array_length; i++)
{
strcpy(cmp_array, array[i].word);
count[i] = 0;
for (int j=0; j<array_length; j++)
{
n = (strcmp(array[j].word, cmp_array));
if(n == 0)
count[i]++;
}
}
outFile<<"The average occurence of a word appears as the following:" << endl; outFile <<"word/average appearence:" << endl;
outFile<< endl;
for (int k = 0; k< array_length; k++)
{
if(strcmp(array[k].word,array[k-1].word)!=0)
outFile <<array[k].word << " / " << count[k]/array_length << endl;
}
outFile<<endl;
}
The "storeFile" function is the only one that prints to the file.
BTW I know this code is unorganized and not the best way to do it, but this project is due tomorrow (12-10) so I'm just trying to turn in what I have.