I'm currently trying to write an application to count the number of occurrences of words in an ASCII file (with punctuation and ignoring spaces). The application must store the word and the number of words in the data structure, which will ultimately be sorted in descending order, and then printed in a CSV file.
I started working on this program, but when I tried to save a new word, I ran into the problem of segmentation. Here is my code (I know that this is not an ideal implementation, I plan to clarify it):
#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <errno.h>
#define TRUE 1
#define FALSE 0
void getWords(void);
void printFile(void);
void save(char *input);
struct word {
char *str;
int wc;
};
struct word *warray = NULL;
FILE *infile;
FILE *outfile;
void getWords(void)
{
rewind(infile);
char cw[100];
int i = 0, j = 0, c;
while((c = fgetc(infile)) != EOF)
{
if(isalpha(c))
{
if(isupper(c))
{
cw[i] = tolower(c);
++i;
}
else
{
cw[i] = c;
++i;
}
}
else
{
if(c == '\n' || c == '\t' || c == ' ')
{
cw[i] = '\0';
i = 0;
save(cw);
for(j = 0; j < cw[99]; j++)
{
printf("%c", cw[j]);
}
}
}
}
}
void printFile(void)
{
int i, c;
printf("Printing the file to be counted in lowercase...\n");
for(i = 0; (c = fgetc(infile)) != EOF; i++)
{
if(ispunct(c) || isdigit(c))
{
++i;
}
else
{
putchar(tolower(c));
}
}
}
void save(char *input)
{
int exists = FALSE, i = 0;
int elements = sizeof(warray)/sizeof(struct word);
if(!warray)
{
warray = malloc(sizeof(struct word));
printf("Made array.\n");
}
else
{
printf("New.\n");
warray = realloc(warray, (elements++)*sizeof(struct word));
}
while(i < elements)
{
printf("in while loop\n");
if(strcmp(input, warray[i].str) == 0)
{
warray[i].wc++;
}
else
{
++i;
}
}
printf("Out while loop\n");
if(strcmp(input, warray[i].str) == 1)
{
printf("Inside save if statement\n");
warray[elements].str = malloc(strlen(input)+1);
strcpy(warray[elements].str, input);
warray[elements].wc = 1;
elements++;
}
}
int main (int argc, char *argv[])
{
if (argc < 3)
{
puts("Please supply the input filename and desired output filename as arguments.");
return 1;
}
infile = fopen(argv[1], "r");
if(infile == NULL)
{
printf("File failed to open. Error: %d\n", errno);
return 1;
}
else
{
puts("File opened successfully.");
printFile();
getWords();
}
return 0;
}
I set up several print statements to try to isolate the problem, and it seems to work in this problem inside the function save(char *input):
if(strcmp(input, warray[i].str) == 1)
{
printf("Inside save if statement\n");
warray[elements].str = malloc(strlen(input)+1);
strcpy(warray[elements].str, input);
warray[elements].wc = 1;
elements++;
}
, strcmp , value == 1, , , , , .
, - , !