Word Counting Application - C

I'm currently trying to write an application to count the number of occurrences of words in an ASCII file (with punctuation and ignoring spaces). The application must store the word and the number of words in the data structure, which will ultimately be sorted in descending order, and then printed in a CSV file.

I started working on this program, but when I tried to save a new word, I ran into the problem of segmentation. Here is my code (I know that this is not an ideal implementation, I plan to clarify it):

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdlib.h>
#include <errno.h>

#define TRUE 1
#define FALSE 0

/* This program is designed to take an ASCII input file, count the occurrences of words in it
 * and write an output file displaying the data. I intend for it to convert uppercase to 
 * lowercase, so as not to generate duplicate words in the data structure. It should also 
 * ignore whitespace and punctuation.
*/

void getWords(void);
void printFile(void);
void save(char *input);

struct word {
    char *str;
    int wc;
};

struct word *warray = NULL;

FILE *infile;
FILE *outfile;

void getWords(void)
{

    rewind(infile);
    char cw[100]; // Current word storage
    int i = 0, j = 0, c;

    while((c = fgetc(infile)) != EOF)
    {
        if(isalpha(c))
        {
            if(isupper(c))
            {
                cw[i] = tolower(c);
                ++i;
            }
            else
            {
                cw[i] = c;
                ++i;
            }
        }
        else
        {
            if(c == '\n' || c == '\t' || c == ' ')
            {
                cw[i] = '\0';
                i = 0;
                save(cw);

                for(j = 0; j < cw[99]; j++)
                {
                    printf("%c", cw[j]);
                }
            }
        }

    }

}

void printFile(void)
{

    int i, c;

    printf("Printing the file to be counted in lowercase...\n");
    for(i = 0; (c = fgetc(infile)) != EOF; i++)
    {
        if(ispunct(c) || isdigit(c))
        {
            ++i;
        }
        else
        {
            putchar(tolower(c));
        }

    }
}

void save(char *input)
{

    int exists = FALSE, i = 0;
    int elements = sizeof(warray)/sizeof(struct word);

    if(!warray)
    {
        warray = malloc(sizeof(struct word));
        printf("Made array.\n");
    }
    else
    {
        printf("New.\n");
        warray = realloc(warray, (elements++)*sizeof(struct word));
    }

    while(i < elements)
    {
        printf("in while loop\n");
        if(strcmp(input, warray[i].str) == 0)
        {
            warray[i].wc++;
        }
        else
        {
            ++i;
        }

    }
    printf("Out while loop\n");

    if(strcmp(input, warray[i].str) == 1)
    {
        printf("Inside save if statement\n");

        warray[elements].str = malloc(strlen(input)+1);

        strcpy(warray[elements].str, input);

        warray[elements].wc = 1;

        elements++;
    }


}

int main (int argc, char *argv[])
{


    if (argc < 3)
    {
        puts("Please supply the input filename and desired output filename as arguments.");
        return 1;
    }

    infile = fopen(argv[1], "r");
    if(infile == NULL)
    {
        printf("File failed to open. Error: %d\n", errno);
        return 1;
    }
    else
    {
        puts("File opened successfully.");
        printFile();
        getWords();
    }

    return 0;

}

I set up several print statements to try to isolate the problem, and it seems to work in this problem inside the function save(char *input):

if(strcmp(input, warray[i].str) == 1)
{
    printf("Inside save if statement\n");

    warray[elements].str = malloc(strlen(input)+1);

    strcpy(warray[elements].str, input);

    warray[elements].wc = 1;

    elements++;
}

, strcmp , value == 1, , , , , .

, - , !

+3
3

. , :

  • , warray . , .
  • , , . , .
  • , .

.

if(!warray)
{
    warray = malloc(sizeof(struct word));
    printf("Made array.\n");
}

.

else
{
    printf("New.\n");
    warray = realloc(warray, (elements++)*sizeof(struct word));
}

. , , .

while(i < elements)
{
    printf("in while loop\n");
    if(strcmp(input, warray[i].str) == 0)
    {
        warray[i].wc++;
    }
    else
    {
        ++i;
    }
}

. , warray[i].wc++;. .

if(strcmp(input, warray[i].str) == 1)
{
    printf("Inside save if statement\n");
    warray[elements].str = malloc(strlen(input)+1);
    strcpy(warray[elements].str, input);
    warray[elements].wc = 1;
    elements++;
}

. i elements. 0 elements-1. , warray[i] warray[elements] . ( elements warray = realloc(warray, (elements++)*sizeof(struct word));)

: for(j = 0; j < cw[99]; j++) getwords .

. .

warray = realloc(warray, (++elements)*sizeof(struct word));

warray = realloc(warray, (elements++)*sizeof(struct word));

Chronos.

+2

, , . !

, getWords, for-loop ( "for(j = 0;..." ) "j < cw[99]"... , "j < 100". , c [99], , REACHED !

-, save else- , warray ... , POST- elements, . PRE-increment elements , .

warray = realloc(warray, (++elements)*sizeof(struct word));

-, save, , , ... , .

, , , , .

, ...

+2

, :

int elements = sizeof(warray)/sizeof(struct word);

sizeof(warray) , . sizeof(struct word) sizeof(pointer)+padding+sizeof(int), sizeof(pointer) / (sizeof(pointer)+padding+sizeof(int)), 4 / (4+0+4) 4/8 . - elements 0 , save, malloc(0), undefined. NULL, , warray[i], segfault. NULL, .

save .

Also, your string reallocis incorrect. When performing elements++, you say that if the number of elements was earlier than 1, you should select only 1, and elementsincrease for a while to the next point in the sequence. You want ++elementsone that increases the number of elements before performing the distribution (for example, you have 1 and now you want 2).

There may be other mistakes, but those were the ones I noticed.

+1
source

All Articles