S_strip (s, del) in C, is there a more optimized version than this?

my first post is here (I'm sorry that I have not discovered this great community before).

Anyway, I encoded a function C, which removes from the string s any character contained in the string del. I was wondering if there is room for improvement, in speed, especially for the part that searches for the characters contained in del inside the for loop (I used strpbrk (), but pmg wisely suggested strchr ()).

Bug hunters are welcome too! I find it reliable, but you never know.

Here's the code (thanks in advance for any answers) ...

Current version

// remove from string s any char contained in string del (return modified s)
// alg:
// parse s via cp1, keep desired *cp1 by copying them via cp2 to the start of s
// null terminate & return the trimmed s

char *s_strip(char *s, const char *del)
{
    char *cp1;                      // for parsing the whole s
    char *cp2;                      // for keeping desired *cp1's

    for (cp1=s, cp2=s; *cp1; cp1++ )
        if ( !strchr(del, *cp1) )   // *cp1 is NOT contained in del (thanks pmg!)
            *cp2++ = *cp1;          // copy it via cp2

    *cp2 = 0;                       // null terminate the trimmed s
    return s;
}

Original version

char *s_strip(char *s, const char *del)
{
    char *cp1;                              // for parsing the whole s
    char *cp2;                              // for keeping desired *cp1's

    for (cp1=s, cp2=s; *cp1; cp1++ )
        if ( cp1 != strpbrk(cp1, del) ) {   // *cp1 is NOT contained in del
            *cp2 = *cp1;                    // copy it via cp2
            cp2++;
        }

    *cp2 = 0;                               // null terminate the trimmed s
    return s;
}
+3
source share
3 answers

. 8- , :

char *s_strip(char *s, const char *del)
{
    char map[256] = { 0 };
    const unsigned char *up1 = (const unsigned char *)del;
    unsigned char *up2 = (unsigned char *)s;
    unsigned char *up3 = (unsigned char *)s;

    while (*up1 != '\0')
        map[*up1++] = 1;

    for ( ; *up2 != '\0'; up2++)
    {
        if (map[*up2] == 0)
            *up3++ = *up2;
    }
    *up3 = '\0';

    return (char *)up3;
}

(strpbrk() strchr()) . , .

, strlen(). , , - , , , , s_strip() . ( , strcpy() strcat() .)

'unsigned char' , . , (, s to up3 `up2 ).

, , , , , :

char realmap[256] = { 0 };
char *map = &realmap[128];

, , map[-128].. map[+127] - realmap.


​​ - , . , , , , . (Mac Mini 2 GHZ Intel Core 2 Duo, MacOS X 10.6.7, GCC 4.1.2).

- . 10 . - ; 22 (6 , 4 ). , , ASCII, , , , . , - "null" , .

size     map        strchr     strpbrk    null       micro1     micro2
   2     0.000542   0.002292   0.001009   0.000106   0.000639   0.000707
   8     0.000654   0.004125   0.017524   0.000106   0.001012   0.000966
  32     0.001667   0.015815   0.063314   0.000196   0.002549   0.002247
 128     0.006385   0.064513   0.313749   0.000171   0.008455   0.007188
 512     0.022231   0.257910   1.293040   0.000282   0.013284   0.011829
2048     0.089066   1.035052   5.297966   0.000819   0.043391   0.037597

, strchr() strpbrk() ( 5-10 , strchr(), 5-50 , strpbrk()), . ( - .)

"micro1" "micro2" , AShelly. (- 128 512 - ), - , .


(. ), timer.c, timer.h. , .

#include <string.h>

extern char *s_strip1(char *s, const char *del);
extern char *s_strip2(char *s, const char *del);
extern char *s_strip3(char *s, const char *del);

char *s_strip3(char *s, const char *del)
{
    char map[256] = { 0 };
    const unsigned char *up1 = (const unsigned char *)del;
    unsigned char *up2 = (unsigned char *)s;
    unsigned char *up3 = (unsigned char *)s;

    while (*up1 != '\0')
        map[*up1++] = 1;

    for ( ; *up2 != '\0'; up2++)
    {
        if (map[*up2] == 0)
            *up3++ = *up2;
    }
    *up3 = '\0';

    return (char *)up3;
}

char *s_strip2(char *s, const char *del)
{
    char *cp1;
    char *cp2;

    for (cp1=s, cp2=s; *cp1; cp1++ )
        if ( !strchr(del, *cp1) )
            *cp2++ = *cp1;

    *cp2 = 0;
    return s;
}

char *s_strip1(char *s, const char *del)
{
    char *cp1;
    char *cp2;

    for (cp1=s, cp2=s; *cp1; cp1++ )
        if ( cp1 != strpbrk(cp1, del) ) {
            *cp2 = *cp1;
            cp2++;
        }

    *cp2 = 0;
    return s;
}

#include <stdio.h>
#include "timer.h"
#include "timer.c"

enum { NUM_REPEATS = 10000 };
typedef char *(*Function)(char *str, const char *del);

static void fill_bytes(char *buffer, size_t buflen)
{
    static const char source[] =
        "abcdefghijklmnopqrstuvwxyz"
        "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
        "0123456789[]{}\\|,./?><;:'\"=+-_)(*&^%$#@!";
    char *end = buffer + buflen;

    while (buffer < end)
    {
        size_t numbytes = sizeof(source) - 1;
        if ((size_t)(end - buffer) < sizeof(source)-1)
            numbytes = end - buffer;
        memmove(buffer, source, numbytes);
        buffer += numbytes;
    }
}

static void test(Function f, const char *fn, const char *del, size_t numbytes)
{
    Clock clk;
    char refbuf[numbytes];
    char buffer[numbytes];
    char clkbuf[32];
    fill_bytes(refbuf, sizeof(refbuf));
    strcpy(buffer, refbuf);
    clk_init(&clk);
    clk_start(&clk);
    for (size_t i = 0; i < NUM_REPEATS; i++)
    {
        memmove(buffer, refbuf, sizeof(buffer));
        if (f)
            (*f)(buffer, del);
    }
    clk_stop(&clk);
    printf("%-17s (%4zd) = %10s (%.64s)\n", fn, numbytes,
           clk_elapsed_us(&clk, clkbuf, sizeof(clkbuf)), buffer);
}

int main(void)
{
    for (int size = 2; size <= 2048; size = size * 4)
    {
        for (int i = 0; i < 10; i++)
        {
           test(s_strip1, "s_strip1:strpbrk:", "AJQRSTajqrst234567=+[]", size);
           test(s_strip2, "s_strip2:strchr:",  "AJQRSTajqrst234567=+[]", size);
           test(s_strip3, "s_strip3:map",      "AJQRSTajqrst234567=+[]", size);
           test(0,        "s_strip4:null",     "AJQRSTajqrst234567=+[]", size);
        }
    }
    return 0;
}

Micro-

extern char *s_strip4(char *s, const char *del);
extern char *s_strip5(char *s, const char *del);

char *s_strip5(char *s, const char *del)
{
    char map[256];
    const unsigned char *up1 = (const unsigned char *)del;
    unsigned char *up2 = (unsigned char *)s;
    unsigned char *up3 = (unsigned char *)s;

    memset(map, 1, sizeof(map));

    while (*up1 != '\0')
        map[*up1++] = 0;

    for ( ; *up2 != '\0'; up2++)
    {
        *up3 = *up2;
        up3 += map[*up2];
    }
    *up3 = '\0';

    return (char *)up3;
}

char *s_strip4(char *s, const char *del)
{
    char map[256] = { 0 };
    const unsigned char *up1 = (const unsigned char *)del;
    unsigned char *up2 = (unsigned char *)s;
    unsigned char *up3 = (unsigned char *)s;

    while (*up1 != '\0')
        map[*up1++] = 1;

    for ( ; *up2 != '\0'; up2++)
    {
        *up3 = *up2;
        up3 += !map[*up2];
    }
    *up3 = '\0';

    return (char *)up3;
}
+2

strchr() strpbrk()?

, 1- .

/* ... */
if (!strchr(del, *cp1)) *cp2++ = *cp1;
/* ... */
+3

, strpbrk, , del. 100 , "" , 100 + 99 + 98 + 97 +... + 1.

strpbrk? null, . , strpbrk.

- :

 dest=src=s;
 bad = strpbrk(src,del);
 if (!bad) return s;  //early exit if there is no work.
 while (bad)
 {
    while (src<bad) *dst++=*src++;
    bad = strpbrk(++src,del);
 }
 while (*src) *dst++=*src++; //copy the remainder
 *dst='\0';
 return s;
+2