For example:
input(string): foo $$ foo ## foo []
search(string): foo
output(array): $$ ,## ,[]
I tried it:
char * str = "foo $$ foo ## foo []";
char * s = "foo";
int buf_len = 0;
int len = strlen(s);
int i = 0;
char ** buffer = malloc(MAX_BUFFER_SIZE);
char * tmpbuf = malloc(MAX_BUFFER_SIZE);
char * p = str;
char ** buf = buffer;
char * tbuf = tmpbuf;
while(*p)
{
if(*p == *s)
{
while(*p == *(s + i))
{
i++;
p++;
}
if(i == len)
{
*buf ++ = tbuf;
memset(tbuf,0,buf_len);
i = buf_len = 0;
}
}
else
{
*tbuf ++= *p;
buf_len ++;
}
p++;
}
*buf ++= NULL;
int x;
for(x = 0; buffer[x]; x++)
{
printf("%s\n", buffer[x]);
}
free(buffer);
free(tmpbuf);
that show the following output:
$$ ## []
## []
[]
but the expected is:
$$
##
[]
how to fix this?
It is because you do not copy the contents of tbuf
to buf
when you say:
*buf ++ = tbuf;
What you do is save a reference to the current position in tbuf
(or tmpbuf
if you like).
tmpbuf
get filled with everything but the delimiter.
It is something like, at end of loop:
01234567 <- offset
tmpbuf = "$$ ## []"
buf[0] = tmpbuf+0;
buf[1] = tmpbuf+3;
buf[2] = tmpbuf+6;
Or very simplified memory table:
memory
address value
tmpbuf -> 0x01 [ $] <- buffer[0] points here
0x02 [ $]
0x03 [ ]
0x04 [ #] <- buffer[1] points here
0x05 [ #]
0x06 [ ]
0x07 [ [] <- buffer[2] points here
0x08 [ ]]
0x09 [ ]
...
buffer -> 0x3A [0x01]
0x3B [0x04]
0x3C [0x07]
0x3D [ ]
0x3E [ ]
...
EDIT
For the phun of it; a pointer, dynamic, way, not using strstr()
.
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
int is_needle(char *hay, char *needle)
{
while (*hay && *++hay == *++needle);
return *needle == '\0';
}
char *find(char *hay, char *needle)
{
while (*hay) {
if (*hay == *needle && is_needle(hay, needle))
return hay;
++hay;
}
return hay;
}
int pushstr(char ***vs, size_t *vslen, char *val, size_t slen)
{
char **vsp = *vs + *vslen;
if ((*vsp = realloc(*(*vs + *vslen), slen + 1)) == NULL) {
perror("pushstr.1"); exit(1);
}
memcpy(*vsp, val, slen);
*(*vsp + slen) = '\0';
if ((*vs = realloc(*vs, sizeof(char*) * (++*vslen + 1))) == NULL) {
perror("pushstr.2"); exit(1);
}
*(*vs + *vslen) = NULL;
return *vslen;
}
int main(void)
{
char *hay = "foo $$ foo ## foo [] fox @@ foo ??";
char *needle = "foo";
char *np;
char **vs;
size_t vslen = 0;
size_t nlen = strlen(needle);
if ((vs = malloc(sizeof(char*))) == NULL) {
perror("main");
return 1;
}
*vs = NULL;
while (*(np = find(hay, needle))) {
if (np != hay) {
pushstr(&vs, &vslen, hay, np - hay);
hay = np + nlen;
} else {
hay += nlen;
}
}
if (np != hay)
pushstr(&vs, &vslen, hay, np - hay);
while (*vs)
printf("V: '%s'\n", *vs++);
vs -= vslen;
while (*vs)
free(*vs++);
vs -= vslen;
free(vs);
return 0;
}
Here's a function for splitting a string into an array of strings:
#include <assert.h>
#include <string.h>
/*
* Split a string by a delimiter.
*
* This function writes the beginning of each item to @pointers_out
* (forming an array of C strings), and writes the actual string bytes
* to @bytes_out. Both buffers are assumed to be big enough for all of the
* strings.
*
* Returns the number of strings written to @pointers_out.
*/
size_t explode(const char *delim, const char *str,
char **pointers_out, char *bytes_out)
{
size_t delim_length = strlen(delim);
char **pointers_out_start = pointers_out;
assert(delim_length > 0);
for (;;) {
/* Find the next occurrence of the item delimiter. */
const char *delim_pos = strstr(str, delim);
/*
* Emit the current output buffer position, since that is where the
* next item will be written.
*/
*pointers_out++ = bytes_out;
if (delim_pos == NULL) {
/*
* No more item delimiters left. Treat the rest of the input
* string as the last item.
*/
strcpy(bytes_out, str);
return pointers_out - pointers_out_start;
} else {
/*
* Item delimiter found. The bytes leading up to it form the next
* string.
*/
while (str < delim_pos)
*bytes_out++ = *str++;
/* Don't forget the NUL terminator. */
*bytes_out++ = '\0';
/* Skip over the delimiter. */
str += delim_length;
}
}
}
Usage:
#include <stdio.h>
/* ... */
#define BIG_ENOUGH 1000
int main(void)
{
char *items[BIG_ENOUGH];
char item_bytes[BIG_ENOUGH];
size_t i;
size_t count;
count = explode("foo", "foo $$ foo ## foo []", items, item_bytes);
for (i = 0; i < count; i++)
printf("\"%s\"\n", items[i]);
return 0;
}
Output:
""
" $$ "
" ## "
" []"
This does not produce the exact output you asked for, as I'm not sure how you want to handle surrounding spaces and occurrences of the item delimiter (in your example, "foo"
) at the beginning of the string. Instead, I mimicked PHP's explode function.
I'd like to point out how my explode
function punts on memory management. It is up to the caller to ensure the buffers are big enough. This is fine for a quick script, but might be annoying in a more serious program, where you'll have to do some math to use this function correctly. I could have written a more "robust" implementation that performs its own allocation, but:
That would clutter the implementation.
It doesn't give the caller the option of using their own memory allocator.
So implementing explode
the way I did is "bad" because it is hard to use correctly, and worse, easy to use incorrectly. On the other hand, it is "good" in that it separates the concerns of functionality and memory management.
This is a task for strstr()
. I changed your code a little bit to make use of it.
int add_to_buf(char *str, size_t len, char ***buf)
{
if (len <= 0) return 0;
**buf = malloc (len);
strncpy (**buf, str, len);
++*buf;
return 1;
}
int main()
{
char *str = "foo $$ foo ## foo []";
char *s = "foo";
char **buffer = malloc (MAX_BUFFER_SIZE*sizeof(*buffer)), **buf = buffer;
char *start, *end;
int s_len = strlen (s);
start = str;
end = strstr (str, s);
while (end) {
add_to_buf (start, end-start, &buf);
start = end + s_len;
end = strstr (start, s);
}
add_to_buf (start, strlen (str) - (start-str), &buf);
*buf = 0;
for (buf = buffer; *buf; ++buf)
printf ("%s\n", *buf);
free (buffer);
return 0;
}
You are using too many pointers for a simple program and the the way you used them makes it hard to understand. One straightforward bug I see is you are using buffer**
(array of strings) but you are only allocating a single string. You are this this array of strings to store the tokens, which will do some memory violation somewhere.
Since you want to print the tokens, you don't need to store them in a separate array. This will do:
#include<stdio.h>
#include<string.h>
int main(int ac, char*argv[]) {
char str[] = "foo $$ foo ## foo []";
char * s = "foo";
char *p;
p = strtok(str, " "); // tokenize
while(p!=NULL)
{
if(strcmp(p, s)) //print non matching tokens
printf("%s\n", p);
p = strtok(NULL, " ");
}
return 0;
}
Note that here the delimiter is whitespace which makes it easier here.
The strtok function was designed for this task:
#include <string.h>
...
char *token;
char *line = "LINE TO BE SEPARATED";
char *search = " ";
/* Token will point to "LINE". */
token = strtok(line, search);
/* Token will point to "TO". */
token = strtok(NULL, search);
strtok
's delimiter string considers each of its individual characters as valid delimiters. Using "foo"
as a strtok
delimiter string would split an input like "f $$ f ## f []"
into three tokens, which isn't what the OP wants - Matt Eckert 2012-04-03 20:35
strstr
and strcpy
is going to make the task a lot clearer - Karl Bielefeldt 2012-04-03 20:46