In my open-source plain C code I use this simple structure to read and parse data from a string buffer:
typedef struct lts_LoadState
{
const unsigned char * pos;
size_t unread;
} lts_LoadState;
The buffer is accessed with this simple API:
/* Initialize buffer */
void ltsLS_init(lts_LoadState * ls,const unsigned char * data, size_t len);
/* Do we have something to read? (Actually a macro.) */
BOOL ltsLS_good(ls);
/* How much do we have to read? (Actually a macro.) */
size_t ltsLS_unread(ls);
/* Eat given number of characters, return pointer to beginning of eaten data */
const unsigned char * ltsLS_eat(lts_LoadState * ls, size_t len);
Note: ltsLS_unread
may be replaced with return (ltsLS_good(ls)) ? SIZE_MAX : 0
without breaking the current implementation.
This code is used to load some data in a custom format from a string buffer. (This may be a better illustration.)
Now I need to load data not from a string buffer, but from a FILE
pointer.
I would hate to copy-paste the implementation, and would like to reuse existing code instead. (I'm OK with refactoring/adapting it, of course.)
This is a textbook stuff in C++, but how to do that in plain C without incurring runtime overhead?
Here is an example function that uses the lts_LoadState
API and that is not to be copy-pasted (but may be changed, of course, to support both string buffer and FILE *
):
static int ltsLS_readline(
lts_LoadState * ls,
const unsigned char ** dest,
size_t * len
)
{
const unsigned char * origin = ls->pos;
unsigned char last = 0;
size_t read = 0;
while (ltsLS_good(ls))
{
if (ltsLS_unread(ls) > 0)
{
unsigned char b = *ls->pos; /* OK, this should be ltsLS_eat_char macro. */
++ls->pos;
--ls->unread;
if (b == '\n')
{
*dest = origin;
*len = (last == '\r') ? read - 1 : read;
return LUATEXTS_ESUCCESS;
}
last = b;
++read;
}
else
{
ls->unread = 0;
ls->pos = NULL;
}
}
return LUATEXTS_ECLIPPED;
}
lts_LoadState
and its access functions - Alexander Gladysh 2012-04-04 20:25
It sounds like you want function variables, which you would pass as a parameter. C can do them, but the syntax isn't very pretty.
There probably is a bit of runtime overhead, but not much.
How do you pass a function as a parameter in C?
I hate to open this back up but this is something I was thinking about today and I don't think this has a great answer yet.
I think to implement duck typing in C what you're after is a global vtable. Every struct (object) should have the vtable as the first element in the struct. Basically whenever there's a behaviour you want to access through duck typing you would add it to this global vtable; then you can call it no matter what object is passed to your function, you'd be able to cast the object to the table, look to the location the behaviour should be, check it's non-null, and call it.
//Would be declared in some global.h or similar
struct global_v_table_t =
{
char* (*toString)(void);
//... other functions being accessed through duck typing go here
}
//--------------------
//In some other files:
//Then we create some objects:
struct bicycle_t
{
struct global_v_table;
void (*ride)(void);
};
//When we initialise a bicycle
bicycle_t * bicycyle_init(void)
{
bicycle_t * bike = malloc(sizeof(bicycle_t));
//Req'd basically for every object within the project:
//Either do this or call calloc() instead of malloc():
globalVtableInit((global_v_table_init)bike);//NULL the vtable
//Set the behaviours that this object exhibits:
bike->global_v_table.toString = BIKE_toString;
}
static char * bikeString = "I'm a bike!";
char * BIKE_toString(void)
{
return bikeString;
}
//----------------
//Now anyone can ask that an object provide it's toString:
//The example uses an error logging function:
void logError(void * obj)
{
char * (toStringMethod)(void) = ((global_v_table *)obj)->toString;
if (NULL != toStringMethod)
{//As long as the object implements the toString behaviour:
printf(toStringMethod()); //Print the object's toString.
}
}
//Will tidy this code up a bit later but this is what I'm thinking.
//Hopefully is at least partly understandable. The obvious drawback
//to this implementation is that for every object you get this massive
//v_table which is full of mostly NULL's for each object as it scales.
//If you want to make C behave like other languages though you have
//to expect some sort of penalty I guess...
I had a similar need in my postscript interpreter for the token
operator to work the same whether reading from a FILE*
or a string. It appears that you've done the first step, at least partly, of separating the parsing logic from the data access through a get/unget pair. If you can write string versions which match the prototypes of the library FILE*
functions, that simplifies the implementation.
For mine, I had a main entry point which takes function-pointers for the get/unget accessors.
int toke (Xpost_Context *ctx,
Xpost_Object *src,
int (*next)(Xpost_Context *ctx, Xpost_Object *src),
void (*back)(Xpost_Context *ctx, int c, Xpost_Object *src),
Xpost_Object *retval);
The normal operator execution handles calling an appropriate interface function depending on the type. So the file version calls toke
and implements the two actions in lower-level terms.
/* file token token true
false
read token from file */
static
int Fnext(Xpost_Context *ctx,
Xpost_Object *F)
{
return xpost_file_getc(xpost_file_get_file_pointer(ctx->lo, *F));
}
static
void Fback(Xpost_Context *ctx,
int c,
Xpost_Object *F)
{
(void)ungetc(c, xpost_file_get_file_pointer(ctx->lo, *F));
}
static
int Ftoken (Xpost_Context *ctx,
Xpost_Object F)
{
Xpost_Object t;
int ret;
if (!xpost_file_get_status(ctx->lo, F))
return ioerror;
ret = toke(ctx, &F, Fnext, Fback, &t);
if (ret)
return ret;
if (xpost_object_get_type(t) != nulltype) {
xpost_stack_push(ctx->lo, ctx->os, t);
xpost_stack_push(ctx->lo, ctx->os, xpost_bool_cons(1));
} else {
xpost_stack_push(ctx->lo, ctx->os, xpost_bool_cons(0));
}
return 0;
}
And the string version uses string implementations for the two actions.
/* string token substring token true
false
read token from string */
static
int Snext(Xpost_Context *ctx,
Xpost_Object *S)
{
int ret;
if (S->comp_.sz == 0) return EOF;
ret = xpost_string_get_pointer(ctx, *S)[0];
++S->comp_.off;
--S->comp_.sz;
return ret;
}
static
void Sback(Xpost_Context *ctx,
int c,
Xpost_Object *S)
{
--S->comp_.off;
++S->comp_.sz;
xpost_string_get_pointer(ctx, *S)[0] = c;
}
static
int Stoken (Xpost_Context *ctx,
Xpost_Object S)
{
Xpost_Object t;
int ret;
ret = toke(ctx, &S, Snext, Sback, &t);
if (ret)
return ret;
if (xpost_object_get_type(t) != nulltype) {
xpost_stack_push(ctx->lo, ctx->os, S);
xpost_stack_push(ctx->lo, ctx->os, t);
xpost_stack_push(ctx->lo, ctx->os, xpost_bool_cons(1));
} else {
xpost_stack_push(ctx->lo, ctx->os, xpost_bool_cons(0));
}
return 0;
}
This is from the xpost postscript interpreter in the file src/lib/xpost_op_token.c .