All-inclusive Buffer for C
Experimental string buffer featuring
- SSO (small string optimization) inline short data into the type.
- views (or 'slices') no-copy references to sub-strings.
- refcount (reference counting) account for views before mutation or release.
- automated allocations
Aims at security with decent speed.
Todo: thread safety
Buffet is a tagged union with 4 modes.
// Hard values show 64-bit
union Buffet {
    struct ptr {
        char*   data
        size_t  len
        size_t  off:62, tag:2 // tag = OWN|SSV|VUE
    }
    struct sso {
        char    data[22]
        uint8_t refcnt
        uint8_t len:6, tag:2  // tag = SSO
    }
}
sizeof(Buffet) == 24The tag sets a Buffet's mode :
- OWNco-owning slice of a store
- SSOembedded char array
- SSV(small string view) view on an SSO
- VUEnon-owning view on any data
If OWN, Buffet.data points into an allocated heap store :
struct Store {
    size_t   cap    // store capacity
    size_t   len    // store length
    uint32_t refcnt // number of views on store
    uint32_t canary // invalidates store if modified
    char     data[] // buffer data, shared by owning views
}#include "../buffet.h"
int main() {
    
// SHARED OWN =================
    char large[] = "DATA STORE IS HEAP ALLOCATION.";
    Buffet own1 = bft_memcopy(large, sizeof(large)-1);
    // Now own1 owns a store housing a copy of `large`
    bft_dbg(&own1); 
    //-> OWN 30 "DATA STORE ..."
    // View "STORE" in own1 :
    Buffet own2 = bft_view(&own1, 5, 5);
    // Now own1 and own2 share the store, whose refcount is 2
    bft_dbg(&own2); 
    //-> OWN 5 "STORE"
// SSO & SSV =================
    char small[] = "SMALL STRING";
    Buffet sso1 = bft_memcopy(small, sizeof(small)-1);
    bft_dbg(&sso1); 
    //-> SSO 12 "SMALL STRING"
    // View "STRING" in sso1 :
    Buffet ssv1 = bft_view(&sso1, 6, 6);
    bft_dbg(&ssv1); 
    //-> SSV 6 "STRING"
// VUE =======================
    char any[] = "SOME BYTES";
    // View "BYTES" in `any` :
    Buffet vue1 = bft_memview(any+5, 5);
    bft_dbg(&vue1); 
    //-> VUE 5 "BYTES"
    return 0;
}make && make check
While extensive, unit tests may not yet cover all cases.
Buffet aims at preventing memory faults, including from user.
(Except of course losing scope and such.)
// (pseudo code)
// overflow
buf = new(8)
append(buf, large_str) // Done
// invalid ref
buf = memcopy(short_str) // SSO
view = view(buf)
append(buf, large_str) // would mutate SSO to OWN
// => abort & warn "Append would invalidate views on SSO"
// double-free
bft_free(buf)
bft_free(buf) // OK
// use-after-free
bft_free(buf)
append(buf, "foo") // Done. Now buf is "foo".
// aliasing
alias = buf // should be `alias = bft_dup(buf)`
bft_free(buf)
bft_free(alias) // OK. Possible warning "Bad canary. Double free ?"
// Etc...To this end, operations like view() or free() may check the store's header.
If wrong, the operation aborts and returns an empty buffet.
Checks are enabled by #define MEMCHECK or building with
MEMCHECK=1 make
Warnings are enabled by #define DEBUG or building with
DEBUG=1 make
NB: Even with checks, some aliasing can be fatal.
own = memcopy(large_str)
view = view(own)
alias = view
bft_free(view)
bft_free(own) // refcnt == 0, free(store) !
// alias now points into freed memory...See src/check.c unit-tests and warnings output.
make && make bench (requires libbenchmark-dev)
NB: The lib is not much optimized and the bench maybe amateurish.
On a weak Core i3 :
MEMVIEW_cpp/8 0.609 ns MEMVIEW_buffet/8 6.36 ns MEMCOPY_c/8 16.7 ns MEMCOPY_buffet/8 11.9 ns MEMCOPY_c/32 15.3 ns MEMCOPY_buffet/32 26.3 ns MEMCOPY_c/128 16.8 ns MEMCOPY_buffet/128 29.8 ns MEMCOPY_c/512 24.9 ns MEMCOPY_buffet/512 39.3 ns MEMCOPY_c/2048 94.1 ns MEMCOPY_buffet/2048 109 ns MEMCOPY_c/8192 196 ns MEMCOPY_buffet/8192 282 ns APPEND_cpp/8/4 10.9 ns APPEND_buffet/8/4 16.3 ns APPEND_cpp/8/16 36.5 ns APPEND_buffet/8/16 30.2 ns APPEND_cpp/24/4 49.0 ns APPEND_buffet/24/4 30.1 ns APPEND_cpp/24/32 48.1 ns APPEND_buffet/24/32 28.8 ns SPLITJOIN_c 2782 ns SPLITJOIN_cpp 3317 ns SPLITJOIN_buffet 1397 ns
bft_new
bft_memcopy
bft_memview
bft_copy
bft_copyall
bft_view
bft_dup  (don't alias buffets, use this)
bft_append
bft_split
bft_splitstr
bft_join
bft_free
bft_cmp
bft_cap
bft_len
bft_data
bft_cstr
bft_export
Buffet bft_new (size_t cap)
Create a new empty Buffet of minimum capacity cap.
Buffet buf = bft_new(40);
bft_dbg(&buf); 
// OWN 0 ""Buffet bft_memcopy (const char *src, size_t len)
Create a new Buffet by copying len bytes from src.
Buffet copy = bft_memcopy("Bonjour", 3);
// SSO 3 "Bon"Buffet bft_memview (const char *src, size_t len)
Create a new Buffet viewing len bytes from src.
You get a window into src without copy or allocation.
NB: You shouldn't directly memview a Buffet's data. Use view()
char src[] = "Eat Buffet!";
Buffet view = bft_memview(src+4, 6);
// VUE 6 "Buffet"Buffet bft_copy (const Buffet *src, ptrdiff_t off, size_t len)
Copy len bytes at offset off from Buffet src into a new Buffet.
Buffet src = bft_memcopy("Bonjour", 7);
Buffet cpy = bft_copy(&src, 3, 4);
// SSO 4 "jour"Buffet bft_copyall (const Buffet *src)
Copy all bytes from Buffet src into a new Buffet.
Buffet bft_view (Buffet *src, ptrdiff_t off, size_t len)
View len bytes of Buffet src, starting at off.
You get a window into src without copy or allocation.
The return internal type depends on src type :
- view(SSO) -> SSV(refcounted)
- view(SSV) -> SSVon src's target
- view(OWN) -> OWN(as refcounted store co-owner)
- view(VUE) -> VUEon src's target
If the return is OWN, the target store won't be released before either
- the return is discarded by bft_free
- the return is detached by e.g. appending to it.
#include "../buffet.h"
int main() {
    char text[] = "Bonjour monsieur Buddy. Already speaks french!";
    // view sso
    Buffet sso = bft_memcopy(text, 16); // "Bonjour monsieur"
    Buffet ssv = bft_view(&sso, 0, 7);
    bft_dbg(&ssv);
    // view ssv
    Buffet Bon = bft_view(&ssv, 0, 3);
    bft_dbg(&Bon);
    // view own
    Buffet own = bft_memcopy(text, sizeof(text));
    Buffet ownview = bft_view(&own, 0, 7);
    bft_dbg(&ownview);
    // detach view
    bft_append (&ownview, "!", 1);
    // bft_free(&ownview); 
    bft_free(&own); // Done
    // view vue
    Buffet vue = bft_memview(text+8, 8); // "Good"
    Buffet mon = bft_view(&vue, 0, 3);
    bft_dbg(&mon);
    return 0;
}$ cc view.c libbuffet.a -o view && ./view
SSV 7 data:"Bonjour"
SSV 3 data:"Bon"
OWN 7 data:"Bonjour"
VUE 3 data:"mon"
Buffet bft_dup (const Buffet *src)
Create a shallow copy of src.
Use this intead of aliasing a Buffet.
Buffet src = bft_memcopy("Hello", 5);
Buffet cpy = src; // BAD
Buffet cpy = bft_dup(&src); // GOOD
bft_dbg(&cpy);
// SSO 5 "Hello"Rem: aliasing would mostly work but mess up refcounting (without crash if store protections are enabled) :
Buffet alias = sso; //ok if sso was not viewed
Buffet alias = own; //not refcounted
Buffet alias = vue; //okvoid bft_free (Buffet *buf)
Discards buf.
- aborts if buf is an SSO with views.
- otherwise, buf is zeroed into an empty SSO.
- if buf was a view, its target refcount is decremented.
- if buf was the last view on a store, the store is released.
Security:
- the zeroing makes double-free harmless.
- the only problematic use-after-free would be of a OWN alias (not recommended), but the store management prevents stale memory access.
#include "../buffet.h"
int main() {
    char text[] = "Le grand orchestre de Patato Valdez";
    Buffet own = bft_memcopy(text, sizeof(text));
    Buffet ref = bft_view(&own, 9, 9); // "orchestre"
    bft_free(&own); // A bit soon but ok, --refcnt
    bft_dbg(&own);  // SSO 0 ""
    bft_free(&ref); // Was last co-owner, store is released
    Buffet sso = bft_memcopy(text, 8); // "Le grand"
    Buffet ref2 = bft_view(&sso, 3, 5); // "grand"
    bft_free(&sso); // WARN line:328 bft_free: SSO has views on it
    bft_free(&ref2);
    bft_free(&sso); // OK now
    bft_dbg(&sso);  // SSO 0 ""
    return 0;
}$ valgrind  --leak-check=full ./bin/ex/free
All heap blocks were freed -- no leaks are possible
size_t bft_cat (Buffet *dst, const Buffet *buf, const char *src, size_t len)
Concatenates buf and len bytes of src into resulting dst.
Returns total length or 0 on error.
Buffet buf = bft_memcopy("abc", 3);
Buffet dst;
size_t totlen = bft_cat(&dst, &buf, "def", 3);
bft_dbg(&dst);
// SSO 6 "abcdef"size_t bft_append (Buffet *dst, const char *src, size_t len)
Appends len bytes from src to dst.
Returns new length or 0 on error.
Buffet buf = bft_memcopy("abc", 3); 
size_t newlen = bft_append(&buf, "def", 3);
bft_dbg(&buf);
// SSO 6 "abcdef"NB: returns failure if buf has views and would mutate from SSO to OWN to increase capacity, invalidating the views :
Buffet foo = bft_memcopy("short foo ", 10);
Buffet view = bft_view(&foo, 0, 5);
// would mutate to OWN :
size_t rc = bft_append(&foo, "now too long for SSO");
assert(rc==0); // meaning abortedTo prevent this, release views before appending to a small buffet.
Buffet* bft_split (const char* src, size_t srclen, const char* sep, size_t seplen, 
int *outcnt)
Splits src along separator sep into a Buffet Vue list of length *outcnt.
Being made of views, you can free(list) without leak provided no element was made an owner by e.g appending to it.
Buffet* bft_splitstr (const char *src, const char *sep, int *outcnt);
Convenient split using strlen internally.
int cnt;
Buffet *parts = bft_splitstr("Split me", " ", &cnt);
for (int i=0; i<cnt; ++i)
    bft_print(&parts[i]);
// VUE 5 "Split"
// VUE 2 "me"
free(parts);Buffet bft_join (Buffet *list, int cnt, const char* sep, size_t seplen);
Joins list on separator sep into a new Buffet.
int cnt;
Buffet *parts = bft_splitstr("Split me", " ", &cnt);
Buffet back = bft_join(parts, cnt, " ", 1);
bft_dbg(&back);
// SSO 8 'Split me'int bft_cmp (const Buffet *a, const Buffet *b)
Compare two buffets' data using memcmp.
size_t bft_cap (Buffet *buf)
Get current capacity.
size_t bft_len (Buffet *buf)`
Get current length.
const char* bft_data (const Buffet *buf)`
Get current data pointer.
To ensure null-termination at buf.len, use bft_cstr.
const char* bft_cstr (const Buffet *buf, bool *mustfree)
Get current data as a null-terminated C string of max length buf.len.
If needed (when buf is a view), the data is copied into a new C string that must be freed if mustfree is set.
char* bft_export (const Buffet *buf)
Copies data up to buf.len into a new C string that must be freed.
void bft_print (const Buffet *buf)`
Prints data up to buf.len.
void bft_dbg (Buffet *buf)
Prints buf state.
Buffet buf;
bft_memcopy(&buf, "foo", 3);
bft_dbg(&buf);
// SSO 3 "foo"- ! views : decide clearly if r/o + CoW or writable
- store.len :  not updated when tailing view is detached..
 We lose future in-place optimization.
 Maybe record second to last range ?
- store checks : too many ?
 Decide user responsabilty on mis-handling.
 Add #define ENABLE_MEMCHECKS
- SSO auto-release like own ?
- test 32-bit and small RAM
- write(), sprintf()
- resize()