From 8d8ce19da80e5c51d470870e7fdf7e5b3926f47a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Viktor=20S=C3=B6derqvist?= Date: Wed, 5 Feb 2025 13:15:28 +0100 Subject: [PATCH] Embed keys and hash fields as SDS type 5 (#1613) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until now, these embedded strings were SDS type 8, because they were copied as-is from EMBSTR encoded string objects, which always use SDS type 8. This change allows them to be embedded as SDS type 5, saving two bytes. The implementation of embeddeding SDS strings in other structures is refactored. `sdswrite()` is a new function to create an SDS representation into a buffer provided by the caller. `sdscopytobuffer()`, which just copied the layout of an existing sds, is deleted. The DEBUG SDSLEN command output is changed slightly, to account for correct allocation sizes of embedded strings and not report the same memory twice. Fixes #1567 --------- Signed-off-by: Viktor Söderqvist --- src/debug.c | 12 ++++--- src/object.c | 62 +++++++++++++++-------------------- src/sds.c | 66 ++++++++++++++++---------------------- src/sds.h | 22 ++++++++++--- src/server.c | 4 --- src/t_hash.c | 14 ++++---- tests/unit/type/string.tcl | 27 ++++++++++++++++ 7 files changed, 114 insertions(+), 93 deletions(-) diff --git a/src/debug.c b/src/debug.c index b7f8df04fa..fb9c77faa3 100644 --- a/src/debug.c +++ b/src/debug.c @@ -684,12 +684,16 @@ void debugCommand(client *c) { if (val->type != OBJ_STRING || !sdsEncodedObject(val)) { addReplyError(c, "Not an sds encoded string."); } else { + /* Report the complete robj allocation size as the key's allocation + * size. Report 0 as allocation size for embedded values. */ + size_t obj_alloc = zmalloc_usable_size(val); + size_t val_alloc = val->encoding == OBJ_ENCODING_RAW ? sdsAllocSize(val->ptr) : 0; addReplyStatusFormat(c, - "key_sds_len:%lld, key_sds_avail:%lld, key_zmalloc: %lld, " - "val_sds_len:%lld, val_sds_avail:%lld, val_zmalloc: %lld", - (long long)sdslen(key), (long long)sdsavail(key), (long long)sdsAllocSize(key), + "key_sds_len:%lld key_sds_avail:%lld obj_alloc:%lld " + "val_sds_len:%lld val_sds_avail:%lld val_alloc:%lld", + (long long)sdslen(key), (long long)sdsavail(key), (long long)obj_alloc, (long long)sdslen(val->ptr), (long long)sdsavail(val->ptr), - (long long)getStringObjectSdsUsedMemory(val)); + (long long)val_alloc); } } else if (!strcasecmp(c->argv[1]->ptr, "listpack") && c->argc == 3) { robj *o; diff --git a/src/object.c b/src/object.c index 94c2985edb..16370026de 100644 --- a/src/object.c +++ b/src/object.c @@ -52,16 +52,18 @@ * and expire fields can be omitted by passing NULL and -1, respectively. */ robj *createObjectWithKeyAndExpire(int type, void *ptr, const sds key, long long expire) { /* Calculate sizes */ + int has_embkey = key != NULL; int has_expire = (expire != -1 || - (key != NULL && sdslen(key) >= KEY_SIZE_TO_INCLUDE_EXPIRE_THRESHOLD)); - size_t key_sds_size = 0; + (has_embkey && sdslen(key) >= KEY_SIZE_TO_INCLUDE_EXPIRE_THRESHOLD)); + size_t key_sds_len = has_embkey ? sdslen(key) : 0; + char key_sds_type = has_embkey ? sdsReqType(key_sds_len) : 0; + size_t key_sds_size = has_embkey ? sdsReqSize(key_sds_len, key_sds_type) : 0; size_t min_size = sizeof(robj); if (has_expire) { min_size += sizeof(long long); } - if (key != NULL) { + if (has_embkey) { /* Size of embedded key, incl. 1 byte for prefixed sds hdr size. */ - key_sds_size = sdscopytobuffer(NULL, 0, key, NULL); min_size += 1 + key_sds_size; } /* Allocate and set the declared fields. */ @@ -72,18 +74,18 @@ robj *createObjectWithKeyAndExpire(int type, void *ptr, const sds key, long long o->ptr = ptr; o->refcount = 1; o->lru = 0; - o->hasembkey = (key != NULL); + o->hasembkey = has_embkey; /* If the allocation has enough space for an expire field, add it even if we * don't need it now. Then we don't need to realloc if it's needed later. */ - if (key != NULL && !has_expire && bufsize >= min_size + sizeof(long long)) { + if (has_embkey && !has_expire && bufsize >= min_size + sizeof(long long)) { has_expire = 1; min_size += sizeof(long long); } o->hasexpire = has_expire; /* The memory after the struct where we embedded data. */ - unsigned char *data = (void *)(o + 1); + char *data = (void *)(o + 1); /* Set the expire field. */ if (o->hasexpire) { @@ -93,8 +95,8 @@ robj *createObjectWithKeyAndExpire(int type, void *ptr, const sds key, long long /* Copy embedded key. */ if (o->hasembkey) { - sdscopytobuffer(data + 1, key_sds_size, key, data); - data += 1 + key_sds_size; + *data++ = sdsHdrSize(key_sds_type); + sdswrite(data, key_sds_size, key_sds_type, key, key_sds_len); } return o; @@ -146,18 +148,19 @@ static robj *createEmbeddedStringObjectWithKeyAndExpire(const char *val_ptr, const sds key, long long expire) { /* Calculate sizes */ - size_t key_sds_size = 0; - size_t min_size = sizeof(robj); + int has_embkey = (key != NULL); + size_t key_sds_len = has_embkey ? sdslen(key) : 0; + char key_sds_type = has_embkey ? sdsReqType(key_sds_len) : 0; + size_t key_sds_size = has_embkey ? sdsReqSize(key_sds_len, key_sds_type) : 0; + size_t val_sds_size = sdsReqSize(val_len, SDS_TYPE_8); + size_t min_size = sizeof(robj) + val_sds_size; if (expire != -1) { min_size += sizeof(long long); } - if (key != NULL) { + if (has_embkey) { /* Size of embedded key, incl. 1 byte for prefixed sds hdr size. */ - key_sds_size = sdscopytobuffer(NULL, 0, key, NULL); min_size += 1 + key_sds_size; } - /* Size of embedded value (EMBSTR) including \0 term. */ - min_size += sizeof(struct sdshdr8) + val_len + 1; /* Allocate and set the declared fields. */ size_t bufsize = 0; @@ -167,7 +170,7 @@ static robj *createEmbeddedStringObjectWithKeyAndExpire(const char *val_ptr, o->refcount = 1; o->lru = 0; o->hasexpire = (expire != -1); - o->hasembkey = (key != NULL); + o->hasembkey = has_embkey; /* If the allocation has enough space for an expire field, add it even if we * don't need it now. Then we don't need to realloc if it's needed later. */ @@ -177,7 +180,7 @@ static robj *createEmbeddedStringObjectWithKeyAndExpire(const char *val_ptr, } /* The memory after the struct where we embedded data. */ - unsigned char *data = (void *)(o + 1); + char *data = (void *)(o + 1); /* Set the expire field. */ if (o->hasexpire) { @@ -187,26 +190,13 @@ static robj *createEmbeddedStringObjectWithKeyAndExpire(const char *val_ptr, /* Copy embedded key. */ if (o->hasembkey) { - sdscopytobuffer(data + 1, key_sds_size, key, data); - data += 1 + key_sds_size; - } - - /* Copy embedded value (EMBSTR). */ - struct sdshdr8 *sh = (void *)data; - sh->flags = SDS_TYPE_8; - sh->len = val_len; - size_t capacity = bufsize - (min_size - val_len); - sh->alloc = capacity; - serverAssert(capacity == sh->alloc); /* Overflow check. */ - if (val_ptr == SDS_NOINIT) { - sh->buf[val_len] = '\0'; - } else if (val_ptr != NULL) { - memcpy(sh->buf, val_ptr, val_len); - sh->buf[val_len] = '\0'; - } else { - memset(sh->buf, 0, val_len + 1); + *data++ = sdsHdrSize(key_sds_type); + sdswrite(data, key_sds_size, key_sds_type, key, key_sds_len); + data += key_sds_size; } - o->ptr = sh->buf; + + /* Copy embedded value (EMBSTR) always as SDS TYPE 8. */ + o->ptr = sdswrite(data, val_sds_size, SDS_TYPE_8, val_ptr, val_len); return o; } diff --git a/src/sds.c b/src/sds.c index d956f834e5..2f40c9dc9c 100644 --- a/src/sds.c +++ b/src/sds.c @@ -41,7 +41,7 @@ const char *SDS_NOINIT = "SDS_NOINIT"; -static inline int sdsHdrSize(char type) { +int sdsHdrSize(char type) { switch (type & SDS_TYPE_MASK) { case SDS_TYPE_5: return sizeof(struct sdshdr5); case SDS_TYPE_8: return sizeof(struct sdshdr8); @@ -52,7 +52,8 @@ static inline int sdsHdrSize(char type) { return 0; } -static inline char sdsReqType(size_t string_size) { +/* Returns the minimum SDS type required to store a string of the given length. */ +char sdsReqType(size_t string_size) { if (string_size < 1 << 5) return SDS_TYPE_5; if (string_size <= (1 << 8) - sizeof(struct sdshdr8) - 1) return SDS_TYPE_8; if (string_size <= (1 << 16) - sizeof(struct sdshdr16) - 1) return SDS_TYPE_16; @@ -64,6 +65,7 @@ static inline char sdsReqType(size_t string_size) { #endif } +/* The maximum length of a string that can be stored with the given SDS type. */ static inline size_t sdsTypeMaxSize(char type) { if (type == SDS_TYPE_5) return (1 << 5) - 1; if (type == SDS_TYPE_8) return (1 << 8) - 1; @@ -99,29 +101,33 @@ static inline int adjustTypeIfNeeded(char *type, int *hdrlen, size_t bufsize) { * \0 characters in the middle, as the length is stored in the sds header. */ sds _sdsnewlen(const void *init, size_t initlen, int trymalloc) { void *sh; - sds s; char type = sdsReqType(initlen); /* Empty strings are usually created in order to append. Use type 8 * since type 5 is not good at this. */ if (type == SDS_TYPE_5 && initlen == 0) type = SDS_TYPE_8; int hdrlen = sdsHdrSize(type); - unsigned char *fp; /* flags pointer. */ - size_t bufsize, usable; + size_t bufsize; assert(initlen + hdrlen + 1 > initlen); /* Catch size_t overflow */ sh = trymalloc ? s_trymalloc_usable(hdrlen + initlen + 1, &bufsize) : s_malloc_usable(hdrlen + initlen + 1, &bufsize); if (sh == NULL) return NULL; - if (init == SDS_NOINIT) - init = NULL; - else if (!init) - memset(sh, 0, hdrlen + initlen + 1); adjustTypeIfNeeded(&type, &hdrlen, bufsize); - usable = bufsize - hdrlen - 1; + return sdswrite(sh, bufsize, type, init, initlen); +} - s = (char *)sh + hdrlen; - fp = ((unsigned char *)s) - 1; +/* Writes an sds with type `type` into a buffer `buf` of size `bufsize`. Returns + * an sds handle to the string within the buffer. Use `sdsReqType(length)` to + * compute the type and `sdsReqSize(length, type)` to compute the required + * buffer size. You can use a larger `bufsize` than required, but usable size + * can't be greater than `sdsTypeMaxSize(type)`. */ +sds sdswrite(char *buf, size_t bufsize, char type, const char *init, size_t initlen) { + assert(bufsize >= sdsReqSize(initlen, type)); + int hdrlen = sdsHdrSize(type); + size_t usable = bufsize - hdrlen - 1; + sds s = buf + hdrlen; + unsigned char *fp = ((unsigned char *)s) - 1; /* flags pointer. */ switch (type) { case SDS_TYPE_5: { @@ -161,6 +167,10 @@ sds _sdsnewlen(const void *init, size_t initlen, int trymalloc) { break; } } + if (init == SDS_NOINIT) + init = NULL; + else if (!init) + memset(s, 0, initlen); if (initlen && init) memcpy(s, init, initlen); s[initlen] = '\0'; return s; @@ -191,25 +201,6 @@ sds sdsdup(const_sds s) { return sdsnewlen(s, sdslen(s)); } -/* - * This method returns the minimum amount of bytes required to store the sds (header + data + NULL terminator). - */ -static inline size_t sdsminlen(const_sds s) { - return sdslen(s) + sdsHdrSize(s[-1]) + 1; -} - -/* This method copies the sds `s` into `buf` which is the target character buffer. */ -size_t sdscopytobuffer(unsigned char *buf, size_t buf_len, const_sds s, uint8_t *hdr_size) { - size_t required_keylen = sdsminlen(s); - if (buf == NULL) { - return required_keylen; - } - assert(buf_len >= required_keylen); - memcpy(buf, sdsAllocPtr(s), required_keylen); - *hdr_size = sdsHdrSize(s[-1]); - return required_keylen; -} - /* Free an sds string. No operation is performed if 's' is NULL. */ void sdsfree(sds s) { if (s == NULL) return; @@ -267,7 +258,7 @@ sds _sdsMakeRoomFor(sds s, size_t addlen, int greedy) { void *sh, *newsh; size_t avail = sdsavail(s); size_t len, newlen, reqlen; - char type, oldtype = s[-1] & SDS_TYPE_MASK; + char type, oldtype = sdsType(s); int hdrlen; size_t bufsize, usable; int use_realloc; @@ -353,7 +344,7 @@ sds sdsRemoveFreeSpace(sds s, int would_regrow) { * SDS_TYPE_5, which is desired when the sds is likely to be changed again. */ sds sdsResize(sds s, size_t size, int would_regrow) { void *sh, *newsh = NULL; - char type, oldtype = s[-1] & SDS_TYPE_MASK; + char type, oldtype = sdsType(s); int hdrlen, oldhdrlen = sdsHdrSize(oldtype); size_t len = sdslen(s); sh = (char *)s - oldhdrlen; @@ -433,7 +424,7 @@ sds sdsResize(sds s, size_t size, int would_regrow) { * 4) The implicit null term. */ size_t sdsAllocSize(const_sds s) { - char type = s[-1] & SDS_TYPE_MASK; + char type = sdsType(s); /* SDS_TYPE_5 header doesn't contain the size of the allocation */ if (type == SDS_TYPE_5) { return s_malloc_usable_size(sdsAllocPtr(s)); @@ -445,7 +436,7 @@ size_t sdsAllocSize(const_sds s) { /* Return the pointer of the actual SDS allocation (normally SDS strings * are referenced by the start of the string buffer). */ void *sdsAllocPtr(const_sds s) { - return (void *)(s - sdsHdrSize(s[-1])); + return (void *)(s - sdsHdrSize(sdsType(s))); } /* Increment the sds length and decrements the left free space at the @@ -472,12 +463,11 @@ void *sdsAllocPtr(const_sds s) { * sdsIncrLen(s, nread); */ void sdsIncrLen(sds s, ssize_t incr) { - unsigned char flags = s[-1]; size_t len; - switch (flags & SDS_TYPE_MASK) { + switch (sdsType(s)) { case SDS_TYPE_5: { unsigned char *fp = ((unsigned char *)s) - 1; - unsigned char oldlen = SDS_TYPE_5_LEN(flags); + unsigned char oldlen = SDS_TYPE_5_LEN(*fp); assert((incr > 0 && oldlen + incr < 32) || (incr < 0 && oldlen >= (unsigned int)(-incr))); *fp = SDS_TYPE_5 | ((oldlen + incr) << SDS_TYPE_BITS); len = oldlen + incr; diff --git a/src/sds.h b/src/sds.h index d6db8d19c2..137ccaf262 100644 --- a/src/sds.h +++ b/src/sds.h @@ -88,12 +88,16 @@ struct __attribute__((__packed__)) sdshdr64 { #define SDS_TYPE_BITS 3 #define SDS_HDR_VAR(T, s) struct sdshdr##T *sh = (void *)((s) - (sizeof(struct sdshdr##T))); #define SDS_HDR(T, s) ((struct sdshdr##T *)((s) - (sizeof(struct sdshdr##T)))) -#define SDS_TYPE_5_LEN(f) ((f) >> SDS_TYPE_BITS) +#define SDS_TYPE_5_LEN(f) ((unsigned char)(f) >> SDS_TYPE_BITS) -static inline size_t sdslen(const_sds s) { +static inline unsigned char sdsType(const_sds s) { unsigned char flags = s[-1]; - switch (flags & SDS_TYPE_MASK) { - case SDS_TYPE_5: return SDS_TYPE_5_LEN(flags); + return flags & SDS_TYPE_MASK; +} + +static inline size_t sdslen(const_sds s) { + switch (sdsType(s)) { + case SDS_TYPE_5: return SDS_TYPE_5_LEN(s[-1]); case SDS_TYPE_8: return SDS_HDR(8, s)->len; case SDS_TYPE_16: return SDS_HDR(16, s)->len; case SDS_TYPE_32: return SDS_HDR(32, s)->len; @@ -188,7 +192,7 @@ sds sdstrynewlen(const void *init, size_t initlen); sds sdsnew(const char *init); sds sdsempty(void); sds sdsdup(const_sds s); -size_t sdscopytobuffer(unsigned char *buf, size_t buf_len, const_sds s, uint8_t *hdr_size); +sds sdswrite(char *buf, size_t bufsize, char type, const char *init, size_t initlen); void sdsfree(sds s); void sdsfreeVoid(void *s); sds sdsgrowzero(sds s, size_t len); @@ -233,6 +237,8 @@ typedef sds (*sdstemplate_callback_t)(const_sds variable, void *arg); sds sdstemplate(const char *template, sdstemplate_callback_t cb_func, void *cb_arg); /* Low level functions exposed to the user API */ +int sdsHdrSize(char type); +char sdsReqType(size_t string_size); sds sdsMakeRoomFor(sds s, size_t addlen); sds sdsMakeRoomForNonGreedy(sds s, size_t addlen); void sdsIncrLen(sds s, ssize_t incr); @@ -241,6 +247,12 @@ sds sdsResize(sds s, size_t size, int would_regrow); size_t sdsAllocSize(const_sds s); void *sdsAllocPtr(const_sds s); +/* Returns the minimum required size to store an sds string of the given length + * and type. */ +static inline size_t sdsReqSize(size_t len, char type) { + return len + sdsHdrSize(type) + 1; +} + /* Export the allocator used by SDS to the program using SDS. * Sometimes the program SDS is linked to, may use a different set of * allocators, but may want to allocate or free things that SDS will diff --git a/src/server.c b/src/server.c index 8f15d8d653..68c8ecd4bf 100644 --- a/src/server.c +++ b/src/server.c @@ -389,10 +389,6 @@ int hashtableSdsKeyCompare(const void *key1, const void *key2) { return sdslen(sds1) != sdslen(sds2) || sdscmp(sds1, sds2); } -size_t dictSdsEmbedKey(unsigned char *buf, size_t buf_len, const void *key, uint8_t *key_offset) { - return sdscopytobuffer(buf, buf_len, (sds)key, key_offset); -} - /* A case insensitive version used for the command lookup table and other * places where case insensitive non binary-safe comparison is needed. */ int dictSdsKeyCaseCompare(const void *key1, const void *key2) { diff --git a/src/t_hash.c b/src/t_hash.c index b347ecf31f..bb75318d0e 100644 --- a/src/t_hash.c +++ b/src/t_hash.c @@ -36,24 +36,26 @@ struct hashTypeEntry { sds value; - unsigned char field_offset; - unsigned char field_data[]; + char field_offset; + char field_data[]; }; /* takes ownership of value, does not take ownership of field */ hashTypeEntry *hashTypeCreateEntry(sds field, sds value) { - size_t field_size = sdscopytobuffer(NULL, 0, field, NULL); - + size_t field_len = sdslen(field); + char field_sds_type = sdsReqType(field_len); + size_t field_size = sdsReqSize(field_len, field_sds_type); size_t total_size = sizeof(hashTypeEntry) + field_size; hashTypeEntry *entry = zmalloc(total_size); entry->value = value; - sdscopytobuffer(entry->field_data, field_size, field, &entry->field_offset); + entry->field_offset = sdsHdrSize(field_sds_type); + sdswrite(entry->field_data, field_size, field_sds_type, field, field_len); return entry; } sds hashTypeEntryGetField(const hashTypeEntry *entry) { - const unsigned char *field = entry->field_data + entry->field_offset; + const char *field = entry->field_data + entry->field_offset; return (sds)field; } diff --git a/tests/unit/type/string.tcl b/tests/unit/type/string.tcl index bbfb30b60d..e7b6cc7ae7 100644 --- a/tests/unit/type/string.tcl +++ b/tests/unit/type/string.tcl @@ -755,4 +755,31 @@ if {[string match {*jemalloc*} [s mem_allocator]]} { assert_encoding "int" bar lappend res [r get bar] } {12 12} + + test {Memory usage of embedded string value} { + # Check that we can fit 9 bytes of key + value into a 32 byte + # allocation, including the serverObject itself. + r set quux xyzzy + assert_lessthan_equal [r memory usage quux] 32 + + # Check that the SDS overhead of the embedded key and value is 6 bytes + # (sds5 + sds8). This is the memory layout: + # + # +--------------+--------------+---------------+----------------+ + # | serverObject | | sds5 key | sds8 value | + # | header | key-hdr-size | hdr "quux" \0 | hdr "xyzzy" \0 | + # | 16 bytes | 1 | 1 + 4 + 1 | 3 + 5 + 1 | + # +--------------+--------------+---------------+----------------+ + # + set content_size [expr {[string length quux] + [string length xyzzy]}] + regexp {robj:(\d+)} [r debug structsize] _ obj_header_size + set debug_sdslen [r debug sdslen quux] + regexp {key_sds_len:4 key_sds_avail:0 obj_alloc:(\d+)} $debug_sdslen _ obj_alloc + regexp {val_sds_len:5 val_sds_avail:(\d+) val_alloc:(\d+)} $debug_sdslen _ avail val_alloc + set sds_overhead [expr {$obj_alloc + $val_alloc - $obj_header_size - 1 - $content_size - $avail}] + assert_equal 6 $sds_overhead + + # Check that DEBUG SDSLEN reported allocation sizes matching MEMORY USAGE. + assert_equal [r memory usage quux] [expr {$obj_alloc + $val_alloc}] + } {} {needs:debug} }