Redis系列(九):数据结构Hash(ZipList、HashTable)源码解析和H
发布时间:2020-12-16 04:38:03 所属栏目:安全 来源:网络整理
导读:? ?2.源码解析 1.相关命令如下: { " hset " ,hsetCommand, 4 ,1)">wmF 0 ,NULL,1)">1 ,1)">0 },{ hsetnx hget 3 ,1)">rF hmset wm hmget r hincrby hincrbyfloat hdel wF hlen 2 ,1)">hstrlen hkeys rS hvals hgetall hexists hscan rR 0 }, 2.ziplist数据
? ?2.源码解析 1.相关命令如下: {"hset",hsetCommand,4,1)">wmF0,NULL,1)">1,1)">0},{hsetnxhget3,1)">rFhmsetwmhmgetrhincrbyhincrbyfloathdelwFhlen2,1)">hstrlenhkeysrShvalshgetallhexistshscanrR0}, 2.ziplist数据结构 /* We use this function to receive information about a ziplist entry. * Note that this is not how the data is actually encoded,is just what we * get filled by a function in order to operate more easily. */ typedef struct zlentry { unsigned int prevrawlensize; Bytes used to encode the previous entry len unsigned int prevrawlen; Previous entry len. int lensize; Bytes used to encode this entry type/len. For example strings have a 1,2 or 5 bytes header. Integers always use a single byte.int len; Bytes used to represent the actual entry. For strings this is just the string length while for integers it is 1,2,3,4,8 or 0 (for 4 bit immediate) depending on the number range. int headersize; prevrawlensize + lensize. char encoding; Set to ZIP_STR_* or ZIP_INT_* depending on the entry encoding. However for 4 bits immediate integers this can assume a range of values and must be range-checked. char *p; Pointer to the very start of the entry,that is,this points to prev-entry-len field. } zlentry; 3.hashtable数据结构 typedef dictEntry { void *key; union { val; uint64_t u64; int64_t s64; double d; } v; struct dictEntry *next; } dictEntry; typedef dictType { uint64_t (*hashFunction)(const key); void *(*keyDup)(void *privdata,void *(*valDup)(obj); int (*keyCompare)(void *key1,1)">key2); void (*keyDestructor)(void (*valDestructor)(obj); } dictType; This is our hash table structure. Every dictionary has two of this as we * implement incremental rehashing,for the old to the new table. dictht { dictEntry **table; unsigned long size; unsigned sizemask; unsigned used; } dictht; typedef dict { dictType *type; privdata; dictht ht[2]; long rehashidx; rehashing not in progress if rehashidx == -1 long iterators; number of iterators currently running } dict; hset // t_hash.c,set key field value void hsetCommand(client *c) { int update; robj *o; 1. 查找hash的key是否存在,不存在则新建一个,然后在其上进行数据操作 if ((o = hashTypeLookupWriteOrCreate(c,c->argv[1])) == NULL) return; 2. 检查2-3个参数是否需要将简单版(ziplist)hash表转换为复杂的hash表,转换后的表通过 o->ptr 体现 hashTypeTryConversion(o,c->argv,1)">3); 3. 添加kv到 o 的hash表中 update = hashTypeSet(o,1)">2]->ptr,1)">3]->ptr,HASH_SET_COPY); addReply(c,update ? shared.czero : shared.cone); 变更命令传播 signalModifiedKey(c->db,1)">1]); notifyKeyspaceEvent(NOTIFY_HASH,1],c->db->id); server.dirty++; } 1. 获取db外部的key,即整体hash数据实例 t_hash.c robj *hashTypeLookupWriteOrCreate(client *c,robj *key) { robj *o = lookupKeyWrite(c->db,key); if (o == NULL) { 此处创建的hashObject是以 ziplist 形式的 o = createHashObject(); dbAdd(c->else { 不是hash类型的键已存在,不可覆盖,返回错误 if (o->type != OBJ_HASH) { addReply(c,shared.wrongtypeerr); NULL; } } o; } object.c,创建hashObject,以 ziplist 形式创建 robj *createHashObject(void) { unsigned char *zl = ziplistNew(); robj *o = createObject(OBJ_HASH,zl); o->encoding = OBJ_ENCODING_ZIPLIST; ziplist.c static unsigned char *createList() { unsigned ziplistNew(); zl = ziplistPush(zl,(unsigned char*)foo,ZIPLIST_TAIL); zl = ziplistPush(zl,1)">quux4hello51024 zl; } 2. 检查参数,是否需要将 ziplist 形式的hash表转换为真正的hash表 /* Check the length of a number of objects to see if we need to convert a * ziplist to a real hash. Note that we only check string encoded objects * as their string length can be queried in constant time. */ void hashTypeTryConversion(robj *o,robj **argv,1)">int start,1)"> end) { i; if (o->encoding != OBJ_ENCODING_ZIPLIST) ; for (i = start; i <= end; i++) { 参数大于设置的 hash_max_ziplist_value (默认: 64)时,会直接将 ziplist 转换为 ht OBJ_ENCODING_RAW,OBJ_ENCODING_EMBSTR 循环检查参数,只要发生了一次转换就结束检查(没必要继续了) if (sdsEncodedObject(argv[i]) && sdslen(argv[i]->ptr) > server.hash_max_ziplist_value) { 这个转换过程很有意思,我们深入看看 hashTypeConvert(o,OBJ_ENCODING_HT); break; } } } void hashTypeConvert(robj *o,1)"> enc) { if (o->encoding == OBJ_ENCODING_ZIPLIST) { 此处我们只处理这种情况 hashTypeConvertZiplist(o,enc); } else OBJ_ENCODING_HT) { serverPanic(Not implemented"); } { serverPanic(Unknown hash encoding); } } void hashTypeConvertZiplist(robj *o,1)"> enc) { serverAssert(o->encoding == OBJ_ENCODING_ZIPLIST); if (enc == Nothing to do... } OBJ_ENCODING_HT) { hashTypeIterator *hi; dict *dict; ret; 迭代器创建 hi = hashTypeInitIterator(o); 一个hash的数据结构就是一个 dict,从这个级别来说,hash 与 db 是一个级别的 dict = dictCreate(&hashDictType,NULL); 依次迭代 o,赋值到 hi->fptr,hi->vptr 依次添加到 dict 中 while (hashTypeNext(hi) != C_ERR) { sds key,value; 从 hi->fptr 中获取key 从 hi->vptr 中获取value key = hashTypeCurrentObjectNewSds(hi,OBJ_HASH_KEY); value = 添加到 dict 中 ret = dictAdd(dict,value); if (ret != DICT_OK) { serverLogHexDump(LL_WARNING,1)">ziplist with dup elements dumpptr)); serverPanic(Ziplist corruption detected); } } 释放迭代器 hashTypeReleaseIterator(hi); zfree(o->ptr); 将变更反映到o对象上返回 o->encoding = OBJ_ENCODING_HT; o->ptr = dict; } 2.1. 迭代ziplist元素 Move to the next entry in the hash. Return C_OK when the next entry * could be found and C_ERR when the iterator reaches the end. int hashTypeNext(hashTypeIterator *hi) { if (hi->encoding == OBJ_ENCODING_ZIPLIST) { unsigned zl; unsigned char *fptr,*vptr; 每次都是基于原始字符器进行计算偏移 迭代的是 fptr,vptr zl = hi->subject->ptr; fptr = hi->fptr; vptr = hi-> 第一次查找时使用index查找,后续则使用 fptr,vptr 进行迭代 if (fptr == NULL) { Initialize cursor serverAssert(vptr == NULL); fptr = ziplistIndex(zl,1)">); } { Advance cursor serverAssert(vptr != NULL); fptr = ziplistNext(zl,vptr); } if (fptr == NULL) C_ERR; Grab pointer to the value (fptr points to the field) vptr = NULL); fptr,vptr now point to the first or next pair hi->fptr = fptr; hi->vptr = vptr; } OBJ_ENCODING_HT) { if ((hi->de = dictNext(hi->di)) == NULL) C_ERR; } ); } C_OK; } ziplist.c,查找 index 的元素 Returns an offset to use for iterating with ziplistNext. When the given * index is negative,the list is traversed back to front. When the list * doesn't contain an element at the provided index,NULL is returned. unsigned char *ziplistIndex(unsigned char *zl,1)"> index) { unsigned p; unsigned int prevlensize,prevlen = if (index < 小于0时,反向查找 index = (-index)-; p = ZIPLIST_ENTRY_TAIL(zl); if (p[0] != ZIP_END) { ZIP_DECODE_PREVLEN(p,prevlensize,prevlen); while (prevlen > 0 && index--) { p -= prevlen; ZIP_DECODE_PREVLEN(p,prevlen); } } } { p = ZIPLIST_ENTRY_HEAD(zl); while (p[0] != ZIP_END && index--) { p += zipRawEntryLength(p); } } 迭代完成还没找到元素 p[0]=ZIP_END index 超出整体ziplist大小则遍历完成后 index>0 return (p[0] == ZIP_END || index > 0) ? NULL : p; } Return pointer to next entry in ziplist. * * zl is the pointer to the ziplist * p is the pointer to the current element * * The element after 'p' is returned,otherwise NULL if we are at the end. char *ziplistNext(unsigned p) { (() zl); "p" could be equal to ZIP_END,caused by ziplistDelete,* and we should return NULL. Otherwise,we should return NULL * when the *next* element is ZIP_END (there is no next entry). */ 0] == ZIP_END) { NULL; } 当前指针偏移当前元素长度(根据ziplist协议),即到下一元素指针位置 p += zipRawEntryLength(p); NULL; } p; } Return the total number of bytes used by the entry pointed to by 'p'. int zipRawEntryLength(unsigned p) { unsigned prevlensize,encoding,lensize,len; ZIP_DECODE_PREVLENSIZE(p,prevlensize); ZIP_DECODE_LENGTH(p +return prevlensize + lensize + len; } 2.2. t_hash.c,获取 hashTypeIterator 的具体值,写入 vstr,vlen 中 Return the key or value at the current iterator position as a new * SDS string. sds hashTypeCurrentObjectNewSds(hashTypeIterator *hi,1)"> what) { unsigned vstr; unsigned vlen; long vll; hashTypeCurrentObject(hi,what,&vstr,&vlen,&vll); if (vstr) sdsnewlen(vstr,vlen); sdsfromlonglong(vll); } Higher level function of hashTypeCurrent*() that returns the hash value * at current iterator position. * * The returned element is returned by reference in either *vstr and *vlen if * it's returned in string form,or stored in *vll if it's returned as * a number. * * If *vll is populated *vstr is set to NULL,so the caller * can always check the function return by checking the return value * type checking if vstr == NULL. void hashTypeCurrentObject(hashTypeIterator *hi,1)">int what,1)">char **vstr,1)">int *vlen,1)">long *vll) { OBJ_ENCODING_ZIPLIST) { *vstr = NULL; hashTypeCurrentFromZiplist(hi,vstr,vlen,vll); } OBJ_ENCODING_HT) { sds ele = hashTypeCurrentFromHashTable(hi,what); *vstr = (unsigned char*) ele; *vlen = sdslen(ele); } ); } } Get the field or value at iterator cursor,for an iterator on a hash value * encoded as a ziplist. Prototype is similar to `hashTypeGetFromZiplist`. void hashTypeCurrentFromZiplist(hashTypeIterator *hi,1)"> what,unsigned char **vstr,1)">int *vlen,vll) { ret; serverAssert(hi->encoding == OBJ_ENCODING_ZIPLIST); OBJ_HASH_KEY 从 fptr 中获取,否则从 vptr 中获取 if (what & OBJ_HASH_KEY) { ret = ziplistGet(hi->fptr,vll); serverAssert(ret); } { ret = ziplistGet(hi->vptr,vll); serverAssert(ret); } } Get entry pointed to by 'p' and store in either '*sstr' or 'sval' depending * on the encoding of the entry. '*sstr' is always set to NULL to be able * to find out whether the string pointer or the integer value was set. * Return 0 if 'p' points to the end of the ziplist,1 otherwise. int ziplistGet(unsigned char *p,1)">char **sstr,1)">int *slen,1)">sval) { zlentry entry; if (p == NULL || p[0] == ZIP_END) return if (sstr) *sstr = NULL; 按照ziplist的编码协议,获取头部信息 zipEntry(p,1)">entry); if (ZIP_IS_STR(entry.encoding)) { (sstr) { *slen = entry.len; *sstr = p+entry.headersize; } } (sval) { *sval = zipLoadInteger(p+entry.headersize,entry.encoding); } } ; } Return a struct with all information about an entry. static void zipEntry(unsigned e) { prevrawlen); 指向下一位置偏移,按照ziplist的编码协议,依次读取 encoding,len ZIP_DECODE_LENGTH(p + e->prevrawlensize,e->encoding,e->lensize,1)">len); 除去header得到 body偏移 e->headersize = e->prevrawlensize + e->lensize; e->p = p; } header ziplist.c Decode the length of the previous element,from the perspective of the entry * pointed to by 'ptr'. #define ZIP_DECODE_PREVLEN(ptr,prevlen) do { 解析第1个字符为 prevlensize ZIP_DECODE_PREVLENSIZE(ptr,prevlensize); if ((prevlensize) == ) { (prevlen) = (ptr)[]; } ) { assert(sizeof((prevlensize)) == ); 当ptr[0]>254时,代表内容有点大,需要使用 5个字符保存上一字符长度 memcpy(&(prevlen),((char*)(ptr)) + ); memrev32ifbe(&prevlen); } } while(); Decode the number of bytes required to store the length of the previous * element,from the perspective of the entry pointed to by 'ptr'. #define ZIP_DECODE_PREVLENSIZE(ptr,prevlensize) do { if ((ptr)[0] < ZIP_BIGLEN) { (prevlensize) = ; } { (prevlensize) = ; } } Decode the length encoded in 'ptr'. The 'encoding' variable will hold the * entries encoding,the 'lensize' variable will hold the number of bytes * required to encode the entries length,and the 'len' variable will hold the * entries length. #define ZIP_DECODE_LENGTH(ptr,len) do { 解析第1个字符为 编码格式 &ZIP_STR_MASK=0xc0 ZIP_ENTRY_ENCODING((ptr),(encoding)); if ((encoding) < ZIP_STR_MASK) { 0 << 6 =0 具体解析如下代码, if ((encoding) == ZIP_STR_06B) { (lensize) = ; (len) = (ptr)[0] & 0x3f; } 1 << 6 =64 ZIP_STR_14B) { (lensize) = ; (len) = (((ptr)[0x3f) << 8) | (ptr)[]; } 2 << 6 =128 if (encoding == ZIP_STR_32B) { (lensize) = ; (len) = ((ptr)[1] << 24) | ((ptr)[2] << 16) |3] << 8) |]); } { assert(NULL); } } { 超过 0xc0 的长度了,直接使用 1,4 表示len (lensize) = ; (len) = zipIntSize(encoding); } } Extract the encoding from the byte pointed by 'ptr' and set it into * 'encoding'. #define ZIP_ENTRY_ENCODING(ptr,encoding) do { (encoding) = (ptr[]); if ((encoding) < ZIP_STR_MASK) (encoding) &= ZIP_STR_MASK; } ) Different encoding/length possibilities #define ZIP_STR_MASK 0xc0 #define ZIP_INT_MASK 0x30 #define ZIP_STR_06B (0 << 6) 0x00 #define ZIP_STR_14B (1 << 6) 0x40 #define ZIP_STR_32B (2 << 6) 0x80 #define ZIP_INT_16B (0xc0 | 0<<4) 0xc0 #define ZIP_INT_32B (0xc0 | 1<<4) 0xd0 #define ZIP_INT_64B (0xc0 | 2<<4) 0xe0 #define ZIP_INT_24B (0xc0 | 3<<4) 0xf0 #define ZIP_INT_8B 0xfe 0xfe 添加kv到对应的key实例中: 3. 添加kv到 hash表中,稍微复杂 int hashTypeSet(robj *o,sds field,sds value,1)"> flags) { int update = 针对ziplist 的添加,与 ht 编码的添加,自然是分别处理 vptr; zl = o->ptr; 找到ziplist 的头节点指针 fptr = ziplistIndex(zl,ZIPLIST_HEAD); if (fptr != 尝试查找该 field 对应的元素(从1开始),如果找到则先删除原值,然后统一添加 fptr = ziplistFind(fptr,1)">char*)field,sdslen(field),1)">); NULL) { */ value 不可以为null,否则 ziplist 将无法工作 vptr = NULL); update = ; Delete value 先删除旧的 value,再以插入的形式更新,后续讲删除时再详解 zl = ziplistDelete(zl,1)">vptr); Insert new value 重点,将value添加到 ziplist 中 zl = ziplistInsert(zl,vptr,1)">)value,sdslen(value)); } } 没有找到对应元素,则直接将元素添加到尾部即可 if (!update) { Push new field/value pair onto the tail of the ziplist zl = ziplistPush(zl,1)">)field,ZIPLIST_TAIL); zl = ziplistPush(zl,sdslen(value),ZIPLIST_TAIL); } o->ptr = zl; Check if the ziplist needs to be converted to a hash table */ 大于设置的阀值后,转换ziplist为ht(默认: 512) if (hashTypeLength(o) > server.hash_max_ziplist_entries) hashTypeConvert(o,OBJ_ENCODING_HT); } OBJ_ENCODING_HT) { dictEntry *de = dictFind(o-> (de) { sdsfree(dictGetVal(de)); if (flags & HASH_SET_TAKE_VALUE) { dictGetVal(de) = value; value = NULL; } { dictGetVal(de) = sdsdup(value); } update = ; } { sds f,v; HASH_SET_TAKE_FIELD) { f = field; field = { f = sdsdup(field); } HASH_SET_TAKE_VALUE) { v = { v = sdsdup(value); } dictAdd(o->); } Free SDS strings we did not referenced elsewhere if the flags * want this function to be responsible. if (flags & HASH_SET_TAKE_FIELD && field) sdsfree(field); if (flags & HASH_SET_TAKE_VALUE && value) sdsfree(value); update; } 3.1. 使用ziplist进行保存 field -> value Find pointer to the entry equal to the specified entry. Skip 'skip' entries * between every comparison. Returns NULL when the field could not be found. char *ziplistFind(unsigned char *vstr,1)">int vlen,1)"> skip) { int skipcnt = ; unsigned char vencoding = long vll = ZIP_END) { unsigned q; 解析整个字符串p的 prevlensize,len ZIP_DECODE_PREVLENSIZE(p,prevlensize); ZIP_DECODE_LENGTH(p + lensize; 传入1,代表要跳过一个元素,比如: 查找key时,跳过1个v,然后继续迭代 跳过了n个元素后,再从此开始key的比对过程 if (skipcnt == ) { Compare current entry with specified entry */ 针对不同的编码使用不同的比较方式 (ZIP_IS_STR(encoding)) { 找到相应的元素,直接返回 p 指针 if (len == vlen && memcmp(q,vlen) == ) { p; } } { Find out if the searched field can be encoded. Note that * we do it only the first time,once done vencoding is set * to non-zero and vll is set to the integer value. if (vencoding == if (!zipTryEncoding(vstr,&vll,1)">vencoding)) { If the entry can't be encoded we set it to * UCHAR_MAX so that we don't retry again the next * time. vencoding = UCHAR_MAX; } Must be non-zero by now assert(vencoding); } Compare current entry with specified entry,do it only * if vencoding != UCHAR_MAX because if there is no encoding * possible for the field it can't be a valid integer. if (vencoding != UCHAR_MAX) { long ll = zipLoadInteger(q,encoding); if (ll == vll) { p; } } } Reset skip count 查找一次,跳过skip次 skipcnt = skip; } Skip entry skipcnt--; } Move to next entry p = q + len; } NULL; } zl:ziplist实例,p:要插入的key字串,s:要插入的value字串,len:要插入的value的长度 Insert an entry at "p". char *ziplistInsert(unsigned char *s,1)"> slen) { __ziplistInsert(zl,p,s,slen); } Insert item at "p". char *__ziplistInsert(unsigned slen) { size_t curlen = intrev32ifbe(ZIPLIST_BYTES(zl)),reqlen; unsigned ; size_t offset; int nextdiff = char encoding = long value = 123456789; initialized to avoid warning. Using a value that is easy to see if for some reason we use it uninitialized. zlentry tail; Find out prevlen for the entry that is inserted. ZIP_END) { ZIP_DECODE_PREVLEN(p,prevlen); } { unsigned char *ptail =if (ptail[ ZIP_END) { prevlen = zipRawEntryLength(ptail); } } See if the entry can be encoded if (zipTryEncoding(s,slen,&value,1)">encoding)) { 'encoding' is set to the appropriate integer encoding reqlen = zipIntSize(encoding); } 'encoding' is untouched,however zipEncodeLength will use the * string length to figure out how to encode it. slen; } We need space for both the length of the previous entry and * the length of the payload. 加上prevlen,slen 的长度,以计算value的存放位置 reqlen += zipPrevEncodeLength(NULL,prevlen); reqlen += zipEncodeLength(NULL,slen); When the insert position is not equal to the tail,we need to * make sure that the next entry can hold this entry's length in * its prevlen field. nextdiff = (p[0] != ZIP_END) ? zipPrevLenByteDiff(p,reqlen) : Store offset because a realloc may change the address of zl. 存储当前偏移位置,以便在扩容之后,还能找到相应位置 p = p -zl + zl offset = p-zl; zl = ziplistResize(zl,curlen+reqlen+nextdiff); p = zl+offset; Apply memory move when necessary and update tail offset. Subtract one because of the ZIP_END bytes 字符拷贝 memmove(p+reqlen,p-nextdiff,curlen-offset-1+nextdiff); Encode this entry's raw length in the next entry. zipPrevEncodeLength(p+reqlen,reqlen); Update offset for tail ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+reqlen); When the tail contains more than one entry,we need to take * "nextdiff" in account as well. Otherwise,a change in the * size of prevlen doesn't have an effect on the *tail* offset. zipEntry(p+reqlen,1)">tail); if (p[reqlen+tail.headersize+tail.len] != ZIP_END) { ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(intrev32ifbe(ZIPLIST_TAIL_OFFSET(zl))+nextdiff); } } This element will be the new tail. ZIPLIST_TAIL_OFFSET(zl) = intrev32ifbe(p-zl); } When nextdiff != 0,the raw length of the next entry has changed,so * we need to cascade the update throughout the ziplist if (nextdiff != 如果本次更新后数据位置变化,则需要更新后续的元素位置 offset = p-zl; zl = __ziplistCascadeUpdate(zl,p+reqlen); p = zl+offset; } Write the entry 将 value 写入 p 中,即写入了 ziplist 中 p += zipPrevEncodeLength(p,prevlen); p += zipEncodeLength(p,slen); (ZIP_IS_STR(encoding)) { memcpy(p,slen); } { zipSaveInteger(p,value,encoding); } ZIPLIST_INCR_LENGTH(zl,1)"> zl; } 另外,如果没有旧的元素值时,直接在hash表的末尾添加对应的field->value 即可 char *ziplistPush(unsigned int slen,1)">int wherep; p = (where == ZIPLIST_HEAD) ? ZIPLIST_ENTRY_HEAD(zl) : ZIPLIST_ENTRY_END(zl); |