erlang node_name phash 冲突坑

概述 在线上遇到了因节点名哈希值冲突导致的部分机器无负载问题。10台机器中,冲突的机器达到了4台之多。假设哈希的概率是平均的。10台机器中,不存在冲突的概率接近

>>> 1 - (1.0 / (2 ** 32)) * 10 0.9999999976716936

实际上,10台中哈希值冲突了6台。于是看源码找答案。
过程 先从phash2 api入手 erlang 的 api调用方式和 linux有相似之处。通过函数指针数组。
erl_bif_list.h 中看到,我们调用的phash2对应phash2_2。
BIF_LIST(am_erlang,am_phash,2,phash_2,phash_2,37) BIF_LIST(am_erlang,am_phash2,1,phash2_1,phash2_1,38) BIF_LIST(am_erlang,am_phash2,2,phash2_2,phash2_2,39)

bif.c:4905
BIF_RETTYPE phash2_2(BIF_ALIST_2) { Uint32 hash; Uint32 final_hash; Uint32 range; Eterm trap_state = THE_NON_VALUE; /* Check for special case 2^32 */ if (term_equals_2pow32(BIF_ARG_2)) { range = 0; } else { Uint u; if (!term_to_Uint(BIF_ARG_2, &u) || ((u >> 16) >> 16) != 0 || !u) { BIF_ERROR(BIF_P, BADARG); } range = (Uint32) u; } hash = trapping_make_hash2(BIF_ARG_1, &trap_state, BIF_P); if (trap_state != THE_NON_VALUE) { BIF_TRAP2(&bif_trap_export[BIF_phash2_2], BIF_P, trap_state, BIF_ARG_2); } if (range) { final_hash = hash % range; /* [0..range-1] */ } else { final_hash = hash; } /* * Return either a small or a big. Use the heap for bigs if there is room. */ #if defined(ARCH_64) BIF_RET(make_small(final_hash)); #else if (IS_USMALL(0, final_hash)) { BIF_RET(make_small(final_hash)); } else { Eterm* hp = HAlloc(BIF_P, BIG_UINT_HEAP_SIZE); BIF_RET(uint_to_big(final_hash, hp)); } #endif }

通过调试可以确认
1298{ (gdb) bt #0make_hash2_helper (term_param=203, can_trap=1, state_mref_write_back=0x7fd2b7bfc598, p=0x7fd2b9900b88) at beam/utils.c:1298 #10x0000558545a0a5eb in trapping_make_hash2 (term=203, state_mref_write_back=0x7fd2b7bfc598, p=0x7fd2b9900b88) at beam/utils.c:1983 #20x0000558545a284ad in phash2_1 (A__p=0x7fd2b9900b88, BIF__ARGS=0x7fd2ba200200, A__I=0x7fd2b7d5ef60) at beam/bif.c:4897 #30x00005585459455b2 in process_main (x_reg_array=0x7fd2ba200200, f_reg_array=0x7fd2ba202240) at x86_64-unknown-linux-gnu/opt/smp/beam_cold.h:121 #40x00005585459613ac in sched_thread_func (vesdp=0x7fd2b89440c0) at beam/erl_process.c:8498 #50x0000558545c47a63 in thr_wrapper (vtwd=0x7ffcd94d55a0) at pthread/ethread.c:122 #60x00007fd2fb2896db in start_thread (arg=0x7fd2b7bff700) at pthread_create.c:463 #70x00007fd2fadaa71f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95

phash2 对atom的处理 erts/emulator/beam/utils.c:1418
直接用atom的值找到erts_atom_table对应的哈希值。
if (primary_tag(term) == TAG_PRIMARY_IMMED1) { switch (term & _TAG_IMMED1_MASK) { case _TAG_IMMED1_IMMED2: switch (term & _TAG_IMMED2_MASK) { case _TAG_IMMED2_ATOM: /* Fast, but the poor hash value should be mixed. */ return atom_tab(atom_val(term))->slot.bucket.hvalue; }

可以看到,erts_atom_table是全局的,上限为1024*1024哈希表。这也是为什么原子最大只能存储1048576。
erts/emulator/beam/atom.h:atom_tab
atom_tab(Uint i) { return (Atom *) erts_index_lookup(&erts_atom_table, i); }

【erlang node_name phash 冲突坑】erts/emulator/beam/index.h:erts_index_lookup
seg_table是一个 1024*1024 的二维数组
ERTS_GLB_INLINE IndexSlot* erts_index_lookup(IndexTable* t, Uint ix) { return t->seg_table[ix>>INDEX_PAGE_SHIFT][ix&INDEX_PAGE_MASK]; }

搜索erts_atom_table从代码可以看到,是生成原子时,在修改erts_atom_table时,计算的哈希值。问题的关键在于原子的哈希值是如何计算的。
atom hash 从atom.c:init_atom_table中可以看到。原子的哈希值是通过init时赋值的函数指针计算的,具体逻辑如下:
atom.c:atom_hash
static HashValue atom_hash(Atom *obj) { byte *p = obj->name; int len = obj->len; HashValue h = 0, g; byte v; while (len--) { v = *p++; /* latin1 clutch for r16 */ if (len && (v & 0xFE) == 0xC2 && (*p & 0xC0) == 0x80) { v = (v << 6) | (*p & 0x3F); p++; len--; } /* normal hashpjw follows for v */ h = (h << 4) + v; if ((g = h & 0xf0000000)) { h ^= (g >> 24); h ^= g; } } return h; }

调试确认,节点名atom的计算,也是走这里,那么为什么会冲突呢?
Thread 6 "2_scheduler" hit Breakpoint 1, atom_hash (obj=0x7f39bfef92d0) at beam/atom.c:131 131byte* p = obj->name; (gdb) p obj->name $19 = (byte *) 0x7f3a08580180 "test1@ubuntu" (gdb) p obj->len $20 = 12 (gdb) bt #0atom_hash (obj=0x7f39bfef92d0) at beam/atom.c:131 #10x000055c20bd115ab in hash_fetch (h=0x55c20c1d8040 , tmpl=0x7f39bfef92d0, hash=0x55c20bd12a67 , cmp=0x55c20bd12b48 ) at beam/hash.h:133 #20x000055c20bd11d27 in hash_get (h=0x55c20c1d8040 , tmpl=0x7f39bfef92d0) at beam/hash.c:228 #30x000055c20bd124bc in index_get (t=0x55c20c1d8040 , tmpl=0x7f39bfef92d0) at beam/index.c:109 #40x000055c20bd12fb7 in erts_atom_put_index (name=0x7f3a08580180 "test1@ubuntu", len=12, enc=ERTS_ATOM_ENC_UTF8, trunc=1) at beam/atom.c:299 #50x000055c20bd13115 in erts_atom_put (name=0x7f3a08580180 "test1@ubuntu", len=12, enc=ERTS_ATOM_ENC_UTF8, trunc=1) at beam/atom.c:350 #60x000055c20bc57596 in list_to_atom_1 (A__p=0x7f39bf203588, BIF__ARGS=0x7f39c67c4280, A__I=0x7f39be3d9590) at beam/bif.c:2913 #70x000055c20bb65087 in process_main (x_reg_array=0x7f39c67c4280, f_reg_array=0x7f39c67c6300) at x86_64-unknown-linux-gnu/opt/smp/beam_hot.h:331 #80x000055c20bb973ac in sched_thread_func (vesdp=0x7f39c4f4e480) at beam/erl_process.c:8498 #90x000055c20be7da63 in thr_wrapper (vtwd=0x7ffc7f688b60) at pthread/ethread.c:122 #10 0x00007f3a078816db in start_thread (arg=0x7f39bfefc700) at pthread_create.c:463 #11 0x00007f3a073a271f in clone () at ../sysdeps/unix/sysv/linux/x86_64/clone.S:95(gdb) printf "%d,%s\n", h, obj->name 1385189,test1@ubuntu (gdb) printf "%d,%s\n", h, obj->name 1385013,test4@ubuntu (gdb) printf "%d,%s\n", h, obj->name 1384965,test7@ubuntu

用python写了同样的算法,发现冲突概率真的很高,参考结论里的链接。
def atom_hash(s): h = 0 g = 0 for v in s: h = (h << 4) + ord(v) g = h & 0xf0000000 if g: h ^= (g >> 24) h ^= g return hfor num in range(10): value = "https://www.it610.com/article/test{0}@ubuntu".format(num) hash_val = atom_hash(value) print(value, hash_val)test0@ubuntu 1385205 test1@ubuntu 1385189 test2@ubuntu 1385173 test3@ubuntu 1385157 test4@ubuntu 1385013 test5@ubuntu 1384997 test6@ubuntu 1384981 test7@ubuntu 1384965 test8@ubuntu 1385077 test9@ubuntu 1385061

结论
  • 原子的哈希值和原子的字符串展示相关,即同样的原子,在不同的节点上(同样的erlang vm版本,同样的哈希算法),那么该原子的哈希值是一样的。
  • atom_hash使用了hashpjw算法,该算法,即erlang原子哈希值的生成算法很容易冲突。没找到更权威的资料了:https://blog.csdn.net/iteye_1...

    推荐阅读