浅析布隆过滤器及实现demo
? 布隆过滤器
空间效率
布隆过滤器基础
简单的Python实现
1 from bitarray import bitarray 2 3 # 3rd party 4 import mmh3 5 6 7 class BloomFilter(set): 8 9 def __init__(self,size,hash_count): 10 super(BloomFilter,self).__init__() 11 self.bit_array = bitarray(size) 12 self.bit_array.setall(0) 13 self.size = size 14 self.hash_count = hash_count 15 16 def __len__(self): 17 return self.size 18 19 def __iter__(self): 20 return iter(self.bit_array) 21 22 def add(self,item): 23 for ii in range(self.hash_count): 24 index = mmh3.hash(item,ii) % self.size 25 self.bit_array[index] = 1 26 27 return self 28 29 def __contains__(self,item): 30 out = True 31 for ii in range(self.hash_count): 32 index = mmh3.hash(item,ii) % self.size 33 if self.bit_array[index] == 0: 34 out = False 35 36 return out 37 38 39 def main(): 40 bloom = BloomFilter(100,10) 41 animals = [‘dog‘,‘cat‘,‘giraffe‘,‘fly‘,‘mosquito‘,‘horse‘,‘eagle‘,42 ‘bird‘,‘bison‘,‘boar‘,‘butterfly‘,‘ant‘,‘anaconda‘,‘bear‘,43 ‘chicken‘,‘dolphin‘,‘donkey‘,‘crow‘,‘crocodile‘] 44 # First insertion of animals into the bloom filter 45 for animal in animals: 46 bloom.add(animal) 47 48 # Membership existence for already inserted animals 49 # There should not be any false negatives 50 for animal in animals: 51 if animal in bloom: 52 print(‘{} is in bloom filter as expected‘.format(animal)) 53 else: 54 print(‘Something is terribly went wrong for {}‘.format(animal)) 55 print(‘FALSE NEGATIVE!‘) 56 57 # Membership existence for not inserted animals 58 # There could be false positives 59 other_animals = [‘badger‘,‘cow‘,‘pig‘,‘sheep‘,‘bee‘,‘wolf‘,‘fox‘,60 ‘whale‘,‘shark‘,‘fish‘,‘turkey‘,‘duck‘,‘dove‘,61 ‘deer‘,‘elephant‘,‘frog‘,‘falcon‘,‘goat‘,‘gorilla‘,62 ‘hawk‘ ] 63 for other_animal in other_animals: 64 if other_animal in bloom: 65 print(‘{} is not in the bloom,but a false positive‘.format(other_animal)) 66 else: 67 print(‘{} is not in the bloom filter as expected‘.format(other_animal)) 68 69 70 if __name__ == ‘__main__‘: 71 main() ? 输出结果如下所示: dog is in bloom filter as expected cat is in bloom filter as expected giraffe is in bloom filter as expected fly is in bloom filter as expected mosquito is in bloom filter as expected horse is in bloom filter as expected eagle is in bloom filter as expected bird is in bloom filter as expected bison is in bloom filter as expected boar is in bloom filter as expected butterfly is in bloom filter as expected ant is in bloom filter as expected anaconda is in bloom filter as expected bear is in bloom filter as expected chicken is in bloom filter as expected dolphin is in bloom filter as expected donkey is in bloom filter as expected crow is in bloom filter as expected crocodile is in bloom filter as expected badger is not in the bloom filter as expected cow is not in the bloom filter as expected pig is not in the bloom filter as expected sheep is not in the bloom,but a false positive bee is not in the bloom filter as expected wolf is not in the bloom filter as expected fox is not in the bloom filter as expected whale is not in the bloom filter as expected shark is not in the bloom,but a false positive fish is not in the bloom,but a false positive turkey is not in the bloom filter as expected duck is not in the bloom filter as expected dove is not in the bloom filter as expected deer is not in the bloom filter as expected elephant is not in the bloom,but a false positive frog is not in the bloom filter as expected falcon is not in the bloom filter as expected goat is not in the bloom filter as expected gorilla is not in the bloom filter as expected hawk is not in the bloom filter as expected
误报
在使用布隆过滤器时,需要考虑误报的潜在影响。 确定性
缺点
布隆过滤器的容量
布隆过滤器的构造和检索
无法返回元素本身
删除某个元素
(编辑:李大同) 【声明】本站内容均来自网络,其相关言论仅代表作者个人观点,不代表本站立场。若无意侵犯到您的权利,请及时与联系站长删除相关内容! |