13 years ago · d46261638d
--- a/bloom_filter_mod.py
+++ b/bloom_filter_mod.py
@@ -23,15 +23,14 @@ import random
 
				 # p is the desired error rate when full - we call this error_rate_p
			
 
				 
			
 
				 
			
 
				-def get_index_bitmask_seed_rnd(bloom_filter, key):
			
 
				+def get_bitno_seed_rnd(bloom_filter, key):
			
 
				 	'''Apply num_probes_k hash functions to key.  Generate the array index and bitmask corresponding to each result'''
			
 
				 
			
 
				 	# We're using key as a seed to a pseudorandom number generator
			
 
				 	hasher = random.Random(key).randrange
			
 
				-	for _ in range(bloom_filter.num_probes_k):
			
 
				-		array_index = hasher(bloom_filter.num_words)
			
 
				-		bit_within_word_index = hasher(32)
			
 
				-		yield array_index, 1 << bit_within_word_index
			
 
				+	for dummy in range(bloom_filter.num_probes_k):
			
 
				+		bitno = hasher(bloom_filter.num_bits_m)
			
 
				+		yield bitno % bloom_filter.num_bits_m
			
 
				 
			
 
				 
			
 
				 MERSENNES1 = [ 2 ** x - 1 for x in [ 17, 31, 127 ] ]
			
@@ -56,7 +55,7 @@ def hash2(int_list):
 
				 	return simple_hash(int_list, MERSENNES2[0], MERSENNES2[1], MERSENNES2[2])
			
 
				 
			
 
				 
			
 
				-def get_index_bitmask_lin_comb(bloom_filter, key):
			
 
				+def get_bitno_lin_comb(bloom_filter, key):
			
 
				 	'''Apply num_probes_k hash functions to key.  Generate the array index and bitmask corresponding to each result'''
			
 
				 
			
 
				 	# This one assumes key is either bytes or str (or other list of integers)
			
@@ -81,16 +80,14 @@ def get_index_bitmask_lin_comb(bloom_filter, key):
 
				 	# We're using linear combinations of hash_value1 and hash_value2 to obtain num_probes_k hash functions
			
 
				 	for probeno in range(1, bloom_filter.num_probes_k + 1):
			
 
				 		bit_index = hash_value1 + probeno * hash_value2
			
 
				-		bit_within_word_index = bit_index % 32
			
 
				-		array_index = (bit_index // 32) % bloom_filter.num_words
			
 
				-		yield array_index, 1 << bit_within_word_index
			
 
				+		yield bit_index % bloom_filter.num_bits_m
			
 
				 
			
 
				 
			
 
				 class Bloom_filter:
			
 
				 	'''Probabilistic set membership testing for large sets'''
			
 
				 
			
 
				 	#def __init__(self, ideal_num_elements_n, error_rate_p, probe_offsetter=get_index_bitmask_seed_rnd):
			
 
				-	def __init__(self, ideal_num_elements_n, error_rate_p, probe_offsetter=get_index_bitmask_lin_comb):
			
 
				+	def __init__(self, ideal_num_elements_n, error_rate_p, probe_bitnoer=get_bitno_lin_comb):
			
 
				 		if ideal_num_elements_n <= 0:
			
 
				 			raise ValueError('ideal_num_elements_n must be > 0')
			
 
				 		if not (0 < error_rate_p < 1):
			
@@ -107,8 +104,7 @@ class Bloom_filter:
 
				 		real_num_bits_m = numerator / denominator
			
 
				 		self.num_bits_m = int(math.ceil(real_num_bits_m))
			
 
				 
			
 
				-		self.num_words = int((self.num_bits_m + 31) / 32)
			
 
				-		self.array_ = array.array('L', [0]) * self.num_words
			
 
				+		self.array_ = array.array('L', [0]) * ((self.num_bits_m + 31) // 32)
			
 
				 
			
 
				 		# AKA num_offsetters
			
 
				 		# Verified against http://en.wikipedia.org/wiki/Bloom_filter#Probability_of_false_positives
			
@@ -124,7 +120,7 @@ class Bloom_filter:
 
				 #				)
			
 
				 #			sys.exit(1)
			
 
				 
			
 
				-		self.probe_offsetter = probe_offsetter
			
 
				+		self.probe_bitnoer = probe_bitnoer
			
 
				 
			
 
				 	def __repr__(self):
			
 
				 		return 'Bloom_filter(ideal_num_elements_n=%d, error_rate_p=%f, num_bits_m=%d)' % (
			
@@ -135,7 +131,9 @@ class Bloom_filter:
 
				 
			
 
				 	def add(self, key):
			
 
				 		'''Add an element to the filter'''
			
 
				-		for index, mask in self.probe_offsetter(self, key):
			
 
				+		for bitno in self.probe_bitnoer(self, key):
			
 
				+			index, bit_within_word = divmod(bitno, 32)
			
 
				+			mask = 1 << bit_within_word
			
 
				 			self.array_[index] |= mask
			
 
				 
			
 
				 	def __iadd__(self, key):
			
@@ -146,7 +144,7 @@ class Bloom_filter:
 
				 		'''Compare a sort of signature for two bloom filters.  Used in preparation for binary operations'''
			
 
				 		return (self.num_bits_m == bloom_filter.num_bits_m \
			
 
				 			and self.num_probes_k == bloom_filter.num_probes_k \
			
 
				-			and self.probe_offsetter == bloom_filter.probe_offsetter)
			
 
				+			and self.probe_bitnoer == bloom_filter.probe_bitnoer)
			
 
				 
			
 
				 	def union(self, bloom_filter):
			
 
				 		'''Compute the set union of two bloom filters'''
			
@@ -173,5 +171,12 @@ class Bloom_filter:
 
				 		return self
			
 
				 
			
 
				 	def __contains__(self, key):
			
 
				-		return all(self.array_[i] & mask for i, mask in self.probe_offsetter(self, key))
			
 
				+		for bitno in self.probe_bitnoer(self, key):
			
 
				+			wordno, bit_within_word = divmod(bitno, 32)
			
 
				+			mask = 1 << bit_within_word
			
 
				+			if not (self.array_[wordno] & mask):
			
 
				+				return False
			
 
				+		return True
			
 
				+				
			
 
				+		#return all(self.array_[i] & mask for i, mask in self.probe_bitnoer(self, key))