14 years ago · 1900ff6240
--- a/test-bloom-filter
+++ b/test-bloom-filter
@@ -21,42 +21,44 @@ def my_range(maximum):
 
				 		yield value
			
 
				 		value += 1
			
 
				 
			
 
				-def primary_test(order, included, trials, error_rate):
			
 
				+def test(description, values, trials, error_rate):
			
 
				 	'''Some quick automatic tests for the bloom filter class'''
			
 
				 
			
 
				 	all_good = True
			
 
				 
			
 
				-	bloom_filter = bloom_filter_mod.Bloom_filter(ideal_num_elements=trials, error_rate=error_rate)
			
 
				-	print(repr(bloom_filter))
			
 
				-	for step in order:
			
 
				-		if step == 1:
			
 
				-			for include in included:
			
 
				-				bloom_filter.add(include)
			
 
				-
			
 
				-			include_in_count = sum(include in bloom_filter for include in included)
			
 
				-			if include_in_count == len(included):
			
 
				-				# Good
			
 
				-				pass
			
 
				-			else:
			
 
				-				sys.stderr.write('Include count bad: %s, %d\n' % (include_in_count, len(included)))
			
 
				-				all_good = False
			
 
				-
			
 
				-		elif step == 2:
			
 
				-			false_positives = 0
			
 
				-			for trialno in my_range(trials):
			
 
				-				if trialno % 10000 == 0:
			
 
				-					sys.stderr.write('trialno countdown: %d\n' % (trials-trialno))
			
 
				-				dummy = trialno
			
 
				-				while True:
			
 
				-					candidate = ''.join(random.sample(string.ascii_letters, 5))
			
 
				-					# If we accidentally found a real include, try again
			
 
				-					if candidate in included:
			
 
				-						continue
			
 
				-					if candidate in bloom_filter:
			
 
				-						false_positives += 1
			
 
				-					break
			
 
				-		else:
			
 
				-			raise ValueError('step is not 1 or 2')
			
 
				+	bloom_filter = bloom_filter_mod.Bloom_filter(ideal_num_elements=trials * 2, error_rate=error_rate)
			
 
				+	#print(repr(bloom_filter))
			
 
				+
			
 
				+	print '\n', description, bloom_filter.num_words, bloom_filter.num_probes
			
 
				+
			
 
				+	print 'adding'
			
 
				+	for include in values.generator():
			
 
				+		bloom_filter.add(include)
			
 
				+
			
 
				+	print 'testing for appropriate positives'''
			
 
				+	include_in_count = sum(include in bloom_filter for include in values.generator())
			
 
				+	if include_in_count == values.length():
			
 
				+		# Good
			
 
				+		pass
			
 
				+	else:
			
 
				+		sys.stderr.write('Include count bad: %s, %d\n' % (include_in_count, values.length()))
			
 
				+		all_good = False
			
 
				+
			
 
				+	print 'testing for false positives'''
			
 
				+	false_positives = 0
			
 
				+	for trialno in my_range(trials):
			
 
				+		if trialno % 10000 == 0:
			
 
				+			sys.stderr.write('trialno countdown: %d\n' % (trials-trialno))
			
 
				+		#dummy = trialno
			
 
				+		while True:
			
 
				+			candidate = ''.join(random.sample(CHARACTERS, 5))
			
 
				+			# If we accidentally found a member, try again
			
 
				+			if values.within(candidate):
			
 
				+				continue
			
 
				+			if candidate in bloom_filter:
			
 
				+				print 'We erroneously think %s is in the filter' % candidate
			
 
				+				false_positives += 1
			
 
				+			break
			
 
				 
			
 
				 	actual_error_rate = float(false_positives) / trials
			
 
				 
			
@@ -109,6 +111,58 @@ def random_string():
 
				 		list_.append(character)
			
 
				 	return ''.join(list_)
			
 
				 
			
 
				+class Random_content:
			
 
				+	'''Generated a bunch of random strings in sorted order'''
			
 
				+
			
 
				+	random_content = [ random_string() for dummy in range(1000) ]
			
 
				+
			
 
				+	def __init__(self):
			
 
				+		pass
			
 
				+
			
 
				+	@staticmethod
			
 
				+	def generator():
			
 
				+		'''Generate all values'''
			
 
				+		for item in Random_content.random_content:
			
 
				+			yield item
			
 
				+
			
 
				+	@staticmethod
			
 
				+	def within(value):
			
 
				+		'''Test for membership'''
			
 
				+		return value in Random_content.random_content
			
 
				+
			
 
				+	@staticmethod
			
 
				+	def length():
			
 
				+		'''How many members?'''
			
 
				+		return len(Random_content.random_content)
			
 
				+
			
 
				+class Evens:
			
 
				+	'''Generate a bunch of even numbers'''
			
 
				+
			
 
				+	def __init__(self, maximum):
			
 
				+		self.maximum = maximum
			
 
				+
			
 
				+	def generator(self):
			
 
				+		'''Generate all values'''
			
 
				+		for value in my_range(self.maximum):
			
 
				+			if value % 2 == 0:
			
 
				+				yield str(value)
			
 
				+
			
 
				+	def within(self, value):
			
 
				+		'''Test for membership'''
			
 
				+		try:
			
 
				+			int_value = int(value)
			
 
				+		except ValueError:
			
 
				+			return False
			
 
				+
			
 
				+		if int_value >= 0 and int_value < self.maximum and int_value % 2 == 0:
			
 
				+			return True
			
 
				+		else:
			
 
				+			return False
			
 
				+
			
 
				+	def length(self):
			
 
				+		'''How many members?'''
			
 
				+		return self.maximum // 2
			
 
				+
			
 
				 def and_test():
			
 
				 	'''Test the & operator'''
			
 
				 
			
@@ -139,7 +193,7 @@ def and_test():
 
				 		all_good = False
			
 
				 
			
 
				 	return all_good
			
 
				-	
			
 
				+
			
 
				 def or_test():
			
 
				 	'''Test the | operator'''
			
 
				 
			
@@ -183,11 +237,9 @@ def main():
 
				 
			
 
				 	all_good &= test('random', Random_content(), trials=10000, error_rate=0.1)
			
 
				 
			
 
				-	all_good &= primary_test([1, 2], states, trials=10000, error_rate=0.01)
			
 
				-
			
 
				-	all_good &= primary_test([1, 2], random_content, trials=10000, error_rate=0.1)
			
 
				-
			
 
				-	all_good &= primary_test([2, 1], [ 'a', 'b', 'c'], trials=100, error_rate=0.000001)
			
 
				+	#for elements in [ 1, 10, 100, 1000 ]:
			
 
				+	for elements in [ 1, 10, 100, 1000, 10000, 100000, 1000000 ]:
			
 
				+		all_good &= test('evens %d' % elements, Evens(elements), trials=elements, error_rate=1e-12)
			
 
				 
			
 
				 	all_good &= and_test()