Browse Source

Greatly expanded unit tests, uncovered a problem wi0th error probability

dstromberg 14 years ago
parent
commit
1900ff6240
1 changed files with 90 additions and 38 deletions
  1. 90 38
      test-bloom-filter

+ 90 - 38
test-bloom-filter

@@ -21,42 +21,44 @@ def my_range(maximum):
 		yield value
 		value += 1
 
-def primary_test(order, included, trials, error_rate):
+def test(description, values, trials, error_rate):
 	'''Some quick automatic tests for the bloom filter class'''
 
 	all_good = True
 
-	bloom_filter = bloom_filter_mod.Bloom_filter(ideal_num_elements=trials, error_rate=error_rate)
-	print(repr(bloom_filter))
-	for step in order:
-		if step == 1:
-			for include in included:
-				bloom_filter.add(include)
-
-			include_in_count = sum(include in bloom_filter for include in included)
-			if include_in_count == len(included):
-				# Good
-				pass
-			else:
-				sys.stderr.write('Include count bad: %s, %d\n' % (include_in_count, len(included)))
-				all_good = False
-
-		elif step == 2:
-			false_positives = 0
-			for trialno in my_range(trials):
-				if trialno % 10000 == 0:
-					sys.stderr.write('trialno countdown: %d\n' % (trials-trialno))
-				dummy = trialno
-				while True:
-					candidate = ''.join(random.sample(string.ascii_letters, 5))
-					# If we accidentally found a real include, try again
-					if candidate in included:
-						continue
-					if candidate in bloom_filter:
-						false_positives += 1
-					break
-		else:
-			raise ValueError('step is not 1 or 2')
+	bloom_filter = bloom_filter_mod.Bloom_filter(ideal_num_elements=trials * 2, error_rate=error_rate)
+	#print(repr(bloom_filter))
+
+	print '\n', description, bloom_filter.num_words, bloom_filter.num_probes
+
+	print 'adding'
+	for include in values.generator():
+		bloom_filter.add(include)
+
+	print 'testing for appropriate positives'''
+	include_in_count = sum(include in bloom_filter for include in values.generator())
+	if include_in_count == values.length():
+		# Good
+		pass
+	else:
+		sys.stderr.write('Include count bad: %s, %d\n' % (include_in_count, values.length()))
+		all_good = False
+
+	print 'testing for false positives'''
+	false_positives = 0
+	for trialno in my_range(trials):
+		if trialno % 10000 == 0:
+			sys.stderr.write('trialno countdown: %d\n' % (trials-trialno))
+		#dummy = trialno
+		while True:
+			candidate = ''.join(random.sample(CHARACTERS, 5))
+			# If we accidentally found a member, try again
+			if values.within(candidate):
+				continue
+			if candidate in bloom_filter:
+				print 'We erroneously think %s is in the filter' % candidate
+				false_positives += 1
+			break
 
 	actual_error_rate = float(false_positives) / trials
 
@@ -109,6 +111,58 @@ def random_string():
 		list_.append(character)
 	return ''.join(list_)
 
+class Random_content:
+	'''Generated a bunch of random strings in sorted order'''
+
+	random_content = [ random_string() for dummy in range(1000) ]
+
+	def __init__(self):
+		pass
+
+	@staticmethod
+	def generator():
+		'''Generate all values'''
+		for item in Random_content.random_content:
+			yield item
+
+	@staticmethod
+	def within(value):
+		'''Test for membership'''
+		return value in Random_content.random_content
+
+	@staticmethod
+	def length():
+		'''How many members?'''
+		return len(Random_content.random_content)
+
+class Evens:
+	'''Generate a bunch of even numbers'''
+
+	def __init__(self, maximum):
+		self.maximum = maximum
+
+	def generator(self):
+		'''Generate all values'''
+		for value in my_range(self.maximum):
+			if value % 2 == 0:
+				yield str(value)
+
+	def within(self, value):
+		'''Test for membership'''
+		try:
+			int_value = int(value)
+		except ValueError:
+			return False
+
+		if int_value >= 0 and int_value < self.maximum and int_value % 2 == 0:
+			return True
+		else:
+			return False
+
+	def length(self):
+		'''How many members?'''
+		return self.maximum // 2
+
 def and_test():
 	'''Test the & operator'''
 
@@ -139,7 +193,7 @@ def and_test():
 		all_good = False
 
 	return all_good
-	
+
 def or_test():
 	'''Test the | operator'''
 
@@ -183,11 +237,9 @@ def main():
 
 	all_good &= test('random', Random_content(), trials=10000, error_rate=0.1)
 
-	all_good &= primary_test([1, 2], states, trials=10000, error_rate=0.01)
-
-	all_good &= primary_test([1, 2], random_content, trials=10000, error_rate=0.1)
-
-	all_good &= primary_test([2, 1], [ 'a', 'b', 'c'], trials=100, error_rate=0.000001)
+	#for elements in [ 1, 10, 100, 1000 ]:
+	for elements in [ 1, 10, 100, 1000, 10000, 100000, 1000000 ]:
+		all_good &= test('evens %d' % elements, Evens(elements), trials=elements, error_rate=1e-12)
 
 	all_good &= and_test()