Sfoglia il codice sorgente

A little more thorough testing

dstromberg 14 anni fa
parent
commit
5708ab4a6b
1 ha cambiato i file con 76 aggiunte e 20 eliminazioni
  1. 76 20
      test-bloom-filter

+ 76 - 20
test-bloom-filter

@@ -5,44 +5,100 @@
 
 '''Unit tests for bloom_filter_mod'''
 
+import sys
 import random
 import string
 
 import bloom_filter_mod
 
-def tests():
-	'''Some quick automatic tests for the bloom filter class'''
+def my_range(maximum):
+	'''A range function with consistent semantics on 2.x and 3.x'''
+	value = 0
+	while True:
+		if value >= maximum:
+			break
+		yield value
+		value += 1
 
-	states = '''Alabama Alaska Arizona Arkansas California Colorado Connecticut
-		Delaware Florida Georgia Hawaii Idaho Illinois Indiana Iowa Kansas
-		Kentucky Louisiana Maine Maryland Massachusetts Michigan Minnesota
-		Mississippi Missouri Montana Nebraska Nevada NewHampshire NewJersey
-		NewMexico NewYork NorthCarolina NorthDakota Ohio Oklahoma Oregon
-		Pennsylvania RhodeIsland SouthCarolina SouthDakota Tennessee Texas Utah
-		Vermont Virginia Washington WestVirginia Wisconsin Wyoming'''.split()
+def test(included, trials, error_rate):
+	'''Some quick automatic tests for the bloom filter class'''
 
-	trials = 100000
+	all_good = True
 
-	bloom_filter = bloom_filter_mod.Bloom_filter(ideal_num_elements=trials * 2, error_rate=0.01)
+	bloom_filter = bloom_filter_mod.Bloom_filter(ideal_num_elements=trials * 2, error_rate=error_rate)
 	print(repr(bloom_filter))
-	for state in states:
-		bloom_filter.add(state)
+	for include in included:
+		bloom_filter.add(include)
 
-	states_in_count = sum(state in bloom_filter for state in states)
-	print('%d true positives out of %d trials' % (states_in_count, len(states)))
+	include_in_count = sum(include in bloom_filter for include in included)
+	if include_in_count == len(included):
+		# Good
+		pass
+	else:
+		sys.stderr.write('Include count bad: %s, %d\n' % (include_in_count, len(included)))
+		all_good = False
 
 	false_positives = 0
-	for trialno in range(trials):
+	for trialno in my_range(trials):
+		if trialno % 10000 == 0:
+			sys.stderr.write('trialno countdown: %d\n' % (trials-trialno))
 		dummy = trialno
 		while True:
 			candidate = ''.join(random.sample(string.ascii_letters, 5))
-			# If we accidentally found a real state, try again
-			if candidate in states:
+			# If we accidentally found a real include, try again
+			if candidate in included:
 				continue
 			if candidate in bloom_filter:
 				false_positives += 1
 			break
-	print('%d true negatives and %d false positives out of %d trials' % (trials - false_positives, false_positives, trials))
 
-tests()
+	#print('%d true negatives and %d false positives out of %d trials' % (trials - false_positives, false_positives, trials))
+	actual_error_rate = float(false_positives) / trials
+
+	if actual_error_rate > error_rate:
+		sys.stderr.write('%s: Too many false positives: actual: %s, expected: %s\n' % (
+			sys.argv[0],
+			actual_error_rate,
+			error_rate,
+			))
+		all_good = False
+
+	return all_good
+
+def random_string():
+	'''Generate a random, 10 character string - for testing purposes'''
+	list_ = []
+	for chrno in range(10):
+		dummy = chrno
+		character = string.ascii_letters[int(random.random() * 26)]
+		list_.append(character)
+	return ''.join(list_)
+
+def main():
+	'''Unit tests for Bloom_filter class'''
+
+	all_good = True
+
+	states = '''Alabama Alaska Arizona Arkansas California Colorado Connecticut
+		Delaware Florida Georgia Hawaii Idaho Illinois Indiana Iowa Kansas
+		Kentucky Louisiana Maine Maryland Massachusetts Michigan Minnesota
+		Mississippi Missouri Montana Nebraska Nevada NewHampshire NewJersey
+		NewMexico NewYork NorthCarolina NorthDakota Ohio Oklahoma Oregon
+		Pennsylvania RhodeIsland SouthCarolina SouthDakota Tennessee Texas Utah
+		Vermont Virginia Washington WestVirginia Wisconsin Wyoming'''.split()
+
+	random_content = [ random_string() for dummy in range(1000) ]
+
+	all_good &= test(states, trials=100000, error_rate=0.01)
+
+	all_good &= test(random_content, trials=10000000, error_rate=0.001)
+
+	if all_good:
+		sys.stderr.write('%s: All tests passed\n' % sys.argv[0])
+		sys.exit(0)
+	else:
+		sys.stderr.write('%s: One or more tests failed\n' % sys.argv[0])
+		sys.exit(1)
+
+main()