|
@@ -21,42 +21,44 @@ def my_range(maximum):
|
|
|
yield value
|
|
|
value += 1
|
|
|
|
|
|
-def primary_test(order, included, trials, error_rate):
|
|
|
+def test(description, values, trials, error_rate):
|
|
|
'''Some quick automatic tests for the bloom filter class'''
|
|
|
|
|
|
all_good = True
|
|
|
|
|
|
- bloom_filter = bloom_filter_mod.Bloom_filter(ideal_num_elements=trials, error_rate=error_rate)
|
|
|
- print(repr(bloom_filter))
|
|
|
- for step in order:
|
|
|
- if step == 1:
|
|
|
- for include in included:
|
|
|
- bloom_filter.add(include)
|
|
|
-
|
|
|
- include_in_count = sum(include in bloom_filter for include in included)
|
|
|
- if include_in_count == len(included):
|
|
|
- # Good
|
|
|
- pass
|
|
|
- else:
|
|
|
- sys.stderr.write('Include count bad: %s, %d\n' % (include_in_count, len(included)))
|
|
|
- all_good = False
|
|
|
-
|
|
|
- elif step == 2:
|
|
|
- false_positives = 0
|
|
|
- for trialno in my_range(trials):
|
|
|
- if trialno % 10000 == 0:
|
|
|
- sys.stderr.write('trialno countdown: %d\n' % (trials-trialno))
|
|
|
- dummy = trialno
|
|
|
- while True:
|
|
|
- candidate = ''.join(random.sample(string.ascii_letters, 5))
|
|
|
- # If we accidentally found a real include, try again
|
|
|
- if candidate in included:
|
|
|
- continue
|
|
|
- if candidate in bloom_filter:
|
|
|
- false_positives += 1
|
|
|
- break
|
|
|
- else:
|
|
|
- raise ValueError('step is not 1 or 2')
|
|
|
+ bloom_filter = bloom_filter_mod.Bloom_filter(ideal_num_elements=trials * 2, error_rate=error_rate)
|
|
|
+ #print(repr(bloom_filter))
|
|
|
+
|
|
|
+ print '\n', description, bloom_filter.num_words, bloom_filter.num_probes
|
|
|
+
|
|
|
+ print 'adding'
|
|
|
+ for include in values.generator():
|
|
|
+ bloom_filter.add(include)
|
|
|
+
|
|
|
+ print 'testing for appropriate positives'''
|
|
|
+ include_in_count = sum(include in bloom_filter for include in values.generator())
|
|
|
+ if include_in_count == values.length():
|
|
|
+ # Good
|
|
|
+ pass
|
|
|
+ else:
|
|
|
+ sys.stderr.write('Include count bad: %s, %d\n' % (include_in_count, values.length()))
|
|
|
+ all_good = False
|
|
|
+
|
|
|
+ print 'testing for false positives'''
|
|
|
+ false_positives = 0
|
|
|
+ for trialno in my_range(trials):
|
|
|
+ if trialno % 10000 == 0:
|
|
|
+ sys.stderr.write('trialno countdown: %d\n' % (trials-trialno))
|
|
|
+ #dummy = trialno
|
|
|
+ while True:
|
|
|
+ candidate = ''.join(random.sample(CHARACTERS, 5))
|
|
|
+ # If we accidentally found a member, try again
|
|
|
+ if values.within(candidate):
|
|
|
+ continue
|
|
|
+ if candidate in bloom_filter:
|
|
|
+ print 'We erroneously think %s is in the filter' % candidate
|
|
|
+ false_positives += 1
|
|
|
+ break
|
|
|
|
|
|
actual_error_rate = float(false_positives) / trials
|
|
|
|
|
@@ -109,6 +111,58 @@ def random_string():
|
|
|
list_.append(character)
|
|
|
return ''.join(list_)
|
|
|
|
|
|
+class Random_content:
|
|
|
+ '''Generated a bunch of random strings in sorted order'''
|
|
|
+
|
|
|
+ random_content = [ random_string() for dummy in range(1000) ]
|
|
|
+
|
|
|
+ def __init__(self):
|
|
|
+ pass
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def generator():
|
|
|
+ '''Generate all values'''
|
|
|
+ for item in Random_content.random_content:
|
|
|
+ yield item
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def within(value):
|
|
|
+ '''Test for membership'''
|
|
|
+ return value in Random_content.random_content
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def length():
|
|
|
+ '''How many members?'''
|
|
|
+ return len(Random_content.random_content)
|
|
|
+
|
|
|
+class Evens:
|
|
|
+ '''Generate a bunch of even numbers'''
|
|
|
+
|
|
|
+ def __init__(self, maximum):
|
|
|
+ self.maximum = maximum
|
|
|
+
|
|
|
+ def generator(self):
|
|
|
+ '''Generate all values'''
|
|
|
+ for value in my_range(self.maximum):
|
|
|
+ if value % 2 == 0:
|
|
|
+ yield str(value)
|
|
|
+
|
|
|
+ def within(self, value):
|
|
|
+ '''Test for membership'''
|
|
|
+ try:
|
|
|
+ int_value = int(value)
|
|
|
+ except ValueError:
|
|
|
+ return False
|
|
|
+
|
|
|
+ if int_value >= 0 and int_value < self.maximum and int_value % 2 == 0:
|
|
|
+ return True
|
|
|
+ else:
|
|
|
+ return False
|
|
|
+
|
|
|
+ def length(self):
|
|
|
+ '''How many members?'''
|
|
|
+ return self.maximum // 2
|
|
|
+
|
|
|
def and_test():
|
|
|
'''Test the & operator'''
|
|
|
|
|
@@ -139,7 +193,7 @@ def and_test():
|
|
|
all_good = False
|
|
|
|
|
|
return all_good
|
|
|
-
|
|
|
+
|
|
|
def or_test():
|
|
|
'''Test the | operator'''
|
|
|
|
|
@@ -183,11 +237,9 @@ def main():
|
|
|
|
|
|
all_good &= test('random', Random_content(), trials=10000, error_rate=0.1)
|
|
|
|
|
|
- all_good &= primary_test([1, 2], states, trials=10000, error_rate=0.01)
|
|
|
-
|
|
|
- all_good &= primary_test([1, 2], random_content, trials=10000, error_rate=0.1)
|
|
|
-
|
|
|
- all_good &= primary_test([2, 1], [ 'a', 'b', 'c'], trials=100, error_rate=0.000001)
|
|
|
+ #for elements in [ 1, 10, 100, 1000 ]:
|
|
|
+ for elements in [ 1, 10, 100, 1000, 10000, 100000, 1000000 ]:
|
|
|
+ all_good &= test('evens %d' % elements, Evens(elements), trials=elements, error_rate=1e-12)
|
|
|
|
|
|
all_good &= and_test()
|
|
|
|