|
@@ -1,9 +1,5 @@
|
|
|
#!/usr/bin/python
|
|
|
|
|
|
-# pylint: disable=W0402,W0404
|
|
|
-# W0402: We want the deprecated string module, for a use that isn't deprecated
|
|
|
-# W0404: We need to import anydbm or dbm
|
|
|
-
|
|
|
'''Unit tests for bloom_filter_mod'''
|
|
|
|
|
|
#mport os
|
|
@@ -11,9 +7,9 @@ import sys
|
|
|
import math
|
|
|
import time
|
|
|
try:
|
|
|
- import anydbm
|
|
|
+ import anydbm
|
|
|
except ImportError:
|
|
|
- import dbm as anydbm
|
|
|
+ import dbm as anydbm
|
|
|
|
|
|
import random
|
|
|
|
|
@@ -22,309 +18,309 @@ import bloom_filter_mod
|
|
|
CHARACTERS = 'abcdefghijklmnopqrstuvwxyz1234567890'
|
|
|
|
|
|
def my_range(maximum):
|
|
|
- '''A range function with consistent semantics on 2.x and 3.x'''
|
|
|
- value = 0
|
|
|
- while True:
|
|
|
- if value >= maximum:
|
|
|
- break
|
|
|
- yield value
|
|
|
- value += 1
|
|
|
+ '''A range function with consistent semantics on 2.x and 3.x'''
|
|
|
+ value = 0
|
|
|
+ while True:
|
|
|
+ if value >= maximum:
|
|
|
+ break
|
|
|
+ yield value
|
|
|
+ value += 1
|
|
|
|
|
|
def test(description, values, trials, error_rate, probe_bitnoer=bloom_filter_mod.get_bitno_lin_comb, filename=None):
|
|
|
- # pylint: disable=R0913,R0914
|
|
|
- # R0913: We want a few arguments
|
|
|
- # R0914: We want some local variables too. This is just test code.
|
|
|
- '''Some quick automatic tests for the bloom filter class'''
|
|
|
-
|
|
|
- all_good = True
|
|
|
-
|
|
|
- divisor = 100000
|
|
|
-
|
|
|
- bloom_filter = bloom_filter_mod.Bloom_filter(
|
|
|
- ideal_num_elements_n=trials * 2,
|
|
|
- error_rate_p=error_rate,
|
|
|
- probe_bitnoer=probe_bitnoer,
|
|
|
- filename=filename,
|
|
|
- start_fresh=True,
|
|
|
- )
|
|
|
-
|
|
|
- sys.stdout.write('\ndescription: %s num_bits_m: %s num_probes_k: %s\n' %
|
|
|
- (description, bloom_filter.num_bits_m, bloom_filter.num_probes_k))
|
|
|
-
|
|
|
- print('starting to add values to an empty bloom filter')
|
|
|
- for valueno, value in enumerate(values.generator()):
|
|
|
- reverse_valueno = values.length() - valueno
|
|
|
- if reverse_valueno % divisor == 0:
|
|
|
- print('adding valueno %d' % reverse_valueno)
|
|
|
- bloom_filter.add(value)
|
|
|
-
|
|
|
- print('testing all known members')
|
|
|
- include_in_count = sum(include in bloom_filter for include in values.generator())
|
|
|
- if include_in_count == values.length():
|
|
|
- # Good
|
|
|
- pass
|
|
|
- else:
|
|
|
- sys.stderr.write('Include count bad: %s, %d\n' % (include_in_count, values.length()))
|
|
|
- all_good = False
|
|
|
-
|
|
|
- print('testing random non-members')
|
|
|
- false_positives = 0
|
|
|
- for trialno in my_range(trials):
|
|
|
- if trialno % divisor == 0:
|
|
|
- sys.stderr.write('trialno countdown: %d\n' % (trials-trialno))
|
|
|
- while True:
|
|
|
- candidate = ''.join(random.sample(CHARACTERS, 5))
|
|
|
- # If we accidentally found a member, try again
|
|
|
- if values.within(candidate):
|
|
|
- continue
|
|
|
- if candidate in bloom_filter:
|
|
|
- #print 'We erroneously think %s is in the filter' % candidate
|
|
|
- false_positives += 1
|
|
|
- break
|
|
|
-
|
|
|
- actual_error_rate = float(false_positives) / trials
|
|
|
-
|
|
|
- if actual_error_rate > error_rate:
|
|
|
- sys.stderr.write('%s: Too many false positives: actual: %s, expected: %s\n' % (
|
|
|
- sys.argv[0],
|
|
|
- actual_error_rate,
|
|
|
- error_rate,
|
|
|
- ))
|
|
|
- all_good = False
|
|
|
-
|
|
|
- return all_good
|
|
|
+ # pylint: disable=R0913,R0914
|
|
|
+ # R0913: We want a few arguments
|
|
|
+ # R0914: We want some local variables too. This is just test code.
|
|
|
+ '''Some quick automatic tests for the bloom filter class'''
|
|
|
+
|
|
|
+ all_good = True
|
|
|
+
|
|
|
+ divisor = 100000
|
|
|
+
|
|
|
+ bloom_filter = bloom_filter_mod.Bloom_filter(
|
|
|
+ ideal_num_elements_n=trials * 2,
|
|
|
+ error_rate_p=error_rate,
|
|
|
+ probe_bitnoer=probe_bitnoer,
|
|
|
+ filename=filename,
|
|
|
+ start_fresh=True,
|
|
|
+ )
|
|
|
+
|
|
|
+ sys.stdout.write('\ndescription: %s num_bits_m: %s num_probes_k: %s\n' %
|
|
|
+ (description, bloom_filter.num_bits_m, bloom_filter.num_probes_k))
|
|
|
+
|
|
|
+ print('starting to add values to an empty bloom filter')
|
|
|
+ for valueno, value in enumerate(values.generator()):
|
|
|
+ reverse_valueno = values.length() - valueno
|
|
|
+ if reverse_valueno % divisor == 0:
|
|
|
+ print('adding valueno %d' % reverse_valueno)
|
|
|
+ bloom_filter.add(value)
|
|
|
+
|
|
|
+ print('testing all known members')
|
|
|
+ include_in_count = sum(include in bloom_filter for include in values.generator())
|
|
|
+ if include_in_count == values.length():
|
|
|
+ # Good
|
|
|
+ pass
|
|
|
+ else:
|
|
|
+ sys.stderr.write('Include count bad: %s, %d\n' % (include_in_count, values.length()))
|
|
|
+ all_good = False
|
|
|
+
|
|
|
+ print('testing random non-members')
|
|
|
+ false_positives = 0
|
|
|
+ for trialno in my_range(trials):
|
|
|
+ if trialno % divisor == 0:
|
|
|
+ sys.stderr.write('trialno countdown: %d\n' % (trials-trialno))
|
|
|
+ while True:
|
|
|
+ candidate = ''.join(random.sample(CHARACTERS, 5))
|
|
|
+ # If we accidentally found a member, try again
|
|
|
+ if values.within(candidate):
|
|
|
+ continue
|
|
|
+ if candidate in bloom_filter:
|
|
|
+ #print 'We erroneously think %s is in the filter' % candidate
|
|
|
+ false_positives += 1
|
|
|
+ break
|
|
|
+
|
|
|
+ actual_error_rate = float(false_positives) / trials
|
|
|
+
|
|
|
+ if actual_error_rate > error_rate:
|
|
|
+ sys.stderr.write('%s: Too many false positives: actual: %s, expected: %s\n' % (
|
|
|
+ sys.argv[0],
|
|
|
+ actual_error_rate,
|
|
|
+ error_rate,
|
|
|
+ ))
|
|
|
+ all_good = False
|
|
|
+
|
|
|
+ return all_good
|
|
|
|
|
|
class States:
|
|
|
- '''Generate the USA's state names'''
|
|
|
-
|
|
|
- def __init__(self):
|
|
|
- pass
|
|
|
-
|
|
|
- states = '''Alabama Alaska Arizona Arkansas California Colorado Connecticut
|
|
|
- Delaware Florida Georgia Hawaii Idaho Illinois Indiana Iowa Kansas
|
|
|
- Kentucky Louisiana Maine Maryland Massachusetts Michigan Minnesota
|
|
|
- Mississippi Missouri Montana Nebraska Nevada NewHampshire NewJersey
|
|
|
- NewMexico NewYork NorthCarolina NorthDakota Ohio Oklahoma Oregon
|
|
|
- Pennsylvania RhodeIsland SouthCarolina SouthDakota Tennessee Texas Utah
|
|
|
- Vermont Virginia Washington WestVirginia Wisconsin Wyoming'''.split()
|
|
|
-
|
|
|
- @staticmethod
|
|
|
- def generator():
|
|
|
- '''Generate the states'''
|
|
|
- for state in States.states:
|
|
|
- yield state
|
|
|
-
|
|
|
- @staticmethod
|
|
|
- def within(value):
|
|
|
- '''Is the vaoue in our list of states?'''
|
|
|
- return value in States.states
|
|
|
-
|
|
|
- @staticmethod
|
|
|
- def length():
|
|
|
- '''What is the length of our contained values?'''
|
|
|
- return len(States.states)
|
|
|
+ '''Generate the USA's state names'''
|
|
|
+
|
|
|
+ def __init__(self):
|
|
|
+ pass
|
|
|
+
|
|
|
+ states = '''Alabama Alaska Arizona Arkansas California Colorado Connecticut
|
|
|
+ Delaware Florida Georgia Hawaii Idaho Illinois Indiana Iowa Kansas
|
|
|
+ Kentucky Louisiana Maine Maryland Massachusetts Michigan Minnesota
|
|
|
+ Mississippi Missouri Montana Nebraska Nevada NewHampshire NewJersey
|
|
|
+ NewMexico NewYork NorthCarolina NorthDakota Ohio Oklahoma Oregon
|
|
|
+ Pennsylvania RhodeIsland SouthCarolina SouthDakota Tennessee Texas Utah
|
|
|
+ Vermont Virginia Washington WestVirginia Wisconsin Wyoming'''.split()
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def generator():
|
|
|
+ '''Generate the states'''
|
|
|
+ for state in States.states:
|
|
|
+ yield state
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def within(value):
|
|
|
+ '''Is the vaoue in our list of states?'''
|
|
|
+ return value in States.states
|
|
|
+
|
|
|
+ @staticmethod
|
|
|
+ def length():
|
|
|
+ '''What is the length of our contained values?'''
|
|
|
+ return len(States.states)
|
|
|
|
|
|
def random_string():
|
|
|
- '''Generate a random, 10 character string - for testing purposes'''
|
|
|
- list_ = []
|
|
|
- for chrno in range(10):
|
|
|
- dummy = chrno
|
|
|
- character = CHARACTERS[int(random.random() * len(CHARACTERS))]
|
|
|
- list_.append(character)
|
|
|
- return ''.join(list_)
|
|
|
+ '''Generate a random, 10 character string - for testing purposes'''
|
|
|
+ list_ = []
|
|
|
+ for chrno in range(10):
|
|
|
+ dummy = chrno
|
|
|
+ character = CHARACTERS[int(random.random() * len(CHARACTERS))]
|
|
|
+ list_.append(character)
|
|
|
+ return ''.join(list_)
|
|
|
|
|
|
class Random_content:
|
|
|
- '''Generated a bunch of random strings in sorted order'''
|
|
|
+ '''Generated a bunch of random strings in sorted order'''
|
|
|
|
|
|
- random_content = [ random_string() for dummy in range(1000) ]
|
|
|
+ random_content = [ random_string() for dummy in range(1000) ]
|
|
|
|
|
|
- def __init__(self):
|
|
|
- pass
|
|
|
+ def __init__(self):
|
|
|
+ pass
|
|
|
|
|
|
- @staticmethod
|
|
|
- def generator():
|
|
|
- '''Generate all values'''
|
|
|
- for item in Random_content.random_content:
|
|
|
- yield item
|
|
|
+ @staticmethod
|
|
|
+ def generator():
|
|
|
+ '''Generate all values'''
|
|
|
+ for item in Random_content.random_content:
|
|
|
+ yield item
|
|
|
|
|
|
- @staticmethod
|
|
|
- def within(value):
|
|
|
- '''Test for membership'''
|
|
|
- return value in Random_content.random_content
|
|
|
+ @staticmethod
|
|
|
+ def within(value):
|
|
|
+ '''Test for membership'''
|
|
|
+ return value in Random_content.random_content
|
|
|
|
|
|
- @staticmethod
|
|
|
- def length():
|
|
|
- '''How many members?'''
|
|
|
- return len(Random_content.random_content)
|
|
|
+ @staticmethod
|
|
|
+ def length():
|
|
|
+ '''How many members?'''
|
|
|
+ return len(Random_content.random_content)
|
|
|
|
|
|
class Evens:
|
|
|
- '''Generate a bunch of even numbers'''
|
|
|
+ '''Generate a bunch of even numbers'''
|
|
|
|
|
|
- def __init__(self, maximum):
|
|
|
- self.maximum = maximum
|
|
|
+ def __init__(self, maximum):
|
|
|
+ self.maximum = maximum
|
|
|
|
|
|
- def generator(self):
|
|
|
- '''Generate all values'''
|
|
|
- for value in my_range(self.maximum):
|
|
|
- if value % 2 == 0:
|
|
|
- yield str(value)
|
|
|
+ def generator(self):
|
|
|
+ '''Generate all values'''
|
|
|
+ for value in my_range(self.maximum):
|
|
|
+ if value % 2 == 0:
|
|
|
+ yield str(value)
|
|
|
|
|
|
- def within(self, value):
|
|
|
- '''Test for membership'''
|
|
|
- try:
|
|
|
- int_value = int(value)
|
|
|
- except ValueError:
|
|
|
- return False
|
|
|
+ def within(self, value):
|
|
|
+ '''Test for membership'''
|
|
|
+ try:
|
|
|
+ int_value = int(value)
|
|
|
+ except ValueError:
|
|
|
+ return False
|
|
|
|
|
|
- if int_value >= 0 and int_value < self.maximum and int_value % 2 == 0:
|
|
|
- return True
|
|
|
- else:
|
|
|
- return False
|
|
|
+ if int_value >= 0 and int_value < self.maximum and int_value % 2 == 0:
|
|
|
+ return True
|
|
|
+ else:
|
|
|
+ return False
|
|
|
|
|
|
- def length(self):
|
|
|
- '''How many members?'''
|
|
|
- return int(math.ceil(self.maximum / 2.0))
|
|
|
+ def length(self):
|
|
|
+ '''How many members?'''
|
|
|
+ return int(math.ceil(self.maximum / 2.0))
|
|
|
|
|
|
def and_test():
|
|
|
- '''Test the & operator'''
|
|
|
+ '''Test the & operator'''
|
|
|
|
|
|
- all_good = True
|
|
|
+ all_good = True
|
|
|
|
|
|
- abc = bloom_filter_mod.Bloom_filter(ideal_num_elements_n=100, error_rate_p=0.01)
|
|
|
- for character in [ 'a', 'b', 'c' ]:
|
|
|
- abc += character
|
|
|
+ abc = bloom_filter_mod.Bloom_filter(ideal_num_elements_n=100, error_rate_p=0.01)
|
|
|
+ for character in [ 'a', 'b', 'c' ]:
|
|
|
+ abc += character
|
|
|
|
|
|
- bcd = bloom_filter_mod.Bloom_filter(ideal_num_elements_n=100, error_rate_p=0.01)
|
|
|
- for character in [ 'b', 'c', 'd' ]:
|
|
|
- bcd += character
|
|
|
+ bcd = bloom_filter_mod.Bloom_filter(ideal_num_elements_n=100, error_rate_p=0.01)
|
|
|
+ for character in [ 'b', 'c', 'd' ]:
|
|
|
+ bcd += character
|
|
|
|
|
|
- abc_and_bcd = abc
|
|
|
- abc_and_bcd &= bcd
|
|
|
+ abc_and_bcd = abc
|
|
|
+ abc_and_bcd &= bcd
|
|
|
|
|
|
- if 'a' in abc_and_bcd:
|
|
|
- sys.stderr.write('a in abc_and_bcd, but should not be')
|
|
|
- all_good = False
|
|
|
- if not 'b' in abc_and_bcd:
|
|
|
- sys.stderr.write('b not in abc_and_bcd, but should be')
|
|
|
- all_good = False
|
|
|
- if not 'c' in abc_and_bcd:
|
|
|
- sys.stderr.write('c not in abc_and_bcd, but should be')
|
|
|
- all_good = False
|
|
|
- if 'd' in abc_and_bcd:
|
|
|
- sys.stderr.write('d in abc_and_bcd, but should not be')
|
|
|
- all_good = False
|
|
|
+ if 'a' in abc_and_bcd:
|
|
|
+ sys.stderr.write('a in abc_and_bcd, but should not be')
|
|
|
+ all_good = False
|
|
|
+ if not 'b' in abc_and_bcd:
|
|
|
+ sys.stderr.write('b not in abc_and_bcd, but should be')
|
|
|
+ all_good = False
|
|
|
+ if not 'c' in abc_and_bcd:
|
|
|
+ sys.stderr.write('c not in abc_and_bcd, but should be')
|
|
|
+ all_good = False
|
|
|
+ if 'd' in abc_and_bcd:
|
|
|
+ sys.stderr.write('d in abc_and_bcd, but should not be')
|
|
|
+ all_good = False
|
|
|
|
|
|
- return all_good
|
|
|
+ return all_good
|
|
|
|
|
|
def or_test():
|
|
|
- '''Test the | operator'''
|
|
|
-
|
|
|
- all_good = True
|
|
|
-
|
|
|
- abc = bloom_filter_mod.Bloom_filter(ideal_num_elements_n=100, error_rate_p=0.01)
|
|
|
- for character in [ 'a', 'b', 'c' ]:
|
|
|
- abc += character
|
|
|
-
|
|
|
- bcd = bloom_filter_mod.Bloom_filter(ideal_num_elements_n=100, error_rate_p=0.01)
|
|
|
- for character in [ 'b', 'c', 'd' ]:
|
|
|
- bcd += character
|
|
|
-
|
|
|
- abc_and_bcd = abc
|
|
|
- abc_and_bcd |= bcd
|
|
|
-
|
|
|
- if not 'a' in abc_and_bcd:
|
|
|
- sys.stderr.write('a not in abc_and_bcd, but should be')
|
|
|
- all_good = False
|
|
|
- if not 'b' in abc_and_bcd:
|
|
|
- sys.stderr.write('b not in abc_and_bcd, but should be')
|
|
|
- all_good = False
|
|
|
- if not 'c' in abc_and_bcd:
|
|
|
- sys.stderr.write('c not in abc_and_bcd, but should be')
|
|
|
- all_good = False
|
|
|
- if not 'd' in abc_and_bcd:
|
|
|
- sys.stderr.write('d not in abc_and_bcd, but should be')
|
|
|
- all_good = False
|
|
|
- if 'e' in abc_and_bcd:
|
|
|
- sys.stderr.write('e in abc_and_bcd, but should be')
|
|
|
- all_good = False
|
|
|
-
|
|
|
- return all_good
|
|
|
+ '''Test the | operator'''
|
|
|
+
|
|
|
+ all_good = True
|
|
|
+
|
|
|
+ abc = bloom_filter_mod.Bloom_filter(ideal_num_elements_n=100, error_rate_p=0.01)
|
|
|
+ for character in [ 'a', 'b', 'c' ]:
|
|
|
+ abc += character
|
|
|
+
|
|
|
+ bcd = bloom_filter_mod.Bloom_filter(ideal_num_elements_n=100, error_rate_p=0.01)
|
|
|
+ for character in [ 'b', 'c', 'd' ]:
|
|
|
+ bcd += character
|
|
|
+
|
|
|
+ abc_and_bcd = abc
|
|
|
+ abc_and_bcd |= bcd
|
|
|
+
|
|
|
+ if not 'a' in abc_and_bcd:
|
|
|
+ sys.stderr.write('a not in abc_and_bcd, but should be')
|
|
|
+ all_good = False
|
|
|
+ if not 'b' in abc_and_bcd:
|
|
|
+ sys.stderr.write('b not in abc_and_bcd, but should be')
|
|
|
+ all_good = False
|
|
|
+ if not 'c' in abc_and_bcd:
|
|
|
+ sys.stderr.write('c not in abc_and_bcd, but should be')
|
|
|
+ all_good = False
|
|
|
+ if not 'd' in abc_and_bcd:
|
|
|
+ sys.stderr.write('d not in abc_and_bcd, but should be')
|
|
|
+ all_good = False
|
|
|
+ if 'e' in abc_and_bcd:
|
|
|
+ sys.stderr.write('e in abc_and_bcd, but should be')
|
|
|
+ all_good = False
|
|
|
+
|
|
|
+ return all_good
|
|
|
|
|
|
def give_description(filename):
|
|
|
- '''Return a description of the filename type - could be array, file or hybrid'''
|
|
|
- if filename is None:
|
|
|
- return 'array'
|
|
|
- elif isinstance(filename, tuple):
|
|
|
- if filename[1] == -1:
|
|
|
- return 'mmap'
|
|
|
- else:
|
|
|
- return 'hybrid'
|
|
|
- else:
|
|
|
- return 'seek'
|
|
|
+ '''Return a description of the filename type - could be array, file or hybrid'''
|
|
|
+ if filename is None:
|
|
|
+ return 'array'
|
|
|
+ elif isinstance(filename, tuple):
|
|
|
+ if filename[1] == -1:
|
|
|
+ return 'mmap'
|
|
|
+ else:
|
|
|
+ return 'hybrid'
|
|
|
+ else:
|
|
|
+ return 'seek'
|
|
|
|
|
|
def main():
|
|
|
- '''Unit tests for Bloom_filter class'''
|
|
|
-
|
|
|
- if sys.argv[1:] == [ '--performance-test' ]:
|
|
|
- performance_test = True
|
|
|
- else:
|
|
|
- performance_test = False
|
|
|
-
|
|
|
- all_good = True
|
|
|
-
|
|
|
- all_good &= test('states', States(), trials=100000, error_rate=0.01)
|
|
|
-
|
|
|
- all_good &= test('random', Random_content(), trials=10000, error_rate=0.1)
|
|
|
- all_good &= test('random', Random_content(), trials=10000, error_rate=0.1, probe_bitnoer=bloom_filter_mod.get_bitno_seed_rnd)
|
|
|
-
|
|
|
- filename = 'bloom-filter-rm-me'
|
|
|
- all_good &= test('random', Random_content(), trials=10000, error_rate=0.1, filename=filename)
|
|
|
-
|
|
|
- all_good &= and_test()
|
|
|
-
|
|
|
- all_good &= or_test()
|
|
|
-
|
|
|
- if performance_test:
|
|
|
- sqrt_of_10 = math.sqrt(10)
|
|
|
- #for exponent in range(5): # this is a lot, but probably not unreasonable
|
|
|
- for exponent in range(19): # this is a lot, but probably not unreasonable
|
|
|
- elements = int(sqrt_of_10 ** exponent + 0.5)
|
|
|
- for filename in [ None, 'bloom-filter-rm-me', ('bloom-filter-rm-me', 768 * 2**20), ('bloom-filter-rm-me', -1) ]:
|
|
|
- description = give_description(filename)
|
|
|
- key = '%s %s' % (description, elements)
|
|
|
- database = anydbm.open('performance-numbers', 'c')
|
|
|
- if key in database:
|
|
|
- database.close()
|
|
|
- continue
|
|
|
- if elements >= 100000000 and description == 'seek':
|
|
|
- continue
|
|
|
- if elements >= 100000000 and description == 'mmap':
|
|
|
- continue
|
|
|
- if elements >= 1000000000 and description == 'array':
|
|
|
- continue
|
|
|
- time0 = time.time()
|
|
|
- all_good &= test(
|
|
|
- 'evens %s elements: %d' % (give_description(filename), elements),
|
|
|
- Evens(elements),
|
|
|
- trials=elements,
|
|
|
- error_rate=1e-2,
|
|
|
- filename=filename,
|
|
|
- )
|
|
|
- time1 = time.time()
|
|
|
- delta_t = time1 - time0
|
|
|
- #file_ = open('%s.txt' % description, 'a')
|
|
|
- #file_.write('%d %f\n' % (elements, delta_t))
|
|
|
- #file_.close()
|
|
|
- database = anydbm.open('performance-numbers', 'c')
|
|
|
- database[key] = '%f' % delta_t
|
|
|
- database.close()
|
|
|
-
|
|
|
- if all_good:
|
|
|
- sys.stderr.write('%s: All tests passed\n' % sys.argv[0])
|
|
|
- sys.exit(0)
|
|
|
- else:
|
|
|
- sys.stderr.write('%s: One or more tests failed\n' % sys.argv[0])
|
|
|
- sys.exit(1)
|
|
|
+ '''Unit tests for Bloom_filter class'''
|
|
|
+
|
|
|
+ if sys.argv[1:] == [ '--performance-test' ]:
|
|
|
+ performance_test = True
|
|
|
+ else:
|
|
|
+ performance_test = False
|
|
|
+
|
|
|
+ all_good = True
|
|
|
+
|
|
|
+ all_good &= test('states', States(), trials=100000, error_rate=0.01)
|
|
|
+
|
|
|
+ all_good &= test('random', Random_content(), trials=10000, error_rate=0.1)
|
|
|
+ all_good &= test('random', Random_content(), trials=10000, error_rate=0.1, probe_bitnoer=bloom_filter_mod.get_bitno_seed_rnd)
|
|
|
+
|
|
|
+ filename = 'bloom-filter-rm-me'
|
|
|
+ all_good &= test('random', Random_content(), trials=10000, error_rate=0.1, filename=filename)
|
|
|
+
|
|
|
+ all_good &= and_test()
|
|
|
+
|
|
|
+ all_good &= or_test()
|
|
|
+
|
|
|
+ if performance_test:
|
|
|
+ sqrt_of_10 = math.sqrt(10)
|
|
|
+ #for exponent in range(5): # this is a lot, but probably not unreasonable
|
|
|
+ for exponent in range(19): # this is a lot, but probably not unreasonable
|
|
|
+ elements = int(sqrt_of_10 ** exponent + 0.5)
|
|
|
+ for filename in [ None, 'bloom-filter-rm-me', ('bloom-filter-rm-me', 768 * 2**20), ('bloom-filter-rm-me', -1) ]:
|
|
|
+ description = give_description(filename)
|
|
|
+ key = '%s %s' % (description, elements)
|
|
|
+ database = anydbm.open('performance-numbers', 'c')
|
|
|
+ if key in database:
|
|
|
+ database.close()
|
|
|
+ continue
|
|
|
+ if elements >= 100000000 and description == 'seek':
|
|
|
+ continue
|
|
|
+ if elements >= 100000000 and description == 'mmap':
|
|
|
+ continue
|
|
|
+ if elements >= 1000000000 and description == 'array':
|
|
|
+ continue
|
|
|
+ time0 = time.time()
|
|
|
+ all_good &= test(
|
|
|
+ 'evens %s elements: %d' % (give_description(filename), elements),
|
|
|
+ Evens(elements),
|
|
|
+ trials=elements,
|
|
|
+ error_rate=1e-2,
|
|
|
+ filename=filename,
|
|
|
+ )
|
|
|
+ time1 = time.time()
|
|
|
+ delta_t = time1 - time0
|
|
|
+ #file_ = open('%s.txt' % description, 'a')
|
|
|
+ #file_.write('%d %f\n' % (elements, delta_t))
|
|
|
+ #file_.close()
|
|
|
+ database = anydbm.open('performance-numbers', 'c')
|
|
|
+ database[key] = '%f' % delta_t
|
|
|
+ database.close()
|
|
|
+
|
|
|
+ if all_good:
|
|
|
+ sys.stderr.write('%s: All tests passed\n' % sys.argv[0])
|
|
|
+ sys.exit(0)
|
|
|
+ else:
|
|
|
+ sys.stderr.write('%s: One or more tests failed\n' % sys.argv[0])
|
|
|
+ sys.exit(1)
|
|
|
|
|
|
main()
|
|
|
|