|
@@ -4,12 +4,13 @@
|
|
|
# pylint: disable=superfluous-parens
|
|
|
# superfluous-parens: Parentheses are good for clarity and portability
|
|
|
|
|
|
-'''Unit tests for bloom_filter_mod'''
|
|
|
+"""Unit tests for bloom_filter_mod"""
|
|
|
|
|
|
-#mport os
|
|
|
+# mport os
|
|
|
import sys
|
|
|
import math
|
|
|
import time
|
|
|
+
|
|
|
try:
|
|
|
import anydbm
|
|
|
except ImportError:
|
|
@@ -21,8 +22,9 @@ import drs_bloom_filter
|
|
|
|
|
|
CHARACTERS = 'abcdefghijklmnopqrstuvwxyz1234567890'
|
|
|
|
|
|
+
|
|
|
def my_range(maximum):
|
|
|
- '''A range function with consistent semantics on 2.x and 3.x'''
|
|
|
+ """A range function with consistent semantics on 2.x and 3.x"""
|
|
|
value = 0
|
|
|
while True:
|
|
|
if value >= maximum:
|
|
@@ -30,11 +32,12 @@ def my_range(maximum):
|
|
|
yield value
|
|
|
value += 1
|
|
|
|
|
|
-def test(description, values, trials, error_rate, probe_bitnoer=drs_bloom_filter.get_bitno_lin_comb, filename=None):
|
|
|
+
|
|
|
+def _test(description, values, trials, error_rate, probe_bitnoer=drs_bloom_filter.get_bitno_lin_comb, filename=None):
|
|
|
# pylint: disable=R0913,R0914
|
|
|
# R0913: We want a few arguments
|
|
|
# R0914: We want some local variables too. This is just test code.
|
|
|
- '''Some quick automatic tests for the bloom filter class'''
|
|
|
+ """Some quick automatic tests for the bloom filter class"""
|
|
|
|
|
|
all_good = True
|
|
|
|
|
@@ -46,7 +49,7 @@ def test(description, values, trials, error_rate, probe_bitnoer=drs_bloom_filter
|
|
|
probe_bitnoer=probe_bitnoer,
|
|
|
filename=filename,
|
|
|
start_fresh=True,
|
|
|
- )
|
|
|
+ )
|
|
|
|
|
|
message = '\ndescription: %s num_bits_m: %s num_probes_k: %s\n'
|
|
|
filled_out_message = message % (
|
|
@@ -77,14 +80,14 @@ def test(description, values, trials, error_rate, probe_bitnoer=drs_bloom_filter
|
|
|
false_positives = 0
|
|
|
for trialno in my_range(trials):
|
|
|
if trialno % divisor == 0:
|
|
|
- sys.stderr.write('trialno countdown: %d\n' % (trials-trialno))
|
|
|
+ sys.stderr.write('trialno countdown: %d\n' % (trials - trialno))
|
|
|
while True:
|
|
|
candidate = ''.join(random.sample(CHARACTERS, 5))
|
|
|
# If we accidentally found a member, try again
|
|
|
if values.within(candidate):
|
|
|
continue
|
|
|
if candidate in bloom_filter:
|
|
|
- #print 'We erroneously think %s is in the filter' % candidate
|
|
|
+ # print 'We erroneously think %s is in the filter' % candidate
|
|
|
false_positives += 1
|
|
|
break
|
|
|
|
|
@@ -95,43 +98,45 @@ def test(description, values, trials, error_rate, probe_bitnoer=drs_bloom_filter
|
|
|
sys.argv[0],
|
|
|
actual_error_rate,
|
|
|
error_rate,
|
|
|
- ))
|
|
|
+ ))
|
|
|
all_good = False
|
|
|
|
|
|
return all_good
|
|
|
|
|
|
+
|
|
|
class States(object):
|
|
|
- '''Generate the USA's state names'''
|
|
|
+ """Generate the USA's state names"""
|
|
|
|
|
|
def __init__(self):
|
|
|
pass
|
|
|
|
|
|
- states = '''Alabama Alaska Arizona Arkansas California Colorado Connecticut
|
|
|
+ states = """Alabama Alaska Arizona Arkansas California Colorado Connecticut
|
|
|
Delaware Florida Georgia Hawaii Idaho Illinois Indiana Iowa Kansas
|
|
|
Kentucky Louisiana Maine Maryland Massachusetts Michigan Minnesota
|
|
|
Mississippi Missouri Montana Nebraska Nevada NewHampshire NewJersey
|
|
|
NewMexico NewYork NorthCarolina NorthDakota Ohio Oklahoma Oregon
|
|
|
Pennsylvania RhodeIsland SouthCarolina SouthDakota Tennessee Texas Utah
|
|
|
- Vermont Virginia Washington WestVirginia Wisconsin Wyoming'''.split()
|
|
|
+ Vermont Virginia Washington WestVirginia Wisconsin Wyoming""".split()
|
|
|
|
|
|
@staticmethod
|
|
|
def generator():
|
|
|
- '''Generate the states'''
|
|
|
+ """Generate the states"""
|
|
|
for state in States.states:
|
|
|
yield state
|
|
|
|
|
|
@staticmethod
|
|
|
def within(value):
|
|
|
- '''Is the value in our list of states?'''
|
|
|
+ """Is the value in our list of states?"""
|
|
|
return value in States.states
|
|
|
|
|
|
@staticmethod
|
|
|
def length():
|
|
|
- '''What is the length of our contained values?'''
|
|
|
+ """What is the length of our contained values?"""
|
|
|
return len(States.states)
|
|
|
|
|
|
+
|
|
|
def random_string():
|
|
|
- '''Generate a random, 10 character string - for testing purposes'''
|
|
|
+ """Generate a random, 10 character string - for testing purposes"""
|
|
|
list_ = []
|
|
|
for chrno in range(10):
|
|
|
dummy = chrno
|
|
@@ -139,8 +144,9 @@ def random_string():
|
|
|
list_.append(character)
|
|
|
return ''.join(list_)
|
|
|
|
|
|
+
|
|
|
class Random_content(object):
|
|
|
- '''Generated a bunch of random strings in sorted order'''
|
|
|
+ """Generated a bunch of random strings in sorted order"""
|
|
|
|
|
|
random_content = [random_string() for dummy in range(1000)]
|
|
|
|
|
@@ -149,34 +155,35 @@ class Random_content(object):
|
|
|
|
|
|
@staticmethod
|
|
|
def generator():
|
|
|
- '''Generate all values'''
|
|
|
+ """Generate all values"""
|
|
|
for item in Random_content.random_content:
|
|
|
yield item
|
|
|
|
|
|
@staticmethod
|
|
|
def within(value):
|
|
|
- '''Test for membership'''
|
|
|
+ """Test for membership"""
|
|
|
return value in Random_content.random_content
|
|
|
|
|
|
@staticmethod
|
|
|
def length():
|
|
|
- '''How many members?'''
|
|
|
+ """How many members?"""
|
|
|
return len(Random_content.random_content)
|
|
|
|
|
|
+
|
|
|
class Evens(object):
|
|
|
- '''Generate a bunch of even numbers'''
|
|
|
+ """Generate a bunch of even numbers"""
|
|
|
|
|
|
def __init__(self, maximum):
|
|
|
self.maximum = maximum
|
|
|
|
|
|
def generator(self):
|
|
|
- '''Generate all values'''
|
|
|
+ """Generate all values"""
|
|
|
for value in my_range(self.maximum):
|
|
|
if value % 2 == 0:
|
|
|
yield str(value)
|
|
|
|
|
|
def within(self, value):
|
|
|
- '''Test for membership'''
|
|
|
+ """Test for membership"""
|
|
|
try:
|
|
|
int_value = int(value)
|
|
|
except ValueError:
|
|
@@ -188,11 +195,12 @@ class Evens(object):
|
|
|
return False
|
|
|
|
|
|
def length(self):
|
|
|
- '''How many members?'''
|
|
|
+ """How many members?"""
|
|
|
return int(math.ceil(self.maximum / 2.0))
|
|
|
|
|
|
+
|
|
|
def and_test():
|
|
|
- '''Test the & operator'''
|
|
|
+ """Test the & operator"""
|
|
|
|
|
|
all_good = True
|
|
|
|
|
@@ -222,8 +230,9 @@ def and_test():
|
|
|
|
|
|
return all_good
|
|
|
|
|
|
+
|
|
|
def or_test():
|
|
|
- '''Test the | operator'''
|
|
|
+ """Test the | operator"""
|
|
|
|
|
|
all_good = True
|
|
|
|
|
@@ -256,8 +265,9 @@ def or_test():
|
|
|
|
|
|
return all_good
|
|
|
|
|
|
+
|
|
|
def give_description(filename):
|
|
|
- '''Return a description of the filename type - could be array, file or hybrid'''
|
|
|
+ """Return a description of the filename type - could be array, file or hybrid"""
|
|
|
if filename is None:
|
|
|
return 'array'
|
|
|
elif isinstance(filename, tuple):
|
|
@@ -268,8 +278,9 @@ def give_description(filename):
|
|
|
else:
|
|
|
return 'seek'
|
|
|
|
|
|
-def main():
|
|
|
- '''Unit tests for Bloom_filter class'''
|
|
|
+
|
|
|
+def test_drs_bloom_filter():
|
|
|
+ """Unit tests for Bloom_filter class"""
|
|
|
|
|
|
if sys.argv[1:] == ['--performance-test']:
|
|
|
performance_test = True
|
|
@@ -278,13 +289,14 @@ def main():
|
|
|
|
|
|
all_good = True
|
|
|
|
|
|
- all_good &= test('states', States(), trials=100000, error_rate=0.01)
|
|
|
+ all_good &= _test('states', States(), trials=100000, error_rate=0.01)
|
|
|
|
|
|
- all_good &= test('random', Random_content(), trials=10000, error_rate=0.1)
|
|
|
- all_good &= test('random', Random_content(), trials=10000, error_rate=0.1, probe_bitnoer=drs_bloom_filter.get_bitno_seed_rnd)
|
|
|
+ all_good &= _test('random', Random_content(), trials=10000, error_rate=0.1)
|
|
|
+ all_good &= _test('random', Random_content(), trials=10000, error_rate=0.1,
|
|
|
+ probe_bitnoer=drs_bloom_filter.get_bitno_seed_rnd)
|
|
|
|
|
|
filename = 'bloom-filter-rm-me'
|
|
|
- all_good &= test('random', Random_content(), trials=10000, error_rate=0.1, filename=filename)
|
|
|
+ all_good &= _test('random', Random_content(), trials=10000, error_rate=0.1, filename=filename)
|
|
|
|
|
|
all_good &= and_test()
|
|
|
|
|
@@ -292,10 +304,11 @@ def main():
|
|
|
|
|
|
if performance_test:
|
|
|
sqrt_of_10 = math.sqrt(10)
|
|
|
- #for exponent in range(5): # this is a lot, but probably not unreasonable
|
|
|
- for exponent in range(19): # this is a lot, but probably not unreasonable
|
|
|
+ # for exponent in range(5): # this is a lot, but probably not unreasonable
|
|
|
+ for exponent in range(19): # this is a lot, but probably not unreasonable
|
|
|
elements = int(sqrt_of_10 ** exponent + 0.5)
|
|
|
- for filename in [None, 'bloom-filter-rm-me', ('bloom-filter-rm-me', 768 * 2**20), ('bloom-filter-rm-me', -1)]:
|
|
|
+ for filename in [None, 'bloom-filter-rm-me', ('bloom-filter-rm-me', 768 * 2 ** 20),
|
|
|
+ ('bloom-filter-rm-me', -1)]:
|
|
|
description = give_description(filename)
|
|
|
key = '%s %s' % (description, elements)
|
|
|
database = anydbm.open('performance-numbers', 'c')
|
|
@@ -309,28 +322,29 @@ def main():
|
|
|
if elements >= 1000000000 and description == 'array':
|
|
|
continue
|
|
|
time0 = time.time()
|
|
|
- all_good &= test(
|
|
|
+ all_good &= _test(
|
|
|
'evens %s elements: %d' % (give_description(filename), elements),
|
|
|
Evens(elements),
|
|
|
trials=elements,
|
|
|
error_rate=1e-2,
|
|
|
filename=filename,
|
|
|
- )
|
|
|
+ )
|
|
|
time1 = time.time()
|
|
|
delta_t = time1 - time0
|
|
|
- #file_ = open('%s.txt' % description, 'a')
|
|
|
- #file_.write('%d %f\n' % (elements, delta_t))
|
|
|
- #file_.close()
|
|
|
+ # file_ = open('%s.txt' % description, 'a')
|
|
|
+ # file_.write('%d %f\n' % (elements, delta_t))
|
|
|
+ # file_.close()
|
|
|
database = anydbm.open('performance-numbers', 'c')
|
|
|
database[key] = '%f' % delta_t
|
|
|
database.close()
|
|
|
|
|
|
if all_good:
|
|
|
sys.stderr.write('%s: All tests passed\n' % sys.argv[0])
|
|
|
- sys.exit(0)
|
|
|
+ # sys.exit(0)
|
|
|
else:
|
|
|
sys.stderr.write('%s: One or more tests failed\n' % sys.argv[0])
|
|
|
sys.exit(1)
|
|
|
|
|
|
-main()
|
|
|
|
|
|
+if __name__ == '__main__':
|
|
|
+ test_drs_bloom_filter()
|