#!/usr/bin/python # pylint: disable=W0402 # W0402: We want the deprecated string module, for a use that isn't deprecated '''Unit tests for bloom_filter_mod''' import sys import random import string import bloom_filter_mod def my_range(maximum): '''A range function with consistent semantics on 2.x and 3.x''' value = 0 while True: if value >= maximum: break yield value value += 1 def primary_test(order, included, trials, error_rate): '''Some quick automatic tests for the bloom filter class''' all_good = True bloom_filter = bloom_filter_mod.Bloom_filter(ideal_num_elements=trials, error_rate=error_rate) print(repr(bloom_filter)) for step in order: if step == 1: for include in included: bloom_filter.add(include) include_in_count = sum(include in bloom_filter for include in included) if include_in_count == len(included): # Good pass else: sys.stderr.write('Include count bad: %s, %d\n' % (include_in_count, len(included))) all_good = False elif step == 2: false_positives = 0 for trialno in my_range(trials): if trialno % 10000 == 0: sys.stderr.write('trialno countdown: %d\n' % (trials-trialno)) dummy = trialno while True: candidate = ''.join(random.sample(string.ascii_letters, 5)) # If we accidentally found a real include, try again if candidate in included: continue if candidate in bloom_filter: false_positives += 1 break else: raise ValueError('step is not 1 or 2') #print('%d true negatives and %d false positives out of %d trials' % (trials - false_positives, false_positives, trials)) actual_error_rate = float(false_positives) / trials if actual_error_rate > error_rate: sys.stderr.write('%s: Too many false positives: actual: %s, expected: %s\n' % ( sys.argv[0], actual_error_rate, error_rate, )) all_good = False return all_good def random_string(): '''Generate a random, 10 character string - for testing purposes''' list_ = [] for chrno in range(10): dummy = chrno character = string.ascii_letters[int(random.random() * 26)] list_.append(character) return ''.join(list_) def and_test(): '''Test the & operator''' all_good = True abc = bloom_filter_mod.Bloom_filter(ideal_num_elements=100, error_rate=0.01) for character in [ 'a', 'b', 'c' ]: abc += character bcd = bloom_filter_mod.Bloom_filter(ideal_num_elements=100, error_rate=0.01) for character in [ 'b', 'c', 'd' ]: bcd += character abc_and_bcd = abc abc_and_bcd &= bcd if 'a' in abc_and_bcd: sys.stderr.write('a in abc_and_bcd, but should not be') all_good = False if not 'b' in abc_and_bcd: sys.stderr.write('b not in abc_and_bcd, but should be') all_good = False if not 'c' in abc_and_bcd: sys.stderr.write('c not in abc_and_bcd, but should be') all_good = False if 'd' in abc_and_bcd: sys.stderr.write('d in abc_and_bcd, but should not be') all_good = False return all_good def or_test(): '''Test the | operator''' all_good = True abc = bloom_filter_mod.Bloom_filter(ideal_num_elements=100, error_rate=0.01) for character in [ 'a', 'b', 'c' ]: abc += character bcd = bloom_filter_mod.Bloom_filter(ideal_num_elements=100, error_rate=0.01) for character in [ 'b', 'c', 'd' ]: bcd += character abc_and_bcd = abc abc_and_bcd |= bcd if not 'a' in abc_and_bcd: sys.stderr.write('a not in abc_and_bcd, but should be') all_good = False if not 'b' in abc_and_bcd: sys.stderr.write('b not in abc_and_bcd, but should be') all_good = False if not 'c' in abc_and_bcd: sys.stderr.write('c not in abc_and_bcd, but should be') all_good = False if not 'd' in abc_and_bcd: sys.stderr.write('d not in abc_and_bcd, but should be') all_good = False if 'e' in abc_and_bcd: sys.stderr.write('e in abc_and_bcd, but should be') all_good = False return all_good def main(): '''Unit tests for Bloom_filter class''' all_good = True states = '''Alabama Alaska Arizona Arkansas California Colorado Connecticut Delaware Florida Georgia Hawaii Idaho Illinois Indiana Iowa Kansas Kentucky Louisiana Maine Maryland Massachusetts Michigan Minnesota Mississippi Missouri Montana Nebraska Nevada NewHampshire NewJersey NewMexico NewYork NorthCarolina NorthDakota Ohio Oklahoma Oregon Pennsylvania RhodeIsland SouthCarolina SouthDakota Tennessee Texas Utah Vermont Virginia Washington WestVirginia Wisconsin Wyoming'''.split() random_content = [ random_string() for dummy in range(1000) ] all_good &= primary_test([1, 2], states, trials=10000, error_rate=0.01) all_good &= primary_test([1, 2], random_content, trials=10000, error_rate=0.1) all_good &= primary_test([2, 1], [ 'a', 'b', 'c'], trials=100, error_rate=0.000001) all_good &= and_test() all_good &= or_test() if all_good: sys.stderr.write('%s: All tests passed\n' % sys.argv[0]) sys.exit(0) else: sys.stderr.write('%s: One or more tests failed\n' % sys.argv[0]) sys.exit(1) main()