test-bloom-filter 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. #!/usr/bin/python
  2. # pylint: disable=W0402
  3. # W0402: We want the deprecated string module, for a use that isn't deprecated
  4. '''Unit tests for bloom_filter_mod'''
  5. import sys
  6. import random
  7. import bloom_filter_mod
  8. CHARACTERS = 'abcdefghijklmnopqrstuvwxyz1234567890'
  9. def my_range(maximum):
  10. '''A range function with consistent semantics on 2.x and 3.x'''
  11. value = 0
  12. while True:
  13. if value >= maximum:
  14. break
  15. yield value
  16. value += 1
  17. def primary_test(order, included, trials, error_rate):
  18. '''Some quick automatic tests for the bloom filter class'''
  19. all_good = True
  20. bloom_filter = bloom_filter_mod.Bloom_filter(ideal_num_elements=trials, error_rate=error_rate)
  21. print(repr(bloom_filter))
  22. for step in order:
  23. if step == 1:
  24. for include in included:
  25. bloom_filter.add(include)
  26. include_in_count = sum(include in bloom_filter for include in included)
  27. if include_in_count == len(included):
  28. # Good
  29. pass
  30. else:
  31. sys.stderr.write('Include count bad: %s, %d\n' % (include_in_count, len(included)))
  32. all_good = False
  33. elif step == 2:
  34. false_positives = 0
  35. for trialno in my_range(trials):
  36. if trialno % 10000 == 0:
  37. sys.stderr.write('trialno countdown: %d\n' % (trials-trialno))
  38. dummy = trialno
  39. while True:
  40. candidate = ''.join(random.sample(string.ascii_letters, 5))
  41. # If we accidentally found a real include, try again
  42. if candidate in included:
  43. continue
  44. if candidate in bloom_filter:
  45. false_positives += 1
  46. break
  47. else:
  48. raise ValueError('step is not 1 or 2')
  49. actual_error_rate = float(false_positives) / trials
  50. if actual_error_rate > error_rate:
  51. sys.stderr.write('%s: Too many false positives: actual: %s, expected: %s\n' % (
  52. sys.argv[0],
  53. actual_error_rate,
  54. error_rate,
  55. ))
  56. all_good = False
  57. return all_good
  58. class States:
  59. '''Generate the USA's state names'''
  60. def __init__(self):
  61. pass
  62. states = '''Alabama Alaska Arizona Arkansas California Colorado Connecticut
  63. Delaware Florida Georgia Hawaii Idaho Illinois Indiana Iowa Kansas
  64. Kentucky Louisiana Maine Maryland Massachusetts Michigan Minnesota
  65. Mississippi Missouri Montana Nebraska Nevada NewHampshire NewJersey
  66. NewMexico NewYork NorthCarolina NorthDakota Ohio Oklahoma Oregon
  67. Pennsylvania RhodeIsland SouthCarolina SouthDakota Tennessee Texas Utah
  68. Vermont Virginia Washington WestVirginia Wisconsin Wyoming'''.split()
  69. @staticmethod
  70. def generator():
  71. '''Generate the states'''
  72. for state in States.states:
  73. yield state
  74. @staticmethod
  75. def within(value):
  76. '''Is the vaoue in our list of states?'''
  77. return value in States.states
  78. @staticmethod
  79. def length():
  80. '''What is the length of our contained values?'''
  81. return len(States.states)
  82. def random_string():
  83. '''Generate a random, 10 character string - for testing purposes'''
  84. list_ = []
  85. for chrno in range(10):
  86. dummy = chrno
  87. character = CHARACTERS[int(random.random() * len(CHARACTERS))]
  88. list_.append(character)
  89. return ''.join(list_)
  90. def and_test():
  91. '''Test the & operator'''
  92. all_good = True
  93. abc = bloom_filter_mod.Bloom_filter(ideal_num_elements=100, error_rate=0.01)
  94. for character in [ 'a', 'b', 'c' ]:
  95. abc += character
  96. bcd = bloom_filter_mod.Bloom_filter(ideal_num_elements=100, error_rate=0.01)
  97. for character in [ 'b', 'c', 'd' ]:
  98. bcd += character
  99. abc_and_bcd = abc
  100. abc_and_bcd &= bcd
  101. if 'a' in abc_and_bcd:
  102. sys.stderr.write('a in abc_and_bcd, but should not be')
  103. all_good = False
  104. if not 'b' in abc_and_bcd:
  105. sys.stderr.write('b not in abc_and_bcd, but should be')
  106. all_good = False
  107. if not 'c' in abc_and_bcd:
  108. sys.stderr.write('c not in abc_and_bcd, but should be')
  109. all_good = False
  110. if 'd' in abc_and_bcd:
  111. sys.stderr.write('d in abc_and_bcd, but should not be')
  112. all_good = False
  113. return all_good
  114. def or_test():
  115. '''Test the | operator'''
  116. all_good = True
  117. abc = bloom_filter_mod.Bloom_filter(ideal_num_elements=100, error_rate=0.01)
  118. for character in [ 'a', 'b', 'c' ]:
  119. abc += character
  120. bcd = bloom_filter_mod.Bloom_filter(ideal_num_elements=100, error_rate=0.01)
  121. for character in [ 'b', 'c', 'd' ]:
  122. bcd += character
  123. abc_and_bcd = abc
  124. abc_and_bcd |= bcd
  125. if not 'a' in abc_and_bcd:
  126. sys.stderr.write('a not in abc_and_bcd, but should be')
  127. all_good = False
  128. if not 'b' in abc_and_bcd:
  129. sys.stderr.write('b not in abc_and_bcd, but should be')
  130. all_good = False
  131. if not 'c' in abc_and_bcd:
  132. sys.stderr.write('c not in abc_and_bcd, but should be')
  133. all_good = False
  134. if not 'd' in abc_and_bcd:
  135. sys.stderr.write('d not in abc_and_bcd, but should be')
  136. all_good = False
  137. if 'e' in abc_and_bcd:
  138. sys.stderr.write('e in abc_and_bcd, but should be')
  139. all_good = False
  140. return all_good
  141. def main():
  142. '''Unit tests for Bloom_filter class'''
  143. all_good = True
  144. all_good &= test('states', States(), trials=100000, error_rate=0.01)
  145. all_good &= test('random', Random_content(), trials=10000, error_rate=0.1)
  146. all_good &= primary_test([1, 2], states, trials=10000, error_rate=0.01)
  147. all_good &= primary_test([1, 2], random_content, trials=10000, error_rate=0.1)
  148. all_good &= primary_test([2, 1], [ 'a', 'b', 'c'], trials=100, error_rate=0.000001)
  149. all_good &= and_test()
  150. all_good &= or_test()
  151. if all_good:
  152. sys.stderr.write('%s: All tests passed\n' % sys.argv[0])
  153. sys.exit(0)
  154. else:
  155. sys.stderr.write('%s: One or more tests failed\n' % sys.argv[0])
  156. sys.exit(1)
  157. main()