test-bloom-filter 3.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111
  1. #!/usr/bin/python
  2. # pylint: disable=W0402
  3. # W0402: We want the deprecated string module, for a use that isn't deprecated
  4. '''Unit tests for bloom_filter_mod'''
  5. import sys
  6. import random
  7. import string
  8. import bloom_filter_mod
  9. def my_range(maximum):
  10. '''A range function with consistent semantics on 2.x and 3.x'''
  11. value = 0
  12. while True:
  13. if value >= maximum:
  14. break
  15. yield value
  16. value += 1
  17. def test(order, included, trials, error_rate):
  18. '''Some quick automatic tests for the bloom filter class'''
  19. all_good = True
  20. bloom_filter = bloom_filter_mod.Bloom_filter(ideal_num_elements=trials, error_rate=error_rate)
  21. print(repr(bloom_filter))
  22. for step in order:
  23. if step == 1:
  24. for include in included:
  25. bloom_filter.add(include)
  26. include_in_count = sum(include in bloom_filter for include in included)
  27. if include_in_count == len(included):
  28. # Good
  29. pass
  30. else:
  31. sys.stderr.write('Include count bad: %s, %d\n' % (include_in_count, len(included)))
  32. all_good = False
  33. elif step == 2:
  34. false_positives = 0
  35. for trialno in my_range(trials):
  36. if trialno % 10000 == 0:
  37. sys.stderr.write('trialno countdown: %d\n' % (trials-trialno))
  38. dummy = trialno
  39. while True:
  40. candidate = ''.join(random.sample(string.ascii_letters, 5))
  41. # If we accidentally found a real include, try again
  42. if candidate in included:
  43. continue
  44. if candidate in bloom_filter:
  45. false_positives += 1
  46. break
  47. else:
  48. raise ValueError('step is not 1 or 2')
  49. #print('%d true negatives and %d false positives out of %d trials' % (trials - false_positives, false_positives, trials))
  50. actual_error_rate = float(false_positives) / trials
  51. if actual_error_rate > error_rate:
  52. sys.stderr.write('%s: Too many false positives: actual: %s, expected: %s\n' % (
  53. sys.argv[0],
  54. actual_error_rate,
  55. error_rate,
  56. ))
  57. all_good = False
  58. return all_good
  59. def random_string():
  60. '''Generate a random, 10 character string - for testing purposes'''
  61. list_ = []
  62. for chrno in range(10):
  63. dummy = chrno
  64. character = string.ascii_letters[int(random.random() * 26)]
  65. list_.append(character)
  66. return ''.join(list_)
  67. def main():
  68. '''Unit tests for Bloom_filter class'''
  69. all_good = True
  70. states = '''Alabama Alaska Arizona Arkansas California Colorado Connecticut
  71. Delaware Florida Georgia Hawaii Idaho Illinois Indiana Iowa Kansas
  72. Kentucky Louisiana Maine Maryland Massachusetts Michigan Minnesota
  73. Mississippi Missouri Montana Nebraska Nevada NewHampshire NewJersey
  74. NewMexico NewYork NorthCarolina NorthDakota Ohio Oklahoma Oregon
  75. Pennsylvania RhodeIsland SouthCarolina SouthDakota Tennessee Texas Utah
  76. Vermont Virginia Washington WestVirginia Wisconsin Wyoming'''.split()
  77. random_content = [ random_string() for dummy in range(1000) ]
  78. all_good &= test([1, 2], states, trials=100000, error_rate=0.01)
  79. all_good &= test([1, 2], random_content, trials=1000000, error_rate=0.1)
  80. all_good &= test([2, 1], [ 'a', 'b', 'c'], trials=100, error_rate=0.000001)
  81. if all_good:
  82. sys.stderr.write('%s: All tests passed\n' % sys.argv[0])
  83. sys.exit(0)
  84. else:
  85. sys.stderr.write('%s: One or more tests failed\n' % sys.argv[0])
  86. sys.exit(1)
  87. main()