test-bloom-filter 2.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104
  1. #!/usr/bin/python
  2. # pylint: disable=W0402
  3. # W0402: We want the deprecated string module, for a use that isn't deprecated
  4. '''Unit tests for bloom_filter_mod'''
  5. import sys
  6. import random
  7. import string
  8. import bloom_filter_mod
  9. def my_range(maximum):
  10. '''A range function with consistent semantics on 2.x and 3.x'''
  11. value = 0
  12. while True:
  13. if value >= maximum:
  14. break
  15. yield value
  16. value += 1
  17. def test(included, trials, error_rate):
  18. '''Some quick automatic tests for the bloom filter class'''
  19. all_good = True
  20. bloom_filter = bloom_filter_mod.Bloom_filter(ideal_num_elements=trials * 2, error_rate=error_rate)
  21. print(repr(bloom_filter))
  22. for include in included:
  23. bloom_filter.add(include)
  24. include_in_count = sum(include in bloom_filter for include in included)
  25. if include_in_count == len(included):
  26. # Good
  27. pass
  28. else:
  29. sys.stderr.write('Include count bad: %s, %d\n' % (include_in_count, len(included)))
  30. all_good = False
  31. false_positives = 0
  32. for trialno in my_range(trials):
  33. if trialno % 10000 == 0:
  34. sys.stderr.write('trialno countdown: %d\n' % (trials-trialno))
  35. dummy = trialno
  36. while True:
  37. candidate = ''.join(random.sample(string.ascii_letters, 5))
  38. # If we accidentally found a real include, try again
  39. if candidate in included:
  40. continue
  41. if candidate in bloom_filter:
  42. false_positives += 1
  43. break
  44. #print('%d true negatives and %d false positives out of %d trials' % (trials - false_positives, false_positives, trials))
  45. actual_error_rate = float(false_positives) / trials
  46. if actual_error_rate > error_rate:
  47. sys.stderr.write('%s: Too many false positives: actual: %s, expected: %s\n' % (
  48. sys.argv[0],
  49. actual_error_rate,
  50. error_rate,
  51. ))
  52. all_good = False
  53. return all_good
  54. def random_string():
  55. '''Generate a random, 10 character string - for testing purposes'''
  56. list_ = []
  57. for chrno in range(10):
  58. dummy = chrno
  59. character = string.ascii_letters[int(random.random() * 26)]
  60. list_.append(character)
  61. return ''.join(list_)
  62. def main():
  63. '''Unit tests for Bloom_filter class'''
  64. all_good = True
  65. states = '''Alabama Alaska Arizona Arkansas California Colorado Connecticut
  66. Delaware Florida Georgia Hawaii Idaho Illinois Indiana Iowa Kansas
  67. Kentucky Louisiana Maine Maryland Massachusetts Michigan Minnesota
  68. Mississippi Missouri Montana Nebraska Nevada NewHampshire NewJersey
  69. NewMexico NewYork NorthCarolina NorthDakota Ohio Oklahoma Oregon
  70. Pennsylvania RhodeIsland SouthCarolina SouthDakota Tennessee Texas Utah
  71. Vermont Virginia Washington WestVirginia Wisconsin Wyoming'''.split()
  72. random_content = [ random_string() for dummy in range(1000) ]
  73. all_good &= test(states, trials=100000, error_rate=0.01)
  74. all_good &= test(random_content, trials=10000000, error_rate=0.001)
  75. if all_good:
  76. sys.stderr.write('%s: All tests passed\n' % sys.argv[0])
  77. sys.exit(0)
  78. else:
  79. sys.stderr.write('%s: One or more tests failed\n' % sys.argv[0])
  80. sys.exit(1)
  81. main()