compare_llama_weights.py 1.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051
  1. # Copyright (c) Meta Platforms, Inc. and affiliates.
  2. # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
  3. import gc
  4. import glob
  5. import os
  6. import sys
  7. import torch
  8. import tqdm
  9. def main() -> None:
  10. """Compare two llama checkpoint directories"""
  11. one_files = sorted(glob.glob(os.path.join(sys.argv[1], "consolidated.*.pth")))
  12. two_files = sorted(glob.glob(os.path.join(sys.argv[2], "consolidated.*.pth")))
  13. assert len(one_files) == len(
  14. two_files
  15. ), "One directory has {} files while another has {} files.".format(
  16. len(one_files), len(two_files)
  17. )
  18. deltas = []
  19. for i in tqdm.trange(len(one_files), desc="Comparing shards"):
  20. one = torch.load(one_files[i])
  21. two = torch.load(two_files[i])
  22. assert len(one) == len(
  23. two
  24. ), "shard should have the same length: {} != {}".format(len(one), len(two))
  25. for _, (v, w) in enumerate(zip(one.items(), two.items())):
  26. assert v[0] == w[0], "{} != {}".format(v[0], w[0])
  27. assert v[1].shape == w[1].shape, "tensor {} shape {} != {}".format(
  28. v[0], v[1].shape, w[1].shape
  29. )
  30. delta = (v[1] - w[1]).abs().max().item()
  31. deltas.append((i, v[0], delta))
  32. del one
  33. del two
  34. gc.collect()
  35. deltas = sorted(deltas, key=lambda x: x[-1], reverse=True)
  36. print("Top 10 largest deltas:")
  37. for i, k, v in deltas[:10]:
  38. print(f" shard {i} {k}: {v}")
  39. if __name__ == "__main__":
  40. main()