checkpoint_converter_fsdp_hf.py 2.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. # Copyright (c) Meta Platforms, Inc. and affiliates.
  2. # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
  3. # from accelerate import init_empty_weights, load_checkpoint_and_dispatch
  4. import fire
  5. import torch
  6. import os
  7. import sys
  8. import yaml
  9. from transformers import LlamaTokenizer
  10. from model_utils import load_llama_from_config
  11. # Get the current file's directory
  12. current_directory = os.path.dirname(os.path.abspath(__file__))
  13. # Get the parent directory
  14. parent_directory = os.path.dirname(current_directory)
  15. # Append the parent directory to sys.path
  16. sys.path.append(parent_directory)
  17. from model_checkpointing import load_sharded_model_single_gpu
  18. def main(
  19. fsdp_checkpoint_path="", # Path to FSDP Sharded model checkpoints
  20. consolidated_model_path="", # Path to save the HF converted model checkpoints
  21. HF_model_path_or_name="" # Path/ name of the HF model that include config.json and tokenizer_config.json (e.g. meta-llama/Llama-2-7b-chat-hf)
  22. ):
  23. try:
  24. file_name = 'train_params.yaml'
  25. # Combine the directory and file name to create the full path
  26. train_params_path = os.path.join(fsdp_checkpoint_path, file_name)
  27. # Open the file
  28. with open(train_params_path, 'r') as file:
  29. # Load the YAML data
  30. data = yaml.safe_load(file)
  31. # Access the 'model_name' field
  32. HF_model_path_or_name = data.get('model_name')
  33. print(f"Model name: {HF_model_path_or_name}")
  34. except FileNotFoundError:
  35. print(f"The file {train_params_path} does not exist.")
  36. HF_model_path_or_name = input("Please enter the model name: ")
  37. print(f"Model name: {HF_model_path_or_name}")
  38. except Exception as e:
  39. print(f"An error occurred: {e}")
  40. #load the HF model definition from config
  41. model_def = load_llama_from_config(HF_model_path_or_name)
  42. print("model is loaded from config")
  43. #load the FSDP sharded checkpoints into the model
  44. model = load_sharded_model_single_gpu(model_def, fsdp_checkpoint_path)
  45. print("model is loaded from FSDP checkpoints")
  46. #loading the tokenizer form the model_path
  47. tokenizer = LlamaTokenizer.from_pretrained(HF_model_path_or_name)
  48. tokenizer.save_pretrained(consolidated_model_path)
  49. #save the FSDP sharded checkpoints in HF format
  50. model.save_pretrained(consolidated_model_path)
  51. print(f"HuggingFace model checkpoints has been saved in {consolidated_model_path}")
  52. if __name__ == "__main__":
  53. fire.Fire(main)