Procházet zdrojové kódy

Remove micro_batch_training parameter and replace with gradient_accumulation_steps

Matthias Reso před 1 rokem
rodič
revize
9def4fbafd
2 změnil soubory, kde provedl 2 přidání a 5 odebrání
  1. 1 1
      configs/training.py
  2. 1 4
      llama_finetuning.py

+ 1 - 1
configs/training.py

@@ -11,6 +11,7 @@ class train_config:
     low_cpu_fsdp: bool=False
     run_validation: bool=True
     batch_size_training: int=4
+    gradient_accumulation_steps: int=1
     num_epochs: int=3
     num_workers_dataloader: int=1
     lr: float=1e-4
@@ -21,7 +22,6 @@ class train_config:
     mixed_precision: bool=True
     val_batch_size: int=1
     dataset = "samsum_dataset"
-    micro_batch_size: int=4
     peft_method: str = "lora" # None , llama_adapter, prefix
     use_peft: bool=False
     output_dir: str = "PATH/to/save/PEFT/model"

+ 1 - 4
llama_finetuning.py

@@ -65,9 +65,6 @@ def main(**kwargs):
         clear_gpu_cache(local_rank)
         setup_environ_flags(rank)
 
-    # Calculate gradient accumulation steps
-    gradient_accumulation_steps = train_config.batch_size_training // train_config.micro_batch_size
-
     # Load the pre-trained model and setup its configuration
     if train_config.enable_fsdp and train_config.low_cpu_fsdp:
         """
@@ -240,7 +237,7 @@ def main(**kwargs):
         tokenizer,
         optimizer,
         scheduler,
-        gradient_accumulation_steps,
+        train_config.gradient_accumulation_steps,
         train_config,
         fsdp_config if train_config.enable_fsdp else None,
         local_rank if train_config.enable_fsdp else None,