1 year ago · 6678be75ad
--- a/utils/train_utils.py
+++ b/utils/train_utils.py
@@ -62,7 +62,7 @@ def train(model, train_dataloader,eval_dataloader, tokenizer, optimizer, lr_sche
 
																     Returns: results dictionary containing average training and validation perplexity and loss
															
 
																     """
															
 
																     # Create a gradient scaler for fp16
															
 
																-     if train_config.use_fp16 and train_config.enable_fsdp:
															
 
																+    if train_config.use_fp16 and train_config.enable_fsdp:
															
 
																         scaler = ShardedGradScaler()
															
 
																     elif train_config.use_fp16 and not train_config.enable_fsdp:
															
 
																         scaler = torch.cuda.amp.GradScaler()