1 year ago · a7156dfb5d
--- a/utils/train_utils.py
+++ b/utils/train_utils.py
@@ -199,7 +199,7 @@ def evaluation(model,train_config, eval_dataloader, local_rank, tokenizer):
 
				                 if train_config.enable_fsdp:
			
 
				                     batch[key] = batch[key].to(local_rank)
			
 
				                 else:
			
 
				-                    batch[key] = batch[key].to('cuda')
			
 
				+                    batch[key] = batch[key].to('cuda:0')
			
 
				             # Ensure no gradients are computed for this scope to save memory
			
 
				             with torch.no_grad():
			
 
				                 # Forward pass and compute loss