Преглед изворни кода

Fix cuda id for using quantization (#40)

Hamid Shojanazeri пре 1 година
родитељ
комит
83fde7b94b
1 измењених фајлова са 3 додато и 2 уклоњено
  1. 3 2
      utils/train_utils.py

+ 3 - 2
utils/train_utils.py

@@ -84,7 +84,8 @@ def train(model, train_dataloader,eval_dataloader, tokenizer, optimizer, lr_sche
                     if train_config.enable_fsdp:
                     if train_config.enable_fsdp:
                         batch[key] = batch[key].to(local_rank)
                         batch[key] = batch[key].to(local_rank)
                     else:
                     else:
-                        batch[key] = batch[key].to('cuda')       
+
+                        batch[key] = batch[key].to('cuda:0')              
                 loss = model(**batch).loss
                 loss = model(**batch).loss
                 loss = loss / gradient_accumulation_steps
                 loss = loss / gradient_accumulation_steps
                 total_loss += loss.detach().float()
                 total_loss += loss.detach().float()
@@ -198,7 +199,7 @@ def evaluation(model,train_config, eval_dataloader, local_rank, tokenizer):
                 if train_config.enable_fsdp:
                 if train_config.enable_fsdp:
                     batch[key] = batch[key].to(local_rank)
                     batch[key] = batch[key].to(local_rank)
                 else:
                 else:
-                    batch[key] = batch[key].to('cuda')
+                    batch[key] = batch[key].to('cuda:0')
             # Ensure no gradients are computed for this scope to save memory
             # Ensure no gradients are computed for this scope to save memory
             with torch.no_grad():
             with torch.no_grad():
                 # Forward pass and compute loss
                 # Forward pass and compute loss