Forráskód Böngészése

Add BT after peft module load

Hamid Shojanazeri 1 éve
szülő
commit
bf4e0a134b
2 módosított fájl, 8 hozzáadás és 7 törlés
  1. 3 2
      inference/chat_completion.py
  2. 5 5
      inference/inference.py

+ 3 - 2
inference/chat_completion.py

@@ -58,6 +58,8 @@ def main(
     torch.cuda.manual_seed(seed)
     torch.manual_seed(seed)
     model = load_model(model_name, quantization)
+    if peft_model:
+        model = load_peft_model(model, peft_model)
     if use_fast_kernels:
         """
         Setting 'use_fast_kernels' will enable
@@ -70,8 +72,7 @@ def main(
             print("Module 'optimum' not found. Please install 'optimum' it before proceeding.")
 
         model = BetterTransformer.transform(model)
-    if peft_model:
-        model = load_peft_model(model, peft_model)
+   
     tokenizer = LlamaTokenizer.from_pretrained(model_name)
     tokenizer.add_special_tokens(
         {

+ 5 - 5
inference/inference.py

@@ -52,6 +52,11 @@ def main(
     torch.manual_seed(seed)
     
     model = load_model(model_name, quantization)
+    if peft_model:
+        model = load_peft_model(model, peft_model)
+
+    model.eval()
+    
     if use_fast_kernels:
         """
         Setting 'use_fast_kernels' will enable
@@ -92,11 +97,6 @@ def main(
         print("Skipping the inferece as the prompt is not safe.")
         sys.exit(1)  # Exit the program with an error status
 
-    if peft_model:
-        model = load_peft_model(model, peft_model)
-
-    model.eval()
-
     batch = tokenizer(user_prompt, return_tensors="pt")
     batch = {k: v.to("cuda") for k, v in batch.items()}
     start = time.perf_counter()