1 vuosi sitten · 37d7151494
--- a/configs/training.py
+++ b/configs/training.py
@@ -33,8 +33,8 @@ class train_config:
 
				     dist_checkpoint_root_folder: str="PATH/to/save/FSDP/model" # will be used if using FSDP
			
 
				     dist_checkpoint_folder: str="fine-tuned" # will be used if using FSDP
			
 
				     save_optimizer: bool=False # will be used if using FSDP
			
 
				-    use_fast_kernels: bool = False, # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels
			
 
				+    use_fast_kernels: bool = False # Enable using SDPA from PyTroch Accelerated Transformers, make use Flash Attention and Xformer memory-efficient kernels
			
 
				 
			
 
				     
			
 
				     
			
 
				-    
			
 
				+    
			
--- a/inference/inference.py
+++ b/inference/inference.py
@@ -99,11 +99,7 @@ def main(
 
				         print("Skipping the inference as the prompt is not safe.")
			
 
				         sys.exit(1)  # Exit the program with an error status
			
 
				         
			
 
				-    if peft_model:
			
 
				-        model = load_peft_model(model, peft_model)
			
 
				-
			
 
				-    model.eval()
			
 
				-    batch = tokenizer(user_prompt, padding='max_length', truncation=True,max_length=max_padding_length,return_tensors="pt")
			
 
				+    batch = tokenizer(user_prompt, padding='max_length', truncation=True, max_length=max_padding_length, return_tensors="pt")
			
 
				 
			
 
				     batch = {k: v.to("cuda") for k, v in batch.items()}
			
 
				     start = time.perf_counter()