1 year ago · ce966a97e0
--- a/examples/code_llama/code_instruct_example.py
+++ b/examples/code_llama/code_instruct_example.py
@@ -1,8 +1,6 @@
 
				 # Copyright (c) Meta Platforms, Inc. and affiliates.
			
 
				 # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
			
 
				 
			
 
				-# from accelerate import init_empty_weights, load_checkpoint_and_dispatch
			
 
				-
			
 
				 import fire
			
 
				 import os
			
 
				 import sys
			
@@ -60,7 +58,6 @@ def main(
 
				     peft_model: str=None,
			
 
				     quantization: bool=False,
			
 
				     max_new_tokens =100, #The maximum numbers of tokens to generate
			
 
				-    prompt_file: str=None,
			
 
				     seed: int=42, #seed value for reproducibility
			
 
				     do_sample: bool=True, #Whether or not to use sampling ; use greedy decoding otherwise.
			
 
				     min_length: int=None, #The minimum length of the sequence to be generated, input prompt + min_new_tokens
			
@@ -100,11 +97,10 @@ def main(
 
				         based on the hardware being used. This would speed up inference when used for batched inputs.
			
 
				         """
			
 
				         try:
			
 
				-            from optimum.bettertransformer import BetterTransformer
			
 
				-            model = BetterTransformer.transform(model)    
			
 
				+            model.to_bettertransformer()   
			
 
				         except ImportError:
			
 
				-            print("Module 'optimum' not found. Please install 'optimum' it before proceeding.")
			
 
				-
			
 
				+            print("Please check the Transformers version that support to_bettertransformer natively.")
			
 
				+        
			
 
				     tokenizer = AutoTokenizer.from_pretrained(model_name)
			
 
				     safety_checker = get_safety_checker(enable_azure_content_safety,
			
 
				                                         enable_sensitive_topics,