|
@@ -10,7 +10,7 @@ import time
|
|
import gradio as gr
|
|
import gradio as gr
|
|
|
|
|
|
import torch
|
|
import torch
|
|
-from transformers import LlamaTokenizer
|
|
|
|
|
|
+from transformers import LlamaTokenizer, AutoTokenizer
|
|
|
|
|
|
from llama_recipes.inference.safety_utils import get_safety_checker, AgentType
|
|
from llama_recipes.inference.safety_utils import get_safety_checker, AgentType
|
|
from llama_recipes.inference.model_utils import load_model, load_peft_model
|
|
from llama_recipes.inference.model_utils import load_model, load_peft_model
|
|
@@ -77,9 +77,9 @@ def main(
|
|
model.eval()
|
|
model.eval()
|
|
|
|
|
|
|
|
|
|
- tokenizer = LlamaTokenizer.from_pretrained(model_name)
|
|
|
|
|
|
+ tokenizer = AutoTokenizer.from_pretrained(model_name)
|
|
tokenizer.pad_token = tokenizer.eos_token
|
|
tokenizer.pad_token = tokenizer.eos_token
|
|
-
|
|
|
|
|
|
+
|
|
batch = tokenizer(user_prompt, padding='max_length', truncation=True, max_length=max_padding_length, return_tensors="pt")
|
|
batch = tokenizer(user_prompt, padding='max_length', truncation=True, max_length=max_padding_length, return_tensors="pt")
|
|
if is_xpu_available():
|
|
if is_xpu_available():
|
|
batch = {k: v.to("xpu") for k, v in batch.items()}
|
|
batch = {k: v.to("xpu") for k, v in batch.items()}
|