Przeglądaj źródła

aliging special tokens in toeknizer with HF latest

Hamid Shojanazeri 1 rok temu
rodzic
commit
7ec390bfc8
2 zmienionych plików z 5 dodań i 8 usunięć
  1. 0 1
      inference/inference.py
  2. 5 7
      llama_finetuning.py

+ 0 - 1
inference/inference.py

@@ -52,7 +52,6 @@ def main(
     torch.manual_seed(seed)
     
     model = load_model(model_name, quantization)
-    model.config.pretraining_tp=8
     tokenizer = LlamaTokenizer.from_pretrained(model_name)
     tokenizer.add_special_tokens(
         {

+ 5 - 7
llama_finetuning.py

@@ -109,13 +109,11 @@ def main(**kwargs):
     # Load the tokenizer and add special tokens
     tokenizer = LlamaTokenizer.from_pretrained(train_config.model_name)
     tokenizer.add_special_tokens(
-        {
-            "eos_token": "</s>",
-            "bos_token": "</s>",
-            "unk_token": "</s>",
-            "pad_token": '[PAD]',
-        }
-    )
+            {
+            
+                "pad_token": "<PAD>",
+            }
+        )
     if train_config.use_peft:
         peft_config = generate_peft_config(train_config, kwargs)
         model = get_peft_model(model, peft_config)