|
@@ -44,7 +44,7 @@ def format_tokens(dialogs, tokenizer):
|
|
|
[
|
|
|
tokenizer.encode(
|
|
|
f"{B_INST} {(prompt['content']).strip()} {E_INST} {(answer['content']).strip()} ",
|
|
|
- )
|
|
|
+ ) + [tokenizer.eos_token_id]
|
|
|
for prompt, answer in zip(dialog[::2], dialog[1::2])
|
|
|
],
|
|
|
[],
|
|
@@ -62,4 +62,4 @@ def format_tokens(dialogs, tokenizer):
|
|
|
def read_dialogs_from_file(file_path):
|
|
|
with open(file_path, 'r') as file:
|
|
|
dialogs = json.load(file)
|
|
|
- return dialogs
|
|
|
+ return dialogs
|