|
@@ -4,20 +4,30 @@
|
|
import pytest
|
|
import pytest
|
|
from unittest.mock import patch
|
|
from unittest.mock import patch
|
|
|
|
|
|
|
|
+EXPECTED_SAMPLE_NUMBER ={
|
|
|
|
+ "meta-llama/Llama-2-7b-hf": {
|
|
|
|
+ "train": 96,
|
|
|
|
+ "eval": 42,
|
|
|
|
+ },
|
|
|
|
+ "hsramall/hsramall-7b-hf": {
|
|
|
|
+ "train": 79,
|
|
|
|
+ "eval": 34,
|
|
|
|
+ }
|
|
|
|
+}
|
|
|
|
|
|
@pytest.mark.skip_missing_tokenizer
|
|
@pytest.mark.skip_missing_tokenizer
|
|
@patch('llama_recipes.finetuning.train')
|
|
@patch('llama_recipes.finetuning.train')
|
|
-@patch('llama_recipes.finetuning.LlamaTokenizer')
|
|
|
|
|
|
+@patch('llama_recipes.finetuning.AutoTokenizer')
|
|
@patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained')
|
|
@patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained')
|
|
@patch('llama_recipes.finetuning.optim.AdamW')
|
|
@patch('llama_recipes.finetuning.optim.AdamW')
|
|
@patch('llama_recipes.finetuning.StepLR')
|
|
@patch('llama_recipes.finetuning.StepLR')
|
|
-def test_packing(step_lr, optimizer, get_model, tokenizer, train, mocker, setup_tokenizer):
|
|
|
|
|
|
+def test_packing(step_lr, optimizer, get_model, tokenizer, train, setup_tokenizer, llama_version):
|
|
from llama_recipes.finetuning import main
|
|
from llama_recipes.finetuning import main
|
|
|
|
|
|
setup_tokenizer(tokenizer)
|
|
setup_tokenizer(tokenizer)
|
|
|
|
|
|
kwargs = {
|
|
kwargs = {
|
|
- "model_name": "meta-llama/Llama-2-7b-hf",
|
|
|
|
|
|
+ "model_name": llama_version,
|
|
"batch_size_training": 8,
|
|
"batch_size_training": 8,
|
|
"val_batch_size": 1,
|
|
"val_batch_size": 1,
|
|
"use_peft": False,
|
|
"use_peft": False,
|
|
@@ -33,8 +43,8 @@ def test_packing(step_lr, optimizer, get_model, tokenizer, train, mocker, setup_
|
|
train_dataloader = args[1]
|
|
train_dataloader = args[1]
|
|
eval_dataloader = args[2]
|
|
eval_dataloader = args[2]
|
|
|
|
|
|
- assert len(train_dataloader) == 96
|
|
|
|
- assert len(eval_dataloader) == 42
|
|
|
|
|
|
+ assert len(train_dataloader) == EXPECTED_SAMPLE_NUMBER[llama_version]["train"]
|
|
|
|
+ assert len(eval_dataloader) == EXPECTED_SAMPLE_NUMBER[llama_version]["eval"]
|
|
|
|
|
|
batch = next(iter(train_dataloader))
|
|
batch = next(iter(train_dataloader))
|
|
|
|
|
|
@@ -49,7 +59,7 @@ def test_packing(step_lr, optimizer, get_model, tokenizer, train, mocker, setup_
|
|
|
|
|
|
@pytest.mark.skip_missing_tokenizer
|
|
@pytest.mark.skip_missing_tokenizer
|
|
@patch('llama_recipes.finetuning.train')
|
|
@patch('llama_recipes.finetuning.train')
|
|
-@patch('llama_recipes.finetuning.LlamaTokenizer')
|
|
|
|
|
|
+@patch('llama_recipes.finetuning.AutoTokenizer')
|
|
@patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained')
|
|
@patch('llama_recipes.finetuning.LlamaForCausalLM.from_pretrained')
|
|
@patch('llama_recipes.finetuning.optim.AdamW')
|
|
@patch('llama_recipes.finetuning.optim.AdamW')
|
|
@patch('llama_recipes.finetuning.StepLR')
|
|
@patch('llama_recipes.finetuning.StepLR')
|
|
@@ -57,13 +67,13 @@ def test_packing(step_lr, optimizer, get_model, tokenizer, train, mocker, setup_
|
|
@patch('llama_recipes.finetuning.FSDP')
|
|
@patch('llama_recipes.finetuning.FSDP')
|
|
@patch('llama_recipes.finetuning.torch.distributed.is_initialized')
|
|
@patch('llama_recipes.finetuning.torch.distributed.is_initialized')
|
|
@patch('llama_recipes.utils.config_utils.dist')
|
|
@patch('llama_recipes.utils.config_utils.dist')
|
|
-def test_distributed_packing(dist, is_initialized, fsdp, setup, step_lr, optimizer, get_model, tokenizer, train, setup_tokenizer):
|
|
|
|
|
|
+def test_distributed_packing(dist, is_initialized, fsdp, setup, step_lr, optimizer, get_model, tokenizer, train, setup_tokenizer, llama_version):
|
|
import os
|
|
import os
|
|
from llama_recipes.finetuning import main
|
|
from llama_recipes.finetuning import main
|
|
|
|
|
|
setup_tokenizer(tokenizer)
|
|
setup_tokenizer(tokenizer)
|
|
|
|
|
|
- rank = 0
|
|
|
|
|
|
+ rank = 1
|
|
os.environ['LOCAL_RANK'] = f'{rank}'
|
|
os.environ['LOCAL_RANK'] = f'{rank}'
|
|
os.environ['RANK'] = f'{rank}'
|
|
os.environ['RANK'] = f'{rank}'
|
|
os.environ['WORLD_SIZE'] = '2'
|
|
os.environ['WORLD_SIZE'] = '2'
|
|
@@ -71,7 +81,7 @@ def test_distributed_packing(dist, is_initialized, fsdp, setup, step_lr, optimiz
|
|
os.environ['MASTER_PORT'] = '12345'
|
|
os.environ['MASTER_PORT'] = '12345'
|
|
|
|
|
|
kwargs = {
|
|
kwargs = {
|
|
- "model_name": "meta-llama/Llama-2-7b-hf",
|
|
|
|
|
|
+ "model_name": llama_version,
|
|
"batch_size_training": 8,
|
|
"batch_size_training": 8,
|
|
"val_batch_size": 1,
|
|
"val_batch_size": 1,
|
|
"use_peft": False,
|
|
"use_peft": False,
|
|
@@ -92,5 +102,5 @@ def test_distributed_packing(dist, is_initialized, fsdp, setup, step_lr, optimiz
|
|
train_dataloader = args[1]
|
|
train_dataloader = args[1]
|
|
eval_dataloader = args[2]
|
|
eval_dataloader = args[2]
|
|
|
|
|
|
- assert len(train_dataloader) == 96 //2
|
|
|
|
- assert len(eval_dataloader) == 42 //2
|
|
|
|
|
|
+ assert len(train_dataloader) == EXPECTED_SAMPLE_NUMBER[llama_version]["train"] //2
|
|
|
|
+ assert len(eval_dataloader) == EXPECTED_SAMPLE_NUMBER[llama_version]["eval"] //2
|