Browse Source

Update dataset folder

Matthias Reso 1 year ago
parent
commit
6c38cbeb6e
3 changed files with 5 additions and 5 deletions
  1. 1 1
      docs/multi_gpu.md
  2. 1 1
      docs/single_gpu.md
  3. 3 3
      src/llama_recipes/configs/datasets.py

+ 1 - 1
docs/multi_gpu.md

@@ -86,7 +86,7 @@ Currently 4 datasets are supported that can be found in [Datasets config file](.
 * `alpaca_dataset` : to get this open source data please download the `aplaca.json` to `dataset` folder.
 
 ```bash
-wget -P datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json
+wget -P src/llama_recipes/datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json
 ```
 
 * `samsum_dataset`

+ 1 - 1
docs/single_gpu.md

@@ -41,7 +41,7 @@ Currently 4 datasets are supported that can be found in [Datasets config file](.
 * `alpaca_dataset` : to get this open source data please download the `aplaca.json` to `ft_dataset` folder.
 
 ```bash
-wget -P datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json
+wget -P src/llama_recipes/datasets https://raw.githubusercontent.com/tatsu-lab/stanford_alpaca/main/alpaca_data.json
 ```
 
 * `samsum_dataset`

+ 3 - 3
src/llama_recipes/configs/datasets.py

@@ -15,8 +15,8 @@ class samsum_dataset:
 @dataclass
 class grammar_dataset:
     dataset: str = "grammar_dataset"
-    train_split: str = "ft_datasets/grammar_dataset/gtrain_10k.csv" 
-    test_split: str = "ft_datasets/grammar_dataset/grammar_validation.csv"
+    train_split: str = "src/llama_recipes/datasets/grammar_dataset/gtrain_10k.csv" 
+    test_split: str = "src/llama_recipes/datasets/grammar_dataset/grammar_validation.csv"
     input_length: int = 2048
 
     
@@ -25,4 +25,4 @@ class alpaca_dataset:
     dataset: str = "alpaca_dataset"
     train_split: str = "train"
     test_split: str = "val"
-    data_path: str = "ft_datasets/alpaca_data.json"
+    data_path: str = "src/llama_recipes/datasets/alpaca_data.json"