Browse Source

Update src/llama_recipes/configs/fsdp.py

Co-authored-by: Andrew Gu <31054793+awgu@users.noreply.github.com>
Hamid Shojanazeri 1 year ago
parent
commit
046e91ceb5
1 changed files with 1 additions and 1 deletions
  1. 1 1
      src/llama_recipes/configs/fsdp.py

+ 1 - 1
src/llama_recipes/configs/fsdp.py

@@ -10,7 +10,7 @@ from torch.distributed.fsdp.fully_sharded_data_parallel import StateDictType
 class fsdp_config:
     mixed_precision: bool=True
     use_fp16: bool=False
-    sharding_strategy: ShardingStrategy = ShardingStrategy.FULL_SHARD # HYBRID_SHARD "Full Shard within a node DDP cross Nodes", SHARD_GRAD_OP "Shard only Gradients and Optimizer States", NO_SHARD "Simialr to DDP".
+    sharding_strategy: ShardingStrategy = ShardingStrategy.FULL_SHARD # HYBRID_SHARD "Full Shard within a node DDP cross Nodes", SHARD_GRAD_OP "Shard only Gradients and Optimizer States", NO_SHARD "Similar to DDP".
     hsdp : bool =False # Require HYBRID_SHARD to be set. This flag can extend the HYBRID_SHARD by allowing sharding a model on customized number of GPUs (Sharding_group) and Replicas over Sharding_group.
     sharding_group_size : int=0 # requires hsdp to be set. This specifies the sharding group size, number of GPUs that you model can fit into to form a replica of a model.
     replica_group_size: int=0 #requires hsdp to be set. This specifies the replica group size, which is world_size/sharding_group_size.