|
@@ -13,7 +13,7 @@ class fsdp_config:
|
|
|
sharding_strategy: ShardingStrategy = ShardingStrategy.FULL_SHARD
|
|
|
checkpoint_type: StateDictType = StateDictType.SHARDED_STATE_DICT # alternatively can use SHARDED_STATE_DICT save one file per rank, and can resize the world-size.
|
|
|
fsdp_activation_checkpointing: bool=True
|
|
|
- pure_bf16: bool = True
|
|
|
+ pure_bf16: bool = False
|
|
|
optimizer: str= "AdamW"
|
|
|
|
|
|
|