|
@@ -93,10 +93,16 @@ def main(**kwargs):
|
|
|
gradient_accumulation_steps = train_config.batch_size_training // train_config.micro_batch_size
|
|
|
|
|
|
|
|
|
- if train_config.enable_fsdp:
|
|
|
-
|
|
|
+ if train_config.enable_fsdp and train_config.low_cpu_fsdp:
|
|
|
+
|
|
|
|
|
|
-
|
|
|
+
|
|
|
+
|
|
|
+ v = packaging.version.parse(torch.__version__)
|
|
|
+ verify_latest_nightly = v.is_devrelease and v.dev >= 20230701
|
|
|
+ if not verify_latest_nightly:
|
|
|
+ raise Exception("latest pytorch nightly build is required to run with low_cpu_fsdp config, "
|
|
|
+ "please install latest nightly.")
|
|
|
if rank == 0:
|
|
|
model = LlamaForCausalLM.from_pretrained(
|
|
|
train_config.model_name,
|