1 year atrás · 15053268b4
--- a/inference/inference.py
+++ b/inference/inference.py
@@ -59,12 +59,12 @@ def main(
 
																     torch.cuda.manual_seed(seed)
															
 
																     torch.manual_seed(seed)
															
 
																     # model = load_model(model_name, quantization)
															
 
																-    model = load_llama_from_config()
															
 
																-    loaded_model = load_sharded_model_single_gpu(model, model_name)
															
 
																+    model_config = load_llama_from_config()
															
 
																+    model = load_sharded_model_single_gpu(model_config, model_name)
															
 
																     print("model has been loaded *******************")
															
 
																-    tokenizer = LlamaTokenizer.from_pretrained(model_name)
															
 
																+    tokenizer = LlamaTokenizer.from_pretrained("../../../hf-llama-pr/7B/")
															
 
																     tokenizer.add_special_tokens(
															
 
																         {
															
 
																             "eos_token": "</s>",
															
@@ -97,7 +97,7 @@ def main(
 
																     if peft_model:
															
 
																         model = load_peft_model(model, peft_model)
															
 
																-    model.eval()
															
 
																+    # model.eval()
															
 
																     batch = tokenizer(user_prompt, return_tensors="pt")
															
 
																     batch = {k: v.to("cuda") for k, v in batch.items()}
															
--- a/model_checkpointing/checkpoint_handler.py
+++ b/model_checkpointing/checkpoint_handler.py
@@ -317,18 +317,22 @@ def load_sharded_model_single_gpu(model, model_path):
 
																 def load_sharded_model_single_gpu(model,model_path,verbose=True):
															
 
																     reader = FileSystemReader(model_path)
															
 
																-
															
 
																-    with FSDP.state_dict_type(model, StateDictType.FULL_STATE_DICT):
															
 
																-      
															
 
																-        dist_cp.load_state_dict(
															
 
																-                    state_dict=state_dict_to_load_to,
															
 
																-                    storage_reader= FileSystemReader(path),
															
 
																-                    no_dist=True,
															
 
																-                )
															
 
																-       
															
 
																-        print(f"checkpoint after load_state_dict()")
															
 
																-        ck = checkpoint.keys()
															
 
																-        print(f" checkpoint key len = {len(ck)} and \n keys =  {ck}")
															
 
																-        model.load_state_dict(checkpoint)
															
 
																-    print(f"Sharded state checkpoint loaded from {model_path}")
															
 
																+    # with FSDP.state_dict_type(model, StateDictType.FULL_STATE_DICT):
															
 
																+    state_dict = {
															
 
																+        "model": model.state_dict()
															
 
																+    }
															
 
																+    
															
 
																+    dist_cp.load_state_dict(
															
 
																+                state_dict=state_dict,
															
 
																+                storage_reader= FileSystemReader(model_path),
															
 
																+                no_dist=True,
															
 
																+            )
															
 
																+    
															
 
																+    print(f"checkpoint after load_state_dict()")
															
 
																+    ck = state_dict["model"].keys()
															
 
																+    print(f" checkpoint key len = {len(ck)} and \n keys =  {state_dict['model']['model.embed_tokens.weight']}")
															
 
																+    model.load_state_dict(state_dict["model"])
															
 
																+    
															
 
																+    print(f"Sharded state checkpoint loaded from {model_path}")
															
 
																+    return model