kishkath commited on
Commit
da870d3
·
verified ·
1 Parent(s): cfe3edb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -10
app.py CHANGED
@@ -4,18 +4,20 @@ from peft import PeftModel
4
  import torch
5
 
6
  # Directory where your fine-tuned Phi-2 model and associated files are stored.
7
- # This directory should include files like:
8
- # - adapter_config.json, adapter_model.safetensors,
9
- # - tokenizer_config.json, tokenizer.json, merges.txt,
10
- # - special_tokens_map.json, vocab.json, added_tokens.json, etc.
11
- model_dir = "./phi2-qlora-finetuned"
12
 
13
- # phi2-qlora-finetuned
14
  # Load the tokenizer.
15
  tokenizer = AutoTokenizer.from_pretrained(model_dir)
16
 
17
- # Load the base model. (Assumes the base model files are in model_dir.)
18
- base_model = AutoModelForCausalLM.from_pretrained(model_dir, device_map="auto")
 
 
 
 
19
 
20
  # Load the adapter (PEFT) weights.
21
  model = PeftModel.from_pretrained(base_model, model_dir)
@@ -26,7 +28,7 @@ def generate_response(prompt, max_new_tokens=200, temperature=0.7):
26
  """
27
  # Tokenize the prompt and move tensors to the model's device.
28
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
29
-
30
  # Generate output text using sampling.
31
  outputs = model.generate(
32
  **inputs,
@@ -34,7 +36,7 @@ def generate_response(prompt, max_new_tokens=200, temperature=0.7):
34
  do_sample=True,
35
  temperature=temperature
36
  )
37
-
38
  # Decode the generated tokens and return the response.
39
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
40
  return response
 
4
  import torch
5
 
6
  # Directory where your fine-tuned Phi-2 model and associated files are stored.
7
+ model_dir = "./phi2-finetune"
8
+
9
+ # Directory to store offloaded model parts (for large models).
10
+ offload_dir = "./offload"
 
11
 
 
12
  # Load the tokenizer.
13
  tokenizer = AutoTokenizer.from_pretrained(model_dir)
14
 
15
+ # Load the base model with offloading support.
16
+ base_model = AutoModelForCausalLM.from_pretrained(
17
+ model_dir,
18
+ device_map="auto", # Automatically use available devices (GPU/CPU).
19
+ offload_folder=offload_dir # Directory to offload layers (for larger models).
20
+ )
21
 
22
  # Load the adapter (PEFT) weights.
23
  model = PeftModel.from_pretrained(base_model, model_dir)
 
28
  """
29
  # Tokenize the prompt and move tensors to the model's device.
30
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
31
+
32
  # Generate output text using sampling.
33
  outputs = model.generate(
34
  **inputs,
 
36
  do_sample=True,
37
  temperature=temperature
38
  )
39
+
40
  # Decode the generated tokens and return the response.
41
  response = tokenizer.decode(outputs[0], skip_special_tokens=True)
42
  return response