arya-ai-model commited on
Commit
eb66cb5
·
1 Parent(s): d36dc81
Files changed (1) hide show
  1. model.py +50 -23
model.py CHANGED
@@ -1,36 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
- from transformers import AutoModelForCausalLM, AutoTokenizer
3
  import torch
4
 
5
- # Correct model name
6
- MODEL_NAME = "bigcode/starcoder"
7
-
8
- # Ensure the token is provided
9
  HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
10
- if not HF_TOKEN:
11
- raise ValueError("Missing Hugging Face token. Set HUGGINGFACE_TOKEN as an environment variable.")
12
 
13
- # Set device
14
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
 
 
15
 
16
- # Load tokenizer with authentication
17
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
18
-
19
- # Load model with optimizations
20
  model = AutoModelForCausalLM.from_pretrained(
21
  MODEL_NAME,
22
  token=HF_TOKEN,
23
- torch_dtype=torch.float16, # Reduce memory usage
24
- low_cpu_mem_usage=True, # Optimize loading
25
- device_map="auto", # Automatic device placement
26
- offload_folder="offload" # Offload to disk if needed
27
- ).to(device)
28
 
29
  def generate_code(prompt: str, max_tokens: int = 256):
30
- """Generates code based on the input prompt."""
31
- if not prompt.strip():
32
- return "Error: Empty prompt provided."
33
-
34
- inputs = tokenizer(prompt, return_tensors="pt").to(device)
35
  output = model.generate(**inputs, max_new_tokens=max_tokens)
36
- return tokenizer.decode(output[0], skip_special_tokens=True)
 
1
+ # import os
2
+ # from transformers import AutoModelForCausalLM, AutoTokenizer
3
+ # import torch
4
+
5
+ # # Correct model name
6
+ # MODEL_NAME = "bigcode/starcoder"
7
+
8
+ # # Ensure the token is provided
9
+ # HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
10
+ # if not HF_TOKEN:
11
+ # raise ValueError("Missing Hugging Face token. Set HUGGINGFACE_TOKEN as an environment variable.")
12
+
13
+ # # Set device
14
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
15
+
16
+ # # Load tokenizer with authentication
17
+ # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
18
+
19
+ # # Load model with optimizations
20
+ # model = AutoModelForCausalLM.from_pretrained(
21
+ # MODEL_NAME,
22
+ # token=HF_TOKEN,
23
+ # torch_dtype=torch.float16, # Reduce memory usage
24
+ # low_cpu_mem_usage=True, # Optimize loading
25
+ # device_map="auto", # Automatic device placement
26
+ # offload_folder="offload" # Offload to disk if needed
27
+ # ).to(device)
28
+
29
+ # def generate_code(prompt: str, max_tokens: int = 256):
30
+ # """Generates code based on the input prompt."""
31
+ # if not prompt.strip():
32
+ # return "Error: Empty prompt provided."
33
+
34
+ # inputs = tokenizer(prompt, return_tensors="pt").to(device)
35
+ # output = model.generate(**inputs, max_new_tokens=max_tokens)
36
+ # return tokenizer.decode(output[0], skip_special_tokens=True)
37
+
38
  import os
39
+ from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
40
  import torch
41
 
42
+ MODEL_NAME = "bigcode/starcoderbase-1b" # Lighter version
 
 
 
43
  HF_TOKEN = os.getenv("HUGGINGFACE_TOKEN")
 
 
44
 
45
+ quant_config = BitsAndBytesConfig(
46
+ load_in_4bit=True,
47
+ bnb_4bit_quant_type="nf4",
48
+ bnb_4bit_compute_dtype=torch.float16
49
+ )
50
 
 
51
  tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN)
 
 
52
  model = AutoModelForCausalLM.from_pretrained(
53
  MODEL_NAME,
54
  token=HF_TOKEN,
55
+ quantization_config=quant_config,
56
+ device_map="auto",
57
+ trust_remote_code=True
58
+ )
 
59
 
60
  def generate_code(prompt: str, max_tokens: int = 256):
61
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
 
 
 
 
62
  output = model.generate(**inputs, max_new_tokens=max_tokens)
63
+ return tokenizer.decode(output[0], skip_special_tokens=True)