Alejadro Sanchez-Giraldo commited on
Commit
ba1b260
·
1 Parent(s): 192cb9d

check if MPS available

Browse files
Files changed (2) hide show
  1. app.py +7 -3
  2. requirements.txt +3 -1
app.py CHANGED
@@ -3,6 +3,10 @@ import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
 
 
 
 
 
6
  tokenizer = AutoTokenizer.from_pretrained(
7
  "deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True)
8
  model = AutoModelForCausalLM.from_pretrained(
@@ -12,8 +16,8 @@ model = AutoModelForCausalLM.from_pretrained(
12
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
13
 
14
 
15
- # Use CPU if CUDA is not available
16
- device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")
17
  model = model.to(device)
18
 
19
  # Theme builder
@@ -39,7 +43,7 @@ def chatbot_response(query):
39
  inputs = tokenizer.apply_chat_template(
40
  messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
41
 
42
- outputs = model.generate(inputs, max_new_tokens=512, do_sample=True, top_k=50,
43
  top_p=0.95, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
44
  model_response = tokenizer.decode(
45
  outputs[0][len(inputs[0]):], skip_special_tokens=True)
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import torch
5
 
6
+
7
+ print("CUDA available: ", torch.cuda.is_available())
8
+ print("MPS available: ", torch.backends.mps.is_available())
9
+
10
  tokenizer = AutoTokenizer.from_pretrained(
11
  "deepseek-ai/deepseek-coder-1.3b-instruct", trust_remote_code=True)
12
  model = AutoModelForCausalLM.from_pretrained(
 
16
  os.environ["TOKENIZERS_PARALLELISM"] = "false"
17
 
18
 
19
+ # Check if MPS (Metal Performance Shaders) is available
20
+ device = torch.device("mps") if torch.backends.mps.is_available() else torch.device("cpu")
21
  model = model.to(device)
22
 
23
  # Theme builder
 
43
  inputs = tokenizer.apply_chat_template(
44
  messages, add_generation_prompt=True, return_tensors="pt").to(model.device)
45
 
46
+ outputs = model.generate(inputs, max_new_tokens=512, do_sample=False, top_k=50,
47
  top_p=0.95, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
48
  model_response = tokenizer.decode(
49
  outputs[0][len(inputs[0]):], skip_special_tokens=True)
requirements.txt CHANGED
@@ -3,4 +3,6 @@ gradio==4.44.1
3
  requests
4
  transformers
5
  minijinja
6
- torch --extra-index-url https://download.pytorch.org/whl/cu118
 
 
 
3
  requests
4
  transformers
5
  minijinja
6
+ torch --extra-index-url https://download.pytorch.org/whl/torch-cuda80
7
+ torchvision
8
+ torchaudio