Avinash109 commited on
Commit
fb19b6e
·
verified ·
1 Parent(s): 088f906

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -19
app.py CHANGED
@@ -1,6 +1,6 @@
1
  import streamlit as st
2
  import torch
3
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
4
  import datetime
5
 
6
  # Page configuration
@@ -17,28 +17,34 @@ if 'messages' not in st.session_state:
17
  # Cache the model loading
18
  @st.cache_resource
19
  def load_model_and_tokenizer():
20
- model_name = "Qwen/Qwen2.5-Coder-32B-Instruct"
21
 
22
- # Configure quantization
23
- bnb_config = BitsAndBytesConfig(
24
- load_in_8bit=True,
25
- bnb_4bit_quant_type="nf4",
26
- bnb_4bit_compute_dtype=torch.float16,
27
- bnb_4bit_use_double_quant=False,
28
- )
29
-
30
- # Load tokenizer and model
31
  tokenizer = AutoTokenizer.from_pretrained(
32
  model_name,
33
  trust_remote_code=True
34
  )
35
- model = AutoModelForCausalLM.from_pretrained(
36
- model_name,
37
- quantization_config=bnb_config,
38
- torch_dtype=torch.float16,
39
- device_map="auto",
40
- trust_remote_code=True
41
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
42
 
43
  return tokenizer, model
44
 
@@ -52,7 +58,7 @@ with st.sidebar:
52
  max_length = st.slider(
53
  "Maximum Length",
54
  min_value=64,
55
- max_value=4096,
56
  value=512,
57
  step=64,
58
  help="Maximum number of tokens to generate"
 
1
  import streamlit as st
2
  import torch
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM
4
  import datetime
5
 
6
  # Page configuration
 
17
  # Cache the model loading
18
  @st.cache_resource
19
  def load_model_and_tokenizer():
20
+ model_name = "Qwen/Qwen2.5-Coder-7B-Instruct" # Using smaller 7B model
21
 
22
+ # Load tokenizer
 
 
 
 
 
 
 
 
23
  tokenizer = AutoTokenizer.from_pretrained(
24
  model_name,
25
  trust_remote_code=True
26
  )
27
+
28
+ # Determine device
29
+ device = "cuda" if torch.cuda.is_available() else "cpu"
30
+ st.info(f"Using device: {device}")
31
+
32
+ # Load model with appropriate settings for CPU/GPU
33
+ if device == "cuda":
34
+ model = AutoModelForCausalLM.from_pretrained(
35
+ model_name,
36
+ torch_dtype=torch.float16,
37
+ device_map="auto",
38
+ trust_remote_code=True
39
+ )
40
+ else:
41
+ model = AutoModelForCausalLM.from_pretrained(
42
+ model_name,
43
+ torch_dtype=torch.float32,
44
+ device_map={"": device},
45
+ trust_remote_code=True,
46
+ low_cpu_mem_usage=True
47
+ )
48
 
49
  return tokenizer, model
50
 
 
58
  max_length = st.slider(
59
  "Maximum Length",
60
  min_value=64,
61
+ max_value=2048, # Reduced for CPU usage
62
  value=512,
63
  step=64,
64
  help="Maximum number of tokens to generate"