Spaces:
Sleeping
Sleeping
# import os | |
# import streamlit as st | |
# import torch | |
# from transformers import AutoTokenizer, AutoModelForCausalLM | |
# from huggingface_hub import login | |
# # Load Hugging Face Token from Secrets | |
# hf_token = os.getenv("HF_TOKEN") | |
# if not hf_token: | |
# st.error("Hugging Face token is missing! Please add it to Hugging Face Secrets.") | |
# st.stop() | |
# # Authenticate | |
# login(token=hf_token) | |
# # Load the model and tokenizer with authentication | |
# MODEL_NAME = "google/gemma-2b-it" | |
# tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token) | |
# model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, token=hf_token, torch_dtype=torch.float16, device_map="auto") | |
# # Streamlit UI | |
# st.title("Gemma-2B Code Assistant") | |
# user_input = st.text_area("Enter your coding query:") | |
# if st.button("Generate Code"): | |
# if user_input: | |
# inputs = tokenizer(user_input, return_tensors="pt").to("cuda") | |
# output = model.generate(**inputs, max_new_tokens=100) | |
# response = tokenizer.decode(output[0], skip_special_tokens=True) | |
# st.write(response) | |
# else: | |
# st.warning("Please enter a query!") | |
import os | |
import streamlit as st | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
# Load Hugging Face Token | |
hf_token = os.getenv("HF_TOKEN") | |
if not hf_token: | |
st.error("❌ Hugging Face token is missing! Please add it to Secrets.") | |
st.stop() | |
# Set device to CPU (because CUDA is unavailable) | |
device = "cpu" | |
# Load tokenizer and model in CPU mode (without bitsandbytes) | |
MODEL_NAME = "google/gemma-2b-it" | |
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token) | |
model = AutoModelForCausalLM.from_pretrained( | |
MODEL_NAME, | |
token=hf_token, | |
torch_dtype=torch.float32, # 👈 Use standard float32 for CPU | |
device_map="cpu" | |
) | |
# Streamlit UI | |
st.title("Gemma-2B Code Assistant") | |
user_input = st.text_area("Enter your coding query:") | |
if st.button("Generate Code"): | |
if user_input: | |
with st.spinner("⏳ Generating response... Please wait!"): | |
inputs = tokenizer(user_input, return_tensors="pt").to(device) | |
output = model.generate(**inputs, max_new_tokens=50) | |
response = tokenizer.decode(output[0], skip_special_tokens=True) | |
st.subheader("📝 Generated Code:") | |
st.code(response, language="python") | |
else: | |
st.warning("⚠️ Please enter a query!") | |