# import os # import streamlit as st # import torch # from transformers import AutoTokenizer, AutoModelForCausalLM # from huggingface_hub import login # # Load Hugging Face Token from Secrets # hf_token = os.getenv("HF_TOKEN") # if not hf_token: # st.error("Hugging Face token is missing! Please add it to Hugging Face Secrets.") # st.stop() # # Authenticate # login(token=hf_token) # # Load the model and tokenizer with authentication # MODEL_NAME = "google/gemma-2b-it" # tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token) # model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, token=hf_token, torch_dtype=torch.float16, device_map="auto") # # Streamlit UI # st.title("Gemma-2B Code Assistant") # user_input = st.text_area("Enter your coding query:") # if st.button("Generate Code"): # if user_input: # inputs = tokenizer(user_input, return_tensors="pt").to("cuda") # output = model.generate(**inputs, max_new_tokens=100) # response = tokenizer.decode(output[0], skip_special_tokens=True) # st.write(response) # else: # st.warning("Please enter a query!") import os import streamlit as st import torch from transformers import AutoTokenizer, AutoModelForCausalLM # Load Hugging Face Token hf_token = os.getenv("HF_TOKEN") if not hf_token: st.error("❌ Hugging Face token is missing! Please add it to Secrets.") st.stop() # Set device to CPU (because CUDA is unavailable) device = "cpu" # Load tokenizer and model in CPU mode (without bitsandbytes) MODEL_NAME = "google/gemma-2b-it" tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=hf_token) model = AutoModelForCausalLM.from_pretrained( MODEL_NAME, token=hf_token, torch_dtype=torch.float32, # 👈 Use standard float32 for CPU device_map="cpu" ) # Streamlit UI st.title("Gemma-2B Code Assistant") user_input = st.text_area("Enter your coding query:") if st.button("Generate Code"): if user_input: with st.spinner("⏳ Generating response... Please wait!"): inputs = tokenizer(user_input, return_tensors="pt").to(device) output = model.generate(**inputs, max_new_tokens=50) response = tokenizer.decode(output[0], skip_special_tokens=True) st.subheader("📝 Generated Code:") st.code(response, language="python") else: st.warning("⚠️ Please enter a query!")