Spaces:
Running
Running
import streamlit as st | |
from llama_cpp import Llama | |
def load_model(): | |
return Llama( | |
model_path="cybertron-v4-qw7B-MGS-IQ2_M.gguf", | |
n_ctx=2048, | |
n_threads=8, | |
n_gpu_layers=20 | |
) | |
llm = load_model() | |
st.title("Cybertron Chat") | |
prompt = st.text_input("Ask a question:") | |
if prompt: | |
with st.spinner("Generating response..."): | |
response = llm.create_chat_completion( | |
messages=[{"role": "user", "content": prompt}], | |
temperature=0.7, | |
max_tokens=256 | |
) | |
st.write(response["choices"][0]["message"]["content"]) | |