Spaces:
Sleeping
Sleeping
import streamlit as st | |
from transformers import AutoModelForCausalLM, AutoTokenizer,pipeline | |
model_id = "meta-llama/Llama-2-7b-chat-hf" | |
model_id = str(st.text_input("Enter model_id")) | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
load_in_4bit=True, | |
#attn_implementation="flash_attention_2", # if you have an ampere GPU | |
) | |
max_new_tokens=100 | |
top_k=50 | |
temperature=0.1 | |
max_new_tokens = st.text_input("Enter max_new_tokens") | |
top_k = st.text_input("Enter max_new_tokens") | |
temperature = st.text_input("Enter temperature") | |
query = st.chat_input("Enter your query") | |
st.write(query) | |
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100, top_k=50, temperature=0.1) | |
llm = HuggingFacePipeline(pipeline=pipe) | |
st.write(llm.invoke(query)) | |