hf_model_chat / app.py
visit2sachin56's picture
Update app.py
74e4e38 verified
raw
history blame contribute delete
840 Bytes
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer,pipeline
model_id = "meta-llama/Llama-2-7b-chat-hf"
model_id = str(st.text_input("Enter model_id"))
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
load_in_4bit=True,
#attn_implementation="flash_attention_2", # if you have an ampere GPU
)
max_new_tokens=100
top_k=50
temperature=0.1
max_new_tokens = st.text_input("Enter max_new_tokens")
top_k = st.text_input("Enter max_new_tokens")
temperature = st.text_input("Enter temperature")
query = st.chat_input("Enter your query")
st.write(query)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100, top_k=50, temperature=0.1)
llm = HuggingFacePipeline(pipeline=pipe)
st.write(llm.invoke(query))