Spaces:
Sleeping
Sleeping
File size: 840 Bytes
3f35d0e 74e4e38 3f35d0e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 |
import streamlit as st
from transformers import AutoModelForCausalLM, AutoTokenizer,pipeline
model_id = "meta-llama/Llama-2-7b-chat-hf"
model_id = str(st.text_input("Enter model_id"))
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
model_id,
load_in_4bit=True,
#attn_implementation="flash_attention_2", # if you have an ampere GPU
)
max_new_tokens=100
top_k=50
temperature=0.1
max_new_tokens = st.text_input("Enter max_new_tokens")
top_k = st.text_input("Enter max_new_tokens")
temperature = st.text_input("Enter temperature")
query = st.chat_input("Enter your query")
st.write(query)
pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, max_new_tokens=100, top_k=50, temperature=0.1)
llm = HuggingFacePipeline(pipeline=pipe)
st.write(llm.invoke(query))
|