yakine commited on
Commit
bf2f303
·
verified ·
1 Parent(s): cd42fe2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +15 -14
app.py CHANGED
@@ -1,24 +1,25 @@
1
  import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import os
 
 
 
4
  hf_token = os.getenv('HF_API_TOKEN')
5
- import streamlit as st
6
 
7
- from transformers import pipeline
8
 
9
- # Load the model
10
- generator = pipeline("text-generation", model="meta-llama/Meta-Llama-3.1-8B", token= hf_token)
11
 
12
- # Create an API route in Streamlit
13
- @st.cache_resource
14
- def predict(inputs):
15
- return generator(inputs, max_length=512, top_p=0.9, temperature=0.8)[0]['generated_text']
16
 
17
- @st.cache_resource
18
- def predict_endpoint():
19
- inputs = st.experimental_get_query_params().get('inputs', [''])[0]
20
- return predict(inputs)
21
 
22
- st.experimental_set_query_params(result=predict_endpoint())
 
 
 
 
23
 
24
- st.title("Llama3.1 API is Running")
 
1
  import streamlit as st
2
  from transformers import AutoTokenizer, AutoModelForCausalLM
3
  import os
4
+ from transformers import pipeline
5
+ import torch
6
+
7
  hf_token = os.getenv('HF_API_TOKEN')
 
8
 
 
9
 
 
 
10
 
11
+ # Load the Llama 3.1 model and tokenizer
12
+ model_name = "meta-llama/Meta-Llama-3.1-8B"
13
+ tokenizer = AutoTokenizer.from_pretrained(model_name, token= hf_token)
14
+ model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype="auto", device_map="auto", token= hf_token)
15
 
16
+ # Streamlit app interface
17
+ st.title("Llama 3.1 Text Generator")
18
+ prompt = st.text_area("Enter a prompt:", "Once upon a time")
 
19
 
20
+ if st.button("Generate"):
21
+ inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
22
+ outputs = model.generate(**inputs, max_length=512, top_p=0.9, temperature=0.8)
23
+ generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
24
+ st.write(generated_text)
25