angolan_LGT / app.py
amiguel's picture
Update app.py
379c06a verified
raw
history blame contribute delete
5.43 kB
import streamlit as st
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TextIteratorStreamer
)
from huggingface_hub import login
from threading import Thread
import PyPDF2
import pandas as pd
import torch
import time
import os
# πŸ” Hugging Face Token via Environment Variable
HF_TOKEN = os.environ.get("HF_TOKEN")
if not HF_TOKEN:
raise ValueError("Missing Hugging Face Token. Please set the HF_TOKEN environment variable.")
# βœ… Only PT-T5 Model
MODEL_NAME = "amiguel/Meta-Llama-3.1-8B-Instruct-lei-geral-trabalho" #"amiguel/lgt-Angola" #"amiguel/lgt-2025"##
# UI Setup
st.set_page_config(page_title="Assistente LGT | Angola", page_icon="πŸš€", layout="centered")
st.title("πŸš€ Assistente LGT | Angola πŸš€")
USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
# Upload sidebar
with st.sidebar:
st.header("Upload Documentos πŸ“‚")
uploaded_file = st.file_uploader("Escolhe um ficheiro PDF ou XLSX", type=["pdf", "xlsx"], label_visibility="collapsed")
# Cache file processing
@st.cache_data
def process_file(uploaded_file):
if uploaded_file is None:
return ""
try:
if uploaded_file.type == "application/pdf":
reader = PyPDF2.PdfReader(uploaded_file)
return "\n".join(page.extract_text() or "" for page in reader.pages)
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
df = pd.read_excel(uploaded_file)
return df.to_markdown()
except Exception as e:
st.error(f"πŸ“„ Erro ao processar o ficheiro: {str(e)}")
return ""
# Cache model loading
@st.cache_resource
def load_model():
try:
login(token=HF_TOKEN)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float32).to("cuda" if torch.cuda.is_available() else "cpu")
return model, tokenizer
except Exception as e:
st.error(f"πŸ€– Erro ao carregar o modelo: {str(e)}")
return None, None
# Streaming response generation
def generate_response(prompt, context, model, tokenizer):
full_prompt = f"Contexto:\n{context}\n\nPergunta: {prompt}\nResposta:"
inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True).to(model.device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = {
"input_ids": inputs["input_ids"],
"attention_mask": inputs["attention_mask"],
"max_new_tokens": 512,
"temperature": 0.7,
"top_p": 0.9,
"repetition_penalty": 1.1,
"do_sample": True,
"use_cache": True,
"streamer": streamer
}
Thread(target=model.generate, kwargs=generation_kwargs).start()
return streamer
# Store chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Show chat history
for message in st.session_state.messages:
avatar = USER_AVATAR if message["role"] == "user" else BOT_AVATAR
with st.chat_message(message["role"], avatar=avatar):
st.markdown(message["content"])
# Chat input
if prompt := st.chat_input("Faca uma pergunta sobre a LGT..."):
with st.chat_message("user", avatar=USER_AVATAR):
st.markdown(prompt)
st.session_state.messages.append({"role": "user", "content": prompt})
# Load model if not loaded
if "model" not in st.session_state:
with st.spinner("πŸ”„ A carregar o modelo PT-T5..."):
model, tokenizer = load_model()
if not model:
st.stop()
st.session_state.model = model
st.session_state.tokenizer = tokenizer
else:
model = st.session_state.model
tokenizer = st.session_state.tokenizer
context = process_file(uploaded_file) or "Sem contexto adicional disponΓ­vel."
# Generate assistant response
with st.chat_message("assistant", avatar=BOT_AVATAR):
response_box = st.empty()
full_response = ""
try:
start_time = time.time()
streamer = generate_response(prompt, context, model, tokenizer)
for chunk in streamer:
full_response += chunk.strip() + " "
response_box.markdown(full_response + "β–Œ", unsafe_allow_html=True)
end_time = time.time()
input_tokens = len(tokenizer(prompt)["input_ids"])
output_tokens = len(tokenizer(full_response)["input_ids"])
speed = output_tokens / (end_time - start_time)
cost_usd = ((input_tokens / 1e6) * 0.0001) + ((output_tokens / 1e6) * 0.0001)
cost_aoa = cost_usd * 1160
st.caption(
f"πŸ”‘ Tokens: {input_tokens} β†’ {output_tokens} | πŸ•’ Velocidade: {speed:.1f}t/s | "
f"πŸ’° USD: ${cost_usd:.4f} | πŸ‡¦πŸ‡΄ AOA: {cost_aoa:.2f}"
)
response_box.markdown(full_response.strip())
st.session_state.messages.append({"role": "assistant", "content": full_response.strip()})
except Exception as e:
st.error(f"⚑ Erro ao gerar resposta: {str(e)}")