Spaces:
Sleeping
Sleeping
File size: 5,426 Bytes
3145713 211d6b8 6a8c296 3145713 6a8c296 379c06a fe620e6 6a8c296 3145713 6a8c296 3145713 6a8c296 3145713 6a8c296 3145713 6a8c296 3145713 6a8c296 3145713 6a8c296 3145713 6a8c296 1dfd2ed 3145713 6a8c296 3145713 6a8c296 3145713 6a8c296 3145713 6a8c296 3145713 6a8c296 3145713 6a8c296 3145713 6a8c296 3145713 6a8c296 211d6b8 6a8c296 3145713 6a8c296 3145713 6a8c296 3145713 1dfd2ed 3145713 6a8c296 3145713 1dfd2ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
import streamlit as st
from transformers import (
AutoTokenizer,
AutoModelForCausalLM,
TextIteratorStreamer
)
from huggingface_hub import login
from threading import Thread
import PyPDF2
import pandas as pd
import torch
import time
import os
# π Hugging Face Token via Environment Variable
HF_TOKEN = os.environ.get("HF_TOKEN")
if not HF_TOKEN:
raise ValueError("Missing Hugging Face Token. Please set the HF_TOKEN environment variable.")
# β
Only PT-T5 Model
MODEL_NAME = "amiguel/Meta-Llama-3.1-8B-Instruct-lei-geral-trabalho" #"amiguel/lgt-Angola" #"amiguel/lgt-2025"##
# UI Setup
st.set_page_config(page_title="Assistente LGT | Angola", page_icon="π", layout="centered")
st.title("π Assistente LGT | Angola π")
USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
# Upload sidebar
with st.sidebar:
st.header("Upload Documentos π")
uploaded_file = st.file_uploader("Escolhe um ficheiro PDF ou XLSX", type=["pdf", "xlsx"], label_visibility="collapsed")
# Cache file processing
@st.cache_data
def process_file(uploaded_file):
if uploaded_file is None:
return ""
try:
if uploaded_file.type == "application/pdf":
reader = PyPDF2.PdfReader(uploaded_file)
return "\n".join(page.extract_text() or "" for page in reader.pages)
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet":
df = pd.read_excel(uploaded_file)
return df.to_markdown()
except Exception as e:
st.error(f"π Erro ao processar o ficheiro: {str(e)}")
return ""
# Cache model loading
@st.cache_resource
def load_model():
try:
login(token=HF_TOKEN)
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, token=HF_TOKEN, use_fast=False)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME, torch_dtype=torch.float32).to("cuda" if torch.cuda.is_available() else "cpu")
return model, tokenizer
except Exception as e:
st.error(f"π€ Erro ao carregar o modelo: {str(e)}")
return None, None
# Streaming response generation
def generate_response(prompt, context, model, tokenizer):
full_prompt = f"Contexto:\n{context}\n\nPergunta: {prompt}\nResposta:"
inputs = tokenizer(full_prompt, return_tensors="pt", truncation=True).to(model.device)
streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
generation_kwargs = {
"input_ids": inputs["input_ids"],
"attention_mask": inputs["attention_mask"],
"max_new_tokens": 512,
"temperature": 0.7,
"top_p": 0.9,
"repetition_penalty": 1.1,
"do_sample": True,
"use_cache": True,
"streamer": streamer
}
Thread(target=model.generate, kwargs=generation_kwargs).start()
return streamer
# Store chat history
if "messages" not in st.session_state:
st.session_state.messages = []
# Show chat history
for message in st.session_state.messages:
avatar = USER_AVATAR if message["role"] == "user" else BOT_AVATAR
with st.chat_message(message["role"], avatar=avatar):
st.markdown(message["content"])
# Chat input
if prompt := st.chat_input("Faca uma pergunta sobre a LGT..."):
with st.chat_message("user", avatar=USER_AVATAR):
st.markdown(prompt)
st.session_state.messages.append({"role": "user", "content": prompt})
# Load model if not loaded
if "model" not in st.session_state:
with st.spinner("π A carregar o modelo PT-T5..."):
model, tokenizer = load_model()
if not model:
st.stop()
st.session_state.model = model
st.session_state.tokenizer = tokenizer
else:
model = st.session_state.model
tokenizer = st.session_state.tokenizer
context = process_file(uploaded_file) or "Sem contexto adicional disponΓvel."
# Generate assistant response
with st.chat_message("assistant", avatar=BOT_AVATAR):
response_box = st.empty()
full_response = ""
try:
start_time = time.time()
streamer = generate_response(prompt, context, model, tokenizer)
for chunk in streamer:
full_response += chunk.strip() + " "
response_box.markdown(full_response + "β", unsafe_allow_html=True)
end_time = time.time()
input_tokens = len(tokenizer(prompt)["input_ids"])
output_tokens = len(tokenizer(full_response)["input_ids"])
speed = output_tokens / (end_time - start_time)
cost_usd = ((input_tokens / 1e6) * 0.0001) + ((output_tokens / 1e6) * 0.0001)
cost_aoa = cost_usd * 1160
st.caption(
f"π Tokens: {input_tokens} β {output_tokens} | π Velocidade: {speed:.1f}t/s | "
f"π° USD: ${cost_usd:.4f} | π¦π΄ AOA: {cost_aoa:.2f}"
)
response_box.markdown(full_response.strip())
st.session_state.messages.append({"role": "assistant", "content": full_response.strip()})
except Exception as e:
st.error(f"β‘ Erro ao gerar resposta: {str(e)}") |