ataliba / app.py
amiguel's picture
Update app.py
f2855af verified
raw
history blame contribute delete
5.22 kB
import streamlit as st
import os
import time
import PyPDF2
from docx import Document
import pandas as pd
from dotenv import load_dotenv
from unsloth import FastLanguageModel
from transformers import AutoTokenizer
# Load environment variables
load_dotenv()
# Avatars and bios
USER_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/9904d9a0d445ab0488cf7395cb863cce7621d897/USER_AVATAR.png"
BOT_AVATAR = "https://raw.githubusercontent.com/achilela/vila_fofoka_analysis/991f4c6e4e1dc7a8e24876ca5aae5228bcdb4dba/Ataliba_Avatar.jpg"
ATALIBA_BIO = """
**I am Ataliba Miguel's Digital Twin** πŸ€–
**Background:**
- πŸŽ“ Mechanical Engineering (BSc)
- β›½ Oil & Gas Engineering (MSc Specialization)
- πŸ”§ 17+ years in Oil & Gas Industry
- πŸ” Current: Topside Inspection Methods Engineer @ TotalEnergies
- πŸ€– AI Practitioner Specialist
- πŸš€ Founder of ValonyLabs (AI solutions for industrial corrosion, retail analytics, and KPI monitoring)
**Capabilities:**
- Technical document analysis
- Engineering insights
- AI-powered problem solving
- Cross-domain knowledge integration
Ask me about engineering challenges, AI applications, or industry best practices!
"""
# UI Setup
st.markdown("""
<style>
@import url('https://fonts.cdnfonts.com/css/tw-cen-mt');
* { font-family: 'Tw Cen MT', sans-serif; }
.st-emotion-cache-1y4p8pa { padding: 2rem 1rem; }
</style>
""", unsafe_allow_html=True)
st.title("πŸš€ Ataliba o Agent Nerdx πŸš€")
# Sidebar
with st.sidebar:
st.header("⚑️ Hugging Face Model Loaded")
st.markdown("Model: `amiguel/unsloth_finetune_test` with LoRA")
uploaded_file = st.file_uploader("Upload technical documents", type=["pdf", "docx", "xlsx", "xlsm"])
# Session state
if "file_context" not in st.session_state:
st.session_state.file_context = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = []
# File parser
def parse_file(file):
try:
if file.type == "application/pdf":
reader = PyPDF2.PdfReader(file)
return "\n".join([page.extract_text() for page in reader.pages])
elif file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
doc = Document(file)
return "\n".join([para.text for para in doc.paragraphs])
elif file.type in ["application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", "application/vnd.ms-excel"]:
df = pd.read_excel(file)
return df.to_string()
except Exception as e:
st.error(f"Error processing file: {str(e)}")
return None
# Process file
if uploaded_file and not st.session_state.file_context:
st.session_state.file_context = parse_file(uploaded_file)
if st.session_state.file_context:
st.sidebar.success("βœ… Document loaded successfully")
# Load model
@st.cache_resource
def load_unsloth_model():
base_model = "unsloth/llama-3-8b-Instruct-bnb-4bit"
adapter = "amiguel/unsloth_finetune_test"
model, tokenizer = FastLanguageModel.from_pretrained(
model_name=base_model,
max_seq_length=2048,
dtype=None,
load_in_4bit=True
)
model.load_adapter(adapter)
FastLanguageModel.for_inference(model)
return model, tokenizer
# Generate response
def generate_response(prompt):
bio_triggers = ['who are you', 'ataliba', 'yourself', 'skilled at',
'background', 'experience', 'valonylabs', 'totalenergies']
if any(trigger in prompt.lower() for trigger in bio_triggers):
for line in ATALIBA_BIO.split('\n'):
yield line + '\n'
time.sleep(0.1)
return
try:
model, tokenizer = load_unsloth_model()
context = st.session_state.file_context or ""
full_prompt = f"You are an expert in life balance and general knowledge. Use the context to answer precisely.\nContext: {context}\n\nQuestion: {prompt}"
inputs = tokenizer(full_prompt, return_tensors="pt").to(model.device)
outputs = model.generate(**inputs, max_new_tokens=256, do_sample=False)
response = tokenizer.decode(outputs[0], skip_special_tokens=True)
for line in response.split('\n'):
yield line + '\n'
time.sleep(0.05)
except Exception as e:
yield f"⚠️ Model Error: {str(e)}"
# Chat interface
for msg in st.session_state.chat_history:
with st.chat_message(msg["role"], avatar=USER_AVATAR if msg["role"] == "user" else BOT_AVATAR):
st.markdown(msg["content"])
if prompt := st.chat_input("Ask about documents or technical matters..."):
st.session_state.chat_history.append({"role": "user", "content": prompt})
with st.chat_message("user", avatar=USER_AVATAR):
st.markdown(prompt)
with st.chat_message("assistant", avatar=BOT_AVATAR):
response_placeholder = st.empty()
full_response = ""
for chunk in generate_response(prompt):
full_response += chunk
response_placeholder.markdown(full_response + "β–Œ")
response_placeholder.markdown(full_response)
st.session_state.chat_history.append({"role": "assistant", "content": full_response})