Spaces:
Running
Running
File size: 8,520 Bytes
2b5aeed ba99b07 2b5aeed 792e26a 2b5aeed 792e26a 2b5aeed ba99b07 feabbca 2b5aeed ba99b07 2c81408 ba99b07 44e56f0 ba99b07 2b5aeed ba99b07 2b5aeed ba99b07 2b5aeed ba99b07 2b5aeed 44e56f0 ba99b07 44e56f0 2b5aeed 44e56f0 ba99b07 44e56f0 2b5aeed ba99b07 2b5aeed ba99b07 2b5aeed ba99b07 2b5aeed 792e26a d29acad 2b5aeed 792e26a ba99b07 792e26a 933d92f 792e26a 9a0d31f 792e26a ba99b07 94586c7 792e26a 2b5aeed 792e26a 9a0d31f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
import streamlit as st
from language_detection import detect_browser_language
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.messages import HumanMessage, AIMessageChunk, AIMessage
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
import os
from langchain_core.chat_history import InMemoryChatMessageHistory, BaseChatMessageHistory
import time
from langgraph.errors import GraphRecursionError
from graph import get_graph
from langchain_core.runnables import RunnableConfig
if 'language' not in st.session_state:
st.session_state.language = detect_browser_language()
if not st.session_state.language:
st.session_state.language = "en-US"
if 'read_file' not in st.session_state:
st.session_state.read_file = False
st.session_state.retriever = None
if 'chat_history' not in st.session_state:
st.session_state.chat_history = {}
st.session_state.first_msg = True
def get_session_by_id(session_id: str) -> BaseChatMessageHistory:
if session_id not in st.session_state.chat_history:
st.session_state.chat_history[session_id] = InMemoryChatMessageHistory()
return st.session_state.chat_history[session_id]
return st.session_state.chat_history[session_id]
if not st.session_state.read_file:
st.title('π€ Me manda um PDF pra gente conversar sobre ele!'
if st.session_state.language.startswith("pt")
else 'π€ Upload your PDF to talk with it!',
anchor=False)
file = st.file_uploader('Carregue o seu arquivo PDF'
if st.session_state.language.startswith("pt")
else 'Upload a PDF file', type='pdf')
if file:
with st.status('π€ Deixando tudo pronto!'
if st.session_state.language.startswith("pt")
else 'π€ Booting up the things!', expanded=True):
with st.spinner('π Carregando o PDF...'
if st.session_state.language.startswith("pt")
else 'π Uploading the PDF...', show_time=True):
with open('file.pdf', 'wb') as f:
f.write(file.read())
loader = PyPDFLoader('file.pdf')
documents = loader.load_and_split(RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200))
st.success('π Arquivo carregado com sucesso!!!'
if st.session_state.language.startswith("pt")
else 'π File uploaded successfully!!!')
with st.spinner('π§ Lendo o arquivo...'
if st.session_state.language.startswith("pt")
else 'π§ Reading the file...', show_time=True):
vstore = InMemoryVectorStore.from_documents(documents, HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2'))
st.session_state.retriever = vstore.as_retriever()
st.success('π§ Arquivo lido com sucesso!!!'
if st.session_state.language.startswith("pt")
else 'π§ File read successfully!!!')
os.remove('file.pdf')
with st.spinner('π΄ Acordando DocAI...'
if st.session_state.language.startswith("pt")
else 'π΄ Waking up DocAI...', show_time=True):
st.session_state.graph = get_graph(st.session_state.retriever)
st.success('π DocAI pronto e operante!!!'
if st.session_state.language.startswith("pt")
else 'π DocAI awakened!!!')
st.balloons()
placeholder = st.empty()
for _ in range(5, -1, -1):
placeholder.write(f'β³ Chat starting in 0{_} sec.')
time.sleep(1)
st.session_state.read_file = True
st.rerun()
if st.session_state.read_file:
st.title('π€ DocAI', anchor=False)
st.subheader('Converse com seu PDF!'
if st.session_state.language.startswith("pt")
else 'Chat with your document!', anchor=False)
if st.session_state.first_msg:
st.session_state.first_msg = False
get_session_by_id('chat42').add_message(AIMessage(content='E aΓ, como vocΓͺ tΓ‘? Que tal a gente conversar sobre o '
'arquivo que vocΓͺ me passou pra ler?'
if st.session_state.language.startswith("pt")
else 'Hello, how are you? How about we talk about the '
'document you sent me to read?'))
for msg in get_session_by_id('chat42').messages:
with st.chat_message(name='user' if isinstance(msg, HumanMessage) else 'ai'):
st.write(msg.content)
prompt = st.chat_input('Tente perguntar algo sobre o seu arquivo!'
if st.session_state.language.startswith("pt")
else 'Try to ask something about your file!')
if prompt:
with st.chat_message(name='user'):
st.write(prompt)
response = st.session_state.graph.stream(
{
'question': prompt,
'scratchpad': None,
'answer': None,
'next_node': None,
'history': get_session_by_id('chat42').messages,
},
stream_mode='messages',
config = RunnableConfig(recursion_limit=5)
)
get_session_by_id('chat42').add_message(HumanMessage(content=prompt))
def get_message():
for chunk, _ in response:
if chunk.content and isinstance(chunk, AIMessageChunk):
yield chunk.content
with st.chat_message(name='ai'):
full_response = ''
tool_placeholder = st.empty()
placeholders = {}
prompt_message_placeholder = st.empty()
try:
for msg in get_message():
full_response += msg
if '<tool>' in full_response:
with tool_placeholder.status('Lendo documento...'
if st.session_state.language.startswith("pt")
else 'Reading document...', expanded=True):
if 'tool_message_placeholder' not in placeholders:
placeholders['tool_message_placeholder'] = st.empty()
placeholders['tool_message_placeholder'].write(full_response
.replace('<tool>', '')
.replace('</tool>', '')
.replace('retriever', 'Consultando PDF'
if st.session_state.language.startswith("pt")
else 'Retrieving document'))
prompt_message_placeholder.empty()
if '</tool>' in full_response:
full_response = ''
continue
else:
prompt_message_placeholder.write(full_response.replace('$', '\$'))
except GraphRecursionError:
message = ('NΓ£o consegui responder a sua pergunta. π₯ Poderia me perguntar outra coisa?'
if st.session_state.language.startswith("pt")
else "I can't answer your question. π₯ Can u ask me other question?")
full_response = ''
for letter in message:
full_response += letter
time.sleep(0.015)
prompt_message_placeholder.write(full_response)
get_session_by_id('chat42').add_message(AIMessage(content=full_response.replace('$', '\$'))) |