File size: 8,520 Bytes
2b5aeed
ba99b07
2b5aeed
 
 
 
 
 
 
 
792e26a
 
 
2b5aeed
792e26a
2b5aeed
ba99b07
 
 
feabbca
 
 
2b5aeed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ba99b07
2c81408
ba99b07
44e56f0
ba99b07
 
 
 
2b5aeed
ba99b07
 
 
 
 
 
2b5aeed
 
 
 
ba99b07
 
 
 
 
 
2b5aeed
 
ba99b07
 
 
2b5aeed
44e56f0
ba99b07
44e56f0
2b5aeed
44e56f0
ba99b07
44e56f0
2b5aeed
 
 
 
 
 
 
 
 
 
 
ba99b07
 
 
2b5aeed
 
 
ba99b07
 
 
 
 
2b5aeed
 
 
 
 
ba99b07
 
 
2b5aeed
 
 
 
 
 
 
 
 
 
 
 
792e26a
d29acad
2b5aeed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
792e26a
 
 
 
ba99b07
 
 
792e26a
 
 
 
 
933d92f
 
 
792e26a
 
 
 
 
9a0d31f
792e26a
ba99b07
 
94586c7
792e26a
 
 
 
2b5aeed
792e26a
9a0d31f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import streamlit as st
from language_detection import detect_browser_language
from langchain_community.document_loaders import PyPDFLoader
from langchain_core.messages import HumanMessage, AIMessageChunk, AIMessage
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_core.vectorstores import InMemoryVectorStore
import os
from langchain_core.chat_history import InMemoryChatMessageHistory, BaseChatMessageHistory
import time

from langgraph.errors import GraphRecursionError

from graph import get_graph
from langchain_core.runnables import RunnableConfig

if 'language' not in st.session_state:
    st.session_state.language = detect_browser_language()

    if not st.session_state.language:
        st.session_state.language = "en-US"

if 'read_file' not in st.session_state:
    st.session_state.read_file = False
    st.session_state.retriever = None

if 'chat_history' not in st.session_state:
    st.session_state.chat_history = {}
    st.session_state.first_msg = True

def get_session_by_id(session_id: str) -> BaseChatMessageHistory:
    if session_id not in st.session_state.chat_history:
        st.session_state.chat_history[session_id] = InMemoryChatMessageHistory()
        return st.session_state.chat_history[session_id]
    return st.session_state.chat_history[session_id]

if not st.session_state.read_file:

    st.title('πŸ€“ Me manda um PDF pra gente conversar sobre ele!' 
             if st.session_state.language.startswith("pt") 
             else 'πŸ€“ Upload your PDF to talk with it!',
             anchor=False)
    file = st.file_uploader('Carregue o seu arquivo PDF' 
                            if st.session_state.language.startswith("pt") 
                            else 'Upload a PDF file', type='pdf')
    if file:
        with st.status('πŸ€— Deixando tudo pronto!' 
                       if st.session_state.language.startswith("pt") 
                       else 'πŸ€— Booting up the things!', expanded=True):
            with st.spinner('πŸ“ Carregando o PDF...' 
                            if st.session_state.language.startswith("pt") 
                            else 'πŸ“ Uploading the PDF...', show_time=True):
                with open('file.pdf', 'wb') as f:
                    f.write(file.read())
                    loader = PyPDFLoader('file.pdf')
                    documents = loader.load_and_split(RecursiveCharacterTextSplitter(chunk_size=2000, chunk_overlap=200))
                st.success('πŸ“ Arquivo carregado com sucesso!!!' 
                           if st.session_state.language.startswith("pt") 
                           else 'πŸ“ File uploaded successfully!!!')
            with st.spinner('🧐 Lendo o arquivo...' 
                           if st.session_state.language.startswith("pt") 
                           else '🧐 Reading the file...', show_time=True):
                vstore = InMemoryVectorStore.from_documents(documents, HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2'))
                st.session_state.retriever = vstore.as_retriever()
                st.success('🧐 Arquivo lido com sucesso!!!' 
                           if st.session_state.language.startswith("pt") 
                           else '🧐 File read successfully!!!')
            os.remove('file.pdf')
            with st.spinner('😴 Acordando DocAI...'
                            if st.session_state.language.startswith("pt") 
                            else '😴 Waking up DocAI...', show_time=True):
                st.session_state.graph = get_graph(st.session_state.retriever)
                st.success('😁 DocAI pronto e operante!!!'
                            if st.session_state.language.startswith("pt") 
                            else '😁 DocAI awakened!!!')
            st.balloons()
        placeholder = st.empty()
        for _ in range(5, -1, -1):
            placeholder.write(f'⏳ Chat starting in 0{_} sec.')
            time.sleep(1)
        st.session_state.read_file = True
        st.rerun()

if st.session_state.read_file:

    st.title('πŸ€— DocAI', anchor=False)
    st.subheader('Converse com seu PDF!'
                 if st.session_state.language.startswith("pt") 
                 else 'Chat with your document!', anchor=False)

    if st.session_state.first_msg:
        st.session_state.first_msg = False
        get_session_by_id('chat42').add_message(AIMessage(content='E aΓ­, como vocΓͺ tΓ‘? Que tal a gente conversar sobre o '
                                                                  'arquivo que vocΓͺ me passou pra ler?'
                                                                    if st.session_state.language.startswith("pt") 
                                                                    else 'Hello, how are you? How about we talk about the '
                                                                         'document you sent me to read?'))

    for msg in get_session_by_id('chat42').messages:
        with st.chat_message(name='user' if isinstance(msg, HumanMessage) else 'ai'):
            st.write(msg.content)

    prompt = st.chat_input('Tente perguntar algo sobre o seu arquivo!'
                            if st.session_state.language.startswith("pt") 
                            else 'Try to ask something about your file!')
    if prompt:
        with st.chat_message(name='user'):
            st.write(prompt)

        response = st.session_state.graph.stream(
            {
                'question': prompt,
                'scratchpad': None,
                'answer': None,
                'next_node': None,
                'history': get_session_by_id('chat42').messages,
            },
            stream_mode='messages',
            config = RunnableConfig(recursion_limit=5)
        )

        get_session_by_id('chat42').add_message(HumanMessage(content=prompt))

        def get_message():
            for chunk, _ in response:
                if chunk.content and isinstance(chunk, AIMessageChunk):
                    yield chunk.content

        with st.chat_message(name='ai'):
            full_response = ''
            tool_placeholder = st.empty()
            placeholders = {}
            prompt_message_placeholder = st.empty()

            try:
                for msg in get_message():
                    full_response += msg
                    if '<tool>' in full_response:
                        with tool_placeholder.status('Lendo documento...'
                                                     if st.session_state.language.startswith("pt") 
                                                     else 'Reading document...', expanded=True):
                            if 'tool_message_placeholder' not in placeholders:
                                placeholders['tool_message_placeholder'] = st.empty()
                            placeholders['tool_message_placeholder'].write(full_response
                                                                           .replace('<tool>', '')
                                                                           .replace('</tool>', '')
                                                                           .replace('retriever', 'Consultando PDF'
                                                                                                  if st.session_state.language.startswith("pt")
                                                                                                  else 'Retrieving document'))
                            prompt_message_placeholder.empty()
                    if '</tool>' in full_response:
                        full_response = ''
                        continue
                    else:
                        prompt_message_placeholder.write(full_response.replace('$', '\$'))
            except GraphRecursionError:
                message = ('NΓ£o consegui responder a sua pergunta. πŸ˜₯ Poderia me perguntar outra coisa?'
                            if st.session_state.language.startswith("pt") 
                            else "I can't answer your question. πŸ˜₯ Can u ask me other question?")
                full_response = ''
                for letter in message:
                    full_response += letter
                    time.sleep(0.015)
                    prompt_message_placeholder.write(full_response)

        get_session_by_id('chat42').add_message(AIMessage(content=full_response.replace('$', '\$')))