DHEIVER commited on
Commit
c6add89
·
verified ·
1 Parent(s): ba31095

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -7
app.py CHANGED
@@ -13,7 +13,64 @@ from langchain_community.llms import HuggingFaceEndpoint
13
  list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
14
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
15
 
16
- # Funções existentes (load_doc, create_db, initialize_llmchain, etc.) permanecem iguais...
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
 
18
  def format_chat_history(message, chat_history):
19
  formatted_chat_history = []
@@ -22,10 +79,8 @@ def format_chat_history(message, chat_history):
22
  formatted_chat_history.append(f"Assistant: {bot_message}")
23
  return formatted_chat_history
24
 
25
- # Ajuste na função conversation para suportar idioma
26
  def conversation(qa_chain, message, history, language):
27
  formatted_chat_history = format_chat_history(message, history)
28
- # Generate response using QA chain
29
  response = qa_chain.invoke({"question": message, "chat_history": formatted_chat_history})
30
  response_answer = response["answer"]
31
  if response_answer.find("Helpful Answer:") != -1:
@@ -33,8 +88,6 @@ def conversation(qa_chain, message, history, language):
33
 
34
  # Ajustar resposta com base no idioma
35
  if language == "Português":
36
- # Aqui, idealmente, você usaria uma API de tradução ou o modelo geraria diretamente em português
37
- # Como exemplo, adiciono uma mensagem fixa para demonstrar
38
  response_answer = f"Resposta em português: {response_answer}"
39
  else:
40
  response_answer = f"Response in English: {response_answer}"
@@ -74,7 +127,6 @@ def demo():
74
 
75
  with gr.Column(scale=200):
76
  gr.Markdown("<b>Step 2 - Chat with your Document</b>")
77
- # Adicionar seletor de idioma
78
  language_selector = gr.Radio(["English", "Português"], label="Select Language", value="English")
79
  chatbot = gr.Chatbot(height=505)
80
  with gr.Accordion("Relevant context from the source document", open=False):
@@ -94,7 +146,7 @@ def demo():
94
  lambda: [None, "", 0, "", 0, "", 0], inputs=None, outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], queue=False
95
  )
96
 
97
- # Chatbot events com o idioma
98
  msg.submit(conversation, inputs=[qa_chain, msg, chatbot, language_selector], outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], queue=False)
99
  submit_btn.click(conversation, inputs=[qa_chain, msg, chatbot, language_selector], outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], queue=False)
100
  clear_btn.click(lambda: [None, "", 0, "", 0, "", 0], inputs=None, outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], queue=False)
 
13
  list_llm = ["meta-llama/Meta-Llama-3-8B-Instruct", "mistralai/Mistral-7B-Instruct-v0.2"]
14
  list_llm_simple = [os.path.basename(llm) for llm in list_llm]
15
 
16
+ # Load and split PDF document
17
+ def load_doc(list_file_path):
18
+ loaders = [PyPDFLoader(x) for x in list_file_path]
19
+ pages = []
20
+ for loader in loaders:
21
+ pages.extend(loader.load())
22
+ text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024, chunk_overlap=64)
23
+ doc_splits = text_splitter.split_documents(pages)
24
+ return doc_splits
25
+
26
+ # Create vector database
27
+ def create_db(splits):
28
+ embeddings = HuggingFaceEmbeddings()
29
+ vectordb = FAISS.from_documents(splits, embeddings)
30
+ return vectordb
31
+
32
+ # Initialize database
33
+ def initialize_database(list_file_obj, progress=gr.Progress()):
34
+ list_file_path = [x.name for x in list_file_obj if x is not None]
35
+ doc_splits = load_doc(list_file_path)
36
+ vector_db = create_db(doc_splits)
37
+ return vector_db, "Database created!"
38
+
39
+ # Initialize langchain LLM chain
40
+ def initialize_llmchain(llm_model, temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
41
+ if llm_model == "meta-llama/Meta-Llama-3-8B-Instruct":
42
+ llm = HuggingFaceEndpoint(
43
+ repo_id=llm_model,
44
+ huggingfacehub_api_token=api_token,
45
+ temperature=temperature,
46
+ max_new_tokens=max_tokens,
47
+ top_k=top_k,
48
+ )
49
+ else:
50
+ llm = HuggingFaceEndpoint(
51
+ huggingfacehub_api_token=api_token,
52
+ repo_id=llm_model,
53
+ temperature=temperature,
54
+ max_new_tokens=max_tokens,
55
+ top_k=top_k,
56
+ )
57
+ memory = ConversationBufferMemory(memory_key="chat_history", output_key='answer', return_messages=True)
58
+ retriever = vector_db.as_retriever()
59
+ qa_chain = ConversationalRetrievalChain.from_llm(
60
+ llm,
61
+ retriever=retriever,
62
+ chain_type="stuff",
63
+ memory=memory,
64
+ return_source_documents=True,
65
+ verbose=False,
66
+ )
67
+ return qa_chain
68
+
69
+ # Initialize LLM
70
+ def initialize_LLM(llm_option, llm_temperature, max_tokens, top_k, vector_db, progress=gr.Progress()):
71
+ llm_name = list_llm[llm_option]
72
+ qa_chain = initialize_llmchain(llm_name, llm_temperature, max_tokens, top_k, vector_db, progress)
73
+ return qa_chain, "QA chain initialized. Chatbot is ready!"
74
 
75
  def format_chat_history(message, chat_history):
76
  formatted_chat_history = []
 
79
  formatted_chat_history.append(f"Assistant: {bot_message}")
80
  return formatted_chat_history
81
 
 
82
  def conversation(qa_chain, message, history, language):
83
  formatted_chat_history = format_chat_history(message, history)
 
84
  response = qa_chain.invoke({"question": message, "chat_history": formatted_chat_history})
85
  response_answer = response["answer"]
86
  if response_answer.find("Helpful Answer:") != -1:
 
88
 
89
  # Ajustar resposta com base no idioma
90
  if language == "Português":
 
 
91
  response_answer = f"Resposta em português: {response_answer}"
92
  else:
93
  response_answer = f"Response in English: {response_answer}"
 
127
 
128
  with gr.Column(scale=200):
129
  gr.Markdown("<b>Step 2 - Chat with your Document</b>")
 
130
  language_selector = gr.Radio(["English", "Português"], label="Select Language", value="English")
131
  chatbot = gr.Chatbot(height=505)
132
  with gr.Accordion("Relevant context from the source document", open=False):
 
146
  lambda: [None, "", 0, "", 0, "", 0], inputs=None, outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], queue=False
147
  )
148
 
149
+ # Chatbot events
150
  msg.submit(conversation, inputs=[qa_chain, msg, chatbot, language_selector], outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], queue=False)
151
  submit_btn.click(conversation, inputs=[qa_chain, msg, chatbot, language_selector], outputs=[qa_chain, msg, chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], queue=False)
152
  clear_btn.click(lambda: [None, "", 0, "", 0, "", 0], inputs=None, outputs=[chatbot, doc_source1, source1_page, doc_source2, source2_page, doc_source3, source3_page], queue=False)