reab5555 commited on
Commit
47f9950
·
verified ·
1 Parent(s): c1ae643

Delete app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -159
app.py DELETED
@@ -1,159 +0,0 @@
1
- import os
2
- import gradio as gr
3
- import faiss
4
- import numpy as np
5
- from langchain_huggingface import HuggingFaceEmbeddings
6
- from langchain.text_splitter import RecursiveCharacterTextSplitter
7
- from langchain_community.vectorstores import FAISS
8
- from langchain.chains import ConversationalRetrievalChain
9
- from langchain.memory import ConversationBufferMemory
10
- from langchain_core.documents import Document
11
- from PyPDF2 import PdfReader
12
- from langchain_anthropic import ChatAnthropic
13
-
14
- API_KEY = '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'
15
- llm = ChatAnthropic(model="claude-3-5-sonnet-20240620", temperature=0.5, max_tokens=8192, anthropic_api_key=API_KEY)
16
-
17
- embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2")
18
-
19
- memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
20
-
21
- vector_store = None
22
-
23
-
24
- def process_file(file_path):
25
- _, ext = os.path.splitext(file_path)
26
- try:
27
- if ext.lower() == '.txt':
28
- with open(file_path, 'r', encoding='utf-8') as file:
29
- text = file.read()
30
- elif ext.lower() == '.docx':
31
- with open(file_path, 'rb') as file:
32
- content = file.read()
33
- text = content.decode('utf-8', errors='ignore')
34
- elif ext.lower() == '.pdf':
35
- with open(file_path, 'rb') as file:
36
- pdf_reader = PdfReader(file)
37
- text = '\n'.join([page.extract_text() for page in pdf_reader.pages if page.extract_text()])
38
- else:
39
- print(f"Unsupported file type: {ext}")
40
- return None
41
-
42
- return [Document(page_content=text, metadata={"source": file_path})]
43
- except Exception as e:
44
- print(f"Error processing file {file_path}: {str(e)}")
45
- return None
46
-
47
-
48
- def process_files(file_list, progress=gr.Progress()):
49
- global vector_store
50
- documents = []
51
- total_files = len(file_list)
52
-
53
- for i, file in enumerate(file_list):
54
- progress((i + 1) / total_files, f"Processing file {i + 1} of {total_files}")
55
- if file.name.lower().endswith(('.txt', '.docx', '.pdf')):
56
- docs = process_file(file.name)
57
- if docs:
58
- documents.extend(docs)
59
-
60
- if not documents:
61
- return "No documents were successfully processed. Please check your files and try again."
62
-
63
- progress(0.5, "Splitting text")
64
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=10000, chunk_overlap=200)
65
- texts = text_splitter.split_documents(documents)
66
-
67
- progress(0.7, "Creating embeddings")
68
- vector_store = FAISS.from_documents(texts, embeddings)
69
-
70
- progress(0.9, "Saving vector store")
71
- vector_store.save_local("faiss_index")
72
-
73
- progress(1.0, "Completed")
74
- return f"Embedding process completed and database created. Processed {len(documents)} files. You can now start chatting!"
75
-
76
-
77
- def load_existing_index(folder_path):
78
- global vector_store
79
- try:
80
- index_file = os.path.join(folder_path, "index.faiss")
81
- pkl_file = os.path.join(folder_path, "index.pkl")
82
-
83
- if not os.path.exists(index_file) or not os.path.exists(pkl_file):
84
- return f"Error: FAISS index files not found in {folder_path}. Please ensure both 'index.faiss' and 'index.pkl' are present."
85
-
86
- vector_store = FAISS.load_local(folder_path, embeddings, allow_dangerous_deserialization=True)
87
- return f"Successfully loaded existing index from {folder_path}."
88
- except Exception as e:
89
- return f"Error loading index: {str(e)}"
90
-
91
-
92
- def chat(message, history):
93
- global vector_store
94
- if vector_store is None:
95
- return "Please load documents or an existing index first."
96
-
97
- qa_chain = ConversationalRetrievalChain.from_llm(
98
- llm,
99
- vector_store.as_retriever(),
100
- memory=memory
101
- )
102
-
103
- result = qa_chain.invoke({"question": message, "chat_history": history})
104
- return result['answer']
105
-
106
-
107
- def reset_chat():
108
- global memory
109
- memory.clear()
110
- return []
111
-
112
-
113
- with gr.Blocks() as demo:
114
- gr.Markdown("# Document-based Chatbot")
115
-
116
- with gr.Row():
117
- with gr.Column():
118
- file_input = gr.File(label="Select Files", file_count="multiple", file_types=[".pdf", ".docx", ".txt"])
119
- process_button = gr.Button("Process Files")
120
- with gr.Column():
121
- index_folder = gr.Textbox(label="Existing Index Folder Path",
122
- value="C:\\Works\\Data\\projects\\Python\\QA_Chatbot\\faiss_index")
123
- load_index_button = gr.Button("Load Existing Index")
124
-
125
- output = gr.Textbox(label="Processing Output")
126
-
127
- chatbot = gr.Chatbot()
128
- msg = gr.Textbox()
129
- send = gr.Button("Send")
130
- clear = gr.Button("Clear")
131
-
132
-
133
- def process_selected_files(files):
134
- if files:
135
- return process_files(files)
136
- else:
137
- return "No files selected. Please select files and try again."
138
-
139
-
140
- def load_selected_index(folder_path):
141
- return load_existing_index(folder_path)
142
-
143
-
144
- process_button.click(process_selected_files, file_input, output)
145
- load_index_button.click(load_selected_index, index_folder, output)
146
-
147
-
148
- def respond(message, chat_history):
149
- bot_message = chat(message, chat_history)
150
- chat_history.append((message, bot_message))
151
- return "", chat_history
152
-
153
-
154
- msg.submit(respond, [msg, chatbot], [msg, chatbot])
155
- send.click(respond, [msg, chatbot], [msg, chatbot])
156
- clear.click(reset_chat, None, chatbot)
157
-
158
- if __name__ == "__main__":
159
- demo.launch()