awacke1 commited on
Commit
5fbafa3
Β·
1 Parent(s): 8f697d7

Create backup.py

Browse files
Files changed (1) hide show
  1. backup.py +574 -0
backup.py ADDED
@@ -0,0 +1,574 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import openai
3
+ import os
4
+ import base64
5
+ import glob
6
+ import json
7
+ import mistune
8
+ import pytz
9
+ import math
10
+ import requests
11
+ import time
12
+ import re
13
+ import textract
14
+ import zipfile # New import for zipping files
15
+ from datetime import datetime
16
+ from openai import ChatCompletion
17
+ from xml.etree import ElementTree as ET
18
+ from bs4 import BeautifulSoup
19
+ from collections import deque
20
+ from audio_recorder_streamlit import audio_recorder
21
+ from dotenv import load_dotenv
22
+ from PyPDF2 import PdfReader
23
+ from langchain.text_splitter import CharacterTextSplitter
24
+ from langchain.embeddings import OpenAIEmbeddings
25
+ from langchain.vectorstores import FAISS
26
+ from langchain.chat_models import ChatOpenAI
27
+ from langchain.memory import ConversationBufferMemory
28
+ from langchain.chains import ConversationalRetrievalChain
29
+ from templates import css, bot_template, user_template
30
+ import streamlit.components.v1 as components # Import Streamlit Components for HTML5
31
+
32
+ # page config and sidebar declares up front allow all other functions to see global class variables
33
+ st.set_page_config(page_title="GPT Streamlit Document Reasoner", layout="wide")
34
+ should_save = st.sidebar.checkbox("πŸ’Ύ Save", value=True)
35
+
36
+ # Whisper Paper - how open STT suddenly got so good:
37
+ # st link button with emoji anyone?
38
+ url="https://arxiv.org/pdf/2212.04356.pdf"
39
+ import random
40
+ def link_button_with_emoji(url):
41
+ emojis = ["πŸ’‰", "πŸ₯", "🌑️", "🩺", "🌑️", "πŸ”¬", "πŸ’Š", "πŸ§ͺ", "πŸ‘¨β€βš•οΈ", "πŸ‘©β€βš•οΈ"]
42
+ random_emoji = random.choice(emojis)
43
+ st.markdown(f"[{random_emoji} Whisper Paper - Robust Speech Recognition via Large-Scale Weak Supervision]({url})")
44
+ url = "https://arxiv.org/pdf/2212.04356.pdf"
45
+ link_button_with_emoji(url)
46
+
47
+
48
+
49
+ def generate_filename_old(prompt, file_type):
50
+ central = pytz.timezone('US/Central')
51
+ safe_date_time = datetime.now(central).strftime("%m%d_%H%M") # Date and time DD-HHMM
52
+ safe_prompt = "".join(x for x in prompt if x.isalnum())[:90] # Limit file name size and trim whitespace
53
+ return f"{safe_date_time}_{safe_prompt}.{file_type}" # Return a safe file name
54
+
55
+ def generate_filename(prompt, file_type):
56
+ central = pytz.timezone('US/Central')
57
+ safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
58
+ replaced_prompt = prompt.replace(" ", "_").replace("\n", "_")
59
+ safe_prompt = "".join(x for x in replaced_prompt if x.isalnum() or x == "_")[:90]
60
+ return f"{safe_date_time}_{safe_prompt}.{file_type}"
61
+
62
+ def transcribe_audio(file_path, model):
63
+ key = os.getenv('OPENAI_API_KEY')
64
+ headers = {
65
+ "Authorization": f"Bearer {key}",
66
+ }
67
+ with open(file_path, 'rb') as f:
68
+ data = {'file': f}
69
+ st.write("Read file {file_path}", file_path)
70
+ OPENAI_API_URL = "https://api.openai.com/v1/audio/transcriptions"
71
+ response = requests.post(OPENAI_API_URL, headers=headers, files=data, data={'model': model})
72
+ if response.status_code == 200:
73
+ st.write(response.json())
74
+ chatResponse = chat_with_model(response.json().get('text'), '') # *************************************
75
+ transcript = response.json().get('text')
76
+ #st.write('Responses:')
77
+ #st.write(chatResponse)
78
+ filename = generate_filename(transcript, 'txt')
79
+ #create_file(filename, transcript, chatResponse)
80
+ response = chatResponse
81
+ user_prompt = transcript
82
+ create_file(filename, user_prompt, response, should_save)
83
+ return transcript
84
+ else:
85
+ st.write(response.json())
86
+ st.error("Error in API call.")
87
+ return None
88
+
89
+ def save_and_play_audio(audio_recorder):
90
+ audio_bytes = audio_recorder()
91
+ if audio_bytes:
92
+ filename = generate_filename("Recording", "wav")
93
+ with open(filename, 'wb') as f:
94
+ f.write(audio_bytes)
95
+ st.audio(audio_bytes, format="audio/wav")
96
+ return filename
97
+ return None
98
+
99
+ def create_file(filename, prompt, response, should_save=True):
100
+ if not should_save:
101
+ return
102
+
103
+ # Step 2: Extract base filename without extension
104
+ base_filename, ext = os.path.splitext(filename)
105
+
106
+ # Step 3: Check if the response contains Python code
107
+ has_python_code = bool(re.search(r"```python([\s\S]*?)```", response))
108
+
109
+ # Step 4: Write files based on type
110
+ if ext in ['.txt', '.htm', '.md']:
111
+ # Create Prompt file
112
+ with open(f"{base_filename}-Prompt.txt", 'w') as file:
113
+ file.write(prompt)
114
+
115
+ # Create Response file
116
+ with open(f"{base_filename}-Response.md", 'w') as file:
117
+ file.write(response)
118
+
119
+ # Create Code file if Python code is present
120
+ if has_python_code:
121
+ # Extract Python code from the response
122
+ python_code = re.findall(r"```python([\s\S]*?)```", response)[0].strip()
123
+
124
+ with open(f"{base_filename}-Code.py", 'w') as file:
125
+ file.write(python_code)
126
+
127
+
128
+ def create_file_old(filename, prompt, response, should_save=True):
129
+ if not should_save:
130
+ return
131
+ if filename.endswith(".txt"):
132
+ with open(filename, 'w') as file:
133
+ file.write(f"{prompt}\n{response}")
134
+ elif filename.endswith(".htm"):
135
+ with open(filename, 'w') as file:
136
+ file.write(f"{prompt} {response}")
137
+ elif filename.endswith(".md"):
138
+ with open(filename, 'w') as file:
139
+ file.write(f"{prompt}\n\n{response}")
140
+
141
+ def truncate_document(document, length):
142
+ return document[:length]
143
+ def divide_document(document, max_length):
144
+ return [document[i:i+max_length] for i in range(0, len(document), max_length)]
145
+
146
+ def get_table_download_link(file_path):
147
+ with open(file_path, 'r') as file:
148
+ try:
149
+ data = file.read()
150
+ except:
151
+ st.write('')
152
+ return file_path
153
+ b64 = base64.b64encode(data.encode()).decode()
154
+ file_name = os.path.basename(file_path)
155
+ ext = os.path.splitext(file_name)[1] # get the file extension
156
+ if ext == '.txt':
157
+ mime_type = 'text/plain'
158
+ elif ext == '.py':
159
+ mime_type = 'text/plain'
160
+ elif ext == '.xlsx':
161
+ mime_type = 'text/plain'
162
+ elif ext == '.csv':
163
+ mime_type = 'text/plain'
164
+ elif ext == '.htm':
165
+ mime_type = 'text/html'
166
+ elif ext == '.md':
167
+ mime_type = 'text/markdown'
168
+ else:
169
+ mime_type = 'application/octet-stream' # general binary data type
170
+ href = f'<a href="data:{mime_type};base64,{b64}" target="_blank" download="{file_name}">{file_name}</a>'
171
+ return href
172
+
173
+ def CompressXML(xml_text):
174
+ root = ET.fromstring(xml_text)
175
+ for elem in list(root.iter()):
176
+ if isinstance(elem.tag, str) and 'Comment' in elem.tag:
177
+ elem.parent.remove(elem)
178
+ return ET.tostring(root, encoding='unicode', method="xml")
179
+
180
+ def read_file_content(file,max_length):
181
+ if file.type == "application/json":
182
+ content = json.load(file)
183
+ return str(content)
184
+ elif file.type == "text/html" or file.type == "text/htm":
185
+ content = BeautifulSoup(file, "html.parser")
186
+ return content.text
187
+ elif file.type == "application/xml" or file.type == "text/xml":
188
+ tree = ET.parse(file)
189
+ root = tree.getroot()
190
+ xml = CompressXML(ET.tostring(root, encoding='unicode'))
191
+ return xml
192
+ elif file.type == "text/markdown" or file.type == "text/md":
193
+ md = mistune.create_markdown()
194
+ content = md(file.read().decode())
195
+ return content
196
+ elif file.type == "text/plain":
197
+ return file.getvalue().decode()
198
+ else:
199
+ return ""
200
+
201
+ def readitaloud(result):
202
+ documentHTML5='''
203
+ <!DOCTYPE html>
204
+ <html>
205
+ <head>
206
+ <title>Read It Aloud</title>
207
+ <script type="text/javascript">
208
+ function readAloud() {
209
+ const text = document.getElementById("textArea").value;
210
+ const speech = new SpeechSynthesisUtterance(text);
211
+ window.speechSynthesis.speak(speech);
212
+ }
213
+ </script>
214
+ </head>
215
+ <body>
216
+ <h1>πŸ”Š Read It Aloud</h1>
217
+ <textarea id="textArea" rows="10" cols="80">
218
+ '''
219
+ documentHTML5 = documentHTML5 + result
220
+ documentHTML5 = documentHTML5 + '''
221
+ </textarea>
222
+ <br>
223
+ <button onclick="readAloud()">πŸ”Š Read Aloud</button>
224
+ </body>
225
+ </html>
226
+ '''
227
+
228
+ components.html(documentHTML5, width=1280, height=1024)
229
+ #return result
230
+
231
+ def chat_with_model(prompt, document_section, model_choice='gpt-3.5-turbo'):
232
+ model = model_choice
233
+ conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
234
+ conversation.append({'role': 'user', 'content': prompt})
235
+ if len(document_section)>0:
236
+ conversation.append({'role': 'assistant', 'content': document_section})
237
+
238
+ start_time = time.time()
239
+ report = []
240
+ res_box = st.empty()
241
+ collected_chunks = []
242
+ collected_messages = []
243
+
244
+ key = os.getenv('OPENAI_API_KEY')
245
+ openai.api_key = key
246
+ for chunk in openai.ChatCompletion.create(
247
+ model='gpt-3.5-turbo',
248
+ messages=conversation,
249
+ temperature=0.5,
250
+ stream=True
251
+ ):
252
+
253
+ collected_chunks.append(chunk) # save the event response
254
+ chunk_message = chunk['choices'][0]['delta'] # extract the message
255
+ collected_messages.append(chunk_message) # save the message
256
+
257
+ content=chunk["choices"][0].get("delta",{}).get("content")
258
+
259
+ try:
260
+ report.append(content)
261
+ if len(content) > 0:
262
+ result = "".join(report).strip()
263
+ #result = result.replace("\n", "")
264
+ res_box.markdown(f'*{result}*')
265
+ except:
266
+ st.write(' ')
267
+
268
+ full_reply_content = ''.join([m.get('content', '') for m in collected_messages])
269
+ st.write("Elapsed time:")
270
+ st.write(time.time() - start_time)
271
+ readitaloud(full_reply_content)
272
+ return full_reply_content
273
+
274
+ def chat_with_file_contents(prompt, file_content, model_choice='gpt-3.5-turbo'):
275
+ conversation = [{'role': 'system', 'content': 'You are a helpful assistant.'}]
276
+ conversation.append({'role': 'user', 'content': prompt})
277
+ if len(file_content)>0:
278
+ conversation.append({'role': 'assistant', 'content': file_content})
279
+ response = openai.ChatCompletion.create(model=model_choice, messages=conversation)
280
+ return response['choices'][0]['message']['content']
281
+
282
+ def extract_mime_type(file):
283
+ # Check if the input is a string
284
+ if isinstance(file, str):
285
+ pattern = r"type='(.*?)'"
286
+ match = re.search(pattern, file)
287
+ if match:
288
+ return match.group(1)
289
+ else:
290
+ raise ValueError(f"Unable to extract MIME type from {file}")
291
+ # If it's not a string, assume it's a streamlit.UploadedFile object
292
+ elif isinstance(file, streamlit.UploadedFile):
293
+ return file.type
294
+ else:
295
+ raise TypeError("Input should be a string or a streamlit.UploadedFile object")
296
+
297
+ from io import BytesIO
298
+ import re
299
+
300
+ def extract_file_extension(file):
301
+ # get the file name directly from the UploadedFile object
302
+ file_name = file.name
303
+ pattern = r".*?\.(.*?)$"
304
+ match = re.search(pattern, file_name)
305
+ if match:
306
+ return match.group(1)
307
+ else:
308
+ raise ValueError(f"Unable to extract file extension from {file_name}")
309
+
310
+ def pdf2txt(docs):
311
+ text = ""
312
+ for file in docs:
313
+ file_extension = extract_file_extension(file)
314
+ # print the file extension
315
+ st.write(f"File type extension: {file_extension}")
316
+
317
+ # read the file according to its extension
318
+ try:
319
+ if file_extension.lower() in ['py', 'txt', 'html', 'htm', 'xml', 'json']:
320
+ text += file.getvalue().decode('utf-8')
321
+ elif file_extension.lower() == 'pdf':
322
+ from PyPDF2 import PdfReader
323
+ pdf = PdfReader(BytesIO(file.getvalue()))
324
+ for page in range(len(pdf.pages)):
325
+ text += pdf.pages[page].extract_text() # new PyPDF2 syntax
326
+ except Exception as e:
327
+ st.write(f"Error processing file {file.name}: {e}")
328
+
329
+ return text
330
+
331
+ def pdf2txt_old(pdf_docs):
332
+ st.write(pdf_docs)
333
+ for file in pdf_docs:
334
+ mime_type = extract_mime_type(file)
335
+ st.write(f"MIME type of file: {mime_type}")
336
+
337
+ text = ""
338
+ for pdf in pdf_docs:
339
+ pdf_reader = PdfReader(pdf)
340
+ for page in pdf_reader.pages:
341
+ text += page.extract_text()
342
+ return text
343
+
344
+ def txt2chunks(text):
345
+ text_splitter = CharacterTextSplitter(separator="\n", chunk_size=1000, chunk_overlap=200, length_function=len)
346
+ return text_splitter.split_text(text)
347
+
348
+ def vector_store(text_chunks):
349
+ key = os.getenv('OPENAI_API_KEY')
350
+ embeddings = OpenAIEmbeddings(openai_api_key=key)
351
+ return FAISS.from_texts(texts=text_chunks, embedding=embeddings)
352
+
353
+ def get_chain(vectorstore):
354
+ llm = ChatOpenAI()
355
+ memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
356
+ return ConversationalRetrievalChain.from_llm(llm=llm, retriever=vectorstore.as_retriever(), memory=memory)
357
+
358
+ def process_user_input(user_question):
359
+ response = st.session_state.conversation({'question': user_question})
360
+ st.session_state.chat_history = response['chat_history']
361
+ for i, message in enumerate(st.session_state.chat_history):
362
+ template = user_template if i % 2 == 0 else bot_template
363
+ st.write(template.replace("{{MSG}}", message.content), unsafe_allow_html=True)
364
+ # Save file output from PDF query results
365
+ filename = generate_filename(user_question, 'txt')
366
+ #create_file(filename, user_question, message.content)
367
+ response = message.content
368
+ user_prompt = user_question
369
+ create_file(filename, user_prompt, response, should_save)
370
+ #st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
371
+
372
+ def divide_prompt(prompt, max_length):
373
+ words = prompt.split()
374
+ chunks = []
375
+ current_chunk = []
376
+ current_length = 0
377
+ for word in words:
378
+ if len(word) + current_length <= max_length:
379
+ current_length += len(word) + 1 # Adding 1 to account for spaces
380
+ current_chunk.append(word)
381
+ else:
382
+ chunks.append(' '.join(current_chunk))
383
+ current_chunk = [word]
384
+ current_length = len(word)
385
+ chunks.append(' '.join(current_chunk)) # Append the final chunk
386
+ return chunks
387
+
388
+ def create_zip_of_files(files):
389
+ """
390
+ Create a zip file from a list of files.
391
+ """
392
+ zip_name = "all_files.zip"
393
+ with zipfile.ZipFile(zip_name, 'w') as zipf:
394
+ for file in files:
395
+ zipf.write(file)
396
+ return zip_name
397
+
398
+
399
+ def get_zip_download_link(zip_file):
400
+ """
401
+ Generate a link to download the zip file.
402
+ """
403
+ with open(zip_file, 'rb') as f:
404
+ data = f.read()
405
+ b64 = base64.b64encode(data).decode()
406
+ href = f'<a href="data:application/zip;base64,{b64}" download="{zip_file}">Download All</a>'
407
+ return href
408
+
409
+
410
+ def main():
411
+ #openai.api_key = os.getenv('OPENAI_API_KEY')
412
+
413
+ # File type for output, model choice
414
+ menu = ["txt", "htm", "xlsx", "csv", "md", "py"]
415
+ choice = st.sidebar.selectbox("Output File Type:", menu)
416
+ model_choice = st.sidebar.radio("Select Model:", ('gpt-3.5-turbo', 'gpt-3.5-turbo-0301'))
417
+
418
+ # Audio, transcribe, GPT:
419
+ filename = save_and_play_audio(audio_recorder)
420
+
421
+ if filename is not None:
422
+ try:
423
+ transcription = transcribe_audio(filename, "whisper-1")
424
+ except:
425
+ st.write(' ')
426
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
427
+ filename = None
428
+
429
+ # prompt interfaces
430
+ user_prompt = st.text_area("Enter prompts, instructions & questions:", '', height=100)
431
+
432
+ # file section interface for prompts against large documents as context
433
+ collength, colupload = st.columns([2,3]) # adjust the ratio as needed
434
+ with collength:
435
+ max_length = st.slider("File section length for large files", min_value=1000, max_value=128000, value=12000, step=1000)
436
+ with colupload:
437
+ uploaded_file = st.file_uploader("Add a file for context:", type=["pdf", "xml", "json", "xlsx", "csv", "html", "htm", "md", "txt"])
438
+
439
+
440
+ # Document section chat
441
+
442
+ document_sections = deque()
443
+ document_responses = {}
444
+ if uploaded_file is not None:
445
+ file_content = read_file_content(uploaded_file, max_length)
446
+ document_sections.extend(divide_document(file_content, max_length))
447
+ if len(document_sections) > 0:
448
+ if st.button("πŸ‘οΈ View Upload"):
449
+ st.markdown("**Sections of the uploaded file:**")
450
+ for i, section in enumerate(list(document_sections)):
451
+ st.markdown(f"**Section {i+1}**\n{section}")
452
+ st.markdown("**Chat with the model:**")
453
+ for i, section in enumerate(list(document_sections)):
454
+ if i in document_responses:
455
+ st.markdown(f"**Section {i+1}**\n{document_responses[i]}")
456
+ else:
457
+ if st.button(f"Chat about Section {i+1}"):
458
+ st.write('Reasoning with your inputs...')
459
+ response = chat_with_model(user_prompt, section, model_choice) # *************************************
460
+ st.write('Response:')
461
+ st.write(response)
462
+ document_responses[i] = response
463
+ filename = generate_filename(f"{user_prompt}_section_{i+1}", choice)
464
+ create_file(filename, user_prompt, response, should_save)
465
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
466
+
467
+ if st.button('πŸ’¬ Chat'):
468
+ st.write('Reasoning with your inputs...')
469
+
470
+ #response = chat_with_model(user_prompt, ''.join(list(document_sections,)), model_choice) # *************************************
471
+
472
+ # Divide the user_prompt into smaller sections
473
+ user_prompt_sections = divide_prompt(user_prompt, max_length)
474
+ full_response = ''
475
+ for prompt_section in user_prompt_sections:
476
+ # Process each section with the model
477
+ response = chat_with_model(prompt_section, ''.join(list(document_sections)), model_choice)
478
+ full_response += response + '\n' # Combine the responses
479
+
480
+ #st.write('Response:')
481
+ #st.write(full_response)
482
+
483
+ response = full_response
484
+ st.write('Response:')
485
+ st.write(response)
486
+
487
+ filename = generate_filename(user_prompt, choice)
488
+ create_file(filename, user_prompt, response, should_save)
489
+ st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
490
+
491
+ all_files = glob.glob("*.*")
492
+ all_files = [file for file in all_files if len(os.path.splitext(file)[0]) >= 20] # exclude files with short names
493
+ all_files.sort(key=lambda x: (os.path.splitext(x)[1], x), reverse=True) # sort by file type and file name in descending order
494
+
495
+
496
+ # Sidebar buttons Download All and Delete All
497
+ colDownloadAll, colDeleteAll = st.sidebar.columns([3,3])
498
+ with colDownloadAll:
499
+ if st.button("⬇️ Download All"):
500
+ zip_file = create_zip_of_files(all_files)
501
+ st.markdown(get_zip_download_link(zip_file), unsafe_allow_html=True)
502
+ with colDeleteAll:
503
+ if st.button("πŸ—‘ Delete All"):
504
+ for file in all_files:
505
+ os.remove(file)
506
+ st.experimental_rerun()
507
+
508
+ # Sidebar of Files Saving History and surfacing files as context of prompts and responses
509
+ file_contents=''
510
+ next_action=''
511
+ for file in all_files:
512
+ col1, col2, col3, col4, col5 = st.sidebar.columns([1,6,1,1,1]) # adjust the ratio as needed
513
+ with col1:
514
+ if st.button("🌐", key="md_"+file): # md emoji button
515
+ with open(file, 'r') as f:
516
+ file_contents = f.read()
517
+ next_action='md'
518
+ with col2:
519
+ st.markdown(get_table_download_link(file), unsafe_allow_html=True)
520
+ with col3:
521
+ if st.button("πŸ“‚", key="open_"+file): # open emoji button
522
+ with open(file, 'r') as f:
523
+ file_contents = f.read()
524
+ next_action='open'
525
+ with col4:
526
+ if st.button("πŸ”", key="read_"+file): # search emoji button
527
+ with open(file, 'r') as f:
528
+ file_contents = f.read()
529
+ next_action='search'
530
+ with col5:
531
+ if st.button("πŸ—‘", key="delete_"+file):
532
+ os.remove(file)
533
+ st.experimental_rerun()
534
+
535
+ if len(file_contents) > 0:
536
+ if next_action=='open':
537
+ file_content_area = st.text_area("File Contents:", file_contents, height=500)
538
+ if next_action=='md':
539
+ st.markdown(file_contents)
540
+ if next_action=='search':
541
+ file_content_area = st.text_area("File Contents:", file_contents, height=500)
542
+ st.write('Reasoning with your inputs...')
543
+ response = chat_with_model(user_prompt, file_contents, model_choice)
544
+ filename = generate_filename(file_contents, choice)
545
+ create_file(filename, user_prompt, response, should_save)
546
+
547
+ st.experimental_rerun()
548
+ #st.sidebar.markdown(get_table_download_link(filename), unsafe_allow_html=True)
549
+
550
+ if __name__ == "__main__":
551
+ main()
552
+
553
+ load_dotenv()
554
+ st.write(css, unsafe_allow_html=True)
555
+
556
+ st.header("Chat with documents :books:")
557
+ user_question = st.text_input("Ask a question about your documents:")
558
+ if user_question:
559
+ process_user_input(user_question)
560
+
561
+ with st.sidebar:
562
+ st.subheader("Your documents")
563
+ docs = st.file_uploader("import documents", accept_multiple_files=True)
564
+ with st.spinner("Processing"):
565
+ raw = pdf2txt(docs)
566
+ if len(raw) > 0:
567
+ length = str(len(raw))
568
+ text_chunks = txt2chunks(raw)
569
+ vectorstore = vector_store(text_chunks)
570
+ st.session_state.conversation = get_chain(vectorstore)
571
+ st.markdown('# AI Search Index of Length:' + length + ' Created.') # add timing
572
+ filename = generate_filename(raw, 'txt')
573
+ create_file(filename, raw, '', should_save)
574
+ #create_file(filename, raw, '')