awacke1 commited on
Commit
6f149e0
Β·
verified Β·
1 Parent(s): b4f63d2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -74
app.py CHANGED
@@ -27,7 +27,6 @@ import io
27
  import requests
28
  import numpy as np
29
  from urllib.parse import quote
30
- import PyPDF2
31
 
32
  # =============================================================================
33
  # ───────────── EXTERNAL HELP LINKS ─────────────
@@ -117,16 +116,8 @@ def preprocess_text(text):
117
  return text.strip()
118
 
119
  def sanitize_json_text(text):
120
- text = text.encode().decode('unicode_escape')
121
- text = json.dumps(text)[1:-1]
122
- return text
123
-
124
- def extract_pdf_text(pdf_file):
125
- pdf_reader = PyPDF2.PdfReader(pdf_file)
126
- text = ""
127
- for page in pdf_reader.pages:
128
- text += page.extract_text() or ""
129
- return text
130
 
131
  # =============================================================================
132
  # ───────────── COSMOS DB FUNCTIONS ─────────────
@@ -168,13 +159,10 @@ def delete_record(container, record):
168
  container.delete_item(item=doc_id, partition_key=partition_key_value)
169
  return True, f"Record {doc_id} deleted. πŸ—‘οΈ"
170
  except exceptions.CosmosResourceNotFoundError:
171
- st.write(f"Record {doc_id} not found in Cosmos DB - treating as success.")
172
  return True, f"Record {doc_id} not found (already deleted). πŸ—‘οΈ"
173
  except exceptions.CosmosHttpResponseError as e:
174
- st.error(f"Cosmos HTTP error deleting {doc_id}: {str(e)}")
175
  return False, f"HTTP error deleting {doc_id}: {str(e)} 🚨"
176
  except Exception as e:
177
- st.error(f"Unexpected error deleting {doc_id}: {str(e)}")
178
  return False, f"Unexpected error deleting {doc_id}: {str(e)} 😱"
179
 
180
  def save_to_cosmos_db(container, query, response1, response2):
@@ -376,38 +364,19 @@ def edit_all_documents(container, search_keyword=None):
376
  formatted_ts = dt.strftime("%I:%M %p %m/%d/%Y")
377
  header = f"{doc.get('name', 'Unnamed')} - {formatted_ts}"
378
  with st.expander(header):
379
- col_json, col_pdf = st.columns([2, 1])
380
- with col_json:
381
- doc_key = f"editor_{doc['id']}"
382
- initial_value = st.session_state.saved_docs.get(doc['id'], json.dumps(doc, indent=2))
383
- edited_content = st.text_area("Edit JSON", value=initial_value, height=300, key=doc_key)
384
- with col_pdf:
385
- if 'pdf_data' in doc:
386
- st.markdown("### πŸ“œ PDF Preview")
387
- pdf_bytes = base64.b64decode(doc['pdf_data'])
388
- st.download_button(
389
- label="⬇️ Download PDF",
390
- data=pdf_bytes,
391
- file_name=f"{doc.get('name', 'document')}.pdf",
392
- mime="application/pdf"
393
- )
394
-
395
  col_save, col_delete = st.columns(2)
396
  with col_save:
397
  if st.button("πŸ’Ύ Save", key=f"save_{doc['id']}"):
398
  try:
399
- cleaned_content = edited_content.strip()
400
- if not (cleaned_content.startswith('{') and cleaned_content.endswith('}')):
401
- sanitized_text = sanitize_json_text(cleaned_content)
402
- updated_doc = doc.copy()
403
- updated_doc['content'] = sanitized_text
404
- else:
405
- updated_doc = json.loads(cleaned_content)
406
- updated_doc['id'] = doc['id']
407
- updated_doc['pk'] = doc.get('pk', doc['id'])
408
- for field in ['_ts', '_rid', '_self', '_etag', '_attachments']:
409
- updated_doc.pop(field, None)
410
-
411
  success, message = update_record(container, updated_doc)
412
  if success:
413
  st.success(f"Saved {doc['id']}")
@@ -447,27 +416,6 @@ def new_item_default(container):
447
  else:
448
  st.error(f"Error creating new item: {message}")
449
 
450
- def new_item_from_pdf(container, pdf_file):
451
- new_id = generate_unique_id()
452
- pdf_bytes = pdf_file.read()
453
- pdf_text = extract_pdf_text(io.BytesIO(pdf_bytes))
454
- pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
455
- default_doc = {
456
- "id": new_id,
457
- "pk": new_id,
458
- "name": pdf_file.name,
459
- "content": pdf_text[:1000],
460
- "timestamp": datetime.now().isoformat(),
461
- "type": "pdf_document",
462
- "pdf_data": pdf_base64
463
- }
464
- success, message = insert_record(container, default_doc)
465
- if success:
466
- st.success(f"PDF document '{pdf_file.name}' created! ✨")
467
- st.rerun()
468
- else:
469
- st.error(f"Error creating PDF item: {message}")
470
-
471
  def add_field_to_doc():
472
  key = st.session_state.new_field_key
473
  value = st.session_state.new_field_value
@@ -531,25 +479,20 @@ def vector_keyword_search(keyword, doc):
531
  return False
532
 
533
  def search_documents_ui(container):
534
- st.sidebar.subheader("πŸ” Vector Search")
535
  with st.sidebar.form("search_form"):
536
- keyword = st.text_input("Search Documents", key="search_keyword")
537
  col1, col2 = st.columns(2)
538
  with col1:
539
  search_submitted = st.form_submit_button("πŸ” Search")
540
  with col2:
541
  clear_submitted = st.form_submit_button("πŸ—‘οΈ Clear")
542
  if search_submitted and keyword:
543
- st.session_state.active_search = keyword
544
  st.rerun()
545
  if clear_submitted:
546
  if 'active_search' in st.session_state:
547
  del st.session_state.active_search
548
  st.rerun()
549
-
550
- uploaded_file = st.sidebar.file_uploader("Upload Document (PDF)", type=["pdf"])
551
- if uploaded_file and container:
552
- new_item_from_pdf(container, uploaded_file)
553
 
554
  def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
555
  try:
@@ -668,10 +611,6 @@ def main():
668
  # Sidebar: Hierarchical Navigation
669
  st.sidebar.title("πŸ™ Navigator")
670
 
671
- # Vector Search Section (Moved to Top)
672
- if st.session_state.current_container:
673
- search_documents_ui(st.session_state.current_container)
674
-
675
  # Databases Section
676
  st.sidebar.subheader("πŸ—ƒοΈ Databases")
677
  if "client" not in st.session_state:
@@ -728,6 +667,7 @@ def main():
728
  new_ai_record(st.session_state.current_container)
729
  if st.sidebar.button("πŸ”— New Links Record"):
730
  new_links_record(st.session_state.current_container)
 
731
 
732
  # Central Area: Editable Documents with Search Filter
733
  if st.session_state.current_container:
 
27
  import requests
28
  import numpy as np
29
  from urllib.parse import quote
 
30
 
31
  # =============================================================================
32
  # ───────────── EXTERNAL HELP LINKS ─────────────
 
116
  return text.strip()
117
 
118
  def sanitize_json_text(text):
119
+ text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F]', '', text)
120
+ return text.strip()
 
 
 
 
 
 
 
 
121
 
122
  # =============================================================================
123
  # ───────────── COSMOS DB FUNCTIONS ─────────────
 
159
  container.delete_item(item=doc_id, partition_key=partition_key_value)
160
  return True, f"Record {doc_id} deleted. πŸ—‘οΈ"
161
  except exceptions.CosmosResourceNotFoundError:
 
162
  return True, f"Record {doc_id} not found (already deleted). πŸ—‘οΈ"
163
  except exceptions.CosmosHttpResponseError as e:
 
164
  return False, f"HTTP error deleting {doc_id}: {str(e)} 🚨"
165
  except Exception as e:
 
166
  return False, f"Unexpected error deleting {doc_id}: {str(e)} 😱"
167
 
168
  def save_to_cosmos_db(container, query, response1, response2):
 
364
  formatted_ts = dt.strftime("%I:%M %p %m/%d/%Y")
365
  header = f"{doc.get('name', 'Unnamed')} - {formatted_ts}"
366
  with st.expander(header):
367
+ doc_key = f"editor_{doc['id']}"
368
+ initial_value = st.session_state.saved_docs.get(doc['id'], json.dumps(doc, indent=2))
369
+ edited_content = st.text_area("Edit JSON", value=initial_value, height=300, key=doc_key)
 
 
 
 
 
 
 
 
 
 
 
 
 
370
  col_save, col_delete = st.columns(2)
371
  with col_save:
372
  if st.button("πŸ’Ύ Save", key=f"save_{doc['id']}"):
373
  try:
374
+ cleaned_content = sanitize_json_text(edited_content)
375
+ updated_doc = json.loads(cleaned_content)
376
+ updated_doc['id'] = doc['id']
377
+ updated_doc['pk'] = doc.get('pk', doc['id'])
378
+ for field in ['_ts', '_rid', '_self', '_etag', '_attachments']:
379
+ updated_doc.pop(field, None)
 
 
 
 
 
 
380
  success, message = update_record(container, updated_doc)
381
  if success:
382
  st.success(f"Saved {doc['id']}")
 
416
  else:
417
  st.error(f"Error creating new item: {message}")
418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
  def add_field_to_doc():
420
  key = st.session_state.new_field_key
421
  value = st.session_state.new_field_value
 
479
  return False
480
 
481
  def search_documents_ui(container):
 
482
  with st.sidebar.form("search_form"):
483
+ keyword = st.text_input("Search Keyword", key="search_keyword")
484
  col1, col2 = st.columns(2)
485
  with col1:
486
  search_submitted = st.form_submit_button("πŸ” Search")
487
  with col2:
488
  clear_submitted = st.form_submit_button("πŸ—‘οΈ Clear")
489
  if search_submitted and keyword:
490
+ st.session_state.active_search = keyword # Use a separate key
491
  st.rerun()
492
  if clear_submitted:
493
  if 'active_search' in st.session_state:
494
  del st.session_state.active_search
495
  st.rerun()
 
 
 
 
496
 
497
  def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
498
  try:
 
611
  # Sidebar: Hierarchical Navigation
612
  st.sidebar.title("πŸ™ Navigator")
613
 
 
 
 
 
614
  # Databases Section
615
  st.sidebar.subheader("πŸ—ƒοΈ Databases")
616
  if "client" not in st.session_state:
 
667
  new_ai_record(st.session_state.current_container)
668
  if st.sidebar.button("πŸ”— New Links Record"):
669
  new_links_record(st.session_state.current_container)
670
+ search_documents_ui(st.session_state.current_container)
671
 
672
  # Central Area: Editable Documents with Search Filter
673
  if st.session_state.current_container: