Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -27,7 +27,6 @@ import io
|
|
27 |
import requests
|
28 |
import numpy as np
|
29 |
from urllib.parse import quote
|
30 |
-
import PyPDF2
|
31 |
|
32 |
# =============================================================================
|
33 |
# βββββββββββββ EXTERNAL HELP LINKS βββββββββββββ
|
@@ -117,16 +116,8 @@ def preprocess_text(text):
|
|
117 |
return text.strip()
|
118 |
|
119 |
def sanitize_json_text(text):
|
120 |
-
text =
|
121 |
-
text
|
122 |
-
return text
|
123 |
-
|
124 |
-
def extract_pdf_text(pdf_file):
|
125 |
-
pdf_reader = PyPDF2.PdfReader(pdf_file)
|
126 |
-
text = ""
|
127 |
-
for page in pdf_reader.pages:
|
128 |
-
text += page.extract_text() or ""
|
129 |
-
return text
|
130 |
|
131 |
# =============================================================================
|
132 |
# βββββββββββββ COSMOS DB FUNCTIONS βββββββββββββ
|
@@ -168,13 +159,10 @@ def delete_record(container, record):
|
|
168 |
container.delete_item(item=doc_id, partition_key=partition_key_value)
|
169 |
return True, f"Record {doc_id} deleted. ποΈ"
|
170 |
except exceptions.CosmosResourceNotFoundError:
|
171 |
-
st.write(f"Record {doc_id} not found in Cosmos DB - treating as success.")
|
172 |
return True, f"Record {doc_id} not found (already deleted). ποΈ"
|
173 |
except exceptions.CosmosHttpResponseError as e:
|
174 |
-
st.error(f"Cosmos HTTP error deleting {doc_id}: {str(e)}")
|
175 |
return False, f"HTTP error deleting {doc_id}: {str(e)} π¨"
|
176 |
except Exception as e:
|
177 |
-
st.error(f"Unexpected error deleting {doc_id}: {str(e)}")
|
178 |
return False, f"Unexpected error deleting {doc_id}: {str(e)} π±"
|
179 |
|
180 |
def save_to_cosmos_db(container, query, response1, response2):
|
@@ -376,38 +364,19 @@ def edit_all_documents(container, search_keyword=None):
|
|
376 |
formatted_ts = dt.strftime("%I:%M %p %m/%d/%Y")
|
377 |
header = f"{doc.get('name', 'Unnamed')} - {formatted_ts}"
|
378 |
with st.expander(header):
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
initial_value = st.session_state.saved_docs.get(doc['id'], json.dumps(doc, indent=2))
|
383 |
-
edited_content = st.text_area("Edit JSON", value=initial_value, height=300, key=doc_key)
|
384 |
-
with col_pdf:
|
385 |
-
if 'pdf_data' in doc:
|
386 |
-
st.markdown("### π PDF Preview")
|
387 |
-
pdf_bytes = base64.b64decode(doc['pdf_data'])
|
388 |
-
st.download_button(
|
389 |
-
label="β¬οΈ Download PDF",
|
390 |
-
data=pdf_bytes,
|
391 |
-
file_name=f"{doc.get('name', 'document')}.pdf",
|
392 |
-
mime="application/pdf"
|
393 |
-
)
|
394 |
-
|
395 |
col_save, col_delete = st.columns(2)
|
396 |
with col_save:
|
397 |
if st.button("πΎ Save", key=f"save_{doc['id']}"):
|
398 |
try:
|
399 |
-
cleaned_content = edited_content
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
updated_doc = json.loads(cleaned_content)
|
406 |
-
updated_doc['id'] = doc['id']
|
407 |
-
updated_doc['pk'] = doc.get('pk', doc['id'])
|
408 |
-
for field in ['_ts', '_rid', '_self', '_etag', '_attachments']:
|
409 |
-
updated_doc.pop(field, None)
|
410 |
-
|
411 |
success, message = update_record(container, updated_doc)
|
412 |
if success:
|
413 |
st.success(f"Saved {doc['id']}")
|
@@ -447,27 +416,6 @@ def new_item_default(container):
|
|
447 |
else:
|
448 |
st.error(f"Error creating new item: {message}")
|
449 |
|
450 |
-
def new_item_from_pdf(container, pdf_file):
|
451 |
-
new_id = generate_unique_id()
|
452 |
-
pdf_bytes = pdf_file.read()
|
453 |
-
pdf_text = extract_pdf_text(io.BytesIO(pdf_bytes))
|
454 |
-
pdf_base64 = base64.b64encode(pdf_bytes).decode('utf-8')
|
455 |
-
default_doc = {
|
456 |
-
"id": new_id,
|
457 |
-
"pk": new_id,
|
458 |
-
"name": pdf_file.name,
|
459 |
-
"content": pdf_text[:1000],
|
460 |
-
"timestamp": datetime.now().isoformat(),
|
461 |
-
"type": "pdf_document",
|
462 |
-
"pdf_data": pdf_base64
|
463 |
-
}
|
464 |
-
success, message = insert_record(container, default_doc)
|
465 |
-
if success:
|
466 |
-
st.success(f"PDF document '{pdf_file.name}' created! β¨")
|
467 |
-
st.rerun()
|
468 |
-
else:
|
469 |
-
st.error(f"Error creating PDF item: {message}")
|
470 |
-
|
471 |
def add_field_to_doc():
|
472 |
key = st.session_state.new_field_key
|
473 |
value = st.session_state.new_field_value
|
@@ -531,25 +479,20 @@ def vector_keyword_search(keyword, doc):
|
|
531 |
return False
|
532 |
|
533 |
def search_documents_ui(container):
|
534 |
-
st.sidebar.subheader("π Vector Search")
|
535 |
with st.sidebar.form("search_form"):
|
536 |
-
keyword = st.text_input("Search
|
537 |
col1, col2 = st.columns(2)
|
538 |
with col1:
|
539 |
search_submitted = st.form_submit_button("π Search")
|
540 |
with col2:
|
541 |
clear_submitted = st.form_submit_button("ποΈ Clear")
|
542 |
if search_submitted and keyword:
|
543 |
-
st.session_state.active_search = keyword
|
544 |
st.rerun()
|
545 |
if clear_submitted:
|
546 |
if 'active_search' in st.session_state:
|
547 |
del st.session_state.active_search
|
548 |
st.rerun()
|
549 |
-
|
550 |
-
uploaded_file = st.sidebar.file_uploader("Upload Document (PDF)", type=["pdf"])
|
551 |
-
if uploaded_file and container:
|
552 |
-
new_item_from_pdf(container, uploaded_file)
|
553 |
|
554 |
def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
|
555 |
try:
|
@@ -668,10 +611,6 @@ def main():
|
|
668 |
# Sidebar: Hierarchical Navigation
|
669 |
st.sidebar.title("π Navigator")
|
670 |
|
671 |
-
# Vector Search Section (Moved to Top)
|
672 |
-
if st.session_state.current_container:
|
673 |
-
search_documents_ui(st.session_state.current_container)
|
674 |
-
|
675 |
# Databases Section
|
676 |
st.sidebar.subheader("ποΈ Databases")
|
677 |
if "client" not in st.session_state:
|
@@ -728,6 +667,7 @@ def main():
|
|
728 |
new_ai_record(st.session_state.current_container)
|
729 |
if st.sidebar.button("π New Links Record"):
|
730 |
new_links_record(st.session_state.current_container)
|
|
|
731 |
|
732 |
# Central Area: Editable Documents with Search Filter
|
733 |
if st.session_state.current_container:
|
|
|
27 |
import requests
|
28 |
import numpy as np
|
29 |
from urllib.parse import quote
|
|
|
30 |
|
31 |
# =============================================================================
|
32 |
# βββββββββββββ EXTERNAL HELP LINKS βββββββββββββ
|
|
|
116 |
return text.strip()
|
117 |
|
118 |
def sanitize_json_text(text):
|
119 |
+
text = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F]', '', text)
|
120 |
+
return text.strip()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
121 |
|
122 |
# =============================================================================
|
123 |
# βββββββββββββ COSMOS DB FUNCTIONS βββββββββββββ
|
|
|
159 |
container.delete_item(item=doc_id, partition_key=partition_key_value)
|
160 |
return True, f"Record {doc_id} deleted. ποΈ"
|
161 |
except exceptions.CosmosResourceNotFoundError:
|
|
|
162 |
return True, f"Record {doc_id} not found (already deleted). ποΈ"
|
163 |
except exceptions.CosmosHttpResponseError as e:
|
|
|
164 |
return False, f"HTTP error deleting {doc_id}: {str(e)} π¨"
|
165 |
except Exception as e:
|
|
|
166 |
return False, f"Unexpected error deleting {doc_id}: {str(e)} π±"
|
167 |
|
168 |
def save_to_cosmos_db(container, query, response1, response2):
|
|
|
364 |
formatted_ts = dt.strftime("%I:%M %p %m/%d/%Y")
|
365 |
header = f"{doc.get('name', 'Unnamed')} - {formatted_ts}"
|
366 |
with st.expander(header):
|
367 |
+
doc_key = f"editor_{doc['id']}"
|
368 |
+
initial_value = st.session_state.saved_docs.get(doc['id'], json.dumps(doc, indent=2))
|
369 |
+
edited_content = st.text_area("Edit JSON", value=initial_value, height=300, key=doc_key)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
370 |
col_save, col_delete = st.columns(2)
|
371 |
with col_save:
|
372 |
if st.button("πΎ Save", key=f"save_{doc['id']}"):
|
373 |
try:
|
374 |
+
cleaned_content = sanitize_json_text(edited_content)
|
375 |
+
updated_doc = json.loads(cleaned_content)
|
376 |
+
updated_doc['id'] = doc['id']
|
377 |
+
updated_doc['pk'] = doc.get('pk', doc['id'])
|
378 |
+
for field in ['_ts', '_rid', '_self', '_etag', '_attachments']:
|
379 |
+
updated_doc.pop(field, None)
|
|
|
|
|
|
|
|
|
|
|
|
|
380 |
success, message = update_record(container, updated_doc)
|
381 |
if success:
|
382 |
st.success(f"Saved {doc['id']}")
|
|
|
416 |
else:
|
417 |
st.error(f"Error creating new item: {message}")
|
418 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
419 |
def add_field_to_doc():
|
420 |
key = st.session_state.new_field_key
|
421 |
value = st.session_state.new_field_value
|
|
|
479 |
return False
|
480 |
|
481 |
def search_documents_ui(container):
|
|
|
482 |
with st.sidebar.form("search_form"):
|
483 |
+
keyword = st.text_input("Search Keyword", key="search_keyword")
|
484 |
col1, col2 = st.columns(2)
|
485 |
with col1:
|
486 |
search_submitted = st.form_submit_button("π Search")
|
487 |
with col2:
|
488 |
clear_submitted = st.form_submit_button("ποΈ Clear")
|
489 |
if search_submitted and keyword:
|
490 |
+
st.session_state.active_search = keyword # Use a separate key
|
491 |
st.rerun()
|
492 |
if clear_submitted:
|
493 |
if 'active_search' in st.session_state:
|
494 |
del st.session_state.active_search
|
495 |
st.rerun()
|
|
|
|
|
|
|
|
|
496 |
|
497 |
def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
|
498 |
try:
|
|
|
611 |
# Sidebar: Hierarchical Navigation
|
612 |
st.sidebar.title("π Navigator")
|
613 |
|
|
|
|
|
|
|
|
|
614 |
# Databases Section
|
615 |
st.sidebar.subheader("ποΈ Databases")
|
616 |
if "client" not in st.session_state:
|
|
|
667 |
new_ai_record(st.session_state.current_container)
|
668 |
if st.sidebar.button("π New Links Record"):
|
669 |
new_links_record(st.session_state.current_container)
|
670 |
+
search_documents_ui(st.session_state.current_container)
|
671 |
|
672 |
# Central Area: Editable Documents with Search Filter
|
673 |
if st.session_state.current_container:
|