Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
|
2 |
import anthropic
|
3 |
import base64
|
4 |
import glob
|
@@ -15,7 +15,6 @@ import time
|
|
15 |
import traceback
|
16 |
import uuid
|
17 |
import zipfile
|
18 |
-
|
19 |
from PIL import Image
|
20 |
from azure.cosmos import CosmosClient, exceptions
|
21 |
from datetime import datetime
|
@@ -24,13 +23,13 @@ from github import Github
|
|
24 |
from gradio_client import Client
|
25 |
from urllib.parse import quote
|
26 |
|
|
|
27 |
# π App Configuration - Because every app needs a good costume!
|
28 |
-
Site_Name = 'π
|
29 |
-
title = "π
|
30 |
helpURL = 'https://huggingface.co/awacke1'
|
31 |
-
bugURL = 'https://huggingface.co/spaces/awacke1'
|
32 |
icons = 'πππ«'
|
33 |
-
|
34 |
st.set_page_config(
|
35 |
page_title=title,
|
36 |
page_icon=icons,
|
@@ -43,6 +42,7 @@ st.set_page_config(
|
|
43 |
}
|
44 |
)
|
45 |
|
|
|
46 |
# π Cosmos DB configuration - Where data goes to party!
|
47 |
ENDPOINT = "https://acae-afd.documents.azure.com:443/"
|
48 |
DATABASE_NAME = os.environ.get("COSMOS_DATABASE_NAME")
|
@@ -60,6 +60,8 @@ client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
|
|
60 |
if "chat_history" not in st.session_state:
|
61 |
st.session_state.chat_history = []
|
62 |
|
|
|
|
|
63 |
# π οΈ Helper Functions - The unsung heroes of our code
|
64 |
|
65 |
# π Get a file download link - Making file sharing as easy as stealing candy from a baby
|
@@ -207,6 +209,8 @@ def save_to_cosmos_db(container, query, response1, response2):
|
|
207 |
except Exception as e:
|
208 |
st.error(f"An unexpected error occurred: {str(e)}")
|
209 |
|
|
|
|
|
210 |
# π GitHub functions - Where code goes to socialize
|
211 |
|
212 |
# π₯ Download GitHub repo - Cloning repos like it's going out of style
|
@@ -228,24 +232,19 @@ def create_repo(g, repo_name):
|
|
228 |
def push_to_github(local_path, repo, github_token):
|
229 |
repo_url = f"https://{github_token}@github.com/{repo.full_name}.git"
|
230 |
local_repo = Repo(local_path)
|
231 |
-
|
232 |
if 'origin' in [remote.name for remote in local_repo.remotes]:
|
233 |
origin = local_repo.remote('origin')
|
234 |
origin.set_url(repo_url)
|
235 |
else:
|
236 |
origin = local_repo.create_remote('origin', repo_url)
|
237 |
-
|
238 |
if not local_repo.heads:
|
239 |
local_repo.git.checkout('-b', 'main')
|
240 |
current_branch = 'main'
|
241 |
else:
|
242 |
current_branch = local_repo.active_branch.name
|
243 |
-
|
244 |
local_repo.git.add(A=True)
|
245 |
-
|
246 |
if local_repo.is_dirty():
|
247 |
local_repo.git.commit('-m', 'Initial commit')
|
248 |
-
|
249 |
origin.push(refspec=f'{current_branch}:{current_branch}')
|
250 |
|
251 |
|
@@ -254,14 +253,11 @@ def save_or_clone_to_cosmos_db(container, document=None, clone_id=None):
|
|
254 |
timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S%f')
|
255 |
random_component = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=8))
|
256 |
return f"{timestamp}-{random_component}-{str(uuid.uuid4())}"
|
257 |
-
|
258 |
max_retries = 10
|
259 |
base_delay = 0.1
|
260 |
-
|
261 |
for attempt in range(max_retries):
|
262 |
try:
|
263 |
new_id = generate_complex_unique_id()
|
264 |
-
|
265 |
if clone_id:
|
266 |
try:
|
267 |
existing_doc = container.read_item(item=clone_id, partition_key=clone_id)
|
@@ -276,15 +272,12 @@ def save_or_clone_to_cosmos_db(container, document=None, clone_id=None):
|
|
276 |
return False, f"Document with ID {clone_id} not found for cloning."
|
277 |
else:
|
278 |
if document is None:
|
279 |
-
return False, "No document provided for saving"
|
280 |
-
|
281 |
document['id'] = new_id
|
282 |
document['created_at'] = datetime.utcnow().isoformat()
|
283 |
new_doc = document
|
284 |
-
|
285 |
response = container.create_item(body=new_doc)
|
286 |
return True, f"{'Cloned' if clone_id else 'New'} document saved successfully with ID: {response['id']}"
|
287 |
-
|
288 |
except exceptions.CosmosHttpResponseError as e:
|
289 |
if e.status_code == 409:
|
290 |
delay = base_delay * (2 ** attempt) + random.uniform(0, 0.1)
|
@@ -293,7 +286,6 @@ def save_or_clone_to_cosmos_db(container, document=None, clone_id=None):
|
|
293 |
return False, f"Error saving to Cosmos DB: {str(e)}"
|
294 |
except Exception as e:
|
295 |
return False, f"An unexpected error occurred: {str(e)}"
|
296 |
-
|
297 |
return False, "Failed to save document after maximum retries."
|
298 |
|
299 |
|
@@ -304,45 +296,37 @@ def archive_current_container(database_name, container_name, client):
|
|
304 |
if os.path.exists(base_dir):
|
305 |
shutil.rmtree(base_dir)
|
306 |
os.makedirs(base_dir)
|
307 |
-
|
308 |
db_client = client.get_database_client(database_name)
|
309 |
container_client = db_client.get_container_client(container_name)
|
310 |
items = list(container_client.read_all_items())
|
311 |
-
|
312 |
container_dir = os.path.join(base_dir, container_name)
|
313 |
os.makedirs(container_dir)
|
314 |
-
|
315 |
for item in items:
|
316 |
item_id = item.get('id', f"unknown_{datetime.now().strftime('%Y%m%d%H%M%S')}")
|
317 |
with open(os.path.join(container_dir, f"{item_id}.json"), 'w') as f:
|
318 |
json.dump(item, f, indent=2)
|
319 |
-
|
320 |
archive_name = f"{container_name}_archive_{datetime.now().strftime('%Y%m%d%H%M%S')}"
|
321 |
shutil.make_archive(archive_name, 'zip', base_dir)
|
322 |
-
|
323 |
return get_download_link(f"{archive_name}.zip")
|
324 |
except Exception as e:
|
325 |
return f"An error occurred while archiving data: {str(e)} π’"
|
326 |
|
|
|
327 |
# π Search glossary - Finding needles in digital haystacks
|
328 |
def search_glossary(query):
|
329 |
st.markdown(f"### π SearchGlossary for: {query}")
|
330 |
-
|
331 |
# Dropdown for model selection
|
332 |
model_options = ['mistralai/Mixtral-8x7B-Instruct-v0.1', 'mistralai/Mistral-7B-Instruct-v0.2', 'google/gemma-7b-it', 'None']
|
333 |
model_choice = st.selectbox('π§ Select LLM Model', options=model_options, index=1)
|
334 |
-
|
335 |
# Dropdown for database selection
|
336 |
database_options = ['Semantic Search', 'Arxiv Search - Latest - (EXPERIMENTAL)']
|
337 |
database_choice = st.selectbox('π Select Database', options=database_options, index=0)
|
338 |
-
|
339 |
# π΅οΈββοΈ Searching the glossary for: query
|
340 |
all_results = ""
|
341 |
-
st.markdown(f"- {query
|
342 |
-
|
343 |
# π ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM
|
344 |
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
|
345 |
-
|
346 |
# π ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM - api_name: /ask_llm
|
347 |
result = client.predict(
|
348 |
prompt=query,
|
@@ -352,7 +336,6 @@ def search_glossary(query):
|
|
352 |
)
|
353 |
st.markdown(result)
|
354 |
st.code(result, language="python", line_numbers=True)
|
355 |
-
|
356 |
# π ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM - api_name: /ask_llm
|
357 |
result2 = client.predict(
|
358 |
prompt=query,
|
@@ -362,7 +345,6 @@ def search_glossary(query):
|
|
362 |
)
|
363 |
st.markdown(result2)
|
364 |
st.code(result2, language="python", line_numbers=True)
|
365 |
-
|
366 |
# π ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM - api_name: /ask_llm
|
367 |
result3 = client.predict(
|
368 |
prompt=query,
|
@@ -372,7 +354,6 @@ def search_glossary(query):
|
|
372 |
)
|
373 |
st.markdown(result3)
|
374 |
st.code(result3, language="python", line_numbers=True)
|
375 |
-
|
376 |
# π ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM - api_name: /update_with_rag_md
|
377 |
response2 = client.predict(
|
378 |
message=query, # str in 'parameter_13' Textbox component
|
@@ -381,19 +362,13 @@ def search_glossary(query):
|
|
381 |
llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2",
|
382 |
api_name="/update_with_rag_md"
|
383 |
)
|
384 |
-
|
385 |
st.markdown(response2[0])
|
386 |
st.code(response2[0], language="python", line_numbers=True, wrap_lines=True)
|
387 |
-
|
388 |
st.markdown(response2[1])
|
389 |
st.code(response2[1], language="python", line_numbers=True, wrap_lines=True)
|
390 |
-
|
391 |
return result, result2, result3, response2
|
392 |
|
393 |
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
# π Generate a safe filename from the first few lines of content
|
398 |
def generate_filename_from_content(content, file_type="md"):
|
399 |
# Extract the first few lines or sentences
|
@@ -404,6 +379,7 @@ def generate_filename_from_content(content, file_type="md"):
|
|
404 |
safe_name = safe_name[:50].strip() # Adjust length limit
|
405 |
return f"{safe_name}.{file_type}"
|
406 |
|
|
|
407 |
# πΎ Create and save a file
|
408 |
def create_file_from_content(content, should_save=True):
|
409 |
if not should_save:
|
@@ -413,12 +389,12 @@ def create_file_from_content(content, should_save=True):
|
|
413 |
file.write(content)
|
414 |
return filename
|
415 |
|
|
|
416 |
# π Display list of saved .md files in the sidebar
|
417 |
def display_saved_files_in_sidebar():
|
418 |
all_files = glob.glob("*.md")
|
419 |
all_files.sort(reverse=True)
|
420 |
all_files = [file for file in all_files if not file.lower().startswith('readme')] # Exclude README.md
|
421 |
-
|
422 |
st.sidebar.markdown("## π Saved Markdown Files")
|
423 |
for file in all_files:
|
424 |
col1, col2, col3 = st.sidebar.columns([6, 2, 1])
|
@@ -435,7 +411,6 @@ def display_saved_files_in_sidebar():
|
|
435 |
os.remove(file)
|
436 |
st.rerun()
|
437 |
|
438 |
-
|
439 |
def clone_record(container, clone_id):
|
440 |
try:
|
441 |
existing_doc = container.read_item(item=clone_id, partition_key=clone_id)
|
@@ -448,17 +423,14 @@ def clone_record(container, clone_id):
|
|
448 |
new_doc['_etag'] = None
|
449 |
new_doc['_attachments'] = None
|
450 |
new_doc['_ts'] = None # Reset timestamp to be updated by Cosmos DB automatically
|
451 |
-
|
452 |
# Insert the cloned document
|
453 |
response = container.create_item(body=new_doc)
|
454 |
st.success(f"Cloned document saved successfully with ID: {new_doc['id']} π")
|
455 |
-
|
456 |
# Refresh the documents in session state
|
457 |
st.session_state.documents = list(container.query_items(
|
458 |
query="SELECT * FROM c ORDER BY c._ts DESC",
|
459 |
enable_cross_partition_query=True
|
460 |
))
|
461 |
-
|
462 |
except exceptions.CosmosResourceNotFoundError:
|
463 |
st.error(f"Document with ID {clone_id} not found for cloning.")
|
464 |
except exceptions.CosmosHttpResponseError as e:
|
@@ -467,8 +439,6 @@ def clone_record(container, clone_id):
|
|
467 |
st.error(f"An unexpected error occurred: {str(e)} π±")
|
468 |
|
469 |
|
470 |
-
|
471 |
-
|
472 |
def create_new_blank_record(container):
|
473 |
try:
|
474 |
# Get the structure of the latest document (to preserve schema)
|
@@ -477,52 +447,45 @@ def create_new_blank_record(container):
|
|
477 |
new_doc_structure = latest_doc[0].copy()
|
478 |
else:
|
479 |
new_doc_structure = {}
|
480 |
-
|
481 |
new_doc = {key: "" for key in new_doc_structure.keys()} # Set all fields to blank
|
482 |
new_doc['id'] = generate_unique_id() # Generate new unique ID
|
483 |
new_doc['createdAt'] = datetime.utcnow().isoformat() # Set creation time
|
484 |
-
|
485 |
# Insert the new blank document
|
486 |
response = container.create_item(body=new_doc)
|
487 |
st.success(f"New blank document saved successfully with ID: {new_doc['id']} π")
|
488 |
-
|
489 |
# Refresh the documents in session state
|
490 |
st.session_state.documents = list(container.query_items(
|
491 |
query="SELECT * FROM c ORDER BY c._ts DESC",
|
492 |
enable_cross_partition_query=True
|
493 |
))
|
494 |
-
|
495 |
except exceptions.CosmosHttpResponseError as e:
|
496 |
st.error(f"HTTP error occurred: {str(e)} π¨")
|
497 |
except Exception as e:
|
498 |
st.error(f"An unexpected error occurred: {str(e)} π±")
|
499 |
|
|
|
500 |
# Function to preprocess the pasted content
|
501 |
def preprocess_text(text):
|
502 |
# Replace CRLF and other newline variations with the JSON newline escape sequence
|
503 |
text = text.replace('\r\n', '\\n')
|
504 |
text = text.replace('\r', '\\n')
|
505 |
text = text.replace('\n', '\\n')
|
506 |
-
|
507 |
# Escape double quotes inside the text
|
508 |
text = text.replace('"', '\\"')
|
509 |
-
|
510 |
# Optionally remove or handle other special characters that might not be JSON-safe
|
511 |
# Here, we remove characters like tabs or non-ASCII characters (as an example)
|
512 |
text = re.sub(r'[\t]', ' ', text) # Replace tabs with spaces
|
513 |
text = re.sub(r'[^\x00-\x7F]+', '', text) # Remove non-ASCII characters
|
514 |
-
|
515 |
# Normalize spaces (strip leading/trailing whitespace)
|
516 |
text = text.strip()
|
517 |
return text
|
518 |
|
|
|
519 |
|
520 |
# π Main function - "All the world's a stage, and all the code merely players" -Shakespeare, probably
|
521 |
def main():
|
522 |
st.title("πGitπCosmosπ« - Azure Cosmos DB and Github Agent")
|
523 |
|
524 |
-
|
525 |
-
|
526 |
# π² Session state vars - "Life is like a session state, you never know what you're gonna get"
|
527 |
if 'logged_in' not in st.session_state:
|
528 |
st.session_state.logged_in = False
|
@@ -541,8 +504,6 @@ def main():
|
|
541 |
if 'cloned_doc' not in st.session_state:
|
542 |
st.session_state.cloned_doc = None
|
543 |
|
544 |
-
|
545 |
-
|
546 |
# π Query processing - "To search or not to search, that is the query"
|
547 |
try:
|
548 |
query_params = st.query_params
|
@@ -577,13 +538,10 @@ def main():
|
|
577 |
st.error(f"HTTP error occurred: {str(e)} π¨")
|
578 |
except Exception as e:
|
579 |
st.error(f"An unexpected error occurred: {str(e)} π±")
|
580 |
-
|
581 |
st.stop()
|
582 |
except Exception as e:
|
583 |
st.markdown(' ')
|
584 |
|
585 |
-
|
586 |
-
|
587 |
# π Auth check - "With great keys come great connectivity"
|
588 |
if Key:
|
589 |
st.session_state.primary_key = Key
|
@@ -592,22 +550,17 @@ def main():
|
|
592 |
st.error("Cosmos DB Key is not set in environment variables. πβ")
|
593 |
return
|
594 |
|
595 |
-
|
596 |
-
|
597 |
if st.session_state.logged_in:
|
598 |
# π DB initialization - "In the beginning, there was connection string..."
|
599 |
try:
|
600 |
if st.session_state.client is None:
|
601 |
st.session_state.client = CosmosClient(ENDPOINT, credential=st.session_state.primary_key)
|
602 |
-
|
603 |
# π Navigation setup - "Navigation is not about where you are, but where you're going"
|
604 |
st.sidebar.title("πGitπCosmosπ«ποΈNavigator")
|
605 |
-
|
606 |
databases = get_databases(st.session_state.client)
|
607 |
selected_db = st.sidebar.selectbox("ποΈ Select Database", databases)
|
608 |
st.markdown(CosmosDBUrl)
|
609 |
|
610 |
-
|
611 |
# π State management - "Change is the only constant in state management"
|
612 |
if selected_db != st.session_state.selected_database:
|
613 |
st.session_state.selected_database = selected_db
|
@@ -615,8 +568,6 @@ def main():
|
|
615 |
st.session_state.selected_document_id = None
|
616 |
st.session_state.current_index = 0
|
617 |
st.rerun()
|
618 |
-
|
619 |
-
|
620 |
|
621 |
if st.session_state.selected_database:
|
622 |
database = st.session_state.client.get_database_client(st.session_state.selected_database)
|
@@ -632,7 +583,6 @@ def main():
|
|
632 |
|
633 |
if st.session_state.selected_container:
|
634 |
container = database.get_container_client(st.session_state.selected_container)
|
635 |
-
|
636 |
# π¦ Export functionality - "Pack it, zip it, ship it"
|
637 |
if st.sidebar.button("π¦ Export Container Data"):
|
638 |
download_link = archive_current_container(st.session_state.selected_database,
|
@@ -643,17 +593,13 @@ def main():
|
|
643 |
else:
|
644 |
st.error(download_link)
|
645 |
|
646 |
-
|
647 |
-
|
648 |
# π Document handling - "Document, document, on the wall, who's the most recent of them all?"
|
649 |
documents = get_documents(container)
|
650 |
total_docs = len(documents)
|
651 |
-
|
652 |
# Add a slider to let the user choose how many documents to display
|
653 |
num_docs_to_display = st.slider(
|
654 |
-
"Select number of documents to display", 5, 500,
|
655 |
)
|
656 |
-
|
657 |
# Adjust the document display logic based on the slider value
|
658 |
if total_docs > num_docs_to_display:
|
659 |
documents_to_display = documents[:num_docs_to_display]
|
@@ -662,12 +608,11 @@ def main():
|
|
662 |
documents_to_display = documents
|
663 |
st.sidebar.info(f"Showing all {len(documents_to_display)} documents.")
|
664 |
|
665 |
-
|
666 |
-
|
667 |
if documents_to_display:
|
668 |
# π¨ View options - "Different strokes for different folks"
|
669 |
view_options = ['Show as Markdown', 'Show as Code Editor', 'Show as Edit, Save, Run AI', 'Clone Document', 'New Record']
|
670 |
selected_view = st.sidebar.selectbox("Select Viewer/Editor", view_options, index=2)
|
|
|
671 |
|
672 |
if selected_view == 'Show as Markdown':
|
673 |
Label = '# π Markdown view - Mark it down, mark it up'
|
@@ -697,8 +642,6 @@ def main():
|
|
697 |
content = json.dumps(doc, indent=2)
|
698 |
st.markdown(f"```json\n{content}\n```")
|
699 |
|
700 |
-
|
701 |
-
|
702 |
# β¬
οΈβ‘οΈ Navigation - "Left and right, day and night"
|
703 |
col_prev, col_next = st.columns([1, 1])
|
704 |
with col_prev:
|
@@ -711,6 +654,8 @@ def main():
|
|
711 |
if st.session_state.current_index < total_docs - 1:
|
712 |
st.session_state.current_index += 1
|
713 |
st.rerun()
|
|
|
|
|
714 |
|
715 |
elif selected_view == 'Show as Code Editor':
|
716 |
Label = '# π» Code editor view'
|
@@ -762,10 +707,6 @@ def main():
|
|
762 |
except Exception as e:
|
763 |
st.error(f"Error deleting document: {str(e)}")
|
764 |
|
765 |
-
|
766 |
-
|
767 |
-
|
768 |
-
|
769 |
|
770 |
elif selected_view == 'Show as Edit, Save, Run AI':
|
771 |
Label = '# βοΈ Edit and save view - Edit with wisdom, save with precision'
|
@@ -775,8 +716,6 @@ def main():
|
|
775 |
num_cols = len(documents_to_display)
|
776 |
cols = st.columns(num_cols)
|
777 |
|
778 |
-
|
779 |
-
|
780 |
for idx, (col, doc) in enumerate(zip(cols, documents_to_display)):
|
781 |
with col:
|
782 |
|
@@ -817,9 +756,7 @@ def main():
|
|
817 |
with col_ai:
|
818 |
if st.button("π€ Run AI", key=f'run_with_ai_button_{idx}'):
|
819 |
search_glossary(json.dumps(editable_doc, indent=2))
|
820 |
-
|
821 |
-
|
822 |
-
|
823 |
|
824 |
elif selected_view == 'Clone Document':
|
825 |
st.markdown("#### Clone a document:")
|
@@ -913,8 +850,6 @@ def main():
|
|
913 |
st.info("No documents to display. π§")
|
914 |
|
915 |
|
916 |
-
|
917 |
-
|
918 |
Label = '# π GitHub integration - Git happens'
|
919 |
st.subheader("π GitHub Operations")
|
920 |
github_token = os.environ.get("GITHUB")
|
@@ -923,8 +858,6 @@ def main():
|
|
923 |
new_repo_name = st.text_input("New Repository Name (for cloning)",
|
924 |
value=f"AIExample-Clone-{datetime.now().strftime('%Y%m%d_%H%M%S')}")
|
925 |
|
926 |
-
|
927 |
-
|
928 |
col1, col2 = st.columns(2)
|
929 |
with col1:
|
930 |
if st.button("π₯ Clone Repository"):
|
@@ -948,8 +881,6 @@ def main():
|
|
948 |
else:
|
949 |
st.error("Please ensure GitHub token is set in environment variables and source repository URL is provided. πβ")
|
950 |
|
951 |
-
|
952 |
-
|
953 |
with col2:
|
954 |
if st.button("π€ Push to New Repository"):
|
955 |
if github_token and source_repo:
|
@@ -970,7 +901,6 @@ def main():
|
|
970 |
else:
|
971 |
st.error("Please ensure GitHub token is set in environment variables and source repository URL is provided. πβ")
|
972 |
|
973 |
-
|
974 |
|
975 |
st.subheader("π¬ Chat with Claude")
|
976 |
user_input = st.text_area("Message π¨:", height=100)
|
@@ -988,12 +918,9 @@ def main():
|
|
988 |
)
|
989 |
st.write("Claude's reply π§ :")
|
990 |
st.write(response.content[0].text)
|
991 |
-
|
992 |
filename = generate_filename(user_input, "md")
|
993 |
create_file(filename, user_input, response.content[0].text)
|
994 |
-
|
995 |
st.session_state.chat_history.append({"user": user_input, "claude": response.content[0].text})
|
996 |
-
|
997 |
# Save to Cosmos DB
|
998 |
save_to_cosmos_db(container, user_input, response.content[0].text, "")
|
999 |
|
@@ -1005,8 +932,7 @@ def main():
|
|
1005 |
st.text_area("You said π¬:", chat["user"], height=100, disabled=True)
|
1006 |
st.text_area("Claude replied π€:", chat["claude"], height=200, disabled=True)
|
1007 |
st.markdown("---")
|
1008 |
-
|
1009 |
-
|
1010 |
|
1011 |
# π File editor - "Edit with care, save with flair"
|
1012 |
if hasattr(st.session_state, 'current_file'):
|
@@ -1023,8 +949,6 @@ def main():
|
|
1023 |
file.write(new_content)
|
1024 |
st.success("File updated successfully! π")
|
1025 |
|
1026 |
-
|
1027 |
-
|
1028 |
# π File management - "Manage many, maintain order"
|
1029 |
st.sidebar.title("π File Management")
|
1030 |
|
@@ -1057,14 +981,10 @@ def main():
|
|
1057 |
os.remove(file)
|
1058 |
st.rerun()
|
1059 |
|
1060 |
-
|
1061 |
-
|
1062 |
except exceptions.CosmosHttpResponseError as e:
|
1063 |
st.error(f"Failed to connect to Cosmos DB. HTTP error: {str(e)} π¨")
|
1064 |
except Exception as e:
|
1065 |
st.error(f"An unexpected error occurred: {str(e)} π±")
|
1066 |
-
|
1067 |
-
|
1068 |
|
1069 |
if st.session_state.logged_in and st.sidebar.button("πͺ Logout"):
|
1070 |
Label = '# πͺ Logout - All good things must come to an end'
|
|
|
1 |
+
# Import libraries and references:
|
2 |
import anthropic
|
3 |
import base64
|
4 |
import glob
|
|
|
15 |
import traceback
|
16 |
import uuid
|
17 |
import zipfile
|
|
|
18 |
from PIL import Image
|
19 |
from azure.cosmos import CosmosClient, exceptions
|
20 |
from datetime import datetime
|
|
|
23 |
from gradio_client import Client
|
24 |
from urllib.parse import quote
|
25 |
|
26 |
+
|
27 |
# π App Configuration - Because every app needs a good costume!
|
28 |
+
Site_Name = 'πGitCosmosπ - AI Azure Cosmos DB and Github Agent'
|
29 |
+
title = "πGitCosmosπ - AI Azure Cosmos DB and Github Agent"
|
30 |
helpURL = 'https://huggingface.co/awacke1'
|
31 |
+
bugURL = 'https://huggingface.co/spaces/awacke1/AzureCosmosDBUI/'
|
32 |
icons = 'πππ«'
|
|
|
33 |
st.set_page_config(
|
34 |
page_title=title,
|
35 |
page_icon=icons,
|
|
|
42 |
}
|
43 |
)
|
44 |
|
45 |
+
|
46 |
# π Cosmos DB configuration - Where data goes to party!
|
47 |
ENDPOINT = "https://acae-afd.documents.azure.com:443/"
|
48 |
DATABASE_NAME = os.environ.get("COSMOS_DATABASE_NAME")
|
|
|
60 |
if "chat_history" not in st.session_state:
|
61 |
st.session_state.chat_history = []
|
62 |
|
63 |
+
|
64 |
+
|
65 |
# π οΈ Helper Functions - The unsung heroes of our code
|
66 |
|
67 |
# π Get a file download link - Making file sharing as easy as stealing candy from a baby
|
|
|
209 |
except Exception as e:
|
210 |
st.error(f"An unexpected error occurred: {str(e)}")
|
211 |
|
212 |
+
|
213 |
+
|
214 |
# π GitHub functions - Where code goes to socialize
|
215 |
|
216 |
# π₯ Download GitHub repo - Cloning repos like it's going out of style
|
|
|
232 |
def push_to_github(local_path, repo, github_token):
|
233 |
repo_url = f"https://{github_token}@github.com/{repo.full_name}.git"
|
234 |
local_repo = Repo(local_path)
|
|
|
235 |
if 'origin' in [remote.name for remote in local_repo.remotes]:
|
236 |
origin = local_repo.remote('origin')
|
237 |
origin.set_url(repo_url)
|
238 |
else:
|
239 |
origin = local_repo.create_remote('origin', repo_url)
|
|
|
240 |
if not local_repo.heads:
|
241 |
local_repo.git.checkout('-b', 'main')
|
242 |
current_branch = 'main'
|
243 |
else:
|
244 |
current_branch = local_repo.active_branch.name
|
|
|
245 |
local_repo.git.add(A=True)
|
|
|
246 |
if local_repo.is_dirty():
|
247 |
local_repo.git.commit('-m', 'Initial commit')
|
|
|
248 |
origin.push(refspec=f'{current_branch}:{current_branch}')
|
249 |
|
250 |
|
|
|
253 |
timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S%f')
|
254 |
random_component = ''.join(random.choices('abcdefghijklmnopqrstuvwxyz0123456789', k=8))
|
255 |
return f"{timestamp}-{random_component}-{str(uuid.uuid4())}"
|
|
|
256 |
max_retries = 10
|
257 |
base_delay = 0.1
|
|
|
258 |
for attempt in range(max_retries):
|
259 |
try:
|
260 |
new_id = generate_complex_unique_id()
|
|
|
261 |
if clone_id:
|
262 |
try:
|
263 |
existing_doc = container.read_item(item=clone_id, partition_key=clone_id)
|
|
|
272 |
return False, f"Document with ID {clone_id} not found for cloning."
|
273 |
else:
|
274 |
if document is None:
|
275 |
+
return False, "No document provided for saving"
|
|
|
276 |
document['id'] = new_id
|
277 |
document['created_at'] = datetime.utcnow().isoformat()
|
278 |
new_doc = document
|
|
|
279 |
response = container.create_item(body=new_doc)
|
280 |
return True, f"{'Cloned' if clone_id else 'New'} document saved successfully with ID: {response['id']}"
|
|
|
281 |
except exceptions.CosmosHttpResponseError as e:
|
282 |
if e.status_code == 409:
|
283 |
delay = base_delay * (2 ** attempt) + random.uniform(0, 0.1)
|
|
|
286 |
return False, f"Error saving to Cosmos DB: {str(e)}"
|
287 |
except Exception as e:
|
288 |
return False, f"An unexpected error occurred: {str(e)}"
|
|
|
289 |
return False, "Failed to save document after maximum retries."
|
290 |
|
291 |
|
|
|
296 |
if os.path.exists(base_dir):
|
297 |
shutil.rmtree(base_dir)
|
298 |
os.makedirs(base_dir)
|
|
|
299 |
db_client = client.get_database_client(database_name)
|
300 |
container_client = db_client.get_container_client(container_name)
|
301 |
items = list(container_client.read_all_items())
|
|
|
302 |
container_dir = os.path.join(base_dir, container_name)
|
303 |
os.makedirs(container_dir)
|
|
|
304 |
for item in items:
|
305 |
item_id = item.get('id', f"unknown_{datetime.now().strftime('%Y%m%d%H%M%S')}")
|
306 |
with open(os.path.join(container_dir, f"{item_id}.json"), 'w') as f:
|
307 |
json.dump(item, f, indent=2)
|
|
|
308 |
archive_name = f"{container_name}_archive_{datetime.now().strftime('%Y%m%d%H%M%S')}"
|
309 |
shutil.make_archive(archive_name, 'zip', base_dir)
|
|
|
310 |
return get_download_link(f"{archive_name}.zip")
|
311 |
except Exception as e:
|
312 |
return f"An error occurred while archiving data: {str(e)} π’"
|
313 |
|
314 |
+
|
315 |
# π Search glossary - Finding needles in digital haystacks
|
316 |
def search_glossary(query):
|
317 |
st.markdown(f"### π SearchGlossary for: {query}")
|
|
|
318 |
# Dropdown for model selection
|
319 |
model_options = ['mistralai/Mixtral-8x7B-Instruct-v0.1', 'mistralai/Mistral-7B-Instruct-v0.2', 'google/gemma-7b-it', 'None']
|
320 |
model_choice = st.selectbox('π§ Select LLM Model', options=model_options, index=1)
|
|
|
321 |
# Dropdown for database selection
|
322 |
database_options = ['Semantic Search', 'Arxiv Search - Latest - (EXPERIMENTAL)']
|
323 |
database_choice = st.selectbox('π Select Database', options=database_options, index=0)
|
|
|
324 |
# π΅οΈββοΈ Searching the glossary for: query
|
325 |
all_results = ""
|
326 |
+
st.markdown(f"- {query
|
327 |
+
|
328 |
# π ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM
|
329 |
client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
|
|
|
330 |
# π ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM - api_name: /ask_llm
|
331 |
result = client.predict(
|
332 |
prompt=query,
|
|
|
336 |
)
|
337 |
st.markdown(result)
|
338 |
st.code(result, language="python", line_numbers=True)
|
|
|
339 |
# π ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM - api_name: /ask_llm
|
340 |
result2 = client.predict(
|
341 |
prompt=query,
|
|
|
345 |
)
|
346 |
st.markdown(result2)
|
347 |
st.code(result2, language="python", line_numbers=True)
|
|
|
348 |
# π ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM - api_name: /ask_llm
|
349 |
result3 = client.predict(
|
350 |
prompt=query,
|
|
|
354 |
)
|
355 |
st.markdown(result3)
|
356 |
st.code(result3, language="python", line_numbers=True)
|
|
|
357 |
# π ArXiv RAG researcher expert ~-<>-~ Paper Summary & Ask LLM - api_name: /update_with_rag_md
|
358 |
response2 = client.predict(
|
359 |
message=query, # str in 'parameter_13' Textbox component
|
|
|
362 |
llm_model_picked="mistralai/Mistral-7B-Instruct-v0.2",
|
363 |
api_name="/update_with_rag_md"
|
364 |
)
|
|
|
365 |
st.markdown(response2[0])
|
366 |
st.code(response2[0], language="python", line_numbers=True, wrap_lines=True)
|
|
|
367 |
st.markdown(response2[1])
|
368 |
st.code(response2[1], language="python", line_numbers=True, wrap_lines=True)
|
|
|
369 |
return result, result2, result3, response2
|
370 |
|
371 |
|
|
|
|
|
|
|
372 |
# π Generate a safe filename from the first few lines of content
|
373 |
def generate_filename_from_content(content, file_type="md"):
|
374 |
# Extract the first few lines or sentences
|
|
|
379 |
safe_name = safe_name[:50].strip() # Adjust length limit
|
380 |
return f"{safe_name}.{file_type}"
|
381 |
|
382 |
+
|
383 |
# πΎ Create and save a file
|
384 |
def create_file_from_content(content, should_save=True):
|
385 |
if not should_save:
|
|
|
389 |
file.write(content)
|
390 |
return filename
|
391 |
|
392 |
+
|
393 |
# π Display list of saved .md files in the sidebar
|
394 |
def display_saved_files_in_sidebar():
|
395 |
all_files = glob.glob("*.md")
|
396 |
all_files.sort(reverse=True)
|
397 |
all_files = [file for file in all_files if not file.lower().startswith('readme')] # Exclude README.md
|
|
|
398 |
st.sidebar.markdown("## π Saved Markdown Files")
|
399 |
for file in all_files:
|
400 |
col1, col2, col3 = st.sidebar.columns([6, 2, 1])
|
|
|
411 |
os.remove(file)
|
412 |
st.rerun()
|
413 |
|
|
|
414 |
def clone_record(container, clone_id):
|
415 |
try:
|
416 |
existing_doc = container.read_item(item=clone_id, partition_key=clone_id)
|
|
|
423 |
new_doc['_etag'] = None
|
424 |
new_doc['_attachments'] = None
|
425 |
new_doc['_ts'] = None # Reset timestamp to be updated by Cosmos DB automatically
|
|
|
426 |
# Insert the cloned document
|
427 |
response = container.create_item(body=new_doc)
|
428 |
st.success(f"Cloned document saved successfully with ID: {new_doc['id']} π")
|
|
|
429 |
# Refresh the documents in session state
|
430 |
st.session_state.documents = list(container.query_items(
|
431 |
query="SELECT * FROM c ORDER BY c._ts DESC",
|
432 |
enable_cross_partition_query=True
|
433 |
))
|
|
|
434 |
except exceptions.CosmosResourceNotFoundError:
|
435 |
st.error(f"Document with ID {clone_id} not found for cloning.")
|
436 |
except exceptions.CosmosHttpResponseError as e:
|
|
|
439 |
st.error(f"An unexpected error occurred: {str(e)} π±")
|
440 |
|
441 |
|
|
|
|
|
442 |
def create_new_blank_record(container):
|
443 |
try:
|
444 |
# Get the structure of the latest document (to preserve schema)
|
|
|
447 |
new_doc_structure = latest_doc[0].copy()
|
448 |
else:
|
449 |
new_doc_structure = {}
|
|
|
450 |
new_doc = {key: "" for key in new_doc_structure.keys()} # Set all fields to blank
|
451 |
new_doc['id'] = generate_unique_id() # Generate new unique ID
|
452 |
new_doc['createdAt'] = datetime.utcnow().isoformat() # Set creation time
|
|
|
453 |
# Insert the new blank document
|
454 |
response = container.create_item(body=new_doc)
|
455 |
st.success(f"New blank document saved successfully with ID: {new_doc['id']} π")
|
|
|
456 |
# Refresh the documents in session state
|
457 |
st.session_state.documents = list(container.query_items(
|
458 |
query="SELECT * FROM c ORDER BY c._ts DESC",
|
459 |
enable_cross_partition_query=True
|
460 |
))
|
|
|
461 |
except exceptions.CosmosHttpResponseError as e:
|
462 |
st.error(f"HTTP error occurred: {str(e)} π¨")
|
463 |
except Exception as e:
|
464 |
st.error(f"An unexpected error occurred: {str(e)} π±")
|
465 |
|
466 |
+
|
467 |
# Function to preprocess the pasted content
|
468 |
def preprocess_text(text):
|
469 |
# Replace CRLF and other newline variations with the JSON newline escape sequence
|
470 |
text = text.replace('\r\n', '\\n')
|
471 |
text = text.replace('\r', '\\n')
|
472 |
text = text.replace('\n', '\\n')
|
|
|
473 |
# Escape double quotes inside the text
|
474 |
text = text.replace('"', '\\"')
|
|
|
475 |
# Optionally remove or handle other special characters that might not be JSON-safe
|
476 |
# Here, we remove characters like tabs or non-ASCII characters (as an example)
|
477 |
text = re.sub(r'[\t]', ' ', text) # Replace tabs with spaces
|
478 |
text = re.sub(r'[^\x00-\x7F]+', '', text) # Remove non-ASCII characters
|
|
|
479 |
# Normalize spaces (strip leading/trailing whitespace)
|
480 |
text = text.strip()
|
481 |
return text
|
482 |
|
483 |
+
|
484 |
|
485 |
# π Main function - "All the world's a stage, and all the code merely players" -Shakespeare, probably
|
486 |
def main():
|
487 |
st.title("πGitπCosmosπ« - Azure Cosmos DB and Github Agent")
|
488 |
|
|
|
|
|
489 |
# π² Session state vars - "Life is like a session state, you never know what you're gonna get"
|
490 |
if 'logged_in' not in st.session_state:
|
491 |
st.session_state.logged_in = False
|
|
|
504 |
if 'cloned_doc' not in st.session_state:
|
505 |
st.session_state.cloned_doc = None
|
506 |
|
|
|
|
|
507 |
# π Query processing - "To search or not to search, that is the query"
|
508 |
try:
|
509 |
query_params = st.query_params
|
|
|
538 |
st.error(f"HTTP error occurred: {str(e)} π¨")
|
539 |
except Exception as e:
|
540 |
st.error(f"An unexpected error occurred: {str(e)} π±")
|
|
|
541 |
st.stop()
|
542 |
except Exception as e:
|
543 |
st.markdown(' ')
|
544 |
|
|
|
|
|
545 |
# π Auth check - "With great keys come great connectivity"
|
546 |
if Key:
|
547 |
st.session_state.primary_key = Key
|
|
|
550 |
st.error("Cosmos DB Key is not set in environment variables. πβ")
|
551 |
return
|
552 |
|
|
|
|
|
553 |
if st.session_state.logged_in:
|
554 |
# π DB initialization - "In the beginning, there was connection string..."
|
555 |
try:
|
556 |
if st.session_state.client is None:
|
557 |
st.session_state.client = CosmosClient(ENDPOINT, credential=st.session_state.primary_key)
|
|
|
558 |
# π Navigation setup - "Navigation is not about where you are, but where you're going"
|
559 |
st.sidebar.title("πGitπCosmosπ«ποΈNavigator")
|
|
|
560 |
databases = get_databases(st.session_state.client)
|
561 |
selected_db = st.sidebar.selectbox("ποΈ Select Database", databases)
|
562 |
st.markdown(CosmosDBUrl)
|
563 |
|
|
|
564 |
# π State management - "Change is the only constant in state management"
|
565 |
if selected_db != st.session_state.selected_database:
|
566 |
st.session_state.selected_database = selected_db
|
|
|
568 |
st.session_state.selected_document_id = None
|
569 |
st.session_state.current_index = 0
|
570 |
st.rerun()
|
|
|
|
|
571 |
|
572 |
if st.session_state.selected_database:
|
573 |
database = st.session_state.client.get_database_client(st.session_state.selected_database)
|
|
|
583 |
|
584 |
if st.session_state.selected_container:
|
585 |
container = database.get_container_client(st.session_state.selected_container)
|
|
|
586 |
# π¦ Export functionality - "Pack it, zip it, ship it"
|
587 |
if st.sidebar.button("π¦ Export Container Data"):
|
588 |
download_link = archive_current_container(st.session_state.selected_database,
|
|
|
593 |
else:
|
594 |
st.error(download_link)
|
595 |
|
|
|
|
|
596 |
# π Document handling - "Document, document, on the wall, who's the most recent of them all?"
|
597 |
documents = get_documents(container)
|
598 |
total_docs = len(documents)
|
|
|
599 |
# Add a slider to let the user choose how many documents to display
|
600 |
num_docs_to_display = st.slider(
|
601 |
+
"Select number of documents to display", 5, 500, 10
|
602 |
)
|
|
|
603 |
# Adjust the document display logic based on the slider value
|
604 |
if total_docs > num_docs_to_display:
|
605 |
documents_to_display = documents[:num_docs_to_display]
|
|
|
608 |
documents_to_display = documents
|
609 |
st.sidebar.info(f"Showing all {len(documents_to_display)} documents.")
|
610 |
|
|
|
|
|
611 |
if documents_to_display:
|
612 |
# π¨ View options - "Different strokes for different folks"
|
613 |
view_options = ['Show as Markdown', 'Show as Code Editor', 'Show as Edit, Save, Run AI', 'Clone Document', 'New Record']
|
614 |
selected_view = st.sidebar.selectbox("Select Viewer/Editor", view_options, index=2)
|
615 |
+
|
616 |
|
617 |
if selected_view == 'Show as Markdown':
|
618 |
Label = '# π Markdown view - Mark it down, mark it up'
|
|
|
642 |
content = json.dumps(doc, indent=2)
|
643 |
st.markdown(f"```json\n{content}\n```")
|
644 |
|
|
|
|
|
645 |
# β¬
οΈβ‘οΈ Navigation - "Left and right, day and night"
|
646 |
col_prev, col_next = st.columns([1, 1])
|
647 |
with col_prev:
|
|
|
654 |
if st.session_state.current_index < total_docs - 1:
|
655 |
st.session_state.current_index += 1
|
656 |
st.rerun()
|
657 |
+
|
658 |
+
|
659 |
|
660 |
elif selected_view == 'Show as Code Editor':
|
661 |
Label = '# π» Code editor view'
|
|
|
707 |
except Exception as e:
|
708 |
st.error(f"Error deleting document: {str(e)}")
|
709 |
|
|
|
|
|
|
|
|
|
710 |
|
711 |
elif selected_view == 'Show as Edit, Save, Run AI':
|
712 |
Label = '# βοΈ Edit and save view - Edit with wisdom, save with precision'
|
|
|
716 |
num_cols = len(documents_to_display)
|
717 |
cols = st.columns(num_cols)
|
718 |
|
|
|
|
|
719 |
for idx, (col, doc) in enumerate(zip(cols, documents_to_display)):
|
720 |
with col:
|
721 |
|
|
|
756 |
with col_ai:
|
757 |
if st.button("π€ Run AI", key=f'run_with_ai_button_{idx}'):
|
758 |
search_glossary(json.dumps(editable_doc, indent=2))
|
759 |
+
|
|
|
|
|
760 |
|
761 |
elif selected_view == 'Clone Document':
|
762 |
st.markdown("#### Clone a document:")
|
|
|
850 |
st.info("No documents to display. π§")
|
851 |
|
852 |
|
|
|
|
|
853 |
Label = '# π GitHub integration - Git happens'
|
854 |
st.subheader("π GitHub Operations")
|
855 |
github_token = os.environ.get("GITHUB")
|
|
|
858 |
new_repo_name = st.text_input("New Repository Name (for cloning)",
|
859 |
value=f"AIExample-Clone-{datetime.now().strftime('%Y%m%d_%H%M%S')}")
|
860 |
|
|
|
|
|
861 |
col1, col2 = st.columns(2)
|
862 |
with col1:
|
863 |
if st.button("π₯ Clone Repository"):
|
|
|
881 |
else:
|
882 |
st.error("Please ensure GitHub token is set in environment variables and source repository URL is provided. πβ")
|
883 |
|
|
|
|
|
884 |
with col2:
|
885 |
if st.button("π€ Push to New Repository"):
|
886 |
if github_token and source_repo:
|
|
|
901 |
else:
|
902 |
st.error("Please ensure GitHub token is set in environment variables and source repository URL is provided. πβ")
|
903 |
|
|
|
904 |
|
905 |
st.subheader("π¬ Chat with Claude")
|
906 |
user_input = st.text_area("Message π¨:", height=100)
|
|
|
918 |
)
|
919 |
st.write("Claude's reply π§ :")
|
920 |
st.write(response.content[0].text)
|
|
|
921 |
filename = generate_filename(user_input, "md")
|
922 |
create_file(filename, user_input, response.content[0].text)
|
|
|
923 |
st.session_state.chat_history.append({"user": user_input, "claude": response.content[0].text})
|
|
|
924 |
# Save to Cosmos DB
|
925 |
save_to_cosmos_db(container, user_input, response.content[0].text, "")
|
926 |
|
|
|
932 |
st.text_area("You said π¬:", chat["user"], height=100, disabled=True)
|
933 |
st.text_area("Claude replied π€:", chat["claude"], height=200, disabled=True)
|
934 |
st.markdown("---")
|
935 |
+
|
|
|
936 |
|
937 |
# π File editor - "Edit with care, save with flair"
|
938 |
if hasattr(st.session_state, 'current_file'):
|
|
|
949 |
file.write(new_content)
|
950 |
st.success("File updated successfully! π")
|
951 |
|
|
|
|
|
952 |
# π File management - "Manage many, maintain order"
|
953 |
st.sidebar.title("π File Management")
|
954 |
|
|
|
981 |
os.remove(file)
|
982 |
st.rerun()
|
983 |
|
|
|
|
|
984 |
except exceptions.CosmosHttpResponseError as e:
|
985 |
st.error(f"Failed to connect to Cosmos DB. HTTP error: {str(e)} π¨")
|
986 |
except Exception as e:
|
987 |
st.error(f"An unexpected error occurred: {str(e)} π±")
|
|
|
|
|
988 |
|
989 |
if st.session_state.logged_in and st.sidebar.button("πͺ Logout"):
|
990 |
Label = '# πͺ Logout - All good things must come to an end'
|