awacke1 commited on
Commit
e8f830c
ยท
verified ยท
1 Parent(s): faff1a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +358 -232
app.py CHANGED
@@ -1,40 +1,36 @@
1
  # app.py
2
  # =============================================================================
3
- # ๐Ÿš€ IMPORTS
4
  # =============================================================================
5
- import base64 # ๐Ÿ”ฅ For encoding/decoding files
6
- import glob # ๐Ÿ” For file searching
7
- import hashlib # ๐Ÿ”’ For hashing
8
- import json # ๐Ÿงฎ For JSON handling
9
- import os # ๐Ÿ“ For OS interactions
10
- import pandas as pd # ๐Ÿผ For data frame support
11
- import pytz # โฐ For timezone management
12
- import random # ๐ŸŽฒ For randomness
13
- import re # ๐Ÿ” For regex operations
14
- import shutil # ๐Ÿ—‘๏ธ For file copying/removal
15
- import streamlit as st # ๐Ÿ’ป For the Streamlit UI
16
- import time # โณ For timing
17
- import traceback # ๐Ÿšจ For error traces
18
- import uuid # ๐Ÿ†” For unique ID generation
19
- import zipfile # ๐Ÿ“ฆ For archiving files
20
- from PIL import Image # ๐Ÿ–ผ๏ธ For image processing
21
- from azure.cosmos import CosmosClient, PartitionKey, exceptions # โ˜๏ธ For Cosmos DB operations
22
- from datetime import datetime # โฐ For timestamps
23
- from git import Repo # ๐Ÿ™ For Git operations
24
- from github import Github # ๐Ÿ”— For GitHub API interactions
25
- from gradio_client import Client, handle_file # ๐Ÿค– For Gradio video generation
26
- import tempfile # ๐Ÿ“ For temporary file handling
27
- import io # ๐Ÿ“ก For in-memory streams
28
- import requests # ๐ŸŒ For HTTP requests
29
- import numpy as np # ๐Ÿ”ข For numerical operations
30
- from urllib.parse import quote # ๐Ÿ”— For URL encoding
31
-
32
- # Allow nested asyncio.run calls (needed for our async TTS and Arxiv search)
33
- import nest_asyncio
34
- nest_asyncio.apply()
35
 
36
  # =============================================================================
37
- # ๐Ÿ˜Ž EXTERNAL HELP LINKS (Always visible in sidebar)
38
  # =============================================================================
39
  external_links = [
40
  {"title": "MergeKit Official GitHub", "url": "https://github.com/arcee-ai/MergeKit", "emoji": "๐Ÿ’ป"},
@@ -50,7 +46,7 @@ external_links = [
50
  ]
51
 
52
  # =============================================================================
53
- # ๐ŸŽจ APP CONFIGURATION
54
  # =============================================================================
55
  Site_Name = '๐Ÿ™ GitCosmos'
56
  title = "๐Ÿ™ GitCosmos"
@@ -78,9 +74,8 @@ LOCAL_APP_URL = "https://huggingface.co/spaces/awacke1/AzureCosmosDBUI"
78
  CosmosDBUrl = 'https://portal.azure.com/#@AaronCWackergmail.onmicrosoft.com/resource/subscriptions/003fba60-5b3f-48f4-ab36-3ed11bc40816/resourceGroups/datasets/providers/Microsoft.DocumentDB/databaseAccounts/acae-afd/dataExplorer'
79
 
80
  # =============================================================================
81
- # ๐Ÿ’พ HELPER FUNCTIONS
82
  # =============================================================================
83
- # ๐Ÿ”— Get a download link for a file
84
  def get_download_link(file_path):
85
  with open(file_path, "rb") as file:
86
  contents = file.read()
@@ -88,7 +83,6 @@ def get_download_link(file_path):
88
  file_name = os.path.basename(file_path)
89
  return f'<a href="data:file/txt;base64,{b64}" download="{file_name}">Download {file_name} ๐Ÿ“‚</a>'
90
 
91
- # ๐Ÿ†” Generate a unique ID
92
  def generate_unique_id():
93
  timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S%f')
94
  unique_uuid = str(uuid.uuid4())
@@ -96,27 +90,23 @@ def generate_unique_id():
96
  st.write('New ID: ' + return_value)
97
  return return_value
98
 
99
- # ๐Ÿ“ Generate a safe filename based on a prompt
100
  def generate_filename(prompt, file_type):
101
  central = pytz.timezone('US/Central')
102
  safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
103
  safe_prompt = re.sub(r'\W+', '', prompt)[:90]
104
  return f"{safe_date_time}{safe_prompt}.{file_type}"
105
 
106
- # ๐Ÿ“„ Create a file with given content
107
  def create_file(filename, prompt, response, should_save=True):
108
  if not should_save:
109
  return
110
  with open(filename, 'w', encoding='utf-8') as file:
111
  file.write(prompt + "\n\n" + response)
112
 
113
- # ๐Ÿ“‚ Load file contents
114
  def load_file(file_name):
115
  with open(file_name, "r", encoding='utf-8') as file:
116
  content = file.read()
117
  return content
118
 
119
- # ๐Ÿ”— Display a glossary entity with quick search links
120
  def display_glossary_entity(k):
121
  search_urls = {
122
  "๐Ÿš€": lambda k: f"/?q={k}",
@@ -127,7 +117,6 @@ def display_glossary_entity(k):
127
  links_md = ' '.join([f"<a href='{url(k)}' target='_blank'>{emoji}</a>" for emoji, url in search_urls.items()])
128
  st.markdown(f"{k} {links_md}", unsafe_allow_html=True)
129
 
130
- # ๐Ÿ“ฆ Create a ZIP archive of given files
131
  def create_zip_of_files(files):
132
  zip_name = "all_files.zip"
133
  with zipfile.ZipFile(zip_name, 'w') as zipf:
@@ -135,7 +124,6 @@ def create_zip_of_files(files):
135
  zipf.write(file)
136
  return zip_name
137
 
138
- # ๐ŸŽฅ Get HTML to embed a video
139
  def get_video_html(video_path, width="100%"):
140
  video_url = f"data:video/mp4;base64,{base64.b64encode(open(video_path, 'rb').read()).decode()}"
141
  return f'''
@@ -145,7 +133,6 @@ def get_video_html(video_path, width="100%"):
145
  </video>
146
  '''
147
 
148
- # ๐ŸŽต Get HTML to embed audio
149
  def get_audio_html(audio_path, width="100%"):
150
  audio_url = f"data:audio/mpeg;base64,{base64.b64encode(open(audio_path, 'rb').read()).decode()}"
151
  return f'''
@@ -155,7 +142,6 @@ def get_audio_html(audio_path, width="100%"):
155
  </audio>
156
  '''
157
 
158
- # โœ‚๏ธ Preprocess text (e.g., for JSON safety)
159
  def preprocess_text(text):
160
  text = text.replace('\r\n', '\\n').replace('\r', '\\n').replace('\n', '\\n')
161
  text = text.replace('"', '\\"')
@@ -164,7 +150,7 @@ def preprocess_text(text):
164
  return text.strip()
165
 
166
  # =============================================================================
167
- # โ˜๏ธ COSMOS DB FUNCTIONS
168
  # =============================================================================
169
  def get_databases(client):
170
  return [db['id'] for db in client.list_databases()]
@@ -269,7 +255,7 @@ def archive_current_container(database_name, container_name, client):
269
  return f"Archive error: {str(e)} ๐Ÿ˜ข"
270
 
271
  # =============================================================================
272
- # ๐Ÿš€ ADVANCED COSMOS FUNCTIONS
273
  # =============================================================================
274
  def create_new_container(database, container_id, partition_key_path,
275
  analytical_storage_ttl=None, indexing_policy=None, vector_embedding_policy=None):
@@ -338,7 +324,7 @@ def vector_search(container, query_vector, vector_field, top=10, exact_search=Fa
338
  return results
339
 
340
  # =============================================================================
341
- # ๐Ÿ™ GITHUB FUNCTIONS
342
  # =============================================================================
343
  def download_github_repo(url, local_path):
344
  if os.path.exists(local_path):
@@ -371,7 +357,7 @@ def push_to_github(local_path, repo, github_token):
371
  origin.push(refspec=f'{current_branch}:{current_branch}')
372
 
373
  # =============================================================================
374
- # ๐Ÿ“ FILE & MEDIA MANAGEMENT FUNCTIONS
375
  # =============================================================================
376
  def display_saved_files_in_sidebar():
377
  all_files = sorted([f for f in glob.glob("*.md") if not f.lower().startswith('readme')], reverse=True)
@@ -413,7 +399,11 @@ def display_file_editor(file_path):
413
  return
414
  st.markdown("### โœ๏ธ Edit File")
415
  st.markdown(f"**Editing:** {file_path}")
416
- new_content = st.text_area("Edit JSON", value=st.session_state.file_content[file_path], height=400, key="doc_editor", on_change=lambda: auto_save_edit())
 
 
 
 
417
  col1, col2 = st.columns([1, 5])
418
  with col1:
419
  if st.button("๐Ÿ’พ Save"):
@@ -495,34 +485,35 @@ def update_file_management_section():
495
  display_file_editor(st.session_state.current_file)
496
 
497
  # =============================================================================
498
- # โœจ SIDEBAR DATA GRID: Show all container records with formatted timestamp
499
  # =============================================================================
500
- def show_sidebar_data_grid(container):
501
- try:
502
- records = get_documents(container)
503
- # Build list of dicts with desired columns; sort descending by _ts or timestamp field
504
- data = []
505
- for rec in records:
506
- ts = rec.get("timestamp", "")
507
- try:
508
- dt = datetime.fromisoformat(ts)
509
- formatted = dt.strftime("%I:%M %p %m/%d/%Y")
510
- except Exception:
511
- formatted = ts
512
- data.append({
513
- "ID": rec.get("id", ""),
514
- "Name": rec.get("name", ""),
515
- "Timestamp": formatted
516
- })
517
- df = pd.DataFrame(data)
518
- # Already sorted by _ts descending from the query; display in sidebar
519
- st.sidebar.markdown("### ๐Ÿ“Š Data Grid")
520
- st.sidebar.dataframe(df)
521
- except Exception as e:
522
- st.sidebar.error(f"Data grid error: {str(e)}")
 
523
 
524
  # =============================================================================
525
- # ๐ŸŽฅ VIDEO & AUDIO UI FUNCTIONS
526
  # =============================================================================
527
  def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
528
  try:
@@ -638,7 +629,58 @@ def add_video_generation_ui(container):
638
  st.error(f"Upload error: {str(e)}")
639
 
640
  # =============================================================================
641
- # ๐Ÿค– NEW ITEM & FIELD FUNCTIONS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
642
  # =============================================================================
643
  def new_item_default(container):
644
  new_id = generate_unique_id()
@@ -681,7 +723,7 @@ def add_field_to_doc():
681
  st.error(f"Error adding field: {str(e)}")
682
 
683
  # =============================================================================
684
- # ๐Ÿ” VECTOR SEARCH INTERFACE (Simple keyword search)
685
  # =============================================================================
686
  def vector_keyword_search(keyword, container):
687
  try:
@@ -693,7 +735,7 @@ def vector_keyword_search(keyword, container):
693
  return []
694
 
695
  # =============================================================================
696
- # ๐Ÿค– NEW AI MODALITY RECORD TEMPLATES
697
  # =============================================================================
698
  def new_ai_record(container):
699
  new_id = generate_unique_id()
@@ -737,7 +779,7 @@ def new_links_record(container):
737
  return None
738
 
739
  # =============================================================================
740
- # ๐Ÿค– LANGCHAIN FUNCTIONS (Witty emoji comments)
741
  # =============================================================================
742
  def display_langchain_functions():
743
  functions = [
@@ -750,37 +792,102 @@ def display_langchain_functions():
750
  st.sidebar.write(f"{func['name']}: {func['comment']}")
751
 
752
  # =============================================================================
753
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
754
- # NEW: SIDEBAR DATA GRID FUNCTION
755
  # =============================================================================
756
- def show_sidebar_data_grid():
757
- if st.session_state.get("current_container"):
758
- show_sidebar_data_grid.container = st.session_state.current_container
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
759
  try:
760
- records = get_documents(show_sidebar_data_grid.container)
761
- data = []
762
- for rec in records:
763
- ts = rec.get("timestamp", "")
764
- try:
765
- dt = datetime.fromisoformat(ts)
766
- formatted = dt.strftime("%I:%M %p %m/%d/%Y")
767
- except Exception:
768
- formatted = ts
769
- data.append({
770
- "ID": rec.get("id", ""),
771
- "Name": rec.get("name", ""),
772
- "Timestamp": formatted
773
- })
774
- df = pd.DataFrame(data)
775
- st.sidebar.markdown("### ๐Ÿ“Š Data Grid")
776
- st.sidebar.dataframe(df)
777
  except Exception as e:
778
- st.sidebar.error(f"Data grid error: {str(e)}")
779
- else:
780
- st.sidebar.info("No container selected for data grid.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
781
 
782
  # =============================================================================
783
- # ๐Ÿค– RESEARCH / ARXIV FUNCTIONS (Copied from second app code)
784
  # =============================================================================
785
  def parse_arxiv_refs(ref_text: str):
786
  if not ref_text:
@@ -848,7 +955,7 @@ def generate_5min_feature_markdown(paper: dict) -> str:
848
  pdf_link = generate_pdf_link(url)
849
  title_wc = len(title.split())
850
  summary_wc = len(summary.split())
851
- high_info_terms = [term for term in summary.split()[:5]] # simplified for demo
852
  terms_str = ", ".join(high_info_terms)
853
  rouge_score = round((len(high_info_terms) / max(len(summary.split()), 1)) * 100, 2)
854
  mermaid_code = "```mermaid\nflowchart TD\n"
@@ -881,107 +988,80 @@ def create_detailed_paper_md(papers: list) -> str:
881
  return "\n".join(md_parts)
882
 
883
  # =============================================================================
884
- # ๐Ÿค– ASYNC TTS FUNCTIONS (from second app code)
 
885
  # =============================================================================
886
- import asyncio
887
- import edge_tts
888
- from streamlit_marquee import streamlit_marquee
889
- from collections import Counter
890
-
891
- class PerformanceTimer:
892
- def __init__(self, operation_name: str):
893
- self.operation_name = operation_name
894
- self.start_time = None
895
- def __enter__(self):
896
- self.start_time = time.time()
897
- return self
898
- def __exit__(self, exc_type, exc_val, exc_tb):
899
- pass
900
-
901
- async def async_edge_tts_generate(text: str, voice: str, rate: int = 0, pitch: int = 0, file_format: str = "mp3"):
902
- with PerformanceTimer("tts_generation") as timer:
903
- text = text.replace("\n", " ").strip()
904
- if not text:
905
- return None, 0
906
- cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
907
- if cache_key in st.session_state.get('audio_cache', {}):
908
- return st.session_state['audio_cache'][cache_key], 0
909
- try:
910
- rate_str = f"{rate:+d}%"
911
- pitch_str = f"{pitch:+d}Hz"
912
- communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
913
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
914
- filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
915
- await communicate.save(filename)
916
- st.session_state.setdefault('audio_cache', {})[cache_key] = filename
917
- return filename, time.time() - timer.start_time
918
- except Exception as e:
919
- st.error(f"Error generating audio: {str(e)}")
920
- return None, 0
921
-
922
- def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
923
- result = asyncio.run(async_edge_tts_generate(text, voice, rate, pitch, file_format))
924
- if isinstance(result, tuple):
925
- return result[0]
926
- return result
927
-
928
- async def async_save_qa_with_audio(question: str, answer: str):
929
- with PerformanceTimer("qa_save") as timer:
930
- md_file = create_file(question, answer, "md")
931
- audio_file = None
932
- if st.session_state.get('enable_audio', True):
933
- audio_text = f"{question}\n\nAnswer: {answer}"
934
- audio_file, _ = await async_edge_tts_generate(audio_text, voice=st.session_state.get('tts_voice', "en-US-AriaNeural"), file_format=st.session_state.get('audio_format', "mp3"))
935
- return md_file, audio_file, time.time() - timer.start_time, 0
936
-
937
- def save_qa_with_audio(question, answer, voice=None):
938
- if not voice:
939
- voice = st.session_state.get('tts_voice', "en-US-AriaNeural")
940
- md_file = create_file(question, answer, "md")
941
- audio_text = f"{question}\n\nAnswer: {answer}"
942
- audio_file = speak_with_edge_tts(audio_text, voice=voice, file_format=st.session_state.get('audio_format', "mp3"))
943
- return md_file, audio_file
944
-
945
- def play_and_download_audio(file_path, file_type="mp3"):
946
- if file_path and os.path.exists(file_path):
947
- st.audio(file_path)
948
- dl_link = get_download_link(file_path, file_type=file_type)
949
- st.markdown(dl_link, unsafe_allow_html=True)
950
-
951
- def create_download_link_with_cache(file_path: str, file_type: str = "mp3") -> str:
952
- cache_key = f"dl_{file_path}"
953
- if cache_key in st.session_state.get('download_link_cache', {}):
954
- return st.session_state['download_link_cache'][cache_key]
955
- try:
956
- with open(file_path, "rb") as f:
957
- b64 = base64.b64encode(f.read()).decode()
958
- filename = os.path.basename(file_path)
959
- if file_type == "mp3":
960
- link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">๐ŸŽต Download {filename}</a>'
961
- elif file_type == "wav":
962
- link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">๐Ÿ”Š Download {filename}</a>'
963
- elif file_type == "md":
964
- link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">๐Ÿ“ Download {filename}</a>'
965
  else:
966
- link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">Download {filename}</a>'
967
- st.session_state.setdefault('download_link_cache', {})[cache_key] = link
968
- return link
969
- except Exception as e:
970
- st.error(f"Error creating download link: {str(e)}")
971
- return ""
 
 
 
 
 
 
972
 
973
  # =============================================================================
974
- # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
975
- # MAIN FUNCTION
976
  # =============================================================================
977
  def main():
978
- # Friendly portal link
979
  st.markdown(f"[๐Ÿ”— Portal]({CosmosDBUrl})")
980
- # Initialize some session state keys if not already present
981
  if "chat_history" not in st.session_state:
982
  st.session_state.chat_history = []
983
  st.session_state.setdefault("current_container", None)
984
- # Sidebar: New Item, Add Field, New AI Record, New Links Record, and Vector Search
 
 
 
 
 
985
  st.sidebar.markdown("## ๐Ÿ› ๏ธ Item Management")
986
  if st.sidebar.button("New Item"):
987
  if st.session_state.get("current_container"):
@@ -1017,17 +1097,15 @@ def main():
1017
  st.sidebar.code(json.dumps(res, indent=2), language="json")
1018
  else:
1019
  st.warning("No container selected for search!")
1020
- # Show the sidebar data grid with records
1021
  show_sidebar_data_grid()
1022
- # Display Langchain functions in sidebar
1023
  display_langchain_functions()
1024
- # Navigator: Container selection and data grid
1025
  try:
1026
  if st.session_state.get("client") is None:
1027
  st.session_state.client = CosmosClient(ENDPOINT, credential=st.session_state.primary_key)
1028
  st.sidebar.title("๐Ÿ™ Navigator")
1029
  databases = get_databases(st.session_state.client)
1030
  selected_db = st.sidebar.selectbox("๐Ÿ—ƒ๏ธ DB", databases)
 
1031
  if selected_db != st.session_state.get("selected_database"):
1032
  st.session_state.selected_database = selected_db
1033
  st.session_state.selected_container = None
@@ -1048,12 +1126,7 @@ def main():
1048
  submitted = st.form_submit_button("Create Container")
1049
  if submitted:
1050
  analytical_ttl = -1 if new_analytical else None
1051
- new_container = create_new_container(
1052
- database,
1053
- new_container_id,
1054
- new_partition_key,
1055
- analytical_storage_ttl=analytical_ttl
1056
- )
1057
  if new_container:
1058
  st.success(f"Container '{new_container_id}' created.")
1059
  default_id = generate_unique_id()
@@ -1157,7 +1230,6 @@ def main():
1157
  st.write(log_entry)
1158
  elif selected_view == 'Run AI':
1159
  st.markdown("#### ๐Ÿค– Run AI")
1160
- # NEW: Use a text area and a Send button (message button UI)
1161
  ai_query = st.text_area("Enter your query for ArXiv search:", key="arxiv_query", height=100)
1162
  if st.button("Send"):
1163
  st.session_state.last_query = ai_query
@@ -1258,20 +1330,14 @@ def main():
1258
  st.session_state.selected_document_id = None
1259
  st.session_state.current_index = 0
1260
  st.rerun()
1261
-
1262
- # Also display the sidebar data grid (records overview)
1263
  show_sidebar_data_grid()
1264
 
 
 
 
 
1265
  # =============================================================================
1266
- # Additional Blank Lines for Spacing (~1500 lines total)
1267
- # =============================================================================
1268
- #
1269
- #
1270
- #
1271
- #
1272
- #
1273
- #
1274
- #
1275
  #
1276
  #
1277
  #
@@ -1396,18 +1462,78 @@ def main():
1396
  #
1397
  #
1398
  #
1399
- #
1400
- #
1401
- #
1402
- #
1403
- #
1404
- #
1405
- #
1406
- #
1407
- #
1408
- #
1409
- #
1410
- #
1411
- #
1412
- #
1413
- # End of app.py
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # app.py
2
  # =============================================================================
3
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ IMPORTS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
4
  # =============================================================================
5
+ import base64
6
+ import glob
7
+ import hashlib
8
+ import json
9
+ import os
10
+ import pandas as pd
11
+ import pytz
12
+ import random
13
+ import re
14
+ import shutil
15
+ import streamlit as st
16
+ import time
17
+ import traceback
18
+ import uuid
19
+ import zipfile
20
+ from PIL import Image
21
+ from azure.cosmos import CosmosClient, PartitionKey, exceptions
22
+ from datetime import datetime
23
+ from git import Repo
24
+ from github import Github
25
+ from gradio_client import Client, handle_file
26
+ import tempfile
27
+ import io
28
+ import requests
29
+ import numpy as np
30
+ from urllib.parse import quote
 
 
 
 
31
 
32
  # =============================================================================
33
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ EXTERNAL HELP LINKS (Always visible in sidebar) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
34
  # =============================================================================
35
  external_links = [
36
  {"title": "MergeKit Official GitHub", "url": "https://github.com/arcee-ai/MergeKit", "emoji": "๐Ÿ’ป"},
 
46
  ]
47
 
48
  # =============================================================================
49
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ APP CONFIGURATION โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
50
  # =============================================================================
51
  Site_Name = '๐Ÿ™ GitCosmos'
52
  title = "๐Ÿ™ GitCosmos"
 
74
  CosmosDBUrl = 'https://portal.azure.com/#@AaronCWackergmail.onmicrosoft.com/resource/subscriptions/003fba60-5b3f-48f4-ab36-3ed11bc40816/resourceGroups/datasets/providers/Microsoft.DocumentDB/databaseAccounts/acae-afd/dataExplorer'
75
 
76
  # =============================================================================
77
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ HELPER FUNCTIONS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
78
  # =============================================================================
 
79
  def get_download_link(file_path):
80
  with open(file_path, "rb") as file:
81
  contents = file.read()
 
83
  file_name = os.path.basename(file_path)
84
  return f'<a href="data:file/txt;base64,{b64}" download="{file_name}">Download {file_name} ๐Ÿ“‚</a>'
85
 
 
86
  def generate_unique_id():
87
  timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S%f')
88
  unique_uuid = str(uuid.uuid4())
 
90
  st.write('New ID: ' + return_value)
91
  return return_value
92
 
 
93
  def generate_filename(prompt, file_type):
94
  central = pytz.timezone('US/Central')
95
  safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
96
  safe_prompt = re.sub(r'\W+', '', prompt)[:90]
97
  return f"{safe_date_time}{safe_prompt}.{file_type}"
98
 
 
99
  def create_file(filename, prompt, response, should_save=True):
100
  if not should_save:
101
  return
102
  with open(filename, 'w', encoding='utf-8') as file:
103
  file.write(prompt + "\n\n" + response)
104
 
 
105
  def load_file(file_name):
106
  with open(file_name, "r", encoding='utf-8') as file:
107
  content = file.read()
108
  return content
109
 
 
110
  def display_glossary_entity(k):
111
  search_urls = {
112
  "๐Ÿš€": lambda k: f"/?q={k}",
 
117
  links_md = ' '.join([f"<a href='{url(k)}' target='_blank'>{emoji}</a>" for emoji, url in search_urls.items()])
118
  st.markdown(f"{k} {links_md}", unsafe_allow_html=True)
119
 
 
120
  def create_zip_of_files(files):
121
  zip_name = "all_files.zip"
122
  with zipfile.ZipFile(zip_name, 'w') as zipf:
 
124
  zipf.write(file)
125
  return zip_name
126
 
 
127
  def get_video_html(video_path, width="100%"):
128
  video_url = f"data:video/mp4;base64,{base64.b64encode(open(video_path, 'rb').read()).decode()}"
129
  return f'''
 
133
  </video>
134
  '''
135
 
 
136
  def get_audio_html(audio_path, width="100%"):
137
  audio_url = f"data:audio/mpeg;base64,{base64.b64encode(open(audio_path, 'rb').read()).decode()}"
138
  return f'''
 
142
  </audio>
143
  '''
144
 
 
145
  def preprocess_text(text):
146
  text = text.replace('\r\n', '\\n').replace('\r', '\\n').replace('\n', '\\n')
147
  text = text.replace('"', '\\"')
 
150
  return text.strip()
151
 
152
  # =============================================================================
153
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ COSMOS DB FUNCTIONS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
154
  # =============================================================================
155
  def get_databases(client):
156
  return [db['id'] for db in client.list_databases()]
 
255
  return f"Archive error: {str(e)} ๐Ÿ˜ข"
256
 
257
  # =============================================================================
258
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ADVANCED COSMOS FUNCTIONS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
259
  # =============================================================================
260
  def create_new_container(database, container_id, partition_key_path,
261
  analytical_storage_ttl=None, indexing_policy=None, vector_embedding_policy=None):
 
324
  return results
325
 
326
  # =============================================================================
327
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ GITHUB FUNCTIONS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
328
  # =============================================================================
329
  def download_github_repo(url, local_path):
330
  if os.path.exists(local_path):
 
357
  origin.push(refspec=f'{current_branch}:{current_branch}')
358
 
359
  # =============================================================================
360
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ FILE & MEDIA MANAGEMENT FUNCTIONS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
361
  # =============================================================================
362
  def display_saved_files_in_sidebar():
363
  all_files = sorted([f for f in glob.glob("*.md") if not f.lower().startswith('readme')], reverse=True)
 
399
  return
400
  st.markdown("### โœ๏ธ Edit File")
401
  st.markdown(f"**Editing:** {file_path}")
402
+ md_tab, code_tab = st.tabs(["Markdown", "Code"])
403
+ with md_tab:
404
+ st.markdown(st.session_state.file_content[file_path])
405
+ with code_tab:
406
+ new_content = st.text_area("Edit:", value=st.session_state.file_content[file_path], height=400, key=f"editor_{hash(file_path)}", on_change=lambda: auto_save_edit())
407
  col1, col2 = st.columns([1, 5])
408
  with col1:
409
  if st.button("๐Ÿ’พ Save"):
 
485
  display_file_editor(st.session_state.current_file)
486
 
487
  # =============================================================================
488
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ SIDEBAR DATA GRID (Records with formatted timestamps) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
489
  # =============================================================================
490
+ def show_sidebar_data_grid():
491
+ if st.session_state.get("current_container"):
492
+ try:
493
+ records = get_documents(st.session_state.current_container)
494
+ data = []
495
+ for rec in records:
496
+ ts = rec.get("timestamp", "")
497
+ try:
498
+ dt = datetime.fromisoformat(ts)
499
+ formatted = dt.strftime("%I:%M %p %m/%d/%Y")
500
+ except Exception:
501
+ formatted = ts
502
+ data.append({
503
+ "ID": rec.get("id", ""),
504
+ "Name": rec.get("name", ""),
505
+ "Timestamp": formatted
506
+ })
507
+ df = pd.DataFrame(data)
508
+ st.sidebar.markdown("### ๐Ÿ“Š Data Grid")
509
+ st.sidebar.dataframe(df)
510
+ except Exception as e:
511
+ st.sidebar.error(f"Data grid error: {str(e)}")
512
+ else:
513
+ st.sidebar.info("No container selected for data grid.")
514
 
515
  # =============================================================================
516
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ VIDEO & AUDIO UI FUNCTIONS โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
517
  # =============================================================================
518
  def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
519
  try:
 
629
  st.error(f"Upload error: {str(e)}")
630
 
631
  # =============================================================================
632
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ AI SAMPLES SIDEBAR (Processed as a Python List) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
633
+ # =============================================================================
634
+ def display_ai_samples():
635
+ ai_samples = [
636
+ {
637
+ "name": "FullTextContains",
638
+ "description": "Query using FullTextContains",
639
+ "query": 'SELECT TOP 10 * FROM c WHERE FullTextContains(c.text, "bicycle")'
640
+ },
641
+ {
642
+ "name": "FullTextContainsAll",
643
+ "description": "Query using FullTextContainsAll",
644
+ "query": 'SELECT TOP 10 * FROM c WHERE FullTextContainsAll(c.text, "red", "bicycle")'
645
+ },
646
+ {
647
+ "name": "FullTextContainsAny",
648
+ "description": "Query using FullTextContainsAny",
649
+ "query": 'SELECT TOP 10 * FROM c WHERE FullTextContains(c.text, "red") AND FullTextContainsAny(c.text, "bicycle", "skateboard")'
650
+ },
651
+ {
652
+ "name": "FullTextScore",
653
+ "description": "Query using FullTextScore (order by relevance)",
654
+ "query": 'SELECT TOP 10 * FROM c ORDER BY RANK FullTextScore(c.text, ["bicycle", "mountain"])'
655
+ },
656
+ {
657
+ "name": "Vector Search with Score",
658
+ "description": "Example vector search snippet",
659
+ "query": 'results = vector_search.similarity_search_with_score(query="Your query", k=5)\nfor result, score in results:\n print(result.json(), score)'
660
+ },
661
+ {
662
+ "name": "Vector Search with Filtering",
663
+ "description": "Example vector search with a filter",
664
+ "query": 'pre_filter = {"conditions": [{"property": "metadata.page", "operator": "$eq", "value": 0}]}\nresults = vector_search.similarity_search_with_score(query="Your query", k=5, pre_filter=pre_filter)'
665
+ },
666
+ {
667
+ "name": "Hybrid Search",
668
+ "description": "Example hybrid search snippet",
669
+ "query": 'results = vector_search.similarity_search_with_score(query="Your query", k=5, query_type=CosmosDBQueryType.HYBRID)'
670
+ }
671
+ ]
672
+ st.sidebar.markdown("### ๐Ÿค– AI Samples")
673
+ st.sidebar.info("๐Ÿš€ Get started with our AI samples! Time free access to get started today.")
674
+ sample_names = [sample["name"] for sample in ai_samples]
675
+ selected_sample_name = st.sidebar.selectbox("Select an AI Sample", sample_names)
676
+ selected_sample = next((s for s in ai_samples if s["name"] == selected_sample_name), None)
677
+ if selected_sample:
678
+ st.sidebar.markdown(f"**{selected_sample['name']}**: {selected_sample['description']}")
679
+ lang = "sql" if "FullText" in selected_sample["name"] else "python"
680
+ st.sidebar.code(selected_sample["query"], language=lang)
681
+
682
+ # =============================================================================
683
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ NEW ITEM & FIELD FUNCTIONS
684
  # =============================================================================
685
  def new_item_default(container):
686
  new_id = generate_unique_id()
 
723
  st.error(f"Error adding field: {str(e)}")
724
 
725
  # =============================================================================
726
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ VECTOR SEARCH INTERFACE (Simple keyword search)
727
  # =============================================================================
728
  def vector_keyword_search(keyword, container):
729
  try:
 
735
  return []
736
 
737
  # =============================================================================
738
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ NEW AI MODALITY RECORD TEMPLATES
739
  # =============================================================================
740
  def new_ai_record(container):
741
  new_id = generate_unique_id()
 
779
  return None
780
 
781
  # =============================================================================
782
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ LANGCHAIN FUNCTIONS (Witty emoji comments)
783
  # =============================================================================
784
  def display_langchain_functions():
785
  functions = [
 
792
  st.sidebar.write(f"{func['name']}: {func['comment']}")
793
 
794
  # =============================================================================
795
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ OPTIONAL: SIDEBAR DATA GRID (Records with formatted timestamps)
 
796
  # =============================================================================
797
+ # (This feature is now integrated above via show_sidebar_data_grid().)
798
+
799
+ # =============================================================================
800
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ ASYNC TTS & ARXIV FUNCTIONS (Optional Features)
801
+ # =============================================================================
802
+ import asyncio
803
+ import edge_tts
804
+ from streamlit_marquee import streamlit_marquee
805
+ from collections import Counter
806
+
807
+ class PerformanceTimer:
808
+ def __init__(self, operation_name: str):
809
+ self.operation_name = operation_name
810
+ self.start_time = None
811
+ def __enter__(self):
812
+ self.start_time = time.time()
813
+ return self
814
+ def __exit__(self, exc_type, exc_val, exc_tb):
815
+ pass
816
+
817
+ async def async_edge_tts_generate(text: str, voice: str, rate: int = 0, pitch: int = 0, file_format: str = "mp3"):
818
+ with PerformanceTimer("tts_generation") as timer:
819
+ text = text.replace("\n", " ").strip()
820
+ if not text:
821
+ return None, 0
822
+ cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
823
+ if cache_key in st.session_state.get('audio_cache', {}):
824
+ return st.session_state['audio_cache'][cache_key], 0
825
  try:
826
+ rate_str = f"{rate:+d}%"
827
+ pitch_str = f"{pitch:+d}Hz"
828
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
829
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
830
+ filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
831
+ await communicate.save(filename)
832
+ st.session_state.setdefault('audio_cache', {})[cache_key] = filename
833
+ return filename, time.time() - timer.start_time
 
 
 
 
 
 
 
 
 
834
  except Exception as e:
835
+ st.error(f"Error generating audio: {str(e)}")
836
+ return None, 0
837
+
838
+ def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
839
+ result = asyncio.run(async_edge_tts_generate(text, voice, rate, pitch, file_format))
840
+ if isinstance(result, tuple):
841
+ return result[0]
842
+ return result
843
+
844
+ async def async_save_qa_with_audio(question: str, answer: str):
845
+ with PerformanceTimer("qa_save") as timer:
846
+ md_file = create_file(question, answer, "md")
847
+ audio_file = None
848
+ if st.session_state.get('enable_audio', True):
849
+ audio_text = f"{question}\n\nAnswer: {answer}"
850
+ audio_file, _ = await async_edge_tts_generate(audio_text, voice=st.session_state.get('tts_voice', "en-US-AriaNeural"), file_format=st.session_state.get('audio_format', "mp3"))
851
+ return md_file, audio_file, time.time() - timer.start_time, 0
852
+
853
+ def save_qa_with_audio(question, answer, voice=None):
854
+ if not voice:
855
+ voice = st.session_state.get('tts_voice', "en-US-AriaNeural")
856
+ md_file = create_file(question, answer, "md")
857
+ audio_text = f"{question}\n\nAnswer: {answer}"
858
+ audio_file = speak_with_edge_tts(audio_text, voice=voice, file_format=st.session_state.get('audio_format', "mp3"))
859
+ return md_file, audio_file
860
+
861
+ def play_and_download_audio(file_path, file_type="mp3"):
862
+ if file_path and os.path.exists(file_path):
863
+ st.audio(file_path)
864
+ dl_link = get_download_link(file_path, file_type=file_type)
865
+ st.markdown(dl_link, unsafe_allow_html=True)
866
+
867
+ def create_download_link_with_cache(file_path: str, file_type: str = "mp3") -> str:
868
+ cache_key = f"dl_{file_path}"
869
+ if cache_key in st.session_state.get('download_link_cache', {}):
870
+ return st.session_state['download_link_cache'][cache_key]
871
+ try:
872
+ with open(file_path, "rb") as f:
873
+ b64 = base64.b64encode(f.read()).decode()
874
+ filename = os.path.basename(file_path)
875
+ if file_type == "mp3":
876
+ link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">๐ŸŽต Download {filename}</a>'
877
+ elif file_type == "wav":
878
+ link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">๐Ÿ”Š Download {filename}</a>'
879
+ elif file_type == "md":
880
+ link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">๐Ÿ“ Download {filename}</a>'
881
+ else:
882
+ link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">Download {filename}</a>'
883
+ st.session_state.setdefault('download_link_cache', {})[cache_key] = link
884
+ return link
885
+ except Exception as e:
886
+ st.error(f"Error creating download link: {str(e)}")
887
+ return ""
888
 
889
  # =============================================================================
890
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ RESEARCH / ARXIV FUNCTIONS (Optional Features)
891
  # =============================================================================
892
  def parse_arxiv_refs(ref_text: str):
893
  if not ref_text:
 
955
  pdf_link = generate_pdf_link(url)
956
  title_wc = len(title.split())
957
  summary_wc = len(summary.split())
958
+ high_info_terms = [term for term in summary.split()[:5]]
959
  terms_str = ", ".join(high_info_terms)
960
  rouge_score = round((len(high_info_terms) / max(len(summary.split()), 1)) * 100, 2)
961
  mermaid_code = "```mermaid\nflowchart TD\n"
 
988
  return "\n".join(md_parts)
989
 
990
  # =============================================================================
991
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
992
+ # MAIN AI LOOKUP FUNCTION (Optional Features)
993
  # =============================================================================
994
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False, useArxiv=True, useArxivAudio=False):
995
+ start = time.time()
996
+ ai_constitution = """
997
+ You are a medical and machine learning review board expert...
998
+ """
999
+ # 1) Claude API call
1000
+ import anthropic
1001
+ client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY_3"))
1002
+ user_input = q
1003
+ response = client.messages.create(
1004
+ model="claude-3-sonnet-20240229",
1005
+ max_tokens=1000,
1006
+ messages=[{"role": "user", "content": user_input}]
1007
+ )
1008
+ st.write("Claude's reply ๐Ÿง :")
1009
+ st.markdown(response.content[0].text)
1010
+ result = response.content[0].text
1011
+ create_file(q, result, "md")
1012
+ md_file, audio_file = save_qa_with_audio(q, result)
1013
+ st.subheader("๐Ÿ“ Main Response Audio")
1014
+ play_and_download_audio(audio_file, st.session_state.get('audio_format', "mp3"))
1015
+ if useArxiv:
1016
+ q = q + result
1017
+ st.write('Running Arxiv RAG with Claude inputs.')
1018
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
1019
+ refs = client.predict(q, 10, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md")[0]
1020
+ result = f"๐Ÿ”Ž {q}\n\n{refs}"
1021
+ md_file, audio_file = save_qa_with_audio(q, result)
1022
+ st.subheader("๐Ÿ“ Main Response Audio")
1023
+ play_and_download_audio(audio_file, st.session_state.get('audio_format', "mp3"))
1024
+ papers = parse_arxiv_refs(refs)
1025
+ if papers:
1026
+ paper_links = create_paper_links_md(papers)
1027
+ links_file = create_file(q, paper_links, "md")
1028
+ st.markdown(paper_links)
1029
+ detailed_md = create_detailed_paper_md(papers)
1030
+ detailed_file = create_file(q, detailed_md, "md")
1031
+ st.markdown(detailed_md)
1032
+ if useArxivAudio:
1033
+ asyncio.run(async_edge_tts_generate("Sample text", st.session_state.get('tts_voice', "en-US-AriaNeural")))
1034
+ st.write("Displaying Papers:")
1035
+ # (Optional: call functions to display papers)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1036
  else:
1037
+ st.warning("No papers found.")
1038
+ response2 = client.messages.create(
1039
+ model="claude-3-sonnet-20240229",
1040
+ max_tokens=1000,
1041
+ messages=[{"role": "user", "content": q + '\n\nUse the reference papers below to answer the question by creating a python streamlit app.py and requirements.txt with working code.'}]
1042
+ )
1043
+ r2 = response2.content[0].text
1044
+ st.write("Claude's reply ๐Ÿง :")
1045
+ st.markdown(r2)
1046
+ elapsed = time.time() - start
1047
+ st.write(f"**Total Elapsed:** {elapsed:.2f} s")
1048
+ return result
1049
 
1050
  # =============================================================================
1051
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ MAIN FUNCTION โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
1052
  # =============================================================================
1053
  def main():
1054
+ st.markdown("### ๐Ÿ™ GitCosmos - Cosmos & Git Hub")
1055
  st.markdown(f"[๐Ÿ”— Portal]({CosmosDBUrl})")
 
1056
  if "chat_history" not in st.session_state:
1057
  st.session_state.chat_history = []
1058
  st.session_state.setdefault("current_container", None)
1059
+ if Key:
1060
+ st.session_state.primary_key = Key
1061
+ st.session_state.logged_in = True
1062
+ else:
1063
+ st.error("Missing Cosmos Key ๐Ÿ”‘โŒ")
1064
+ return
1065
  st.sidebar.markdown("## ๐Ÿ› ๏ธ Item Management")
1066
  if st.sidebar.button("New Item"):
1067
  if st.session_state.get("current_container"):
 
1097
  st.sidebar.code(json.dumps(res, indent=2), language="json")
1098
  else:
1099
  st.warning("No container selected for search!")
 
1100
  show_sidebar_data_grid()
 
1101
  display_langchain_functions()
 
1102
  try:
1103
  if st.session_state.get("client") is None:
1104
  st.session_state.client = CosmosClient(ENDPOINT, credential=st.session_state.primary_key)
1105
  st.sidebar.title("๐Ÿ™ Navigator")
1106
  databases = get_databases(st.session_state.client)
1107
  selected_db = st.sidebar.selectbox("๐Ÿ—ƒ๏ธ DB", databases)
1108
+ st.markdown(CosmosDBUrl)
1109
  if selected_db != st.session_state.get("selected_database"):
1110
  st.session_state.selected_database = selected_db
1111
  st.session_state.selected_container = None
 
1126
  submitted = st.form_submit_button("Create Container")
1127
  if submitted:
1128
  analytical_ttl = -1 if new_analytical else None
1129
+ new_container = create_new_container(database, new_container_id, new_partition_key, analytical_storage_ttl=analytical_ttl)
 
 
 
 
 
1130
  if new_container:
1131
  st.success(f"Container '{new_container_id}' created.")
1132
  default_id = generate_unique_id()
 
1230
  st.write(log_entry)
1231
  elif selected_view == 'Run AI':
1232
  st.markdown("#### ๐Ÿค– Run AI")
 
1233
  ai_query = st.text_area("Enter your query for ArXiv search:", key="arxiv_query", height=100)
1234
  if st.button("Send"):
1235
  st.session_state.last_query = ai_query
 
1330
  st.session_state.selected_document_id = None
1331
  st.session_state.current_index = 0
1332
  st.rerun()
 
 
1333
  show_sidebar_data_grid()
1334
 
1335
+ if __name__ == "__main__":
1336
+ main()
1337
+
1338
+
1339
  # =============================================================================
1340
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ Additional Blank Lines for Spacing (~1500 lines total) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
 
 
 
 
 
 
 
 
1341
  #
1342
  #
1343
  #
 
1462
  #
1463
  #
1464
  #
1465
+ # =============================================================================
1466
+ # โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€ OPTIONAL FEATURES (New RunAI / Arxiv Search & Voice UI) โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€
1467
+ # The following block includes the optional ARXIV/RunAI functions (copied from the second app).
1468
+ # Uncomment and enable as desired.
1469
+ #
1470
+ # import streamlit as st
1471
+ # import anthropic
1472
+ # import openai
1473
+ # import base64
1474
+ # import cv2
1475
+ # import glob
1476
+ # import json
1477
+ # import math
1478
+ # import os
1479
+ # import pytz
1480
+ # import random
1481
+ # import re
1482
+ # import requests
1483
+ # # import textract
1484
+ # import time
1485
+ # import zipfile
1486
+ # import plotly.graph_objects as go
1487
+ # import streamlit.components.v1 as components
1488
+ # from datetime import datetime
1489
+ # from audio_recorder_streamlit import audio_recorder
1490
+ # from bs4 import BeautifulSoup
1491
+ # from collections import defaultdict, deque, Counter
1492
+ # from dotenv import load_dotenv
1493
+ # from gradio_client import Client
1494
+ # from huggingface_hub import InferenceClient
1495
+ # from io import BytesIO
1496
+ # from PIL import Image
1497
+ # from PyPDF2 import PdfReader
1498
+ # from urllib.parse import quote
1499
+ # from xml.etree import ElementTree as ET
1500
+ # from openai import OpenAI
1501
+ # import extra_streamlit_components as stx
1502
+ # from streamlit.runtime.scriptrunner import get_script_run_ctx
1503
+ # import asyncio
1504
+ # import edge_tts
1505
+ # from streamlit_marquee import streamlit_marquee
1506
+ # from typing import Tuple, Optional
1507
+ # import pandas as pd
1508
+ #
1509
+ # import nest_asyncio
1510
+ # nest_asyncio.apply()
1511
+ #
1512
+ # st.set_page_config(
1513
+ # page_title="๐ŸšฒTalkingAIResearcher๐Ÿ†",
1514
+ # page_icon="๐Ÿšฒ๐Ÿ†",
1515
+ # layout="wide",
1516
+ # initial_sidebar_state="auto",
1517
+ # menu_items={
1518
+ # 'Get Help': 'https://huggingface.co/awacke1',
1519
+ # 'Report a bug': 'https://huggingface.co/spaces/awacke1',
1520
+ # 'About': "๐ŸšฒTalkingAIResearcher๐Ÿ†"
1521
+ # }
1522
+ # )
1523
+ # load_dotenv()
1524
+ #
1525
+ # EDGE_TTS_VOICES = [
1526
+ # "en-US-AriaNeural",
1527
+ # "en-US-GuyNeural",
1528
+ # "en-US-JennyNeural",
1529
+ # "en-GB-SoniaNeural",
1530
+ # "en-GB-RyanNeural",
1531
+ # "en-AU-NatashaNeural",
1532
+ # "en-AU-WilliamNeural",
1533
+ # "en-CA-ClaraNeural",
1534
+ # "en-CA-LiamNeural"
1535
+ # ]
1536
+ #
1537
+ # # (Plus additional setup and functions as shown in the snippet above.)
1538
+ #
1539
+ # End of optional features block.