awacke1 commited on
Commit
1353357
·
verified ·
1 Parent(s): e0cd939

Create app18.py

Browse files
Files changed (1) hide show
  1. app18.py +1337 -0
app18.py ADDED
@@ -0,0 +1,1337 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py
2
+ # =============================================================================
3
+ # ───────────── IMPORTS ─────────────
4
+ # =============================================================================
5
+ import base64
6
+ import glob
7
+ import hashlib
8
+ import json
9
+ import os
10
+ import pandas as pd
11
+ import pytz
12
+ import random
13
+ import re
14
+ import shutil
15
+ import streamlit as st
16
+ import time
17
+ import traceback
18
+ import uuid
19
+ import zipfile
20
+ from PIL import Image
21
+ from azure.cosmos import CosmosClient, PartitionKey, exceptions
22
+ from datetime import datetime
23
+ from git import Repo
24
+ from github import Github
25
+ from gradio_client import Client, handle_file
26
+ import tempfile
27
+ import io
28
+ import requests
29
+ import numpy as np
30
+ from urllib.parse import quote
31
+
32
+ # =============================================================================
33
+ # ───────────── EXTERNAL HELP LINKS (Always visible in sidebar) ─────────────
34
+ # =============================================================================
35
+ external_links = [
36
+ {"title": "CosmosDB GenAI Full Text Search", "url": "https://learn.microsoft.com/en-us/azure/cosmos-db/gen-ai/full-text-search", "emoji": "💻"},
37
+ {"title": "CosmosDB SQL API Client Library", "url": "https://learn.microsoft.com/en-us/python/api/overview/azure/cosmos-readme?view=azure-python", "emoji": "💻"},
38
+ {"title": "CosmosDB Index and Query Vectors", "url": "https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/how-to-python-vector-index-query", "emoji": "💻"},
39
+ {"title": "CosmosDB NoSQL Materialized Views", "url": "https://learn.microsoft.com/en-us/azure/cosmos-db/nosql/materialized-views", "emoji": "💻"},
40
+ {"title": "LangChain Vector Store Guide", "url": "https://python.langchain.com/docs/integrations/vectorstores/azure_cosmos_db_no_sql/", "emoji": "💻"},
41
+ {"title": "Vector Database Prompt Engineering RAG for Python", "url": "https://learn.microsoft.com/en-us/azure/cosmos-db/vector-database?source=recommendations", "emoji": "💻"},
42
+ {"title": "MergeKit Official GitHub", "url": "https://github.com/arcee-ai/MergeKit", "emoji": "💻"},
43
+ {"title": "MergeKit Sample Usage", "url": "https://github.com/arcee-ai/MergeKit#examples", "emoji": "📚"},
44
+ {"title": "DistillKit Official GitHub", "url": "https://github.com/arcee-ai/DistillKit", "emoji": "💻"},
45
+ {"title": "DistillKit Sample Usage", "url": "https://github.com/arcee-ai/DistillKit#usage", "emoji": "📚"},
46
+ {"title": "arcee.ai Official Website", "url": "https://arcee.ai", "emoji": "🌐"},
47
+ ]
48
+
49
+ # =============================================================================
50
+ # ───────────── APP CONFIGURATION ─────────────
51
+ # =============================================================================
52
+ Site_Name = '🐙 GitCosmos'
53
+ title = "🐙 GitCosmos"
54
+ helpURL = 'https://huggingface.co/awacke1'
55
+ bugURL = 'https://huggingface.co/spaces/awacke1/AzureCosmosDBUI/'
56
+ icons = '🐙🌌💫'
57
+ st.set_page_config(
58
+ page_title=title,
59
+ page_icon=icons,
60
+ layout="wide",
61
+ initial_sidebar_state="auto",
62
+ menu_items={
63
+ 'Get Help': helpURL,
64
+ 'Report a bug': bugURL,
65
+ 'About': title
66
+ }
67
+ )
68
+
69
+ # Cosmos DB & App URLs
70
+ ENDPOINT = "https://acae-afd.documents.azure.com:443/"
71
+ DATABASE_NAME = os.environ.get("COSMOS_DATABASE_NAME")
72
+ CONTAINER_NAME = os.environ.get("COSMOS_CONTAINER_NAME")
73
+ Key = os.environ.get("Key")
74
+ LOCAL_APP_URL = "https://huggingface.co/spaces/awacke1/AzureCosmosDBUI"
75
+ CosmosDBUrl = 'https://portal.azure.com/#@AaronCWackergmail.onmicrosoft.com/resource/subscriptions/003fba60-5b3f-48f4-ab36-3ed11bc40816/resourceGroups/datasets/providers/Microsoft.DocumentDB/databaseAccounts/acae-afd/dataExplorer'
76
+
77
+ # =============================================================================
78
+ # ───────────── HELPER FUNCTIONS ─────────────
79
+ # =============================================================================
80
+ def get_download_link(file_path):
81
+ with open(file_path, "rb") as file:
82
+ contents = file.read()
83
+ b64 = base64.b64encode(contents).decode()
84
+ file_name = os.path.basename(file_path)
85
+ return f'<a href="data:file/txt;base64,{b64}" download="{file_name}">Download {file_name} 📂</a>'
86
+
87
+ def generate_unique_id():
88
+ timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S%f')
89
+ unique_uuid = str(uuid.uuid4())
90
+ return_value = f"{timestamp}-{unique_uuid}"
91
+ st.write('New ID: ' + return_value)
92
+ return return_value
93
+
94
+ def generate_filename(prompt, file_type):
95
+ central = pytz.timezone('US/Central')
96
+ safe_date_time = datetime.now(central).strftime("%m%d_%H%M")
97
+ safe_prompt = re.sub(r'\W+', '', prompt)[:90]
98
+ return f"{safe_date_time}{safe_prompt}.{file_type}"
99
+
100
+ def create_file(filename, prompt, response, should_save=True):
101
+ if not should_save:
102
+ return
103
+ with open(filename, 'w', encoding='utf-8') as file:
104
+ file.write(prompt + "\n\n" + response)
105
+
106
+ def load_file(file_name):
107
+ with open(file_name, "r", encoding='utf-8') as file:
108
+ content = file.read()
109
+ return content
110
+
111
+ def display_glossary_entity(k):
112
+ search_urls = {
113
+ "🚀": lambda k: f"/?q={k}",
114
+ "📖": lambda k: f"https://en.wikipedia.org/wiki/{quote(k)}",
115
+ "🔍": lambda k: f"https://www.google.com/search?q={quote(k)}",
116
+ "🎥": lambda k: f"https://www.youtube.com/results?search_query={quote(k)}",
117
+ }
118
+ links_md = ' '.join([f"<a href='{url(k)}' target='_blank'>{emoji}</a>" for emoji, url in search_urls.items()])
119
+ st.markdown(f"{k} {links_md}", unsafe_allow_html=True)
120
+
121
+ def create_zip_of_files(files):
122
+ zip_name = "all_files.zip"
123
+ with zipfile.ZipFile(zip_name, 'w') as zipf:
124
+ for file in files:
125
+ zipf.write(file)
126
+ return zip_name
127
+
128
+ def get_video_html(video_path, width="100%"):
129
+ video_url = f"data:video/mp4;base64,{base64.b64encode(open(video_path, 'rb').read()).decode()}"
130
+ return f'''
131
+ <video width="{width}" controls autoplay loop>
132
+ <source src="{video_url}" type="video/mp4">
133
+ Your browser does not support video.
134
+ </video>
135
+ '''
136
+
137
+ def get_audio_html(audio_path, width="100%"):
138
+ audio_url = f"data:audio/mpeg;base64,{base64.b64encode(open(audio_path, 'rb').read()).decode()}"
139
+ return f'''
140
+ <audio controls style="width:{width}">
141
+ <source src="{audio_url}" type="audio/mpeg">
142
+ Your browser does not support audio.
143
+ </audio>
144
+ '''
145
+
146
+ def preprocess_text(text):
147
+ text = text.replace('\r\n', '\\n').replace('\r', '\\n').replace('\n', '\\n')
148
+ text = text.replace('"', '\\"')
149
+ text = re.sub(r'[\t]', ' ', text)
150
+ text = re.sub(r'[^\x00-\x7F]+', '', text)
151
+ return text.strip()
152
+
153
+ # =============================================================================
154
+ # ───────────── COSMOS DB FUNCTIONS ─────────────
155
+ # =============================================================================
156
+ def get_databases(client):
157
+ return [db['id'] for db in client.list_databases()]
158
+
159
+ def get_containers(database):
160
+ return [container['id'] for container in database.list_containers()]
161
+
162
+ def get_documents(container, limit=None):
163
+ query = "SELECT * FROM c ORDER BY c._ts DESC"
164
+ items = list(container.query_items(query=query, enable_cross_partition_query=True, max_item_count=limit))
165
+ return items
166
+
167
+ def insert_record(container, record):
168
+ try:
169
+ container.create_item(body=record)
170
+ return True, "Inserted! 🎉"
171
+ except exceptions.CosmosHttpResponseError as e:
172
+ return False, f"HTTP error: {str(e)} 🚨"
173
+ except Exception as e:
174
+ return False, f"Error: {str(e)} 😱"
175
+
176
+ def update_record(container, updated_record):
177
+ try:
178
+ container.upsert_item(body=updated_record)
179
+ return True, f"Updated {updated_record['id']} 🛠️"
180
+ except exceptions.CosmosHttpResponseError as e:
181
+ return False, f"HTTP error: {str(e)} 🚨"
182
+ except Exception as e:
183
+ return False, f"Error: {traceback.format_exc()} 😱"
184
+
185
+ def delete_record(container, record):
186
+ try:
187
+ if "id" not in record:
188
+ return False, "Record must contain an 'id' field. 🛑"
189
+ doc_id = record["id"]
190
+ if "delete_log" not in st.session_state:
191
+ st.session_state.delete_log = []
192
+ st.session_state.delete_log.append(f"Attempting to delete document: {json.dumps(record, indent=2)}")
193
+ partition_key_value = record.get("pk", doc_id)
194
+ st.session_state.delete_log.append(f"Using ID and Partition Key: {partition_key_value}")
195
+ container.delete_item(item=doc_id, partition_key=partition_key_value)
196
+ success_msg = f"Record {doc_id} successfully deleted from Cosmos DB. 🗑️"
197
+ st.session_state.delete_log.append(success_msg)
198
+ return True, success_msg
199
+ except exceptions.CosmosResourceNotFoundError:
200
+ success_msg = f"Record {doc_id} not found in Cosmos DB (already deleted or never existed). 🗑️"
201
+ st.session_state.delete_log.append(success_msg)
202
+ return True, success_msg
203
+ except exceptions.CosmosHttpResponseError as e:
204
+ error_msg = f"HTTP error deleting {doc_id}: {str(e)}. 🚨"
205
+ st.session_state.delete_log.append(error_msg)
206
+ return False, error_msg
207
+ except Exception as e:
208
+ error_msg = f"Unexpected error deleting {doc_id}: {str(traceback.format_exc())}. 😱"
209
+ st.session_state.delete_log.append(error_msg)
210
+ return False, error_msg
211
+
212
+ def save_to_cosmos_db(container, query, response1, response2):
213
+ try:
214
+ if container:
215
+ timestamp = datetime.utcnow().strftime('%Y%m%d%H%M%S%f')
216
+ unique_uuid = str(uuid.uuid4())
217
+ new_id = f"{timestamp}-{unique_uuid}"
218
+ record = {
219
+ "id": new_id,
220
+ "pk": new_id,
221
+ "name": new_id,
222
+ "query": query,
223
+ "response1": response1,
224
+ "response2": response2,
225
+ "timestamp": datetime.utcnow().isoformat(),
226
+ "type": "ai_response",
227
+ "version": "1.0"
228
+ }
229
+ container.create_item(body=record)
230
+ st.success(f"Saved: {record['id']}")
231
+ st.session_state.documents = get_documents(container)
232
+ else:
233
+ st.error("Cosmos container not initialized.")
234
+ except Exception as e:
235
+ st.error(f"Save error: {str(e)}")
236
+
237
+ def archive_current_container(database_name, container_name, client):
238
+ try:
239
+ base_dir = "./cosmos_archive_current_container"
240
+ if os.path.exists(base_dir):
241
+ shutil.rmtree(base_dir)
242
+ os.makedirs(base_dir)
243
+ db_client = client.get_database_client(database_name)
244
+ container_client = db_client.get_container_client(container_name)
245
+ items = list(container_client.read_all_items())
246
+ container_dir = os.path.join(base_dir, container_name)
247
+ os.makedirs(container_dir)
248
+ for item in items:
249
+ item_id = item.get('id', f"unknown_{datetime.now().strftime('%Y%m%d%H%M%S')}")
250
+ with open(os.path.join(container_dir, f"{item_id}.json"), 'w') as f:
251
+ json.dump(item, f, indent=2)
252
+ archive_name = f"{container_name}_archive_{datetime.now().strftime('%Y%m%d%H%M%S')}"
253
+ shutil.make_archive(archive_name, 'zip', base_dir)
254
+ return get_download_link(f"{archive_name}.zip")
255
+ except Exception as e:
256
+ return f"Archive error: {str(e)} 😢"
257
+
258
+ # =============================================================================
259
+ # ───────────── ADVANCED COSMOS FUNCTIONS ─────────────
260
+ # =============================================================================
261
+ def create_new_container(database, container_id, partition_key_path,
262
+ analytical_storage_ttl=None, indexing_policy=None, vector_embedding_policy=None):
263
+ try:
264
+ if analytical_storage_ttl is not None:
265
+ container = database.create_container(
266
+ id=container_id,
267
+ partition_key=PartitionKey(path=partition_key_path),
268
+ analytical_storage_ttl=analytical_storage_ttl,
269
+ indexing_policy=indexing_policy,
270
+ vector_embedding_policy=vector_embedding_policy
271
+ )
272
+ else:
273
+ container = database.create_container(
274
+ id=container_id,
275
+ partition_key=PartitionKey(path=partition_key_path),
276
+ indexing_policy=indexing_policy,
277
+ vector_embedding_policy=vector_embedding_policy
278
+ )
279
+ except exceptions.CosmosHttpResponseError as e:
280
+ if analytical_storage_ttl is not None and "analyticalStorageTtl" in str(e):
281
+ try:
282
+ container = database.create_container(
283
+ id=container_id,
284
+ partition_key=PartitionKey(path=partition_key_path),
285
+ indexing_policy=indexing_policy,
286
+ vector_embedding_policy=vector_embedding_policy
287
+ )
288
+ except Exception as e2:
289
+ st.error(f"Error creating container without analytical_storage_ttl: {str(e2)}")
290
+ return None
291
+ elif isinstance(e, exceptions.CosmosResourceExistsError):
292
+ container = database.get_container_client(container_id)
293
+ else:
294
+ st.error(f"Error creating container: {str(e)}")
295
+ return None
296
+ return container
297
+
298
+ def advanced_insert_item(container, item):
299
+ try:
300
+ container.upsert_item(item)
301
+ return True, f"Item {item.get('id', '')} inserted. ➕"
302
+ except Exception as e:
303
+ return False, str(e)
304
+
305
+ def advanced_update_item(container, item):
306
+ try:
307
+ container.upsert_item(item)
308
+ return True, f"Item {item.get('id', '')} updated. ✏️"
309
+ except Exception as e:
310
+ return False, str(e)
311
+
312
+ def advanced_delete_item(container, item_id, partition_key_value):
313
+ try:
314
+ container.delete_item(item=item_id, partition_key=partition_key_value)
315
+ return True, f"Item {item_id} deleted. 🗑️"
316
+ except Exception as e:
317
+ return False, str(e)
318
+
319
+ def vector_search(container, query_vector, vector_field, top=10, exact_search=False):
320
+ query_vector_str = json.dumps(query_vector)
321
+ query = f"""SELECT TOP {top} c.id, VectorDistance(c.{vector_field}, {query_vector_str}, {str(exact_search).lower()},
322
+ {{'dataType':'float32','distanceFunction':'cosine'}}) AS SimilarityScore
323
+ FROM c ORDER BY SimilarityScore"""
324
+ results = list(container.query_items(query=query, enable_cross_partition_query=True))
325
+ return results
326
+
327
+ # =============================================================================
328
+ # ───────────── GITHUB FUNCTIONS ─────────────
329
+ # =============================================================================
330
+ def download_github_repo(url, local_path):
331
+ if os.path.exists(local_path):
332
+ shutil.rmtree(local_path)
333
+ Repo.clone_from(url, local_path)
334
+
335
+ def create_zip_file(source_dir, output_filename):
336
+ shutil.make_archive(output_filename, 'zip', source_dir)
337
+
338
+ def create_repo(g, repo_name):
339
+ user = g.get_user()
340
+ return user.create_repo(repo_name)
341
+
342
+ def push_to_github(local_path, repo, github_token):
343
+ repo_url = f"https://{github_token}@github.com/{repo.full_name}.git"
344
+ local_repo = Repo(local_path)
345
+ if 'origin' in [remote.name for remote in local_repo.remotes]:
346
+ origin = local_repo.remote('origin')
347
+ origin.set_url(repo_url)
348
+ else:
349
+ origin = local_repo.create_remote('origin', repo_url)
350
+ if not local_repo.heads:
351
+ local_repo.git.checkout('-b', 'main')
352
+ current_branch = 'main'
353
+ else:
354
+ current_branch = local_repo.active_branch.name
355
+ local_repo.git.add(A=True)
356
+ if local_repo.is_dirty():
357
+ local_repo.git.commit('-m', 'Initial commit')
358
+ origin.push(refspec=f'{current_branch}:{current_branch}')
359
+
360
+ # =============================================================================
361
+ # ───────────── FILE & MEDIA MANAGEMENT FUNCTIONS ─────────────
362
+ # =============================================================================
363
+ def display_saved_files_in_sidebar():
364
+ all_files = sorted([f for f in glob.glob("*.md") if not f.lower().startswith('readme')], reverse=True)
365
+ st.sidebar.markdown("## 📁 Files")
366
+ for file in all_files:
367
+ col1, col2, col3 = st.sidebar.columns([6, 2, 1])
368
+ with col1:
369
+ st.markdown(f"📄 {file}")
370
+ with col2:
371
+ st.sidebar.download_button(
372
+ label="⬇️",
373
+ data=open(file, 'rb').read(),
374
+ file_name=file
375
+ )
376
+ with col3:
377
+ if st.sidebar.button("🗑", key=f"delete_{file}"):
378
+ os.remove(file)
379
+ st.rerun()
380
+
381
+ def display_file_viewer(file_path):
382
+ content = load_file(file_path)
383
+ if content:
384
+ st.markdown("### 📄 File Viewer")
385
+ st.markdown(f"**{file_path}**")
386
+ file_stats = os.stat(file_path)
387
+ st.markdown(f"**Mod:** {datetime.fromtimestamp(file_stats.st_mtime).strftime('%Y-%m-%d %H:%M:%S')} | **Size:** {file_stats.st_size} bytes")
388
+ st.markdown("---")
389
+ st.markdown(content)
390
+ st.download_button("⬇️", data=content, file_name=os.path.basename(file_path), mime="text/markdown")
391
+
392
+ def display_file_editor(file_path):
393
+ if 'file_content' not in st.session_state:
394
+ st.session_state.file_content = {}
395
+ if file_path not in st.session_state.file_content:
396
+ content = load_file(file_path)
397
+ if content is not None:
398
+ st.session_state.file_content[file_path] = content
399
+ else:
400
+ return
401
+ st.markdown("### ✏️ Edit File")
402
+ st.markdown(f"**Editing:** {file_path}")
403
+ md_tab, code_tab = st.tabs(["Markdown", "Code"])
404
+ with md_tab:
405
+ st.markdown(st.session_state.file_content[file_path])
406
+ with code_tab:
407
+ new_content = st.text_area("Edit:", value=st.session_state.file_content[file_path], height=400, key=f"editor_{hash(file_path)}", on_change=lambda: auto_save_edit())
408
+ col1, col2 = st.columns([1, 5])
409
+ with col1:
410
+ if st.button("💾 Save"):
411
+ if save_file_content(file_path, new_content):
412
+ st.session_state.file_content[file_path] = new_content
413
+ st.success("Saved! 🎉")
414
+ time.sleep(1)
415
+ st.rerun()
416
+ with col2:
417
+ st.download_button("⬇️", data=new_content, file_name=os.path.basename(file_path), mime="text/markdown")
418
+
419
+ def save_file_content(file_path, content):
420
+ try:
421
+ with open(file_path, 'w', encoding='utf-8') as file:
422
+ file.write(content)
423
+ return True
424
+ except Exception as e:
425
+ st.error(f"Save error: {str(e)}")
426
+ return False
427
+
428
+ def update_file_management_section():
429
+ if 'file_view_mode' not in st.session_state:
430
+ st.session_state.file_view_mode = None
431
+ if 'current_file' not in st.session_state:
432
+ st.session_state.current_file = None
433
+ if 'file_content' not in st.session_state:
434
+ st.session_state.file_content = {}
435
+ all_files = sorted(glob.glob("*.md"), reverse=True)
436
+ st.sidebar.title("📁 Files")
437
+ if st.sidebar.button("🗑 Delete All"):
438
+ for file in all_files:
439
+ os.remove(file)
440
+ st.session_state.file_content = {}
441
+ st.session_state.current_file = None
442
+ st.session_state.file_view_mode = None
443
+ st.rerun()
444
+ if st.sidebar.button("⬇️ Download All"):
445
+ zip_file = create_zip_of_files(all_files)
446
+ st.sidebar.markdown(get_download_link(zip_file), unsafe_allow_html=True)
447
+ for file in all_files:
448
+ col1, col2, col3, col4 = st.sidebar.columns([1, 3, 1, 1])
449
+ with col1:
450
+ if st.button("🌐", key=f"view_{file}"):
451
+ st.session_state.current_file = file
452
+ st.session_state.file_view_mode = 'view'
453
+ if file not in st.session_state.file_content:
454
+ content = load_file(file)
455
+ if content is not None:
456
+ st.session_state.file_content[file] = content
457
+ st.rerun()
458
+ with col2:
459
+ st.markdown(get_download_link(file), unsafe_allow_html=True)
460
+ with col3:
461
+ if st.button("📂", key=f"edit_{file}"):
462
+ st.session_state.current_file = file
463
+ st.session_state.file_view_mode = 'edit'
464
+ if file not in st.session_state.file_content:
465
+ content = load_file(file)
466
+ if content is not None:
467
+ st.session_state.file_content[file] = content
468
+ st.rerun()
469
+ with col4:
470
+ if st.button("🗑", key=f"delete_{file}"):
471
+ os.remove(file)
472
+ if file in st.session_state.file_content:
473
+ del st.session_state.file_content[file]
474
+ if st.session_state.current_file == file:
475
+ st.session_state.current_file = None
476
+ st.session_state.file_view_mode = None
477
+ st.rerun()
478
+ st.sidebar.markdown("---")
479
+ st.sidebar.title("External Help Links")
480
+ for link in external_links:
481
+ st.sidebar.markdown(f"{link['emoji']} [{link['title']}]({link['url']})", unsafe_allow_html=True)
482
+ if st.session_state.current_file:
483
+ if st.session_state.file_view_mode == 'view':
484
+ display_file_viewer(st.session_state.current_file)
485
+ elif st.session_state.file_view_mode == 'edit':
486
+ display_file_editor(st.session_state.current_file)
487
+
488
+ # =============================================================================
489
+ # ───────────── SIDEBAR DATA GRID (Records with formatted timestamps) ─────────────
490
+ # =============================================================================
491
+ def show_sidebar_data_grid():
492
+ if st.session_state.get("current_container"):
493
+ try:
494
+ records = get_documents(st.session_state.current_container)
495
+ data = []
496
+ for rec in records:
497
+ ts = rec.get("timestamp", "")
498
+ try:
499
+ dt = datetime.fromisoformat(ts)
500
+ formatted = dt.strftime("%I:%M %p %m/%d/%Y")
501
+ except Exception:
502
+ formatted = ts
503
+ data.append({
504
+ "ID": rec.get("id", ""),
505
+ "Name": rec.get("name", ""),
506
+ "Timestamp": formatted
507
+ })
508
+ df = pd.DataFrame(data)
509
+ st.sidebar.markdown("### 📊 Data Grid")
510
+ st.sidebar.dataframe(df)
511
+ except Exception as e:
512
+ st.sidebar.error(f"Data grid error: {str(e)}")
513
+ else:
514
+ st.sidebar.info("No container selected for data grid.")
515
+
516
+ # =============================================================================
517
+ # ───────────── VIDEO & AUDIO UI FUNCTIONS ─────────────
518
+ # =============================================================================
519
+ def validate_and_preprocess_image(file_data, target_size=(576, 1024)):
520
+ try:
521
+ st.write("Preprocessing image...")
522
+ if isinstance(file_data, bytes):
523
+ img = Image.open(io.BytesIO(file_data))
524
+ elif hasattr(file_data, 'read'):
525
+ if hasattr(file_data, 'seek'):
526
+ file_data.seek(0)
527
+ img = Image.open(file_data)
528
+ elif isinstance(file_data, Image.Image):
529
+ img = file_data
530
+ else:
531
+ raise ValueError(f"Unsupported input: {type(file_data)}")
532
+ if img.mode != 'RGB':
533
+ img = img.convert('RGB')
534
+ aspect_ratio = img.size[0] / img.size[1]
535
+ if aspect_ratio > target_size[0] / target_size[1]:
536
+ new_width = target_size[0]
537
+ new_height = int(new_width / aspect_ratio)
538
+ else:
539
+ new_height = target_size[1]
540
+ new_width = int(new_height * aspect_ratio)
541
+ new_width = (new_width // 2) * 2
542
+ new_height = (new_height // 2) * 2
543
+ resized_img = img.resize((new_width, new_height), Image.Resampling.LANCZOS)
544
+ final_img = Image.new('RGB', target_size, (255, 255, 255))
545
+ paste_x = (target_size[0] - new_width) // 2
546
+ paste_y = (target_size[1] - new_height) // 2
547
+ final_img.paste(resized_img, (paste_x, paste_y))
548
+ return final_img
549
+ except Exception as e:
550
+ st.error(f"Image error: {str(e)}")
551
+ return None
552
+
553
+ def add_video_generation_ui(container):
554
+ st.markdown("### 🎥 Video Gen")
555
+ col1, col2 = st.columns([2, 1])
556
+ with col1:
557
+ uploaded_file = st.file_uploader("Upload Image 🖼️", type=['png', 'jpg', 'jpeg'])
558
+ with col2:
559
+ st.markdown("#### Params")
560
+ motion = st.slider("🌊 Motion", 1, 255, 127)
561
+ fps = st.slider("🎬 FPS", 1, 30, 6)
562
+ with st.expander("Advanced"):
563
+ use_custom = st.checkbox("Custom Seed")
564
+ seed = st.number_input("Seed", value=int(time.time() * 1000)) if use_custom else None
565
+ if uploaded_file is not None:
566
+ try:
567
+ file_data = uploaded_file.read()
568
+ preview1, preview2 = st.columns(2)
569
+ with preview1:
570
+ st.write("Original")
571
+ st.image(Image.open(io.BytesIO(file_data)), use_column_width=True)
572
+ with preview2:
573
+ proc_img = validate_and_preprocess_image(io.BytesIO(file_data))
574
+ if proc_img:
575
+ st.write("Processed")
576
+ st.image(proc_img, use_column_width=True)
577
+ else:
578
+ st.error("Preprocess failed")
579
+ return
580
+ if st.button("🎥 Generate"):
581
+ with st.spinner("Generating video..."):
582
+ with tempfile.NamedTemporaryFile(suffix='.png', delete=False) as temp_file:
583
+ proc_img.save(temp_file.name, format='PNG')
584
+ try:
585
+ client = Client("awacke1/stable-video-diffusion", hf_token=os.environ.get("HUGGINGFACE_TOKEN"))
586
+ result = client.predict(
587
+ image=temp_file.name,
588
+ seed=seed if seed is not None else int(time.time() * 1000),
589
+ randomize_seed=seed is None,
590
+ motion_bucket_id=motion,
591
+ fps_id=fps,
592
+ api_name="/video"
593
+ )
594
+ if result and isinstance(result, tuple) and len(result) >= 1:
595
+ video_path = result[0].get('video') if isinstance(result[0], dict) else None
596
+ if video_path and os.path.exists(video_path):
597
+ video_filename = f"generated_video_{datetime.now().strftime('%Y%m%d_%H%M%S')}.mp4"
598
+ shutil.copy(video_path, video_filename)
599
+ st.success(f"Video generated! 🎉")
600
+ st.video(video_filename)
601
+ if container:
602
+ video_record = {
603
+ "id": generate_unique_id(),
604
+ "pk": generate_unique_id(),
605
+ "type": "generated_video",
606
+ "filename": video_filename,
607
+ "seed": seed if seed is not None else "random",
608
+ "motion": motion,
609
+ "fps": fps,
610
+ "timestamp": datetime.now().isoformat()
611
+ }
612
+ success, message = insert_record(container, video_record)
613
+ if success:
614
+ st.success("DB record saved!")
615
+ else:
616
+ st.error(f"DB error: {message}")
617
+ else:
618
+ st.error("Invalid result format")
619
+ else:
620
+ st.error("No result returned")
621
+ except Exception as e:
622
+ st.error(f"Video gen error: {str(e)}")
623
+ finally:
624
+ try:
625
+ os.unlink(temp_file.name)
626
+ st.write("Temp file removed")
627
+ except Exception as e:
628
+ st.warning(f"Cleanup error: {str(e)}")
629
+ except Exception as e:
630
+ st.error(f"Upload error: {str(e)}")
631
+
632
+ # =============================================================================
633
+ # ───────────── AI SAMPLES SIDEBAR (Processed as a Python List) ─────────────
634
+ # =============================================================================
635
+ def display_ai_samples():
636
+ ai_samples = [
637
+ {
638
+ "name": "FullTextContains",
639
+ "description": "Query using FullTextContains",
640
+ "query": 'SELECT TOP 10 * FROM c WHERE FullTextContains(c.text, "bicycle")'
641
+ },
642
+ {
643
+ "name": "FullTextContainsAll",
644
+ "description": "Query using FullTextContainsAll",
645
+ "query": 'SELECT TOP 10 * FROM c WHERE FullTextContainsAll(c.text, "red", "bicycle")'
646
+ },
647
+ {
648
+ "name": "FullTextContainsAny",
649
+ "description": "Query using FullTextContainsAny",
650
+ "query": 'SELECT TOP 10 * FROM c WHERE FullTextContains(c.text, "red") AND FullTextContainsAny(c.text, "bicycle", "skateboard")'
651
+ },
652
+ {
653
+ "name": "FullTextScore",
654
+ "description": "Query using FullTextScore (order by relevance)",
655
+ "query": 'SELECT TOP 10 * FROM c ORDER BY RANK FullTextScore(c.text, ["bicycle", "mountain"])'
656
+ },
657
+ {
658
+ "name": "Vector Search with Score",
659
+ "description": "Example vector search snippet",
660
+ "query": 'results = vector_search.similarity_search_with_score(query="Your query", k=5)\nfor result, score in results:\n print(result.json(), score)'
661
+ },
662
+ {
663
+ "name": "Vector Search with Filtering",
664
+ "description": "Example vector search with a filter",
665
+ "query": 'pre_filter = {"conditions": [{"property": "metadata.page", "operator": "$eq", "value": 0}]}\nresults = vector_search.similarity_search_with_score(query="Your query", k=5, pre_filter=pre_filter)'
666
+ },
667
+ {
668
+ "name": "Hybrid Search",
669
+ "description": "Example hybrid search snippet",
670
+ "query": 'results = vector_search.similarity_search_with_score(query="Your query", k=5, query_type=CosmosDBQueryType.HYBRID)'
671
+ }
672
+ ]
673
+ st.sidebar.markdown("### 🤖 AI Samples")
674
+ st.sidebar.info("🚀 Get started with our AI samples! Time free access to get started today.")
675
+ sample_names = [sample["name"] for sample in ai_samples]
676
+ selected_sample_name = st.sidebar.selectbox("Select an AI Sample", sample_names)
677
+ selected_sample = next((s for s in ai_samples if s["name"] == selected_sample_name), None)
678
+ if selected_sample:
679
+ st.sidebar.markdown(f"**{selected_sample['name']}**: {selected_sample['description']}")
680
+ lang = "sql" if "FullText" in selected_sample["name"] else "python"
681
+ st.sidebar.code(selected_sample["query"], language=lang)
682
+
683
+ # =============================================================================
684
+ # ───────────── NEW ITEM & FIELD FUNCTIONS
685
+ # =============================================================================
686
+ def new_item_default(container):
687
+ new_id = generate_unique_id()
688
+ default_doc = {
689
+ "id": new_id,
690
+ "pk": new_id,
691
+ "name": "New Sample Document",
692
+ "content": "Start editing your document here...",
693
+ "timestamp": datetime.now().isoformat(),
694
+ "type": "sample"
695
+ }
696
+ success, message = insert_record(container, default_doc)
697
+ if success:
698
+ st.success("New sample document created! ✨")
699
+ return default_doc
700
+ else:
701
+ st.error("Error creating new item: " + message)
702
+ return None
703
+
704
+ def auto_save_edit():
705
+ try:
706
+ edited_str = st.session_state.doc_editor
707
+ edited_doc = json.loads(edited_str)
708
+ container = st.session_state.current_container
709
+ container.upsert_item(edited_doc)
710
+ st.success("Auto-saved! 💾")
711
+ except Exception as e:
712
+ st.error(f"Auto-save error: {str(e)}")
713
+
714
+ def add_field_to_doc():
715
+ key = st.session_state.new_field_key
716
+ value = st.session_state.new_field_value
717
+ try:
718
+ doc = json.loads(st.session_state.doc_editor)
719
+ doc[key] = value
720
+ st.session_state.doc_editor = json.dumps(doc, indent=2)
721
+ auto_save_edit()
722
+ st.success(f"Added field {key} 👍")
723
+ except Exception as e:
724
+ st.error(f"Error adding field: {str(e)}")
725
+
726
+ # =============================================================================
727
+ # ───────────── VECTOR SEARCH INTERFACE (Simple keyword search)
728
+ # =============================================================================
729
+ def vector_keyword_search(keyword, container):
730
+ try:
731
+ query = f"SELECT * FROM c WHERE CONTAINS(c.content, '{keyword}')"
732
+ results = list(container.query_items(query=query, enable_cross_partition_query=True))
733
+ return results
734
+ except Exception as e:
735
+ st.error(f"Vector search error: {str(e)}")
736
+ return []
737
+
738
+ # =============================================================================
739
+ # ───────────── NEW AI MODALITY RECORD TEMPLATES
740
+ # =============================================================================
741
+ def new_ai_record(container):
742
+ new_id = generate_unique_id()
743
+ default_doc = {
744
+ "id": new_id,
745
+ "pk": new_id,
746
+ "name": "AI Modality Record",
747
+ "function_url": "https://example.com/function",
748
+ "input_text": "### Input (markdown)\n\nType your input here.",
749
+ "output_text": "### Output (markdown)\n\nResult will appear here.",
750
+ "timestamp": datetime.now().isoformat(),
751
+ "type": "ai_modality"
752
+ }
753
+ success, message = insert_record(container, default_doc)
754
+ if success:
755
+ st.success("New AI modality record created! 💡")
756
+ return default_doc
757
+ else:
758
+ st.error("Error creating AI record: " + message)
759
+ return None
760
+
761
+ def new_links_record(container):
762
+ new_id = generate_unique_id()
763
+ links_md = "\n".join([f"- {link['emoji']} [{link['title']}]({link['url']})" for link in external_links])
764
+ default_doc = {
765
+ "id": new_id,
766
+ "pk": new_id,
767
+ "name": "Portal Links Record",
768
+ "function_url": "",
769
+ "input_text": links_md,
770
+ "output_text": "",
771
+ "timestamp": datetime.now().isoformat(),
772
+ "type": "ai_modality"
773
+ }
774
+ success, message = insert_record(container, default_doc)
775
+ if success:
776
+ st.success("New Portal Links record created! 🔗")
777
+ return default_doc
778
+ else:
779
+ st.error("Error creating links record: " + message)
780
+ return None
781
+
782
+ # =============================================================================
783
+ # ───────────── LANGCHAIN FUNCTIONS (Witty emoji comments)
784
+ # =============================================================================
785
+ def display_langchain_functions():
786
+ functions = [
787
+ {"name": "OpenAIEmbeddings", "comment": "🔮 Creates embeddings using OpenAI – pure magic!"},
788
+ {"name": "AzureCosmosDBNoSqlVectorSearch", "comment": "🚀 Performs vector search on Cosmos DB – superfast and smart!"},
789
+ {"name": "RecursiveCharacterTextSplitter", "comment": "✂️ Slices text into manageable chunks – like a pro chef!"}
790
+ ]
791
+ st.sidebar.markdown("### 🤖 Langchain Functions")
792
+ for func in functions:
793
+ st.sidebar.write(f"{func['name']}: {func['comment']}")
794
+
795
+ # =============================================================================
796
+ # ───────────── OPTIONAL: SIDEBAR DATA GRID (Records with formatted timestamps)
797
+ # =============================================================================
798
+ # (This feature is now integrated above via show_sidebar_data_grid().)
799
+
800
+ # =============================================================================
801
+ # ───────────── ASYNC TTS & ARXIV FUNCTIONS (Optional Features)
802
+ # =============================================================================
803
+ import asyncio
804
+ import edge_tts
805
+ from streamlit_marquee import streamlit_marquee
806
+ from collections import Counter
807
+
808
+ class PerformanceTimer:
809
+ def __init__(self, operation_name: str):
810
+ self.operation_name = operation_name
811
+ self.start_time = None
812
+ def __enter__(self):
813
+ self.start_time = time.time()
814
+ return self
815
+ def __exit__(self, exc_type, exc_val, exc_tb):
816
+ pass
817
+
818
+ async def async_edge_tts_generate(text: str, voice: str, rate: int = 0, pitch: int = 0, file_format: str = "mp3"):
819
+ with PerformanceTimer("tts_generation") as timer:
820
+ text = text.replace("\n", " ").strip()
821
+ if not text:
822
+ return None, 0
823
+ cache_key = f"{text[:100]}_{voice}_{rate}_{pitch}_{file_format}"
824
+ if cache_key in st.session_state.get('audio_cache', {}):
825
+ return st.session_state['audio_cache'][cache_key], 0
826
+ try:
827
+ rate_str = f"{rate:+d}%"
828
+ pitch_str = f"{pitch:+d}Hz"
829
+ communicate = edge_tts.Communicate(text, voice, rate=rate_str, pitch=pitch_str)
830
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
831
+ filename = f"audio_{timestamp}_{random.randint(1000, 9999)}.{file_format}"
832
+ await communicate.save(filename)
833
+ st.session_state.setdefault('audio_cache', {})[cache_key] = filename
834
+ return filename, time.time() - timer.start_time
835
+ except Exception as e:
836
+ st.error(f"Error generating audio: {str(e)}")
837
+ return None, 0
838
+
839
+ def speak_with_edge_tts(text, voice="en-US-AriaNeural", rate=0, pitch=0, file_format="mp3"):
840
+ result = asyncio.run(async_edge_tts_generate(text, voice, rate, pitch, file_format))
841
+ if isinstance(result, tuple):
842
+ return result[0]
843
+ return result
844
+
845
+ async def async_save_qa_with_audio(question: str, answer: str):
846
+ with PerformanceTimer("qa_save") as timer:
847
+ md_file = create_file(question, answer, "md")
848
+ audio_file = None
849
+ if st.session_state.get('enable_audio', True):
850
+ audio_text = f"{question}\n\nAnswer: {answer}"
851
+ audio_file, _ = await async_edge_tts_generate(audio_text, voice=st.session_state.get('tts_voice', "en-US-AriaNeural"), file_format=st.session_state.get('audio_format', "mp3"))
852
+ return md_file, audio_file, time.time() - timer.start_time, 0
853
+
854
+ def save_qa_with_audio(question, answer, voice=None):
855
+ if not voice:
856
+ voice = st.session_state.get('tts_voice', "en-US-AriaNeural")
857
+ md_file = create_file(question, answer, "md")
858
+ audio_text = f"{question}\n\nAnswer: {answer}"
859
+ audio_file = speak_with_edge_tts(audio_text, voice=voice, file_format=st.session_state.get('audio_format', "mp3"))
860
+ return md_file, audio_file
861
+
862
+ def play_and_download_audio(file_path, file_type="mp3"):
863
+ if file_path and os.path.exists(file_path):
864
+ st.audio(file_path)
865
+ dl_link = get_download_link(file_path, file_type=file_type)
866
+ st.markdown(dl_link, unsafe_allow_html=True)
867
+
868
+ def create_download_link_with_cache(file_path: str, file_type: str = "mp3") -> str:
869
+ cache_key = f"dl_{file_path}"
870
+ if cache_key in st.session_state.get('download_link_cache', {}):
871
+ return st.session_state['download_link_cache'][cache_key]
872
+ try:
873
+ with open(file_path, "rb") as f:
874
+ b64 = base64.b64encode(f.read()).decode()
875
+ filename = os.path.basename(file_path)
876
+ if file_type == "mp3":
877
+ link = f'<a href="data:audio/mpeg;base64,{b64}" download="{filename}">🎵 Download {filename}</a>'
878
+ elif file_type == "wav":
879
+ link = f'<a href="data:audio/wav;base64,{b64}" download="{filename}">🔊 Download {filename}</a>'
880
+ elif file_type == "md":
881
+ link = f'<a href="data:text/markdown;base64,{b64}" download="{filename}">📝 Download {filename}</a>'
882
+ else:
883
+ link = f'<a href="data:application/octet-stream;base64,{b64}" download="{filename}">Download {filename}</a>'
884
+ st.session_state.setdefault('download_link_cache', {})[cache_key] = link
885
+ return link
886
+ except Exception as e:
887
+ st.error(f"Error creating download link: {str(e)}")
888
+ return ""
889
+
890
+ # =============================================================================
891
+ # ───────────── RESEARCH / ARXIV FUNCTIONS (Optional Features)
892
+ # =============================================================================
893
+ def parse_arxiv_refs(ref_text: str):
894
+ if not ref_text:
895
+ return []
896
+ results = []
897
+ current_paper = {}
898
+ lines = ref_text.split('\n')
899
+ for i, line in enumerate(lines):
900
+ if line.count('|') == 2:
901
+ if current_paper:
902
+ results.append(current_paper)
903
+ if len(results) >= 20:
904
+ break
905
+ try:
906
+ header_parts = line.strip('* ').split('|')
907
+ date = header_parts[0].strip()
908
+ title = header_parts[1].strip()
909
+ url_match = re.search(r'(https://arxiv.org/\S+)', line)
910
+ url = url_match.group(1) if url_match else f"paper_{len(results)}"
911
+ current_paper = {
912
+ 'date': date,
913
+ 'title': title,
914
+ 'url': url,
915
+ 'authors': '',
916
+ 'summary': '',
917
+ 'full_audio': None,
918
+ 'download_base64': '',
919
+ }
920
+ except Exception as e:
921
+ st.warning(f"Error parsing paper header: {str(e)}")
922
+ current_paper = {}
923
+ continue
924
+ elif current_paper:
925
+ if not current_paper['authors']:
926
+ current_paper['authors'] = line.strip('* ')
927
+ else:
928
+ if current_paper['summary']:
929
+ current_paper['summary'] += ' ' + line.strip()
930
+ else:
931
+ current_paper['summary'] = line.strip()
932
+ if current_paper:
933
+ results.append(current_paper)
934
+ return results[:20]
935
+
936
+ def create_paper_links_md(papers):
937
+ lines = ["# Paper Links\n"]
938
+ for i, p in enumerate(papers, start=1):
939
+ lines.append(f"{i}. **{p['title']}** — [Arxiv Link]({p['url']})")
940
+ return "\n".join(lines)
941
+
942
+ def generate_pdf_link(url: str) -> str:
943
+ if "abs" in url:
944
+ pdf_url = url.replace("abs", "pdf")
945
+ if not pdf_url.endswith(".pdf"):
946
+ pdf_url += ".pdf"
947
+ return pdf_url
948
+ return url
949
+
950
+ def generate_5min_feature_markdown(paper: dict) -> str:
951
+ title = paper.get('title', '')
952
+ summary = paper.get('summary', '')
953
+ authors = paper.get('authors', '')
954
+ date = paper.get('date', '')
955
+ url = paper.get('url', '')
956
+ pdf_link = generate_pdf_link(url)
957
+ title_wc = len(title.split())
958
+ summary_wc = len(summary.split())
959
+ high_info_terms = [term for term in summary.split()[:5]]
960
+ terms_str = ", ".join(high_info_terms)
961
+ rouge_score = round((len(high_info_terms) / max(len(summary.split()), 1)) * 100, 2)
962
+ mermaid_code = "```mermaid\nflowchart TD\n"
963
+ for i in range(len(high_info_terms) - 1):
964
+ mermaid_code += f' T{i+1}["{high_info_terms[i]}"] --> T{i+2}["{high_info_terms[i+1]}"]\n'
965
+ mermaid_code += "```"
966
+ md = f"""
967
+ ## {title}
968
+
969
+ **Authors:** {authors}
970
+ **Date:** {date}
971
+ **Word Count (Title):** {title_wc} | **Word Count (Summary):** {summary_wc}
972
+
973
+ **Links:** [Abstract]({url}) | [PDF]({pdf_link})
974
+
975
+ **High Info Terms:** {terms_str}
976
+ **ROUGE Score:** {rouge_score}%
977
+
978
+ ### Mermaid Graph of Key Concepts
979
+ {mermaid_code}
980
+
981
+ ---
982
+ """
983
+ return md
984
+
985
+ def create_detailed_paper_md(papers: list) -> str:
986
+ md_parts = ["# Detailed Research Paper Summary\n"]
987
+ for idx, paper in enumerate(papers, start=1):
988
+ md_parts.append(generate_5min_feature_markdown(paper))
989
+ return "\n".join(md_parts)
990
+
991
+ # =============================================================================
992
+ # ─────────────────────────────────────────────────────────
993
+ # MAIN AI LOOKUP FUNCTION (Optional Features)
994
+ # =============================================================================
995
+ def perform_ai_lookup(q, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=False, useArxiv=True, useArxivAudio=False):
996
+ start = time.time()
997
+ ai_constitution = """
998
+ You are a medical and machine learning review board expert...
999
+ """
1000
+ # 1) Claude API call
1001
+ import anthropic
1002
+ client = anthropic.Anthropic(api_key=os.getenv("ANTHROPIC_API_KEY_3"))
1003
+ user_input = q
1004
+ response = client.messages.create(
1005
+ model="claude-3-sonnet-20240229",
1006
+ max_tokens=1000,
1007
+ messages=[{"role": "user", "content": user_input}]
1008
+ )
1009
+ st.write("Claude's reply 🧠:")
1010
+ st.markdown(response.content[0].text)
1011
+ result = response.content[0].text
1012
+ create_file(q, result, "md")
1013
+ md_file, audio_file = save_qa_with_audio(q, result)
1014
+ st.subheader("📝 Main Response Audio")
1015
+ play_and_download_audio(audio_file, st.session_state.get('audio_format', "mp3"))
1016
+ if useArxiv:
1017
+ q = q + result
1018
+ st.write('Running Arxiv RAG with Claude inputs.')
1019
+ client = Client("awacke1/Arxiv-Paper-Search-And-QA-RAG-Pattern")
1020
+ refs = client.predict(q, 10, "Semantic Search", "mistralai/Mixtral-8x7B-Instruct-v0.1", api_name="/update_with_rag_md")[0]
1021
+ result = f"🔎 {q}\n\n{refs}"
1022
+ md_file, audio_file = save_qa_with_audio(q, result)
1023
+ st.subheader("📝 Main Response Audio")
1024
+ play_and_download_audio(audio_file, st.session_state.get('audio_format', "mp3"))
1025
+ papers = parse_arxiv_refs(refs)
1026
+ if papers:
1027
+ paper_links = create_paper_links_md(papers)
1028
+ links_file = create_file(q, paper_links, "md")
1029
+ st.markdown(paper_links)
1030
+ detailed_md = create_detailed_paper_md(papers)
1031
+ detailed_file = create_file(q, detailed_md, "md")
1032
+ st.markdown(detailed_md)
1033
+ if useArxivAudio:
1034
+ asyncio.run(async_edge_tts_generate("Sample text", st.session_state.get('tts_voice', "en-US-AriaNeural")))
1035
+ st.write("Displaying Papers:")
1036
+ # (Optional: call functions to display papers)
1037
+ else:
1038
+ st.warning("No papers found.")
1039
+ response2 = client.messages.create(
1040
+ model="claude-3-sonnet-20240229",
1041
+ max_tokens=1000,
1042
+ messages=[{"role": "user", "content": q + '\n\nUse the reference papers below to answer the question by creating a python streamlit app.py and requirements.txt with working code.'}]
1043
+ )
1044
+ r2 = response2.content[0].text
1045
+ st.write("Claude's reply 🧠:")
1046
+ st.markdown(r2)
1047
+ elapsed = time.time() - start
1048
+ st.write(f"**Total Elapsed:** {elapsed:.2f} s")
1049
+ return result
1050
+
1051
+ # =============================================================================
1052
+ # ───────────── MAIN FUNCTION ─────────────
1053
+ # =============================================================================
1054
+ def main():
1055
+ st.markdown("### 🐙 GitCosmos - Cosmos & Git Hub")
1056
+ st.markdown(f"[🔗 Portal]({CosmosDBUrl})")
1057
+ if "chat_history" not in st.session_state:
1058
+ st.session_state.chat_history = []
1059
+ st.session_state.setdefault("current_container", None)
1060
+ if Key:
1061
+ st.session_state.primary_key = Key
1062
+ st.session_state.logged_in = True
1063
+ else:
1064
+ st.error("Missing Cosmos Key 🔑❌")
1065
+ return
1066
+ st.sidebar.markdown("## 🛠️ Item Management")
1067
+ if st.sidebar.button("New Item"):
1068
+ if st.session_state.get("current_container"):
1069
+ new_doc = new_item_default(st.session_state.current_container)
1070
+ if new_doc:
1071
+ st.session_state.doc_editor = json.dumps(new_doc, indent=2)
1072
+ else:
1073
+ st.warning("No container selected!")
1074
+ st.sidebar.text_input("New Field Key", key="new_field_key")
1075
+ st.sidebar.text_input("New Field Value", key="new_field_value")
1076
+ if st.sidebar.button("Add Field"):
1077
+ if "doc_editor" in st.session_state:
1078
+ add_field_to_doc()
1079
+ else:
1080
+ st.warning("No document loaded to add a field.")
1081
+ if st.sidebar.button("New AI Record"):
1082
+ if st.session_state.get("current_container"):
1083
+ new_ai_record(st.session_state.current_container)
1084
+ else:
1085
+ st.warning("No container selected!")
1086
+ if st.sidebar.button("New Links Record"):
1087
+ if st.session_state.get("current_container"):
1088
+ new_links_record(st.session_state.current_container)
1089
+ else:
1090
+ st.warning("No container selected!")
1091
+ st.sidebar.markdown("## 🔍 Vector Search")
1092
+ search_keyword = st.sidebar.text_input("Search Keyword", key="vector_search_keyword")
1093
+ if st.sidebar.button("Search"):
1094
+ if st.session_state.get("current_container"):
1095
+ results = vector_keyword_search(search_keyword, st.session_state.current_container)
1096
+ st.sidebar.write(f"Found {len(results)} results:")
1097
+ for res in results:
1098
+ st.sidebar.code(json.dumps(res, indent=2), language="json")
1099
+ else:
1100
+ st.warning("No container selected for search!")
1101
+ show_sidebar_data_grid()
1102
+ display_langchain_functions()
1103
+ try:
1104
+ if st.session_state.get("client") is None:
1105
+ st.session_state.client = CosmosClient(ENDPOINT, credential=st.session_state.primary_key)
1106
+ st.sidebar.title("🐙 Navigator")
1107
+ databases = get_databases(st.session_state.client)
1108
+ selected_db = st.sidebar.selectbox("🗃️ DB", databases)
1109
+ st.markdown(CosmosDBUrl)
1110
+ if selected_db != st.session_state.get("selected_database"):
1111
+ st.session_state.selected_database = selected_db
1112
+ st.session_state.selected_container = None
1113
+ st.session_state.selected_document_id = None
1114
+ st.session_state.current_index = 0
1115
+ st.rerun()
1116
+ if st.session_state.selected_database:
1117
+ database = st.session_state.client.get_database_client(st.session_state.selected_database)
1118
+ if "show_new_container_form" not in st.session_state:
1119
+ st.session_state.show_new_container_form = False
1120
+ if st.sidebar.button("🆕 New Container"):
1121
+ st.session_state.show_new_container_form = True
1122
+ if st.session_state.show_new_container_form:
1123
+ with st.sidebar.form("new_container_form"):
1124
+ new_container_id = st.text_input("Container ID", value="aiml-container")
1125
+ new_partition_key = st.text_input("Partition Key", value="/pk")
1126
+ new_analytical = st.checkbox("Enable Analytical Store", value=True)
1127
+ submitted = st.form_submit_button("Create Container")
1128
+ if submitted:
1129
+ analytical_ttl = -1 if new_analytical else None
1130
+ new_container = create_new_container(database, new_container_id, new_partition_key, analytical_storage_ttl=analytical_ttl)
1131
+ if new_container:
1132
+ st.success(f"Container '{new_container_id}' created.")
1133
+ default_id = generate_unique_id()
1134
+ default_item = {
1135
+ "id": default_id,
1136
+ "pk": default_id,
1137
+ "name": "Default Image Prompt",
1138
+ "prompt": "Enter your image prompt here",
1139
+ "timestamp": datetime.now().isoformat(),
1140
+ "type": "image_prompt"
1141
+ }
1142
+ insert_success, insert_message = insert_record(new_container, default_item)
1143
+ if insert_success:
1144
+ st.info("Default templated item created in new container.")
1145
+ else:
1146
+ st.error(f"Default item insertion error: {insert_message}")
1147
+ st.session_state.show_new_container_form = False
1148
+ st.session_state.new_container_created = new_container_id
1149
+ st.rerun()
1150
+ containers = get_containers(database)
1151
+ if "new_container_created" in st.session_state and st.session_state.new_container_created not in containers:
1152
+ containers.append(st.session_state.new_container_created)
1153
+ selected_container = st.sidebar.selectbox("📁 Container", containers)
1154
+ if selected_container != st.session_state.get("selected_container"):
1155
+ st.session_state.selected_container = selected_container
1156
+ st.session_state.selected_document_id = None
1157
+ st.session_state.current_index = 0
1158
+ st.rerun()
1159
+ if st.session_state.selected_container:
1160
+ container = database.get_container_client(st.session_state.selected_container)
1161
+ st.session_state.current_container = container
1162
+ if st.sidebar.button("📦 Export"):
1163
+ download_link = archive_current_container(st.session_state.selected_database, st.session_state.selected_container, st.session_state.client)
1164
+ if download_link.startswith('<a'):
1165
+ st.markdown(download_link, unsafe_allow_html=True)
1166
+ else:
1167
+ st.error(download_link)
1168
+ documents = get_documents(container)
1169
+ total_docs = len(documents)
1170
+ num_docs = st.slider("Docs", 1, 20, 1)
1171
+ documents_to_display = documents[:num_docs] if total_docs > num_docs else documents
1172
+ st.sidebar.info(f"Showing {len(documents_to_display)} docs")
1173
+ view_options = ['Markdown', 'Code', 'Run AI', 'Clone', 'New']
1174
+ selected_view = st.sidebar.selectbox("View", view_options, index=1)
1175
+ if selected_view == 'Markdown':
1176
+ st.markdown("#### 📄 Markdown")
1177
+ if documents:
1178
+ doc = documents[st.session_state.current_index]
1179
+ content = json.dumps(doc, indent=2)
1180
+ st.markdown(f"```json\n{content}\n```")
1181
+ col_prev, col_next = st.columns(2)
1182
+ with col_prev:
1183
+ if st.button("⬅️") and st.session_state.current_index > 0:
1184
+ st.session_state.current_index -= 1
1185
+ st.rerun()
1186
+ with col_next:
1187
+ if st.button("➡️") and st.session_state.current_index < total_docs - 1:
1188
+ st.session_state.current_index += 1
1189
+ st.rerun()
1190
+ elif selected_view == 'Code':
1191
+ st.markdown("#### 💻 Code Editor")
1192
+ if documents:
1193
+ doc = documents[st.session_state.current_index]
1194
+ if "doc_editor" not in st.session_state:
1195
+ st.session_state.doc_editor = json.dumps(doc, indent=2)
1196
+ edited = st.text_area("Edit JSON", value=st.session_state.doc_editor, height=300, key="doc_editor", on_change=lambda: auto_save_edit())
1197
+ col_prev, col_next = st.columns(2)
1198
+ with col_prev:
1199
+ if st.button("⬅️") and st.session_state.current_index > 0:
1200
+ st.session_state.current_index -= 1
1201
+ st.rerun()
1202
+ with col_next:
1203
+ if st.button("➡️") and st.session_state.current_index < total_docs - 1:
1204
+ st.session_state.current_index += 1
1205
+ st.rerun()
1206
+ col_save, col_delete = st.columns(2)
1207
+ with col_save:
1208
+ if st.button("💾 Save", key=f'save_{st.session_state.current_index}'):
1209
+ try:
1210
+ updated_doc = json.loads(edited)
1211
+ container.upsert_item(body=updated_doc)
1212
+ st.success(f"Saved {updated_doc['id']}")
1213
+ st.rerun()
1214
+ except Exception as e:
1215
+ st.error(f"Save err: {str(e)}")
1216
+ with col_delete:
1217
+ if st.button("🗑️ Delete", key=f'delete_{st.session_state.current_index}'):
1218
+ try:
1219
+ current_doc = json.loads(edited)
1220
+ success, message = delete_record(container, current_doc)
1221
+ if success:
1222
+ st.success(message)
1223
+ st.rerun()
1224
+ else:
1225
+ st.error(message)
1226
+ except Exception as e:
1227
+ st.error(f"Delete err: {str(e)}")
1228
+ if "delete_log" in st.session_state and st.session_state.delete_log:
1229
+ st.subheader("Delete Log")
1230
+ for log_entry in st.session_state.delete_log[-5:]:
1231
+ st.write(log_entry)
1232
+ elif selected_view == 'Run AI':
1233
+ st.markdown("#### 🤖 Run AI")
1234
+ ai_query = st.text_area("Enter your query for ArXiv search:", key="arxiv_query", height=100)
1235
+ if st.button("Send"):
1236
+ st.session_state.last_query = ai_query
1237
+ perform_ai_lookup(ai_query, vocal_summary=True, extended_refs=False, titles_summary=True, full_audio=True, useArxiv=True, useArxivAudio=False)
1238
+ elif selected_view == 'Clone':
1239
+ st.markdown("#### 📄 Clone")
1240
+ if documents:
1241
+ doc = documents[st.session_state.current_index]
1242
+ st.markdown(f"Original ID: {doc.get('id', '')}")
1243
+ new_id = st.text_input("New ID", value=generate_unique_id(), key='new_clone_id')
1244
+ new_name = st.text_input("New Name", value=f"Clone_{new_id[:8]}", key='new_clone_name')
1245
+ new_doc = {'id': new_id, 'pk': new_id, 'name': new_name, **{k: v for k, v in doc.items() if k not in ['id', 'name', 'pk', '_rid', '_self', '_etag', '_attachments', '_ts']}}
1246
+ doc_str = st.text_area("Edit JSON", value=json.dumps(new_doc, indent=2), height=300, key='clone_preview')
1247
+ col1, col2 = st.columns(2)
1248
+ with col1:
1249
+ if st.button("🔄 Regenerate"):
1250
+ new_id = generate_unique_id()
1251
+ st.session_state.new_clone_id = new_id
1252
+ st.rerun()
1253
+ with col2:
1254
+ if st.button("💾 Save Clone"):
1255
+ try:
1256
+ final_doc = json.loads(doc_str)
1257
+ for field in ['_rid', '_self', '_etag', '_attachments', '_ts']:
1258
+ final_doc.pop(field, None)
1259
+ container.create_item(body=final_doc)
1260
+ st.success(f"Cloned {final_doc['id']}")
1261
+ st.rerun()
1262
+ except Exception as e:
1263
+ st.error(f"Clone err: {str(e)}")
1264
+ col_prev, col_next = st.columns(2)
1265
+ with col_prev:
1266
+ if st.button("⬅️") and st.session_state.current_index > 0:
1267
+ st.session_state.current_index -= 1
1268
+ st.rerun()
1269
+ with col_next:
1270
+ if st.button("➡️") and st.session_state.current_index < total_docs - 1:
1271
+ st.session_state.current_index += 1
1272
+ st.rerun()
1273
+ elif selected_view == 'New':
1274
+ st.markdown("#### ➕ New Doc")
1275
+ if st.button("🤖 Auto-Gen"):
1276
+ auto_doc = {
1277
+ "id": generate_unique_id(),
1278
+ "pk": generate_unique_id(),
1279
+ "name": f"Auto {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}",
1280
+ "content": "Auto-generated record.",
1281
+ "timestamp": datetime.now().isoformat()
1282
+ }
1283
+ success, message = insert_record(container, auto_doc)
1284
+ if success:
1285
+ st.success(message)
1286
+ st.rerun()
1287
+ else:
1288
+ st.error(message)
1289
+ else:
1290
+ new_id = st.text_input("ID", value=generate_unique_id(), key='new_id')
1291
+ default_doc = {
1292
+ "id": new_id,
1293
+ "pk": new_id,
1294
+ "name": "New Doc",
1295
+ "content": "",
1296
+ "timestamp": datetime.now().isoformat()
1297
+ }
1298
+ new_doc_str = st.text_area("JSON", value=json.dumps(default_doc, indent=2), height=300)
1299
+ if st.button("➕ Create"):
1300
+ try:
1301
+ cleaned = preprocess_text(new_doc_str)
1302
+ new_doc = json.loads(cleaned)
1303
+ new_doc['id'] = new_id
1304
+ new_doc['pk'] = new_id
1305
+ success, message = insert_record(container, new_doc)
1306
+ if success:
1307
+ st.success(f"Created {new_doc['id']}")
1308
+ st.rerun()
1309
+ else:
1310
+ st.error(message)
1311
+ except Exception as e:
1312
+ st.error(f"Create err: {str(e)}")
1313
+ st.subheader(f"📊 {st.session_state.selected_container}")
1314
+ if documents_to_display:
1315
+ df = pd.DataFrame(documents_to_display)
1316
+ st.dataframe(df)
1317
+ else:
1318
+ st.info("No docs.")
1319
+ update_file_management_section()
1320
+ except exceptions.CosmosHttpResponseError as e:
1321
+ st.error(f"Cosmos error: {str(e)} 🚨")
1322
+ except Exception as e:
1323
+ st.error(f"Error: {str(e)} 😱")
1324
+ if st.session_state.logged_in and st.sidebar.button("🚪 Logout"):
1325
+ st.markdown("#### 🚪 Logout")
1326
+ st.session_state.logged_in = False
1327
+ st.session_state.selected_records = []
1328
+ st.session_state.client = None
1329
+ st.session_state.selected_database = None
1330
+ st.session_state.selected_container = None
1331
+ st.session_state.selected_document_id = None
1332
+ st.session_state.current_index = 0
1333
+ st.rerun()
1334
+ show_sidebar_data_grid()
1335
+
1336
+ if __name__ == "__main__":
1337
+ main()