Spaces:
Running
Running
import json | |
import logging | |
import os | |
import gradio as gr | |
from dotenv import load_dotenv | |
from huggingface_hub import HfApi | |
# Import analysis pipeline helpers | |
from analysis_utils import (check_cache_and_download, check_endpoint_status, | |
fetch_and_validate_code, format_tldr_prompt, | |
generate_and_parse_tldr, generate_detailed_report, | |
generate_summary_report, parse_tldr_json_response, | |
render_data_details_markdown, render_tldr_markdown, | |
upload_results) | |
# Import general utils | |
from utils import list_cached_spaces # Added import | |
# Removed LLM interface imports, handled by analysis_utils | |
# from llm_interface import ERROR_503_DICT | |
# from llm_interface import parse_qwen_response, query_qwen_endpoint | |
# Removed prompts import, handled by analysis_utils | |
# from prompts import format_privacy_prompt, format_summary_highlights_prompt | |
# Removed specific utils imports now handled via analysis_utils | |
# from utils import ( | |
# check_report_exists, | |
# download_cached_reports, | |
# get_space_code_files, | |
# upload_reports_to_dataset, | |
# ) | |
# Configure logging | |
logging.basicConfig( | |
level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s" | |
) | |
# Load environment variables from .env file | |
# This is important to ensure API keys and endpoints are loaded before use | |
load_dotenv() | |
# --- Constants --- | |
HF_TOKEN = os.getenv("HF_TOKEN") | |
ENDPOINT_NAME = "qwen2-5-coder-32b-instruct-pmf" | |
DATASET_ID = "yjernite/spaces-privacy-reports" | |
CACHE_INFO_MSG = ( | |
"\n\n*(Report retrieved from cache)*" # Still needed for dropdown cache hit message | |
) | |
DEFAULT_SELECTION = "HuggingFaceTB/SmolVLM2" | |
# TRUNCATION_WARNING now defined and used within analysis_utils | |
# TRUNCATION_WARNING = """**β οΈ Warning:** The input data (code and/or prior analysis) was too long for the AI model's context limit and had to be truncated. The analysis below may be incomplete or based on partial information.\n\n---\n\n""" | |
ERROR_503_USER_MESSAGE = """It appears that the analysis model endpoint is currently down or starting up. | |
You have a few options: | |
* **Wait & Retry:** Try clicking "Get Space Report" again in ~3-5 minutes. Endpoints often scale down to save resources and take a short time to wake up. | |
* **Select Cached Report:** Use the dropdown above to view a report for a Space that has already been analyzed. | |
* **Request Analysis:** If the error persists, please [open an issue or discussion](https://huggingface.co/spaces/yjernite/space-privacy/discussions) in the Space's Community tab requesting analysis for your target Space ID. We can run the job manually when the endpoint is available. | |
""" | |
def _run_live_analysis(space_id: str, progress=gr.Progress(track_tqdm=True)): | |
""" | |
Performs the full analysis pipeline using helper functions from analysis_utils. | |
Yields tuples of Gradio updates. | |
""" | |
total_steps = 9 # Increased step count for TLDR generation | |
current_step = 0 | |
summary_report = "" | |
privacy_report = "" | |
tldr_data = None | |
tldr_markdown_content = "*TLDR loading...*" | |
data_details_content = ( | |
"*Data details loading...*" # Default message for new component | |
) | |
# Initial message before first step | |
tldr_status_message = "*Starting analysis...*" | |
# --- Step 1: Check Cache --- | |
current_step += 1 | |
progress_desc = f"Step {current_step}/{total_steps}: Checking cache..." | |
progress(current_step / total_steps, desc=progress_desc) | |
tldr_status_message = f"*{progress_desc}*" | |
yield ( | |
gr.update(value=tldr_status_message, visible=True), # TLDR shows progress | |
gr.update(value="*Checking cache...*", visible=True), | |
gr.update(value="Checking cache for existing reports...", visible=True), | |
gr.update(value="", visible=True), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
) | |
cache_result = check_cache_and_download(space_id, DATASET_ID, HF_TOKEN) | |
if cache_result["status"] == "cache_hit": | |
progress(total_steps / total_steps, desc="Complete (from cache)") | |
# Try to parse and render TLDR from cache | |
tldr_json_str = cache_result.get("tldr_json_str") | |
rendered_tldr = "*TLDR not found in cache.*" | |
if tldr_json_str: | |
try: | |
cached_tldr_data = json.loads(tldr_json_str) | |
# Render both parts | |
rendered_tldr = render_tldr_markdown(cached_tldr_data, space_id) | |
rendered_data_details = render_data_details_markdown(cached_tldr_data) | |
except Exception as parse_err: | |
logging.warning( | |
f"Failed to parse cached TLDR JSON for {space_id}: {parse_err}" | |
) | |
rendered_tldr = "*Error parsing cached TLDR.*" | |
rendered_data_details = ( | |
"*Could not load data details due to parsing error.*" | |
) | |
yield ( | |
gr.update(value=rendered_tldr, visible=True), | |
gr.update(value=rendered_data_details, visible=True), | |
gr.update(value=cache_result["summary"], visible=True), | |
gr.update(value=cache_result["privacy"], visible=True), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
) | |
return # End generation successfully from cache | |
elif cache_result["status"] == "cache_error": | |
# Display final error in TLDR field | |
tldr_status_message = ( | |
f"*Cache download failed. {cache_result.get('ui_message', '')}*" | |
) | |
data_details_content = "*Data details unavailable due to cache error.*" | |
yield ( | |
gr.update(value=tldr_status_message, visible=True), | |
gr.update(value=data_details_content, visible=True), | |
gr.update(value=cache_result["ui_message"], visible=True), | |
gr.update(value="", visible=True), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
) | |
# Still continue to live analysis if cache download fails | |
elif cache_result["status"] == "cache_miss": | |
tldr_status_message = f"*{progress_desc} - Cache miss.*" # Update status | |
data_details_content = "*Generating report...*" | |
yield ( | |
gr.update(value=tldr_status_message, visible=True), | |
gr.update(value=data_details_content, visible=True), | |
gr.update(value="Cache miss. Starting live analysis...", visible=True), | |
gr.update(value="", visible=True), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
) | |
elif "error_message" in cache_result: | |
# Display final error in TLDR field | |
tldr_status_message = ( | |
f"*Cache check failed. {cache_result.get('error_message', '')}*" | |
) | |
data_details_content = "*Data details unavailable due to cache error.*" | |
yield ( | |
gr.update(value=tldr_status_message, visible=True), | |
gr.update(value=data_details_content, visible=True), | |
gr.update( | |
value=f"Cache check failed: {cache_result.get('error_message', 'Unknown error')}. Proceeding with live analysis...", | |
visible=True, | |
), | |
gr.update(value="", visible=True), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
) | |
# Still continue if cache check fails | |
# --- Step 2: Check Endpoint Status --- | |
current_step += 1 | |
progress_desc = f"Step {current_step}/{total_steps}: Checking endpoint..." | |
progress(current_step / total_steps, desc=progress_desc) | |
tldr_status_message = f"*{progress_desc}*" | |
yield ( | |
gr.update(value=tldr_status_message, visible=True), # TLDR shows progress | |
gr.update(), | |
gr.update(value="Checking analysis model endpoint status...", visible=True), | |
gr.update(value="", visible=True), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
) | |
endpoint_result = check_endpoint_status( | |
ENDPOINT_NAME, HF_TOKEN, ERROR_503_USER_MESSAGE | |
) | |
if endpoint_result["status"] == "error": | |
progress(total_steps / total_steps, desc="Endpoint Error") | |
# Display final error in TLDR field | |
tldr_markdown_content = endpoint_result["ui_message"] | |
yield ( | |
gr.update(value=tldr_markdown_content, visible=True), | |
gr.update(value="", visible=False), | |
gr.update(value="", visible=False), | |
gr.update(value="", visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
) | |
return | |
# --- Step 3: Fetch Code Files --- | |
current_step += 1 | |
progress_desc = f"Step {current_step}/{total_steps}: Fetching code..." | |
progress(current_step / total_steps, desc=progress_desc) | |
tldr_status_message = f"*{progress_desc}*" | |
yield ( | |
gr.update(value=tldr_status_message, visible=True), # TLDR shows progress | |
gr.update(), | |
gr.update(value="Fetching code files from the Space...", visible=True), | |
gr.update(value="", visible=True), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
) | |
code_result = fetch_and_validate_code(space_id) | |
if code_result["status"] == "error": | |
progress(total_steps / total_steps, desc="Code Fetch Error") | |
# Display final error in TLDR field | |
tldr_markdown_content = ( | |
f"**Error:** {code_result.get('ui_message', 'Failed to fetch code.')}" | |
) | |
yield ( | |
gr.update(value=tldr_markdown_content, visible=True), | |
gr.update(value="", visible=False), | |
gr.update(value="", visible=False), | |
gr.update(value="Analysis Canceled", visible=True), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=True, open=False), | |
) | |
return | |
code_files = code_result["code_files"] | |
# --- Step 4: Generate DETAILED Privacy Report (LLM Call 1) --- | |
current_step += 1 | |
progress_desc = ( | |
f"Step {current_step}/{total_steps}: Generating privacy report (AI Call 1)..." | |
) | |
progress(current_step / total_steps, desc=progress_desc) | |
tldr_status_message = f"*{progress_desc}*" | |
yield ( | |
gr.update(value=tldr_status_message, visible=True), # TLDR shows progress | |
gr.update(), | |
gr.update( | |
value="Generating detailed privacy report (AI Call 1)...", visible=True | |
), | |
gr.update(value="Generating detailed privacy report via AI...", visible=True), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=True), | |
) | |
privacy_result = generate_detailed_report( | |
space_id, code_files, ERROR_503_USER_MESSAGE | |
) | |
if privacy_result["status"] == "error": | |
progress(total_steps / total_steps, desc="Privacy Report Error") | |
# Display final error in TLDR field | |
tldr_markdown_content = f"**Error:** {privacy_result.get('ui_message', 'Failed during detailed report generation.')}" | |
yield ( | |
gr.update(value=tldr_markdown_content, visible=True), | |
gr.update(value="", visible=False), | |
gr.update(value="", visible=False), | |
gr.update(value="", visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
gr.update(visible=False), | |
) | |
return | |
privacy_report = privacy_result["report"] | |
# Update UI with successful detailed report | |
yield ( | |
gr.update(value=tldr_status_message, visible=True), # Still show progress | |
gr.update(), | |
gr.update( | |
value="Detailed privacy report generated. Proceeding...", visible=True | |
), | |
gr.update(value=privacy_report, visible=True), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=True), | |
) | |
# --- Step 5: Fetch Model Descriptions (Placeholder/Optional) --- | |
current_step += 1 | |
progress_desc = f"Step {current_step}/{total_steps}: Extracting model info..." | |
progress(current_step / total_steps, desc=progress_desc) | |
tldr_status_message = f"*{progress_desc}*" | |
logging.info(progress_desc + " (Placeholder)") | |
yield ( | |
gr.update(value=tldr_status_message, visible=True), # TLDR shows progress | |
gr.update(), | |
gr.update(value="Extracting model info...", visible=True), | |
gr.update(), | |
gr.update(), | |
gr.update(), | |
gr.update(), | |
) | |
# model_ids = extract_hf_model_ids(code_files) # utils function not imported | |
# model_descriptions = get_model_descriptions(model_ids) # utils function not imported | |
# Add model_descriptions to context if needed for summary prompt later | |
# --- Step 6: Generate Summary + Highlights Report (LLM Call 2) --- | |
current_step += 1 | |
progress_desc = ( | |
f"Step {current_step}/{total_steps}: Generating summary (AI Call 2)..." | |
) | |
progress(current_step / total_steps, desc=progress_desc) | |
tldr_status_message = f"*{progress_desc}*" | |
yield ( | |
gr.update(value=tldr_status_message, visible=True), # TLDR shows progress | |
gr.update(), | |
gr.update(value="Generating summary & highlights (AI Call 2)...", visible=True), | |
gr.update(), | |
gr.update(), | |
gr.update(), | |
gr.update(), | |
) | |
summary_result = generate_summary_report( | |
space_id, code_files, privacy_report, ERROR_503_USER_MESSAGE | |
) | |
if ( | |
summary_result["status"] == "error_503_summary" | |
or summary_result["status"] == "error_summary" | |
): | |
progress(total_steps / total_steps, desc="Summary Report Error") | |
# Display error in TLDR, show partial results below | |
tldr_markdown_content = f"**Error:** {summary_result.get('ui_message', 'Failed during summary generation.')}" | |
data_details_content = "*Data details may be incomplete.*" | |
yield ( | |
gr.update(value=tldr_markdown_content, visible=True), | |
gr.update(value=data_details_content, visible=True), | |
gr.update(value=summary_result["ui_message"], visible=True), | |
gr.update(value=privacy_report, visible=True), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=True), | |
) | |
return | |
elif summary_result["status"] != "success": | |
progress(total_steps / total_steps, desc="Summary Report Error") | |
# Display error in TLDR, show partial results below | |
tldr_markdown_content = f"**Error:** Unexpected error generating summary: {summary_result.get('ui_message', 'Unknown')}" | |
data_details_content = "*Data details unavailable.*" | |
yield ( | |
gr.update(value=tldr_markdown_content, visible=True), | |
gr.update(value=data_details_content, visible=True), | |
gr.update( | |
value=f"Unexpected error generating summary: {summary_result.get('ui_message', 'Unknown')}", | |
visible=True, | |
), | |
gr.update(value=privacy_report, visible=True), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=True), | |
) | |
return | |
summary_report = summary_result["report"] | |
# Update UI with successful summary report before TLDR generation | |
tldr_status_message = ( | |
f"*{progress_desc} - Success. Generating TLDR...*" # Update status | |
) | |
data_details_content = "*Generating data details...*" | |
yield ( | |
gr.update(value=tldr_status_message, visible=True), | |
gr.update(value=data_details_content, visible=True), | |
gr.update(value=summary_report, visible=True), | |
gr.update(value=privacy_report, visible=True), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=True), | |
) | |
# --- Step 7: Generate TLDR --- (New Step) | |
current_step += 1 | |
progress_desc = f"Step {current_step}/{total_steps}: Generating TLDR summary..." | |
progress(current_step / total_steps, desc=progress_desc) | |
tldr_status_message = f"*{progress_desc}*" | |
yield ( | |
gr.update(value=tldr_status_message, visible=True), | |
gr.update(), | |
gr.update(), | |
gr.update(), | |
gr.update(), | |
gr.update(), | |
gr.update(), | |
) | |
tldr_data = None # Reset tldr_data before attempt | |
try: | |
# Call the combined helper function from analysis_utils | |
tldr_data = generate_and_parse_tldr(privacy_report, summary_report) | |
if tldr_data: | |
logging.info(f"Successfully generated and parsed TLDR for {space_id}.") | |
tldr_markdown_content = render_tldr_markdown(tldr_data, space_id) | |
data_details_content = render_data_details_markdown(tldr_data) | |
else: | |
logging.warning( | |
f"Failed to generate or parse TLDR for {space_id}. Proceeding without it." | |
) | |
tldr_markdown_content = "*TLDR generation failed.*" | |
data_details_content = "*Data details generation failed.*" | |
except Exception as tldr_err: | |
# This catch block might be redundant now if generate_and_parse_tldr handles its errors | |
logging.error( | |
f"Unexpected error during TLDR generation step call for {space_id}: {tldr_err}" | |
) | |
tldr_markdown_content = "*Error during TLDR generation step.*" | |
data_details_content = "*Error generating data details.*" | |
tldr_data = None # Ensure it's None on error | |
# Update UI including the generated (or failed) TLDR before upload | |
yield ( | |
gr.update(value=tldr_markdown_content, visible=True), | |
gr.update(value=data_details_content, visible=True), | |
gr.update(), | |
gr.update(), | |
gr.update(visible=True, open=False), | |
gr.update(), | |
gr.update(), | |
) | |
# --- Step 8: Upload to Cache --- (Old Step 7) | |
current_step += 1 | |
progress_desc = f"Step {current_step}/{total_steps}: Uploading to cache..." | |
progress(current_step / total_steps, desc=progress_desc) | |
tldr_status_message = f"*{progress_desc}*" # Display final action in TLDR field | |
yield ( | |
gr.update(value=tldr_status_message, visible=True), | |
gr.update(), | |
gr.update(value="Uploading results to cache...", visible=True), | |
gr.update(), | |
gr.update(), | |
gr.update(), | |
gr.update(), | |
) | |
upload_needed = ( | |
cache_result["status"] != "cache_hit" | |
and cache_result["status"] != "cache_error" | |
) | |
if upload_needed: | |
# Call imported function, now passing tldr_data | |
upload_result = upload_results( | |
space_id, | |
summary_report, | |
privacy_report, | |
DATASET_ID, | |
HF_TOKEN, | |
tldr_json_data=tldr_data, | |
) | |
if upload_result["status"] == "error": | |
# Ensure logging uses f-string if adding step count here | |
logging.error( | |
f"Cache upload failed: {upload_result.get('message', 'Unknown error')}" | |
) | |
# Non-critical, don't stop the UI, just log | |
elif upload_result["status"] == "skipped": | |
logging.info(f"Cache upload skipped: {upload_result.get('reason', '')}") | |
else: | |
logging.info( | |
"Skipping cache upload as results were loaded from cache or cache check failed." | |
) | |
# Update UI including the generated (or failed) TLDR before upload | |
# Yield 7 updates | |
yield ( | |
gr.update(value=tldr_markdown_content, visible=True), | |
gr.update(value=data_details_content, visible=True), | |
gr.update(value=summary_report, visible=True), | |
gr.update(value=privacy_report, visible=True), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
) | |
# --- Step 9: Final Update --- (Old Step 8) | |
current_step += 1 | |
progress_desc = f"Step {current_step}/{total_steps}: Analysis Complete!" | |
progress(current_step / total_steps, desc=progress_desc) | |
logging.info(progress_desc + f" Analysis complete for {space_id}.") | |
# Yield final state again to ensure UI is correct after potential upload messages | |
# Display final generated TLDR and Data Details | |
yield ( | |
gr.update(value=tldr_markdown_content, visible=True), | |
gr.update(value=data_details_content, visible=True), | |
gr.update(value=summary_report, visible=True), | |
gr.update(value=privacy_report, visible=True), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
) | |
# --- Original Input Handling Wrapper (updated yields for initial errors) --- | |
def get_space_report_wrapper( | |
selected_cached_space: str | None, | |
new_space_id: str | None, | |
progress=gr.Progress(track_tqdm=True), | |
): | |
""" | |
Wrapper function to decide whether to fetch cache or run live analysis. | |
Handles the logic based on Dropdown and Textbox inputs. | |
Yields tuples of Gradio updates. | |
""" | |
target_space_id = None | |
source = "new" # Assume new input unless dropdown is chosen | |
# Prioritize new_space_id if provided | |
if new_space_id and new_space_id.strip(): | |
target_space_id = new_space_id.strip() | |
if target_space_id == selected_cached_space: | |
source = "dropdown_match" # User typed ID that exists in dropdown | |
else: | |
source = "new" | |
elif selected_cached_space: | |
target_space_id = selected_cached_space | |
source = "dropdown" | |
if not target_space_id: | |
# Yield 7 updates | |
yield ( | |
gr.update(value="*Please provide a Space ID.*", visible=True), | |
gr.update(value="", visible=False), | |
gr.update( | |
value="Please select an existing report or enter a new Space ID.", | |
visible=True, | |
), | |
gr.update(value="", visible=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=False), | |
) | |
return | |
if "/" not in target_space_id: | |
# Yield 7 updates | |
yield ( | |
gr.update(value="*Invalid Space ID format.*", visible=True), | |
gr.update(value="", visible=False), | |
gr.update( | |
value=f"Invalid Space ID format: '{target_space_id}'. Use 'owner/name'.", | |
visible=True, | |
), | |
gr.update(value="", visible=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=False), | |
) | |
return | |
logging.info(f"Request received for: '{target_space_id}' (Source: {source})") | |
if source == "dropdown": | |
progress(0.1, desc="Fetching selected cached report...") | |
# Yield 7 updates (initial placeholder) | |
yield ( | |
gr.update(value="*Loading TLDR...*", visible=True), | |
gr.update(value="*Loading data details...*", visible=True), | |
gr.update(value="Fetching selected cached report...", visible=True), | |
gr.update(value="", visible=True), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
) | |
cache_result = check_cache_and_download(target_space_id, DATASET_ID, HF_TOKEN) | |
if cache_result["status"] == "cache_hit": | |
logging.info( | |
f"Successfully displayed cached reports for selected '{target_space_id}'." | |
) | |
progress(1.0, desc="Complete (from cache)") | |
# Use the cached report text directly here, adding the cache message is done within the helper now. | |
# Parse and render TLDR if available | |
tldr_json_str = cache_result.get("tldr_json_str") | |
rendered_tldr = "*TLDR not found in cache.*" | |
if tldr_json_str: | |
try: | |
cached_tldr_data = json.loads(tldr_json_str) | |
rendered_tldr = render_tldr_markdown( | |
cached_tldr_data, target_space_id | |
) | |
rendered_data_details = render_data_details_markdown( | |
cached_tldr_data | |
) | |
except Exception as parse_err: | |
logging.warning( | |
f"Failed to parse cached TLDR JSON for {target_space_id}: {parse_err}" | |
) | |
rendered_tldr = "*Error parsing cached TLDR.*" | |
rendered_data_details = ( | |
"*Could not load data details due to parsing error.*" | |
) | |
yield ( | |
gr.update(value=rendered_tldr, visible=True), | |
gr.update(value=rendered_data_details, visible=True), | |
gr.update(value=cache_result["summary"], visible=True), | |
gr.update(value=cache_result["privacy"], visible=True), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
) | |
else: # Cache miss or error for a dropdown selection is an error state | |
error_msg = cache_result.get( | |
"ui_message", | |
f"Failed to find or download cached report for selected '{target_space_id}'.", | |
) | |
logging.error(error_msg) | |
progress(1.0, desc="Error") | |
yield ( | |
gr.update(value="*TLDR load failed.*", visible=True), | |
gr.update(value="*Data details load failed.*", visible=True), | |
gr.update(value=error_msg, visible=True), | |
gr.update(value="", visible=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=True, open=False), | |
gr.update(visible=False), | |
) | |
return # Stop after handling dropdown source | |
# --- Live Analysis or Check Cache for New Input --- | |
# If it came from the textbox OR was a dropdown match, run the full live analysis pipeline | |
# which includes its own cache check at the beginning. | |
else: # source == "new" or source == "dropdown_match" | |
# Yield intermediate updates from the generator by iterating through it | |
for update_tuple in _run_live_analysis(target_space_id, progress): | |
yield update_tuple | |
# --- Load Initial Data Function (for demo.load) --- | |
def load_cached_list(): | |
"""Fetches the list of cached spaces and determines the default selection.""" | |
print("Running demo.load: Fetching list of cached spaces...") | |
# Use os.getenv here directly as HF_TOKEN might be loaded after initial import | |
token = os.getenv("HF_TOKEN") | |
cached_list = list_cached_spaces(DATASET_ID, token) | |
default_value = DEFAULT_SELECTION if DEFAULT_SELECTION in cached_list else None | |
if not cached_list: | |
print( | |
"WARNING: No cached spaces found or failed to fetch list during demo.load." | |
) | |
# Return an update object for the dropdown using gr.update() | |
return gr.update(choices=cached_list, value=default_value) | |
# --- Gradio Interface Definition --- | |
# Use HTML/CSS for centering the title | |
TITLE = "<div style='text-align: center;'><h1>π€ Space Privacy Analyzer π΅οΈ</h1></div>\n<div style='text-align: center;'><h4>Automatic code Data transfer review powered by <a href='https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct' target='_blank'>Qwen2.5-Coder-32B-Instruct</a></h4></div>" | |
DESCRIPTION = """ | |
### Hugging Face π€ Space - Privacy & Data Check | |
[Hugging Face π€ Spaces](https://huggingface.co/spaces) offer a convenient way to build and share code demos online; especially leveraging and exploring AI systems. | |
In most cases, the code for these demos is open source — which provides a unique opportunity to **examine how privacy and data transfers are managed**. | |
This demo leverages a code analysis model ([Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)) to help explore privacy questions in two steps: | |
1. Obtain and **parse the code** of a Space to identify: | |
- data inputs, | |
- AI model use, | |
- API calls, | |
- data transfers. | |
2. Generate a summary of the Space's function and highlight **key privacy points**. | |
Use the dropdown menu below to explore the [reports generated for some popular Spaces](https://huggingface.co/datasets/yjernite/spaces-privacy-reports/tree/main), or enter a new Space ID to query your own π | |
*Please note the following limitations:* | |
- *The model may miss important details in the code, especially when it leverages Docker files or external libraries.* | |
- *This app uses the base Qwen Coder model without specific adaptation to the task. We'd love to discuss how to improve this, if you want to participate [feel free to open a discussion!](https://huggingface.co/spaces/yjernite/space-privacy/discussions)* | |
""" | |
with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
gr.Markdown(TITLE) # This will now render the centered HTML | |
with gr.Row(): | |
with gr.Column(scale=1): # Left column for inputs | |
description_accordion = gr.Accordion( | |
"What Privacy Questions do π€ Spaces Raise? Click here for Demo Description π", | |
open=False, | |
visible=True, | |
) | |
with description_accordion: | |
gr.Markdown(DESCRIPTION) | |
cached_spaces_dropdown = gr.Dropdown( | |
label="Select Existing Report", | |
info="Select a Space whose report has been previously generated.", | |
choices=[], # Initialize empty, will be populated by demo.load | |
value=None, # Initialize empty | |
) | |
space_id_input = gr.Textbox( | |
label="Or Enter New Space ID", | |
placeholder="owner/space-name", | |
info="Enter a new Space ID to analyze (takes precedence over selection).", | |
) | |
analyze_button = gr.Button("Get Space Report", variant="primary", scale=1) | |
with gr.Column(scale=1): # Right column for outputs | |
# Define TLDR Markdown component first, always visible | |
gr.Markdown("### Privacy TLDR π΅οΈ\n", visible=True) | |
tldr_markdown = gr.Markdown( | |
"*Select or enter a Space ID to get started.*", visible=True | |
) | |
# Define Accordions next, closed by default, visible | |
data_types_accordion = gr.Accordion( | |
"Data Types at Play", open=False, visible=True | |
) | |
with data_types_accordion: | |
data_details_markdown = gr.Markdown("*Data details will appear here.*") | |
summary_accordion = gr.Accordion( | |
"Summary & Privacy Highlights", | |
open=False, | |
visible=True, # Changed to open=False | |
) | |
privacy_accordion = gr.Accordion( | |
"Detailed Privacy Analysis Report", | |
open=False, | |
visible=True, # Changed to open=False | |
) | |
with summary_accordion: | |
summary_markdown = gr.Markdown( | |
"Enter or select a Space ID and click Get Report.", | |
show_copy_button=True, | |
) | |
with privacy_accordion: | |
privacy_markdown = gr.Markdown( | |
"Detailed report will appear here.", show_copy_button=True | |
) | |
# --- Event Listeners --- | |
# Load event to populate the dropdown when the UI loads for a user session | |
demo.load(fn=load_cached_list, inputs=None, outputs=cached_spaces_dropdown) | |
# Button click event | |
analyze_button.click( | |
fn=get_space_report_wrapper, | |
inputs=[cached_spaces_dropdown, space_id_input], | |
outputs=[ | |
tldr_markdown, | |
data_details_markdown, # Added data details output | |
summary_markdown, | |
privacy_markdown, | |
data_types_accordion, # Added data details accordion output | |
summary_accordion, | |
privacy_accordion, | |
], | |
show_progress="full", | |
) | |
# --- Application Entry Point --- | |
if __name__ == "__main__": | |
logging.info("Starting Gradio application...") | |
demo.launch() | |