Spaces:

yjernite
/

space-privacy

Running

space-privacy / app.py

Yacine Jernite

added TLDR functionality

36de078 12 days ago

33.3 kB

	import json
	import logging
	import os

	import gradio as gr
	from dotenv import load_dotenv
	from huggingface_hub import HfApi

	# Import analysis pipeline helpers
	from analysis_utils import (check_cache_and_download, check_endpoint_status,
	fetch_and_validate_code, format_tldr_prompt,
	generate_and_parse_tldr, generate_detailed_report,
	generate_summary_report, parse_tldr_json_response,
	render_data_details_markdown, render_tldr_markdown,
	upload_results)
	# Import general utils
	from utils import list_cached_spaces # Added import

	# Removed LLM interface imports, handled by analysis_utils
	# from llm_interface import ERROR_503_DICT
	# from llm_interface import parse_qwen_response, query_qwen_endpoint

	# Removed prompts import, handled by analysis_utils
	# from prompts import format_privacy_prompt, format_summary_highlights_prompt



	# Removed specific utils imports now handled via analysis_utils
	# from utils import (
	# check_report_exists,
	# download_cached_reports,
	# get_space_code_files,
	# upload_reports_to_dataset,
	# )

	# Configure logging
	logging.basicConfig(
	level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
	)

	# Load environment variables from .env file
	# This is important to ensure API keys and endpoints are loaded before use
	load_dotenv()

	# --- Constants ---
	HF_TOKEN = os.getenv("HF_TOKEN")
	ENDPOINT_NAME = "qwen2-5-coder-32b-instruct-pmf"
	DATASET_ID = "yjernite/spaces-privacy-reports"
	CACHE_INFO_MSG = (
	"\n\n(Report retrieved from cache)" # Still needed for dropdown cache hit message
	)
	DEFAULT_SELECTION = "HuggingFaceTB/SmolVLM2"

	# TRUNCATION_WARNING now defined and used within analysis_utils
	# TRUNCATION_WARNING = """⚠️ Warning: The input data (code and/or prior analysis) was too long for the AI model's context limit and had to be truncated. The analysis below may be incomplete or based on partial information.\n\n---\n\n"""

	ERROR_503_USER_MESSAGE = """It appears that the analysis model endpoint is currently down or starting up.

	You have a few options:

	* Wait & Retry: Try clicking "Get Space Report" again in ~3-5 minutes. Endpoints often scale down to save resources and take a short time to wake up.
	* Select Cached Report: Use the dropdown above to view a report for a Space that has already been analyzed.
	* Request Analysis: If the error persists, please [open an issue or discussion](https://huggingface.co/spaces/yjernite/space-privacy/discussions) in the Space's Community tab requesting analysis for your target Space ID. We can run the job manually when the endpoint is available.
	"""


	def _run_live_analysis(space_id: str, progress=gr.Progress(track_tqdm=True)):
	"""
	Performs the full analysis pipeline using helper functions from analysis_utils.
	Yields tuples of Gradio updates.
	"""
	total_steps = 9 # Increased step count for TLDR generation
	current_step = 0
	summary_report = ""
	privacy_report = ""
	tldr_data = None
	tldr_markdown_content = "TLDR loading..."
	data_details_content = (
	"Data details loading..." # Default message for new component
	)

	# Initial message before first step
	tldr_status_message = "Starting analysis..."

	# --- Step 1: Check Cache ---
	current_step += 1
	progress_desc = f"Step {current_step}/{total_steps}: Checking cache..."
	progress(current_step / total_steps, desc=progress_desc)
	tldr_status_message = f"{progress_desc}"
	yield (
	gr.update(value=tldr_status_message, visible=True), # TLDR shows progress
	gr.update(value="Checking cache...", visible=True),
	gr.update(value="Checking cache for existing reports...", visible=True),
	gr.update(value="", visible=True),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	)
	cache_result = check_cache_and_download(space_id, DATASET_ID, HF_TOKEN)

	if cache_result["status"] == "cache_hit":
	progress(total_steps / total_steps, desc="Complete (from cache)")
	# Try to parse and render TLDR from cache
	tldr_json_str = cache_result.get("tldr_json_str")
	rendered_tldr = "TLDR not found in cache."
	if tldr_json_str:
	try:
	cached_tldr_data = json.loads(tldr_json_str)
	# Render both parts
	rendered_tldr = render_tldr_markdown(cached_tldr_data, space_id)
	rendered_data_details = render_data_details_markdown(cached_tldr_data)
	except Exception as parse_err:
	logging.warning(
	f"Failed to parse cached TLDR JSON for {space_id}: {parse_err}"
	)
	rendered_tldr = "Error parsing cached TLDR."
	rendered_data_details = (
	"Could not load data details due to parsing error."
	)

	yield (
	gr.update(value=rendered_tldr, visible=True),
	gr.update(value=rendered_data_details, visible=True),
	gr.update(value=cache_result["summary"], visible=True),
	gr.update(value=cache_result["privacy"], visible=True),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	)
	return # End generation successfully from cache
	elif cache_result["status"] == "cache_error":
	# Display final error in TLDR field
	tldr_status_message = (
	f"Cache download failed. {cache_result.get('ui_message', '')}"
	)
	data_details_content = "Data details unavailable due to cache error."
	yield (
	gr.update(value=tldr_status_message, visible=True),
	gr.update(value=data_details_content, visible=True),
	gr.update(value=cache_result["ui_message"], visible=True),
	gr.update(value="", visible=True),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	)
	# Still continue to live analysis if cache download fails
	elif cache_result["status"] == "cache_miss":
	tldr_status_message = f"{progress_desc} - Cache miss." # Update status
	data_details_content = "Generating report..."
	yield (
	gr.update(value=tldr_status_message, visible=True),
	gr.update(value=data_details_content, visible=True),
	gr.update(value="Cache miss. Starting live analysis...", visible=True),
	gr.update(value="", visible=True),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	)
	elif "error_message" in cache_result:
	# Display final error in TLDR field
	tldr_status_message = (
	f"Cache check failed. {cache_result.get('error_message', '')}"
	)
	data_details_content = "Data details unavailable due to cache error."
	yield (
	gr.update(value=tldr_status_message, visible=True),
	gr.update(value=data_details_content, visible=True),
	gr.update(
	value=f"Cache check failed: {cache_result.get('error_message', 'Unknown error')}. Proceeding with live analysis...",
	visible=True,
	),
	gr.update(value="", visible=True),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	)
	# Still continue if cache check fails

	# --- Step 2: Check Endpoint Status ---
	current_step += 1
	progress_desc = f"Step {current_step}/{total_steps}: Checking endpoint..."
	progress(current_step / total_steps, desc=progress_desc)
	tldr_status_message = f"{progress_desc}"
	yield (
	gr.update(value=tldr_status_message, visible=True), # TLDR shows progress
	gr.update(),
	gr.update(value="Checking analysis model endpoint status...", visible=True),
	gr.update(value="", visible=True),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	)
	endpoint_result = check_endpoint_status(
	ENDPOINT_NAME, HF_TOKEN, ERROR_503_USER_MESSAGE
	)

	if endpoint_result["status"] == "error":
	progress(total_steps / total_steps, desc="Endpoint Error")
	# Display final error in TLDR field
	tldr_markdown_content = endpoint_result["ui_message"]
	yield (
	gr.update(value=tldr_markdown_content, visible=True),
	gr.update(value="", visible=False),
	gr.update(value="", visible=False),
	gr.update(value="", visible=False),
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(visible=False),
	)
	return

	# --- Step 3: Fetch Code Files ---
	current_step += 1
	progress_desc = f"Step {current_step}/{total_steps}: Fetching code..."
	progress(current_step / total_steps, desc=progress_desc)
	tldr_status_message = f"{progress_desc}"
	yield (
	gr.update(value=tldr_status_message, visible=True), # TLDR shows progress
	gr.update(),
	gr.update(value="Fetching code files from the Space...", visible=True),
	gr.update(value="", visible=True),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	)
	code_result = fetch_and_validate_code(space_id)

	if code_result["status"] == "error":
	progress(total_steps / total_steps, desc="Code Fetch Error")
	# Display final error in TLDR field
	tldr_markdown_content = (
	f"Error: {code_result.get('ui_message', 'Failed to fetch code.')}"
	)
	yield (
	gr.update(value=tldr_markdown_content, visible=True),
	gr.update(value="", visible=False),
	gr.update(value="", visible=False),
	gr.update(value="Analysis Canceled", visible=True),
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(visible=True, open=False),
	)
	return
	code_files = code_result["code_files"]

	# --- Step 4: Generate DETAILED Privacy Report (LLM Call 1) ---
	current_step += 1
	progress_desc = (
	f"Step {current_step}/{total_steps}: Generating privacy report (AI Call 1)..."
	)
	progress(current_step / total_steps, desc=progress_desc)
	tldr_status_message = f"{progress_desc}"
	yield (
	gr.update(value=tldr_status_message, visible=True), # TLDR shows progress
	gr.update(),
	gr.update(
	value="Generating detailed privacy report (AI Call 1)...", visible=True
	),
	gr.update(value="Generating detailed privacy report via AI...", visible=True),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=True),
	)
	privacy_result = generate_detailed_report(
	space_id, code_files, ERROR_503_USER_MESSAGE
	)

	if privacy_result["status"] == "error":
	progress(total_steps / total_steps, desc="Privacy Report Error")
	# Display final error in TLDR field
	tldr_markdown_content = f"Error: {privacy_result.get('ui_message', 'Failed during detailed report generation.')}"
	yield (
	gr.update(value=tldr_markdown_content, visible=True),
	gr.update(value="", visible=False),
	gr.update(value="", visible=False),
	gr.update(value="", visible=False),
	gr.update(visible=False),
	gr.update(visible=False),
	gr.update(visible=False),
	)
	return
	privacy_report = privacy_result["report"]

	# Update UI with successful detailed report
	yield (
	gr.update(value=tldr_status_message, visible=True), # Still show progress
	gr.update(),
	gr.update(
	value="Detailed privacy report generated. Proceeding...", visible=True
	),
	gr.update(value=privacy_report, visible=True),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=True),
	)

	# --- Step 5: Fetch Model Descriptions (Placeholder/Optional) ---
	current_step += 1
	progress_desc = f"Step {current_step}/{total_steps}: Extracting model info..."
	progress(current_step / total_steps, desc=progress_desc)
	tldr_status_message = f"{progress_desc}"
	logging.info(progress_desc + " (Placeholder)")
	yield (
	gr.update(value=tldr_status_message, visible=True), # TLDR shows progress
	gr.update(),
	gr.update(value="Extracting model info...", visible=True),
	gr.update(),
	gr.update(),
	gr.update(),
	gr.update(),
	)
	# model_ids = extract_hf_model_ids(code_files) # utils function not imported
	# model_descriptions = get_model_descriptions(model_ids) # utils function not imported
	# Add model_descriptions to context if needed for summary prompt later

	# --- Step 6: Generate Summary + Highlights Report (LLM Call 2) ---
	current_step += 1
	progress_desc = (
	f"Step {current_step}/{total_steps}: Generating summary (AI Call 2)..."
	)
	progress(current_step / total_steps, desc=progress_desc)
	tldr_status_message = f"{progress_desc}"
	yield (
	gr.update(value=tldr_status_message, visible=True), # TLDR shows progress
	gr.update(),
	gr.update(value="Generating summary & highlights (AI Call 2)...", visible=True),
	gr.update(),
	gr.update(),
	gr.update(),
	gr.update(),
	)
	summary_result = generate_summary_report(
	space_id, code_files, privacy_report, ERROR_503_USER_MESSAGE
	)

	if (
	summary_result["status"] == "error_503_summary"
	or summary_result["status"] == "error_summary"
	):
	progress(total_steps / total_steps, desc="Summary Report Error")
	# Display error in TLDR, show partial results below
	tldr_markdown_content = f"Error: {summary_result.get('ui_message', 'Failed during summary generation.')}"
	data_details_content = "Data details may be incomplete."
	yield (
	gr.update(value=tldr_markdown_content, visible=True),
	gr.update(value=data_details_content, visible=True),
	gr.update(value=summary_result["ui_message"], visible=True),
	gr.update(value=privacy_report, visible=True),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=True),
	)
	return
	elif summary_result["status"] != "success":
	progress(total_steps / total_steps, desc="Summary Report Error")
	# Display error in TLDR, show partial results below
	tldr_markdown_content = f"Error: Unexpected error generating summary: {summary_result.get('ui_message', 'Unknown')}"
	data_details_content = "Data details unavailable."
	yield (
	gr.update(value=tldr_markdown_content, visible=True),
	gr.update(value=data_details_content, visible=True),
	gr.update(
	value=f"Unexpected error generating summary: {summary_result.get('ui_message', 'Unknown')}",
	visible=True,
	),
	gr.update(value=privacy_report, visible=True),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=True),
	)
	return

	summary_report = summary_result["report"]

	# Update UI with successful summary report before TLDR generation
	tldr_status_message = (
	f"{progress_desc} - Success. Generating TLDR..." # Update status
	)
	data_details_content = "Generating data details..."
	yield (
	gr.update(value=tldr_status_message, visible=True),
	gr.update(value=data_details_content, visible=True),
	gr.update(value=summary_report, visible=True),
	gr.update(value=privacy_report, visible=True),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=True),
	)

	# --- Step 7: Generate TLDR --- (New Step)
	current_step += 1
	progress_desc = f"Step {current_step}/{total_steps}: Generating TLDR summary..."
	progress(current_step / total_steps, desc=progress_desc)
	tldr_status_message = f"{progress_desc}"
	yield (
	gr.update(value=tldr_status_message, visible=True),
	gr.update(),
	gr.update(),
	gr.update(),
	gr.update(),
	gr.update(),
	gr.update(),
	)
	tldr_data = None # Reset tldr_data before attempt
	try:
	# Call the combined helper function from analysis_utils
	tldr_data = generate_and_parse_tldr(privacy_report, summary_report)

	if tldr_data:
	logging.info(f"Successfully generated and parsed TLDR for {space_id}.")
	tldr_markdown_content = render_tldr_markdown(tldr_data, space_id)
	data_details_content = render_data_details_markdown(tldr_data)
	else:
	logging.warning(
	f"Failed to generate or parse TLDR for {space_id}. Proceeding without it."
	)
	tldr_markdown_content = "TLDR generation failed."
	data_details_content = "Data details generation failed."
	except Exception as tldr_err:
	# This catch block might be redundant now if generate_and_parse_tldr handles its errors
	logging.error(
	f"Unexpected error during TLDR generation step call for {space_id}: {tldr_err}"
	)
	tldr_markdown_content = "Error during TLDR generation step."
	data_details_content = "Error generating data details."
	tldr_data = None # Ensure it's None on error

	# Update UI including the generated (or failed) TLDR before upload
	yield (
	gr.update(value=tldr_markdown_content, visible=True),
	gr.update(value=data_details_content, visible=True),
	gr.update(),
	gr.update(),
	gr.update(visible=True, open=False),
	gr.update(),
	gr.update(),
	)

	# --- Step 8: Upload to Cache --- (Old Step 7)
	current_step += 1
	progress_desc = f"Step {current_step}/{total_steps}: Uploading to cache..."
	progress(current_step / total_steps, desc=progress_desc)
	tldr_status_message = f"{progress_desc}" # Display final action in TLDR field
	yield (
	gr.update(value=tldr_status_message, visible=True),
	gr.update(),
	gr.update(value="Uploading results to cache...", visible=True),
	gr.update(),
	gr.update(),
	gr.update(),
	gr.update(),
	)
	upload_needed = (
	cache_result["status"] != "cache_hit"
	and cache_result["status"] != "cache_error"
	)
	if upload_needed:
	# Call imported function, now passing tldr_data
	upload_result = upload_results(
	space_id,
	summary_report,
	privacy_report,
	DATASET_ID,
	HF_TOKEN,
	tldr_json_data=tldr_data,
	)
	if upload_result["status"] == "error":
	# Ensure logging uses f-string if adding step count here
	logging.error(
	f"Cache upload failed: {upload_result.get('message', 'Unknown error')}"
	)
	# Non-critical, don't stop the UI, just log
	elif upload_result["status"] == "skipped":
	logging.info(f"Cache upload skipped: {upload_result.get('reason', '')}")
	else:
	logging.info(
	"Skipping cache upload as results were loaded from cache or cache check failed."
	)

	# Update UI including the generated (or failed) TLDR before upload
	# Yield 7 updates
	yield (
	gr.update(value=tldr_markdown_content, visible=True),
	gr.update(value=data_details_content, visible=True),
	gr.update(value=summary_report, visible=True),
	gr.update(value=privacy_report, visible=True),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	)

	# --- Step 9: Final Update --- (Old Step 8)
	current_step += 1
	progress_desc = f"Step {current_step}/{total_steps}: Analysis Complete!"
	progress(current_step / total_steps, desc=progress_desc)
	logging.info(progress_desc + f" Analysis complete for {space_id}.")
	# Yield final state again to ensure UI is correct after potential upload messages
	# Display final generated TLDR and Data Details
	yield (
	gr.update(value=tldr_markdown_content, visible=True),
	gr.update(value=data_details_content, visible=True),
	gr.update(value=summary_report, visible=True),
	gr.update(value=privacy_report, visible=True),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	)


	# --- Original Input Handling Wrapper (updated yields for initial errors) ---
	def get_space_report_wrapper(
	selected_cached_space: str \| None,
	new_space_id: str \| None,
	progress=gr.Progress(track_tqdm=True),
	):
	"""
	Wrapper function to decide whether to fetch cache or run live analysis.
	Handles the logic based on Dropdown and Textbox inputs.
	Yields tuples of Gradio updates.
	"""
	target_space_id = None
	source = "new" # Assume new input unless dropdown is chosen

	# Prioritize new_space_id if provided
	if new_space_id and new_space_id.strip():
	target_space_id = new_space_id.strip()
	if target_space_id == selected_cached_space:
	source = "dropdown_match" # User typed ID that exists in dropdown
	else:
	source = "new"
	elif selected_cached_space:
	target_space_id = selected_cached_space
	source = "dropdown"

	if not target_space_id:
	# Yield 7 updates
	yield (
	gr.update(value="Please provide a Space ID.", visible=True),
	gr.update(value="", visible=False),
	gr.update(
	value="Please select an existing report or enter a new Space ID.",
	visible=True,
	),
	gr.update(value="", visible=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=False),
	)
	return

	if "/" not in target_space_id:
	# Yield 7 updates
	yield (
	gr.update(value="Invalid Space ID format.", visible=True),
	gr.update(value="", visible=False),
	gr.update(
	value=f"Invalid Space ID format: '{target_space_id}'. Use 'owner/name'.",
	visible=True,
	),
	gr.update(value="", visible=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=False),
	)
	return

	logging.info(f"Request received for: '{target_space_id}' (Source: {source})")

	if source == "dropdown":
	progress(0.1, desc="Fetching selected cached report...")
	# Yield 7 updates (initial placeholder)
	yield (
	gr.update(value="Loading TLDR...", visible=True),
	gr.update(value="Loading data details...", visible=True),
	gr.update(value="Fetching selected cached report...", visible=True),
	gr.update(value="", visible=True),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	)
	cache_result = check_cache_and_download(target_space_id, DATASET_ID, HF_TOKEN)
	if cache_result["status"] == "cache_hit":
	logging.info(
	f"Successfully displayed cached reports for selected '{target_space_id}'."
	)
	progress(1.0, desc="Complete (from cache)")
	# Use the cached report text directly here, adding the cache message is done within the helper now.
	# Parse and render TLDR if available
	tldr_json_str = cache_result.get("tldr_json_str")
	rendered_tldr = "TLDR not found in cache."
	if tldr_json_str:
	try:
	cached_tldr_data = json.loads(tldr_json_str)
	rendered_tldr = render_tldr_markdown(
	cached_tldr_data, target_space_id
	)
	rendered_data_details = render_data_details_markdown(
	cached_tldr_data
	)
	except Exception as parse_err:
	logging.warning(
	f"Failed to parse cached TLDR JSON for {target_space_id}: {parse_err}"
	)
	rendered_tldr = "Error parsing cached TLDR."
	rendered_data_details = (
	"Could not load data details due to parsing error."
	)

	yield (
	gr.update(value=rendered_tldr, visible=True),
	gr.update(value=rendered_data_details, visible=True),
	gr.update(value=cache_result["summary"], visible=True),
	gr.update(value=cache_result["privacy"], visible=True),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	)
	else: # Cache miss or error for a dropdown selection is an error state
	error_msg = cache_result.get(
	"ui_message",
	f"Failed to find or download cached report for selected '{target_space_id}'.",
	)
	logging.error(error_msg)
	progress(1.0, desc="Error")
	yield (
	gr.update(value="TLDR load failed.", visible=True),
	gr.update(value="Data details load failed.", visible=True),
	gr.update(value=error_msg, visible=True),
	gr.update(value="", visible=False),
	gr.update(visible=True, open=False),
	gr.update(visible=True, open=False),
	gr.update(visible=False),
	)
	return # Stop after handling dropdown source

	# --- Live Analysis or Check Cache for New Input ---
	# If it came from the textbox OR was a dropdown match, run the full live analysis pipeline
	# which includes its own cache check at the beginning.
	else: # source == "new" or source == "dropdown_match"
	# Yield intermediate updates from the generator by iterating through it
	for update_tuple in _run_live_analysis(target_space_id, progress):
	yield update_tuple


	# --- Load Initial Data Function (for demo.load) ---
	def load_cached_list():
	"""Fetches the list of cached spaces and determines the default selection."""
	print("Running demo.load: Fetching list of cached spaces...")
	# Use os.getenv here directly as HF_TOKEN might be loaded after initial import
	token = os.getenv("HF_TOKEN")
	cached_list = list_cached_spaces(DATASET_ID, token)
	default_value = DEFAULT_SELECTION if DEFAULT_SELECTION in cached_list else None
	if not cached_list:
	print(
	"WARNING: No cached spaces found or failed to fetch list during demo.load."
	)
	# Return an update object for the dropdown using gr.update()
	return gr.update(choices=cached_list, value=default_value)


	# --- Gradio Interface Definition ---
	# Use HTML/CSS for centering the title
	TITLE = "<div style='text-align: center;'><h1>🤗 Space Privacy Analyzer 🕵️</h1></div>\n<div style='text-align: center;'><h4>Automatic code Data transfer review powered by <a href='https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct' target='_blank'>Qwen2.5-Coder-32B-Instruct</a></h4></div>"

	DESCRIPTION = """
	### Hugging Face 🤗 Space - Privacy & Data Check

	[Hugging Face 🤗 Spaces](https://huggingface.co/spaces) offer a convenient way to build and share code demos online; especially leveraging and exploring AI systems.
	In most cases, the code for these demos is open source — which provides a unique opportunity to examine how privacy and data transfers are managed.

	This demo leverages a code analysis model ([Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)) to help explore privacy questions in two steps:
	1. Obtain and parse the code of a Space to identify:
	- data inputs,
	- AI model use,
	- API calls,
	- data transfers.
	2. Generate a summary of the Space's function and highlight key privacy points.

	Use the dropdown menu below to explore the [reports generated for some popular Spaces](https://huggingface.co/datasets/yjernite/spaces-privacy-reports/tree/main), or enter a new Space ID to query your own 👇

	Please note the following limitations:
	- The model may miss important details in the code, especially when it leverages Docker files or external libraries.
	- This app uses the base Qwen Coder model without specific adaptation to the task. We'd love to discuss how to improve this, if you want to participate [feel free to open a discussion!](https://huggingface.co/spaces/yjernite/space-privacy/discussions)
	"""

	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown(TITLE) # This will now render the centered HTML

	with gr.Row():
	with gr.Column(scale=1): # Left column for inputs
	description_accordion = gr.Accordion(
	"What Privacy Questions do 🤗 Spaces Raise? Click here for Demo Description 👇",
	open=False,
	visible=True,
	)
	with description_accordion:
	gr.Markdown(DESCRIPTION)

	cached_spaces_dropdown = gr.Dropdown(
	label="Select Existing Report",
	info="Select a Space whose report has been previously generated.",
	choices=[], # Initialize empty, will be populated by demo.load
	value=None, # Initialize empty
	)

	space_id_input = gr.Textbox(
	label="Or Enter New Space ID",
	placeholder="owner/space-name",
	info="Enter a new Space ID to analyze (takes precedence over selection).",
	)

	analyze_button = gr.Button("Get Space Report", variant="primary", scale=1)

	with gr.Column(scale=1): # Right column for outputs
	# Define TLDR Markdown component first, always visible
	gr.Markdown("### Privacy TLDR 🕵️\n", visible=True)
	tldr_markdown = gr.Markdown(
	"Select or enter a Space ID to get started.", visible=True
	)

	# Define Accordions next, closed by default, visible
	data_types_accordion = gr.Accordion(
	"Data Types at Play", open=False, visible=True
	)
	with data_types_accordion:
	data_details_markdown = gr.Markdown("Data details will appear here.")

	summary_accordion = gr.Accordion(
	"Summary & Privacy Highlights",
	open=False,
	visible=True, # Changed to open=False
	)
	privacy_accordion = gr.Accordion(
	"Detailed Privacy Analysis Report",
	open=False,
	visible=True, # Changed to open=False
	)
	with summary_accordion:
	summary_markdown = gr.Markdown(
	"Enter or select a Space ID and click Get Report.",
	show_copy_button=True,
	)
	with privacy_accordion:
	privacy_markdown = gr.Markdown(
	"Detailed report will appear here.", show_copy_button=True
	)

	# --- Event Listeners ---

	# Load event to populate the dropdown when the UI loads for a user session
	demo.load(fn=load_cached_list, inputs=None, outputs=cached_spaces_dropdown)

	# Button click event
	analyze_button.click(
	fn=get_space_report_wrapper,
	inputs=[cached_spaces_dropdown, space_id_input],
	outputs=[
	tldr_markdown,
	data_details_markdown, # Added data details output
	summary_markdown,
	privacy_markdown,
	data_types_accordion, # Added data details accordion output
	summary_accordion,
	privacy_accordion,
	],
	show_progress="full",
	)

	# --- Application Entry Point ---

	if __name__ == "__main__":
	logging.info("Starting Gradio application...")
	demo.launch()