Spaces:

yjernite
/

space-privacy

Running

File size: 33,334 Bytes

import json
import logging
import os

import gradio as gr
from dotenv import load_dotenv
from huggingface_hub import HfApi

# Import analysis pipeline helpers
from analysis_utils import (check_cache_and_download, check_endpoint_status,
                            fetch_and_validate_code, format_tldr_prompt,
                            generate_and_parse_tldr, generate_detailed_report,
                            generate_summary_report, parse_tldr_json_response,
                            render_data_details_markdown, render_tldr_markdown,
                            upload_results)
# Import general utils
from utils import list_cached_spaces  # Added import

# Removed LLM interface imports, handled by analysis_utils
# from llm_interface import ERROR_503_DICT
# from llm_interface import parse_qwen_response, query_qwen_endpoint

# Removed prompts import, handled by analysis_utils
# from prompts import format_privacy_prompt, format_summary_highlights_prompt



# Removed specific utils imports now handled via analysis_utils
# from utils import (
#     check_report_exists,
#     download_cached_reports,
#     get_space_code_files,
#     upload_reports_to_dataset,
# )

# Configure logging
logging.basicConfig(
    level=logging.INFO, format="%(asctime)s - %(levelname)s - %(message)s"
)

# Load environment variables from .env file
# This is important to ensure API keys and endpoints are loaded before use
load_dotenv()

# --- Constants ---
HF_TOKEN = os.getenv("HF_TOKEN")
ENDPOINT_NAME = "qwen2-5-coder-32b-instruct-pmf"
DATASET_ID = "yjernite/spaces-privacy-reports"
CACHE_INFO_MSG = (
    "\n\n*(Report retrieved from cache)*"  # Still needed for dropdown cache hit message
)
DEFAULT_SELECTION = "HuggingFaceTB/SmolVLM2"

# TRUNCATION_WARNING now defined and used within analysis_utils
# TRUNCATION_WARNING = """**⚠️ Warning:** The input data (code and/or prior analysis) was too long for the AI model's context limit and had to be truncated. The analysis below may be incomplete or based on partial information.\n\n---\n\n"""

ERROR_503_USER_MESSAGE = """It appears that the analysis model endpoint is currently down or starting up. 

You have a few options:

*   **Wait & Retry:** Try clicking "Get Space Report" again in ~3-5 minutes. Endpoints often scale down to save resources and take a short time to wake up.
*   **Select Cached Report:** Use the dropdown above to view a report for a Space that has already been analyzed.
*   **Request Analysis:** If the error persists, please [open an issue or discussion](https://huggingface.co/spaces/yjernite/space-privacy/discussions) in the Space's Community tab requesting analysis for your target Space ID. We can run the job manually when the endpoint is available.
"""


def _run_live_analysis(space_id: str, progress=gr.Progress(track_tqdm=True)):
    """
    Performs the full analysis pipeline using helper functions from analysis_utils.
    Yields tuples of Gradio updates.
    """
    total_steps = 9  # Increased step count for TLDR generation
    current_step = 0
    summary_report = ""
    privacy_report = ""
    tldr_data = None
    tldr_markdown_content = "*TLDR loading...*"
    data_details_content = (
        "*Data details loading...*"  # Default message for new component
    )

    # Initial message before first step
    tldr_status_message = "*Starting analysis...*"

    # --- Step 1: Check Cache ---
    current_step += 1
    progress_desc = f"Step {current_step}/{total_steps}: Checking cache..."
    progress(current_step / total_steps, desc=progress_desc)
    tldr_status_message = f"*{progress_desc}*"
    yield (
        gr.update(value=tldr_status_message, visible=True),  # TLDR shows progress
        gr.update(value="*Checking cache...*", visible=True),
        gr.update(value="Checking cache for existing reports...", visible=True),
        gr.update(value="", visible=True),
        gr.update(visible=True, open=False),
        gr.update(visible=True, open=False),
        gr.update(visible=True, open=False),
    )
    cache_result = check_cache_and_download(space_id, DATASET_ID, HF_TOKEN)

    if cache_result["status"] == "cache_hit":
        progress(total_steps / total_steps, desc="Complete (from cache)")
        # Try to parse and render TLDR from cache
        tldr_json_str = cache_result.get("tldr_json_str")
        rendered_tldr = "*TLDR not found in cache.*"
        if tldr_json_str:
            try:
                cached_tldr_data = json.loads(tldr_json_str)
                # Render both parts
                rendered_tldr = render_tldr_markdown(cached_tldr_data, space_id)
                rendered_data_details = render_data_details_markdown(cached_tldr_data)
            except Exception as parse_err:
                logging.warning(
                    f"Failed to parse cached TLDR JSON for {space_id}: {parse_err}"
                )
                rendered_tldr = "*Error parsing cached TLDR.*"
                rendered_data_details = (
                    "*Could not load data details due to parsing error.*"
                )

        yield (
            gr.update(value=rendered_tldr, visible=True),
            gr.update(value=rendered_data_details, visible=True),
            gr.update(value=cache_result["summary"], visible=True),
            gr.update(value=cache_result["privacy"], visible=True),
            gr.update(visible=True, open=False),
            gr.update(visible=True, open=False),
            gr.update(visible=True, open=False),
        )
        return  # End generation successfully from cache
    elif cache_result["status"] == "cache_error":
        # Display final error in TLDR field
        tldr_status_message = (
            f"*Cache download failed. {cache_result.get('ui_message', '')}*"
        )
        data_details_content = "*Data details unavailable due to cache error.*"
        yield (
            gr.update(value=tldr_status_message, visible=True),
            gr.update(value=data_details_content, visible=True),
            gr.update(value=cache_result["ui_message"], visible=True),
            gr.update(value="", visible=True),
            gr.update(visible=True, open=False),
            gr.update(visible=True, open=False),
            gr.update(visible=True, open=False),
        )
        # Still continue to live analysis if cache download fails
    elif cache_result["status"] == "cache_miss":
        tldr_status_message = f"*{progress_desc} - Cache miss.*"  # Update status
        data_details_content = "*Generating report...*"
        yield (
            gr.update(value=tldr_status_message, visible=True),
            gr.update(value=data_details_content, visible=True),
            gr.update(value="Cache miss. Starting live analysis...", visible=True),
            gr.update(value="", visible=True),
            gr.update(visible=True, open=False),
            gr.update(visible=True, open=False),
            gr.update(visible=True, open=False),
        )
    elif "error_message" in cache_result:
        # Display final error in TLDR field
        tldr_status_message = (
            f"*Cache check failed. {cache_result.get('error_message', '')}*"
        )
        data_details_content = "*Data details unavailable due to cache error.*"
        yield (
            gr.update(value=tldr_status_message, visible=True),
            gr.update(value=data_details_content, visible=True),
            gr.update(
                value=f"Cache check failed: {cache_result.get('error_message', 'Unknown error')}. Proceeding with live analysis...",
                visible=True,
            ),
            gr.update(value="", visible=True),
            gr.update(visible=True, open=False),
            gr.update(visible=True, open=False),
            gr.update(visible=True, open=False),
        )
    # Still continue if cache check fails

    # --- Step 2: Check Endpoint Status ---
    current_step += 1
    progress_desc = f"Step {current_step}/{total_steps}: Checking endpoint..."
    progress(current_step / total_steps, desc=progress_desc)
    tldr_status_message = f"*{progress_desc}*"
    yield (
        gr.update(value=tldr_status_message, visible=True),  # TLDR shows progress
        gr.update(),
        gr.update(value="Checking analysis model endpoint status...", visible=True),
        gr.update(value="", visible=True),
        gr.update(visible=True, open=False),
        gr.update(visible=True, open=False),
        gr.update(visible=True, open=False),
    )
    endpoint_result = check_endpoint_status(
        ENDPOINT_NAME, HF_TOKEN, ERROR_503_USER_MESSAGE
    )

    if endpoint_result["status"] == "error":
        progress(total_steps / total_steps, desc="Endpoint Error")
        # Display final error in TLDR field
        tldr_markdown_content = endpoint_result["ui_message"]
        yield (
            gr.update(value=tldr_markdown_content, visible=True),
            gr.update(value="", visible=False),
            gr.update(value="", visible=False),
            gr.update(value="", visible=False),
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(visible=False),
        )
        return

    # --- Step 3: Fetch Code Files ---
    current_step += 1
    progress_desc = f"Step {current_step}/{total_steps}: Fetching code..."
    progress(current_step / total_steps, desc=progress_desc)
    tldr_status_message = f"*{progress_desc}*"
    yield (
        gr.update(value=tldr_status_message, visible=True),  # TLDR shows progress
        gr.update(),
        gr.update(value="Fetching code files from the Space...", visible=True),
        gr.update(value="", visible=True),
        gr.update(visible=True, open=False),
        gr.update(visible=True, open=False),
        gr.update(visible=True, open=False),
    )
    code_result = fetch_and_validate_code(space_id)

    if code_result["status"] == "error":
        progress(total_steps / total_steps, desc="Code Fetch Error")
        # Display final error in TLDR field
        tldr_markdown_content = (
            f"**Error:** {code_result.get('ui_message', 'Failed to fetch code.')}"
        )
        yield (
            gr.update(value=tldr_markdown_content, visible=True),
            gr.update(value="", visible=False),
            gr.update(value="", visible=False),
            gr.update(value="Analysis Canceled", visible=True),
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(visible=True, open=False),
        )
        return
    code_files = code_result["code_files"]

    # --- Step 4: Generate DETAILED Privacy Report (LLM Call 1) ---
    current_step += 1
    progress_desc = (
        f"Step {current_step}/{total_steps}: Generating privacy report (AI Call 1)..."
    )
    progress(current_step / total_steps, desc=progress_desc)
    tldr_status_message = f"*{progress_desc}*"
    yield (
        gr.update(value=tldr_status_message, visible=True),  # TLDR shows progress
        gr.update(),
        gr.update(
            value="Generating detailed privacy report (AI Call 1)...", visible=True
        ),
        gr.update(value="Generating detailed privacy report via AI...", visible=True),
        gr.update(visible=True, open=False),
        gr.update(visible=True, open=False),
        gr.update(visible=True, open=True),
    )
    privacy_result = generate_detailed_report(
        space_id, code_files, ERROR_503_USER_MESSAGE
    )

    if privacy_result["status"] == "error":
        progress(total_steps / total_steps, desc="Privacy Report Error")
        # Display final error in TLDR field
        tldr_markdown_content = f"**Error:** {privacy_result.get('ui_message', 'Failed during detailed report generation.')}"
        yield (
            gr.update(value=tldr_markdown_content, visible=True),
            gr.update(value="", visible=False),
            gr.update(value="", visible=False),
            gr.update(value="", visible=False),
            gr.update(visible=False),
            gr.update(visible=False),
            gr.update(visible=False),
        )
        return
    privacy_report = privacy_result["report"]

    # Update UI with successful detailed report
    yield (
        gr.update(value=tldr_status_message, visible=True),  # Still show progress
        gr.update(),
        gr.update(
            value="Detailed privacy report generated. Proceeding...", visible=True
        ),
        gr.update(value=privacy_report, visible=True),
        gr.update(visible=True, open=False),
        gr.update(visible=True, open=False),
        gr.update(visible=True, open=True),
    )

    # --- Step 5: Fetch Model Descriptions (Placeholder/Optional) ---
    current_step += 1
    progress_desc = f"Step {current_step}/{total_steps}: Extracting model info..."
    progress(current_step / total_steps, desc=progress_desc)
    tldr_status_message = f"*{progress_desc}*"
    logging.info(progress_desc + " (Placeholder)")
    yield (
        gr.update(value=tldr_status_message, visible=True),  # TLDR shows progress
        gr.update(),
        gr.update(value="Extracting model info...", visible=True),
        gr.update(),
        gr.update(),
        gr.update(),
        gr.update(),
    )
    # model_ids = extract_hf_model_ids(code_files) # utils function not imported
    # model_descriptions = get_model_descriptions(model_ids) # utils function not imported
    # Add model_descriptions to context if needed for summary prompt later

    # --- Step 6: Generate Summary + Highlights Report (LLM Call 2) ---
    current_step += 1
    progress_desc = (
        f"Step {current_step}/{total_steps}: Generating summary (AI Call 2)..."
    )
    progress(current_step / total_steps, desc=progress_desc)
    tldr_status_message = f"*{progress_desc}*"
    yield (
        gr.update(value=tldr_status_message, visible=True),  # TLDR shows progress
        gr.update(),
        gr.update(value="Generating summary & highlights (AI Call 2)...", visible=True),
        gr.update(),
        gr.update(),
        gr.update(),
        gr.update(),
    )
    summary_result = generate_summary_report(
        space_id, code_files, privacy_report, ERROR_503_USER_MESSAGE
    )

    if (
        summary_result["status"] == "error_503_summary"
        or summary_result["status"] == "error_summary"
    ):
        progress(total_steps / total_steps, desc="Summary Report Error")
        # Display error in TLDR, show partial results below
        tldr_markdown_content = f"**Error:** {summary_result.get('ui_message', 'Failed during summary generation.')}"
        data_details_content = "*Data details may be incomplete.*"
        yield (
            gr.update(value=tldr_markdown_content, visible=True),
            gr.update(value=data_details_content, visible=True),
            gr.update(value=summary_result["ui_message"], visible=True),
            gr.update(value=privacy_report, visible=True),
            gr.update(visible=True, open=False),
            gr.update(visible=True, open=False),
            gr.update(visible=True, open=True),
        )
        return
    elif summary_result["status"] != "success":
        progress(total_steps / total_steps, desc="Summary Report Error")
        # Display error in TLDR, show partial results below
        tldr_markdown_content = f"**Error:** Unexpected error generating summary: {summary_result.get('ui_message', 'Unknown')}"
        data_details_content = "*Data details unavailable.*"
        yield (
            gr.update(value=tldr_markdown_content, visible=True),
            gr.update(value=data_details_content, visible=True),
            gr.update(
                value=f"Unexpected error generating summary: {summary_result.get('ui_message', 'Unknown')}",
                visible=True,
            ),
            gr.update(value=privacy_report, visible=True),
            gr.update(visible=True, open=False),
            gr.update(visible=True, open=False),
            gr.update(visible=True, open=True),
        )
        return

    summary_report = summary_result["report"]

    # Update UI with successful summary report before TLDR generation
    tldr_status_message = (
        f"*{progress_desc} - Success. Generating TLDR...*"  # Update status
    )
    data_details_content = "*Generating data details...*"
    yield (
        gr.update(value=tldr_status_message, visible=True),
        gr.update(value=data_details_content, visible=True),
        gr.update(value=summary_report, visible=True),
        gr.update(value=privacy_report, visible=True),
        gr.update(visible=True, open=False),
        gr.update(visible=True, open=False),
        gr.update(visible=True, open=True),
    )

    # --- Step 7: Generate TLDR --- (New Step)
    current_step += 1
    progress_desc = f"Step {current_step}/{total_steps}: Generating TLDR summary..."
    progress(current_step / total_steps, desc=progress_desc)
    tldr_status_message = f"*{progress_desc}*"
    yield (
        gr.update(value=tldr_status_message, visible=True),
        gr.update(),
        gr.update(),
        gr.update(),
        gr.update(),
        gr.update(),
        gr.update(),
    )
    tldr_data = None  # Reset tldr_data before attempt
    try:
        # Call the combined helper function from analysis_utils
        tldr_data = generate_and_parse_tldr(privacy_report, summary_report)

        if tldr_data:
            logging.info(f"Successfully generated and parsed TLDR for {space_id}.")
            tldr_markdown_content = render_tldr_markdown(tldr_data, space_id)
            data_details_content = render_data_details_markdown(tldr_data)
        else:
            logging.warning(
                f"Failed to generate or parse TLDR for {space_id}. Proceeding without it."
            )
            tldr_markdown_content = "*TLDR generation failed.*"
            data_details_content = "*Data details generation failed.*"
    except Exception as tldr_err:
        # This catch block might be redundant now if generate_and_parse_tldr handles its errors
        logging.error(
            f"Unexpected error during TLDR generation step call for {space_id}: {tldr_err}"
        )
        tldr_markdown_content = "*Error during TLDR generation step.*"
        data_details_content = "*Error generating data details.*"
        tldr_data = None  # Ensure it's None on error

    # Update UI including the generated (or failed) TLDR before upload
    yield (
        gr.update(value=tldr_markdown_content, visible=True),
        gr.update(value=data_details_content, visible=True),
        gr.update(),
        gr.update(),
        gr.update(visible=True, open=False),
        gr.update(),
        gr.update(),
    )

    # --- Step 8: Upload to Cache --- (Old Step 7)
    current_step += 1
    progress_desc = f"Step {current_step}/{total_steps}: Uploading to cache..."
    progress(current_step / total_steps, desc=progress_desc)
    tldr_status_message = f"*{progress_desc}*"  # Display final action in TLDR field
    yield (
        gr.update(value=tldr_status_message, visible=True),
        gr.update(),
        gr.update(value="Uploading results to cache...", visible=True),
        gr.update(),
        gr.update(),
        gr.update(),
        gr.update(),
    )
    upload_needed = (
        cache_result["status"] != "cache_hit"
        and cache_result["status"] != "cache_error"
    )
    if upload_needed:
        # Call imported function, now passing tldr_data
        upload_result = upload_results(
            space_id,
            summary_report,
            privacy_report,
            DATASET_ID,
            HF_TOKEN,
            tldr_json_data=tldr_data,
        )
        if upload_result["status"] == "error":
            # Ensure logging uses f-string if adding step count here
            logging.error(
                f"Cache upload failed: {upload_result.get('message', 'Unknown error')}"
            )
            # Non-critical, don't stop the UI, just log
        elif upload_result["status"] == "skipped":
            logging.info(f"Cache upload skipped: {upload_result.get('reason', '')}")
    else:
        logging.info(
            "Skipping cache upload as results were loaded from cache or cache check failed."
        )

    # Update UI including the generated (or failed) TLDR before upload
    # Yield 7 updates
    yield (
        gr.update(value=tldr_markdown_content, visible=True),
        gr.update(value=data_details_content, visible=True),
        gr.update(value=summary_report, visible=True),
        gr.update(value=privacy_report, visible=True),
        gr.update(visible=True, open=False),
        gr.update(visible=True, open=False),
        gr.update(visible=True, open=False),
    )

    # --- Step 9: Final Update --- (Old Step 8)
    current_step += 1
    progress_desc = f"Step {current_step}/{total_steps}: Analysis Complete!"
    progress(current_step / total_steps, desc=progress_desc)
    logging.info(progress_desc + f" Analysis complete for {space_id}.")
    # Yield final state again to ensure UI is correct after potential upload messages
    # Display final generated TLDR and Data Details
    yield (
        gr.update(value=tldr_markdown_content, visible=True),
        gr.update(value=data_details_content, visible=True),
        gr.update(value=summary_report, visible=True),
        gr.update(value=privacy_report, visible=True),
        gr.update(visible=True, open=False),
        gr.update(visible=True, open=False),
        gr.update(visible=True, open=False),
    )


# --- Original Input Handling Wrapper (updated yields for initial errors) ---
def get_space_report_wrapper(
    selected_cached_space: str | None,
    new_space_id: str | None,
    progress=gr.Progress(track_tqdm=True),
):
    """
    Wrapper function to decide whether to fetch cache or run live analysis.
    Handles the logic based on Dropdown and Textbox inputs.
    Yields tuples of Gradio updates.
    """
    target_space_id = None
    source = "new"  # Assume new input unless dropdown is chosen

    # Prioritize new_space_id if provided
    if new_space_id and new_space_id.strip():
        target_space_id = new_space_id.strip()
        if target_space_id == selected_cached_space:
            source = "dropdown_match"  # User typed ID that exists in dropdown
        else:
            source = "new"
    elif selected_cached_space:
        target_space_id = selected_cached_space
        source = "dropdown"

    if not target_space_id:
        # Yield 7 updates
        yield (
            gr.update(value="*Please provide a Space ID.*", visible=True),
            gr.update(value="", visible=False),
            gr.update(
                value="Please select an existing report or enter a new Space ID.",
                visible=True,
            ),
            gr.update(value="", visible=False),
            gr.update(visible=True, open=False),
            gr.update(visible=True, open=False),
            gr.update(visible=False),
        )
        return

    if "/" not in target_space_id:
        # Yield 7 updates
        yield (
            gr.update(value="*Invalid Space ID format.*", visible=True),
            gr.update(value="", visible=False),
            gr.update(
                value=f"Invalid Space ID format: '{target_space_id}'. Use 'owner/name'.",
                visible=True,
            ),
            gr.update(value="", visible=False),
            gr.update(visible=True, open=False),
            gr.update(visible=True, open=False),
            gr.update(visible=False),
        )
        return

    logging.info(f"Request received for: '{target_space_id}' (Source: {source})")

    if source == "dropdown":
        progress(0.1, desc="Fetching selected cached report...")
        # Yield 7 updates (initial placeholder)
        yield (
            gr.update(value="*Loading TLDR...*", visible=True),
            gr.update(value="*Loading data details...*", visible=True),
            gr.update(value="Fetching selected cached report...", visible=True),
            gr.update(value="", visible=True),
            gr.update(visible=True, open=False),
            gr.update(visible=True, open=False),
            gr.update(visible=True, open=False),
        )
        cache_result = check_cache_and_download(target_space_id, DATASET_ID, HF_TOKEN)
        if cache_result["status"] == "cache_hit":
            logging.info(
                f"Successfully displayed cached reports for selected '{target_space_id}'."
            )
            progress(1.0, desc="Complete (from cache)")
            # Use the cached report text directly here, adding the cache message is done within the helper now.
            # Parse and render TLDR if available
            tldr_json_str = cache_result.get("tldr_json_str")
            rendered_tldr = "*TLDR not found in cache.*"
            if tldr_json_str:
                try:
                    cached_tldr_data = json.loads(tldr_json_str)
                    rendered_tldr = render_tldr_markdown(
                        cached_tldr_data, target_space_id
                    )
                    rendered_data_details = render_data_details_markdown(
                        cached_tldr_data
                    )
                except Exception as parse_err:
                    logging.warning(
                        f"Failed to parse cached TLDR JSON for {target_space_id}: {parse_err}"
                    )
                    rendered_tldr = "*Error parsing cached TLDR.*"
                    rendered_data_details = (
                        "*Could not load data details due to parsing error.*"
                    )

            yield (
                gr.update(value=rendered_tldr, visible=True),
                gr.update(value=rendered_data_details, visible=True),
                gr.update(value=cache_result["summary"], visible=True),
                gr.update(value=cache_result["privacy"], visible=True),
                gr.update(visible=True, open=False),
                gr.update(visible=True, open=False),
                gr.update(visible=True, open=False),
            )
        else:  # Cache miss or error for a dropdown selection is an error state
            error_msg = cache_result.get(
                "ui_message",
                f"Failed to find or download cached report for selected '{target_space_id}'.",
            )
            logging.error(error_msg)
            progress(1.0, desc="Error")
            yield (
                gr.update(value="*TLDR load failed.*", visible=True),
                gr.update(value="*Data details load failed.*", visible=True),
                gr.update(value=error_msg, visible=True),
                gr.update(value="", visible=False),
                gr.update(visible=True, open=False),
                gr.update(visible=True, open=False),
                gr.update(visible=False),
            )
        return  # Stop after handling dropdown source

    # --- Live Analysis or Check Cache for New Input ---
    # If it came from the textbox OR was a dropdown match, run the full live analysis pipeline
    # which includes its own cache check at the beginning.
    else:  # source == "new" or source == "dropdown_match"
        # Yield intermediate updates from the generator by iterating through it
        for update_tuple in _run_live_analysis(target_space_id, progress):
            yield update_tuple


# --- Load Initial Data Function (for demo.load) ---
def load_cached_list():
    """Fetches the list of cached spaces and determines the default selection."""
    print("Running demo.load: Fetching list of cached spaces...")
    # Use os.getenv here directly as HF_TOKEN might be loaded after initial import
    token = os.getenv("HF_TOKEN")
    cached_list = list_cached_spaces(DATASET_ID, token)
    default_value = DEFAULT_SELECTION if DEFAULT_SELECTION in cached_list else None
    if not cached_list:
        print(
            "WARNING: No cached spaces found or failed to fetch list during demo.load."
        )
    # Return an update object for the dropdown using gr.update()
    return gr.update(choices=cached_list, value=default_value)


# --- Gradio Interface Definition ---
# Use HTML/CSS for centering the title
TITLE = "<div style='text-align: center;'><h1>🤗 Space Privacy Analyzer 🕵️</h1></div>\n<div style='text-align: center;'><h4>Automatic code Data transfer review powered by <a href='https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct' target='_blank'>Qwen2.5-Coder-32B-Instruct</a></h4></div>"

DESCRIPTION = """
### Hugging Face 🤗 Space - Privacy & Data Check

[Hugging Face 🤗 Spaces](https://huggingface.co/spaces) offer a convenient way to build and share code demos online; especially leveraging and exploring AI systems.
In most cases, the code for these demos is open source &mdash; which provides a unique opportunity to **examine how privacy and data transfers are managed**.

This demo leverages a code analysis model ([Qwen2.5-Coder-32B-Instruct](https://huggingface.co/Qwen/Qwen2.5-Coder-32B-Instruct)) to help explore privacy questions in two steps:
1. Obtain and **parse the code** of a Space to identify:
  - data inputs,
  - AI model use,
  - API calls,
  - data transfers.
2. Generate a summary of the Space's function and highlight **key privacy points**.

Use the dropdown menu below to explore the [reports generated for some popular Spaces](https://huggingface.co/datasets/yjernite/spaces-privacy-reports/tree/main), or enter a new Space ID to query your own 👇

*Please note the following limitations:*
- *The model may miss important details in the code, especially when it leverages Docker files or external libraries.*
- *This app uses the base Qwen Coder model without specific adaptation to the task. We'd love to discuss how to improve this, if you want to participate [feel free to open a discussion!](https://huggingface.co/spaces/yjernite/space-privacy/discussions)*
"""

with gr.Blocks(theme=gr.themes.Soft()) as demo:
    gr.Markdown(TITLE)  # This will now render the centered HTML

    with gr.Row():
        with gr.Column(scale=1):  # Left column for inputs
            description_accordion = gr.Accordion(
                "What Privacy Questions do 🤗 Spaces Raise? Click here for Demo Description 👇",
                open=False,
                visible=True,
            )
            with description_accordion:
                gr.Markdown(DESCRIPTION)

            cached_spaces_dropdown = gr.Dropdown(
                label="Select Existing Report",
                info="Select a Space whose report has been previously generated.",
                choices=[],  # Initialize empty, will be populated by demo.load
                value=None,  # Initialize empty
            )

            space_id_input = gr.Textbox(
                label="Or Enter New Space ID",
                placeholder="owner/space-name",
                info="Enter a new Space ID to analyze (takes precedence over selection).",
            )

            analyze_button = gr.Button("Get Space Report", variant="primary", scale=1)

        with gr.Column(scale=1):  # Right column for outputs
            # Define TLDR Markdown component first, always visible
            gr.Markdown("### Privacy TLDR  🕵️\n", visible=True)
            tldr_markdown = gr.Markdown(
                "*Select or enter a Space ID to get started.*", visible=True
            )

            # Define Accordions next, closed by default, visible
            data_types_accordion = gr.Accordion(
                "Data Types at Play", open=False, visible=True
            )
            with data_types_accordion:
                data_details_markdown = gr.Markdown("*Data details will appear here.*")

            summary_accordion = gr.Accordion(
                "Summary & Privacy Highlights",
                open=False,
                visible=True,  # Changed to open=False
            )
            privacy_accordion = gr.Accordion(
                "Detailed Privacy Analysis Report",
                open=False,
                visible=True,  # Changed to open=False
            )
            with summary_accordion:
                summary_markdown = gr.Markdown(
                    "Enter or select a Space ID and click Get Report.",
                    show_copy_button=True,
                )
            with privacy_accordion:
                privacy_markdown = gr.Markdown(
                    "Detailed report will appear here.", show_copy_button=True
                )

    # --- Event Listeners ---

    # Load event to populate the dropdown when the UI loads for a user session
    demo.load(fn=load_cached_list, inputs=None, outputs=cached_spaces_dropdown)

    # Button click event
    analyze_button.click(
        fn=get_space_report_wrapper,
        inputs=[cached_spaces_dropdown, space_id_input],
        outputs=[
            tldr_markdown,
            data_details_markdown,  # Added data details output
            summary_markdown,
            privacy_markdown,
            data_types_accordion,  # Added data details accordion output
            summary_accordion,
            privacy_accordion,
        ],
        show_progress="full",
    )

# --- Application Entry Point ---

if __name__ == "__main__":
    logging.info("Starting Gradio application...")
    demo.launch()