diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -1,848 +1,3893 @@
+import gradio as gr
import os
-import sys
-import subprocess
-import base64
+import aiohttp
+import asyncio
+from git import Repo, GitCommandError, InvalidGitRepositoryError, NoSuchPathError
+from pathlib import Path
+from datetime import datetime, timedelta, timezone
+import shutil
import json
-import requests # Needed for potential API calls, although huggingface_hub is preferred
-from io import StringIO
-from typing import Dict, List, Tuple, Optional
-
-import streamlit as st
-import torch # Needed if using torch models directly, but pipeline handles it
-from transformers import pipeline, AutoModelForSeq2SeqLM, AutoTokenizer
-from huggingface_hub import HfApi, create_repo, upload_file, hf_hub_url # Using huggingface_hub library
-
-# Attempt to import black and pylint, handle if not installed
+import logging
+import re
+from typing import Dict, List, Optional, Tuple, Any
+import subprocess
+import plotly.express as px
+import plotly.graph_objects as go
+import time
+import random
+import pandas as pd
+from collections import Counter
+import string
+from concurrent.futures import ThreadPoolExecutor
+from hdbscan import HDBSCAN
+# --- Required Imports ---
+import hashlib
+from functools import lru_cache
+import threading
+from http.server import HTTPServer, BaseHTTPRequestHandler
+import markdown2
+import websockets
+from websockets.server import WebSocketServerProtocol
+from websockets.exceptions import ConnectionClosed, ConnectionClosedOK, ConnectionAbortedError, ConnectionResetError, WebSocketException
+import signal # For graceful shutdown
+# ---------------------
+
+# Assuming code_editor is available, e.g., installed via pip or included locally
try:
- import black
+ from code_editor import code_editor
except ImportError:
- black = None
- st.warning("Black library not found. Code formatting will be disabled.")
-
+ logging.error("The 'code_editor' Gradio component is not installed or available.")
+ logging.error("Please install it, e.g., 'pip install gradio_code_editor'")
+ def code_editor(*args, **kwargs):
+ logging.warning("Using dummy code_editor. Code editing and collaboration will not function.")
+ # Create a dummy component that looks like a Textbox but is non-interactive
+ return gr.Textbox(label=kwargs.get('label', 'Code Editor (Unavailable)'), interactive=False, value="Error: Code editor component not found. Install 'gradio_code_editor'.", lines=10)
+
+
+# ========== Configuration ==========
+WORKSPACE = Path("./issue_workspace")
+WORKSPACE.mkdir(exist_ok=True)
+GITHUB_API = "https://api.github.com/repos"
+HF_INFERENCE_API = "https://api-inference.huggingface.co/models"
+WEBHOOK_PORT = int(os.environ.get("WEBHOOK_PORT", 8000))
+WS_PORT = int(os.environ.get("WS_PORT", 8001))
+GRADIO_PORT = int(os.environ.get("GRADIO_PORT", 7860))
+
+# Configure logging
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# Set a higher logging level for libraries that are too verbose, e.g.:
+logging.getLogger("websockets").setLevel(logging.WARNING)
+logging.getLogger("aiohttp").setLevel(logging.WARNING)
+logging.getLogger("urllib3").setLevel(logging.WARNING) # Might be used by git or other libs
+logging.getLogger("git").setLevel(logging.WARNING)
+logging.getLogger("hdbscan").setLevel(logging.WARNING) # HDBSCAN can be chatty
+
+
+# Use ThreadPoolExecutor for any synchronous blocking operations if needed,
+# but most heavy lifting (API calls, git) is now async.
+executor = ThreadPoolExecutor(max_workers=4)
+
+
+# Example HF models (replace with your actual models)
+# Ensure these models are suitable for the tasks (text generation, embeddings)
+HF_MODELS = {
+ "Mistral-8x7B": "mistralai/Mixtral-8x7B-Instruct-v0.1",
+ "Llama-2-7B-chat": "huggingface/llama-2-7b-chat-hf",
+ "CodeLlama-34B": "codellama/CodeLlama-34b-Instruct-hf",
+ "Gemma-7B-it": "google/gemma-7b-it", # Added another option
+}
+# Embedding model ID - fixed, not user selectable
+HF_EMBEDDING_MODEL = "sentence-transformers/all-mpnet-base-v2"
+
+# Select default models defensively
+DEFAULT_MODEL_KEY = "Mistral-8x7B" if "Mistral-8x7B" in HF_MODELS else (list(HF_MODELS.keys())[0] if HF_MODELS else None)
+DEFAULT_MODEL_ID = HF_MODELS.get(DEFAULT_MODEL_KEY, None)
+
+DEFAULT_IDLE_MODEL_KEY = "Gemma-7B-it" if "Gemma-7B-it" in HF_MODELS else DEFAULT_MODEL_KEY # Prefer a smaller one if available
+DEFAULT_IDLE_MODEL_ID = HF_MODELS.get(DEFAULT_IDLE_MODEL_KEY, DEFAULT_MODEL_ID)
+
+if not HF_MODELS:
+ logger.critical("No HF models configured! AI features will be disabled.")
+elif DEFAULT_MODEL_ID is None:
+ logger.critical(f"Default model key '{DEFAULT_MODEL_KEY}' not found in configured models. AI features may be limited.")
+if DEFAULT_IDLE_MODEL_ID is None:
+ logger.warning(f"Idle model key '{DEFAULT_IDLE_MODEL_KEY}' not found or no models configured. Idle tasks may be disabled or use the default model if available.")
+
+
+# --- Idle State Configuration ---
+STALE_ISSUE_THRESHOLD_DAYS = 30
+MAX_SUMMARY_COMPUTATIONS_PER_CYCLE = 2
+MAX_CONTEXT_COMPUTATIONS_PER_CYCLE = 3
+MAX_MISSING_INFO_COMPUTATIONS_PER_CYCLE = 1
+MAX_ANALYSIS_COMPUTATIONS_PER_CYCLE = 1
+RECLUSTER_THRESHOLD = 5 # Number of significant webhook changes before re-clustering is flagged
+IDLE_PROCESSING_INTERVAL_SECONDS = 60.0 # How often the idle task loop runs
+
+# ========== Placeholder OTCodeEditor Class ==========
+# WARNING: This is a placeholder and DOES NOT implement Operational Transformation.
+# Concurrent edits WILL lead to data loss or inconsistencies.
+class OTCodeEditor:
+ """
+ Placeholder for an Operational Transformation (OT) enabled code editor backend.
+ This implementation is NOT thread-safe and does NOT handle concurrent edits correctly.
+ It merely logs received deltas and maintains a basic revision counter.
+ The actual document state is held client-side by the Gradio code_editor component.
+ A real collaborative editor would require a robust OT backend to manage
+ the authoritative document state and transform operations.
+ """
+ def __init__(self, initial_value: Dict[str, str]):
+ # In a real OT system, this would initialize the document state
+ # For this placeholder, we just store the initial files dict
+ self.files: Dict[str, str] = initial_value.copy()
+ self.revision = 0 # Basic revision counter, not used for OT logic
+ logger.debug(f"OTCodeEditor initialized with files: {list(self.files.keys())}")
+
+ def apply_delta(self, delta: Dict[str, Any]):
+ # VERY basic placeholder: This logs the delta but does NOT perform OT.
+ # It does NOT handle concurrent edits safely.
+ # In a real OT system, this would transform the delta against the current state
+ # and apply it, incrementing the revision based on successful application.
+ logger.warning(f"Placeholder apply_delta called. Delta: {str(delta)[:200]}. "
+ "WARNING: Full Operational Transformation is NOT implemented. Concurrent edits are UNSAFE.")
+ # The Gradio component holds the actual text state client-side.
+ # A real OT backend would use the delta to update its authoritative state.
+ # We only increment revision for basic tracking visibility if needed.
+ self.revision += 1
+
+ def get_content(self) -> Dict[str, str]:
+ # This is not used by the current Gradio code_editor integration,
+ # as the component holds the state client-side.
+ # In a real OT system, this would return the current authoritative document state.
+ return self.files.copy()
+
+# ========== Modern Theme ==========
try:
- from pylint import epylint as lint # epylint provides a simpler interface
-except ImportError:
- lint = None
- st.warning("Pylint library not found. Code linting will be disabled.")
-
-# --- Constants and Configuration ---
-# Add your Hugging Face API token here. Using Streamlit secrets is best practice.
-# Ensure you have a .streamlit/secrets.toml file with [huggingface] token="your_token"
-hf_token = st.secrets.get("huggingface")
-
-# Define root directory for workspace projects
-PROJECT_ROOT = os.path.join(os.path.expanduser("~"), "codecraft_workspace")
-# Define directory for saving agent prompts
-AGENT_DIRECTORY = os.path.join(os.path.expanduser("~"), "codecraft_agents")
-
-# Ensure directories exist
-os.makedirs(PROJECT_ROOT, exist_ok=True)
-os.makedirs(AGENT_DIRECTORY, exist_ok=True)
-
-# Define available code generative models (using standard pipeline compatible models)
-# NOTE: Replaced GGUF models with standard transformer models for pipeline compatibility.
-# Choose models appropriate for code generation tasks.
-AVAILABLE_CODE_GENERATIVE_MODELS = {
- "Salesforce/codegen-350M-mono": "Salesforce/codegen-350M-mono",
- "gpt2": "gpt2", # Smaller model for quick testing
- # Add other compatible models as needed
+ theme = gr.themes.Soft(
+ primary_hue="violet",
+ secondary_hue="emerald",
+ radius_size="lg",
+ font=[gr.themes.GoogleFont("Inter"), "ui-sans-serif", "system-ui"]
+ ).set(
+ button_primary_background_fill="linear-gradient(90deg, #8B5CF6 0%, #EC4899 100%)",
+ button_primary_text_color="white",
+ block_label_text_size="lg",
+ block_label_text_weight="600",
+ block_title_text_size="lg",
+ block_title_text_weight="800",
+ panel_background_fill="white",
+ block_shadow="*shadow_drop_lg",
+ # Add some more modern touches
+ input_background_fill="#f9fafb",
+ input_border_color="#e5e7eb",
+ input_border_radius="md",
+ button_secondary_background_fill="#f3f4f6",
+ button_secondary_text_color="#374151",
+ button_secondary_border_color="#d1d5db",
+ table_border_color="#e5e7eb",
+ table_row_background_even="#f9fafb",
+ table_row_background_odd="#ffffff",
+ # Use slightly softer colors for plots if default is too bright
+ # (This might need specific Plotly config instead of theme)
+ )
+except AttributeError as e:
+ logger.warning(f"Could not apply all theme settings (might be Gradio version difference): {e}. Using default Soft theme.")
+ theme = gr.themes.Soft()
+
+# Additional UI/UX Enhancements
+custom_css = """
+/* Smooth transitions for buttons and inputs */
+button, input, select, textarea {
+ transition: background-color 0.3s ease, color 0.3s ease, border-color 0.3s ease;
}
-# Define available text translation models
-# Example: English to Spanish
-AVAILABLE_TRANSLATION_MODELS = {
- "English to Spanish": "Helsinki-NLP/opus-mt-en-es",
- # Add other language pairs/models as needed
+/* Hover effects for buttons */
+button:hover {
+ filter: brightness(1.1);
}
+/* Focus styles for inputs */
+input:focus, select:focus, textarea:focus {
+ outline: none;
+ border-color: #8B5CF6;
+ box-shadow: 0 0 5px rgba(139, 92, 246, 0.5);
+}
-# --- Global State Management ---
-# Global state to manage communication between Tool Box and Workspace Chat App
-if "chat_history" not in st.session_state:
- st.session_state.chat_history: List[Tuple[str, str]] = [] # Store tuples of (user_input, response)
-if "terminal_history" not in st.session_state:
- st.session_state.terminal_history: List[Tuple[str, str]] = [] # Store tuples of (command, output)
-if "workspace_projects" not in st.session_state:
- # Store project details: {project_name: {'path': '/path/to/project', 'files': ['file1.py', ...]}}
- st.session_state.workspace_projects: Dict[str, Dict[str, any]] = {}
-# Initialize available agents list
-if "available_agents" not in st.session_state:
- st.session_state.available_agents: List[str] = []
-# Initialize active project for workspace
-if "active_project" not in st.session_state:
- st.session_state.active_project: Optional[str] = None
-
-
-# --- Model Loading with Caching ---
-@st.cache_resource
-def load_text_generation_pipeline(model_name: str):
- """Loads a text generation pipeline and tokenizer with caching."""
- try:
- tokenizer = AutoTokenizer.from_pretrained(model_name)
- # Set pad_token if it's missing, common for generation models
- if tokenizer.pad_token is None:
- tokenizer.pad_token = tokenizer.eos_token
- model = AutoModelForSeq2SeqLM.from_pretrained(model_name) # Assuming Seq2Seq for chat, adjust if needed
- # For general text generation like GPT-2, use AutoModelForCausalLM
- # model = AutoModelForCausalLM.from_pretrained(model_name)
- pipe = pipeline("text-generation", model=model, tokenizer=tokenizer)
- return pipe
- except Exception as e:
- st.error(f"Error loading text generation model {model_name}: {e}")
- return None
+/* Scrollbar styling for scrollable divs */
+#issue_preview_div::-webkit-scrollbar,
+#ai_output_md::-webkit-scrollbar {
+ width: 8px;
+}
-@st.cache_resource
-def load_summarization_pipeline():
- """Loads a summarization pipeline with caching."""
- try:
- return pipeline("summarization")
- except Exception as e:
- st.error(f"Error loading summarization model: {e}")
- return None
+#issue_preview_div::-webkit-scrollbar-thumb,
+#ai_output_md::-webkit-scrollbar-thumb {
+ background-color: #8B5CF6;
+ border-radius: 4px;
+}
-@st.cache_resource
-def load_sentiment_analysis_pipeline():
- """Loads a sentiment analysis pipeline with caching."""
- try:
- return pipeline("sentiment-analysis")
- except Exception as e:
- st.error(f"Error loading sentiment analysis model: {e}")
- return None
+/* Responsive layout improvements */
+@media (max-width: 768px) {
+ #config-panel {
+ flex-direction: column !important;
+ }
+ #issue_list_df {
+ font-size: 0.9em !important;
+ }
+}
-@st.cache_resource
-def load_translation_pipeline(model_name: str):
- """Loads a translation pipeline with caching."""
- try:
- return pipeline("translation", model=model_name)
- except Exception as e:
- st.error(f"Error loading translation model {model_name}: {e}")
- return None
-
-# Load common pipelines on startup
-summarizer_pipeline = load_summarization_pipeline()
-sentiment_analyzer_pipeline = load_sentiment_analysis_pipeline()
-# The chat model is used by chat_interface
-chat_pipeline = load_text_generation_pipeline("microsoft/DialoGPT-medium")
-# RAG model from original code was unused, removing for simplicity unless needed later.
-# rag_retriever = pipeline("retrieval-question-answering", model="facebook/rag-token-base")
-
-
-# --- AI Agent Class ---
-class AIAgent:
- """Represents an AI Agent with a name, description, and skills."""
- def __init__(self, name: str, description: str, skills: List[str]):
- self.name = name
- self.description = description
- self.skills = skills
- # Hugging Face API handled outside the agent for now for clarity
- # self._hf_api = None # Deprecated, use functions with explicit API handling
-
- def create_agent_prompt(self) -> str:
- """Generates a text prompt representing the agent's capabilities."""
- prompt = f"You are {self.name}, an AI agent. {self.description}\n"
- prompt += "Your skills include:\n"
- for skill in self.skills:
- prompt += f"- {skill}\n"
- prompt += "Based on the user's request, use your skills to provide a helpful response or suggest actions."
- return prompt
-
- # autonomous_build method from original code - needs significant refinement
- # Keeping a placeholder but commenting out the original logic for now.
- # A proper autonomous build would involve more sophisticated planning
- # and interaction with the available tools.
- # def autonomous_build(self, chat_history: List[Tuple[str, str]], workspace_projects: Dict[str, Dict[str, any]], project_name: str, selected_model: str):
- # """Placeholder for autonomous build logic."""
- # summary = "Autonomous build logic goes here. It would analyze chat history, project state, and use tools."
- # next_step = "Determine the next step based on analysis."
- # st.info("Autonomous build feature is under development.")
- # return summary, next_step
-
-
-# --- Agent Management Functions ---
-def save_agent_to_file(agent: AIAgent):
- """Saves the agent's prompt to a file."""
- file_path = os.path.join(AGENT_DIRECTORY, f"{agent.name}.txt")
- try:
- with open(file_path, "w") as file:
- file.write(agent.create_agent_prompt())
- # Add agent name to session state if not already there
- if agent.name not in st.session_state.available_agents:
- st.session_state.available_agents.append(agent.name)
- st.success(f"Agent '{agent.name}' saved successfully.")
- except IOError as e:
- st.error(f"Error saving agent '{agent.name}': {e}")
-
-def load_agent_prompt(agent_name: str) -> Optional[str]:
- """Loads an agent prompt from a file."""
- file_path = os.path.join(AGENT_DIRECTORY, f"{agent_name}.txt")
- if os.path.exists(file_path):
+/* Enhanced table styles */
+.gradio-dataframe-container table {
+ border-collapse: separate !important;
+ border-spacing: 0 8px !important;
+}
+
+.gradio-dataframe-container table tr {
+ background-color: #f9fafb !important;
+ border-radius: 8px !important;
+ box-shadow: 0 1px 3px rgba(0,0,0,0.1);
+}
+
+.gradio-dataframe-container table tr:hover {
+ background-color: #ede9fe !important;
+ cursor: pointer;
+}
+
+/* Button styles */
+.gr-button {
+ border-radius: 8px !important;
+ font-weight: 600 !important;
+}
+
+/* Accordion header styles */
+#ai_tools_accordion > .label {
+ font-weight: 700 !important;
+ font-size: 1.1em !important;
+ color: #6b21a8 !important;
+}
+
+/* Status bar improvements */
+#status_output_txt textarea {
+ background-color: #f3f4f6 !important;
+ border-radius: 6px !important;
+ font-family: 'Fira Code', monospace !important;
+ font-size: 0.95em !important;
+ color: #4b5563 !important;
+}
+
+/* Code editor improvements */
+#code_editor_component {
+ border: 1px solid #ddd !important;
+ border-radius: 8px !important;
+ box-shadow: 0 2px 6px rgba(139, 92, 246, 0.15);
+}
+
+/* Plot improvements */
+#stats_plot_viz, #analytics_severity_plot {
+ border-radius: 8px;
+ box-shadow: 0 2px 8px rgba(0,0,0,0.1);
+ background-color: white;
+}
+"""
+
+
+# ========== Enhanced Webhook Handler ==========
+class WebhookHandler(BaseHTTPRequestHandler):
+ manager_instance: Optional['IssueManager'] = None
+ main_loop: Optional[asyncio.AbstractEventLoop] = None # Store reference to the main asyncio loop
+
+ def do_POST(self):
+ content_length = int(self.headers.get('Content-Length', 0))
+ if content_length == 0:
+ self.send_response(400)
+ self.send_header("Content-type", "text/plain")
+ self.end_headers()
+ self.wfile.write(b"Empty payload")
+ logger.warning("Received empty webhook payload.")
+ return
+
+ try:
+ payload_bytes = self.rfile.read(content_length)
+ payload = json.loads(payload_bytes.decode('utf-8'))
+ except json.JSONDecodeError:
+ logger.error(f"Invalid JSON payload received: {payload_bytes[:500]}")
+ self.send_response(400)
+ self.send_header("Content-type", "text/plain")
+ self.end_headers()
+ self.wfile.write(b"Invalid JSON payload")
+ return
+ except Exception as e:
+ logger.error(f"Error reading webhook payload: {e}")
+ self.send_response(500)
+ self.end_headers()
+ return
+
+ event = self.headers.get('X-GitHub-Event')
+ delivery_id = self.headers.get('X-GitHub-Delivery')
+ logger.info(f"Received GitHub webhook event: {event} (Delivery ID: {delivery_id})")
+
+ if event == 'issues' and WebhookHandler.manager_instance and WebhookHandler.main_loop:
+ action = payload.get('action')
+ logger.info(f"Issue action: {action}")
+ # Handle common actions that affect issue state or content
+ if action in ['opened', 'reopened', 'closed', 'assigned', 'unassigned', 'edited', 'labeled', 'unlabeled', 'milestoned', 'demilestoned']:
+ # Check if the loop is running before scheduling
+ if WebhookHandler.main_loop.is_running():
+ # Schedule the async handler to run in the main event loop
+ # Use run_coroutine_threadsafe as this is called from a different thread
+ asyncio.run_coroutine_threadsafe(
+ WebhookHandler.manager_instance.handle_webhook_event(event, action, payload),
+ WebhookHandler.main_loop
+ )
+ logger.debug(f"Scheduled webhook processing for action '{action}' in main loop.")
+ else:
+ logger.error("Asyncio event loop is not running in the target thread for webhook. Cannot process event.")
+ # Respond with an error as processing couldn't be scheduled
+ self.send_response(500)
+ self.send_header("Content-type", "text/plain")
+ self.end_headers()
+ self.wfile.write(b"Async processing loop not available.")
+ return
+ else:
+ logger.info(f"Webhook action '{action}' received but not actively handled by current logic.")
+ elif event == 'ping':
+ logger.info("Received GitHub webhook ping.")
+ else:
+ logger.warning(f"Unhandled event type: {event} or manager/loop not initialized.")
+
+ # Always respond 200 OK for successful receipt, even if action is ignored
+ self.send_response(200)
+ self.send_header("Content-type", "text/plain")
+ self.end_headers()
+ self.wfile.write(b"OK")
+
+# ========== AI-Powered Issue Manager ==========
+class IssueManager:
+ def __init__(self):
+ self.issues: Dict[int, dict] = {}
+ self.repo_url: Optional[str] = None
+ self.repo_owner: Optional[str] = None
+ self.repo_name: Optional[str] = None
+ self.repo_local_path: Optional[Path] = None
+ self.repo: Optional[Repo] = None
+ self.github_token: Optional[str] = None
+ self.hf_token: Optional[str] = None
+ self.collaborators: Dict[str, dict] = {} # {client_id: {name: str, status: str}}
+ self.points: int = 0 # Placeholder for potential gamification
+ self.severity_rules: Dict[str, List[str]] = {
+ "Critical": ["critical", "urgent", "security", "crash", "blocker", "p0", "s0"],
+ "High": ["high", "important", "error", "regression", "major", "p1", "s1"],
+ "Medium": ["medium", "bug", "performance", "minor", "p2", "s2"],
+ "Low": ["low", "documentation", "enhancement", "trivial", "feature", "p3", "s3", "chore", "refactor", "question", "help wanted"]
+ }
+ self.issue_clusters: Dict[int, List[int]] = {} # {cluster_id: [issue_index_in_list, ...]}
+ self.issue_list_for_clustering: List[dict] = [] # List of issue dicts used for the last clustering run
+ self.ws_clients: List[WebSocketServerProtocol] = []
+ self.code_editors: Dict[int, OTCodeEditor] = {} # {issue_id: OTCodeEditor_instance}
+ # Get or create the loop in the thread where the manager is initialized (main thread)
try:
- with open(file_path, "r") as file:
- agent_prompt = file.read()
- return agent_prompt
- except IOError as e:
- st.error(f"Error loading agent '{agent_name}': {e}")
- return None
- else:
- return None
-
-def create_agent_from_text(name: str, text: str):
- """Creates an AIAgent from text input and saves it."""
- if not name:
- st.warning("Please enter an agent name.")
- return
- # Assuming each line in text is a skill
- skills = [skill.strip() for skill in text.split("\n") if skill.strip()]
- if not skills:
- st.warning("Please enter some skills for the agent.")
- return
- agent = AIAgent(name, "AI agent created from text input.", skills)
- save_agent_to_file(agent)
-
-
-def chat_interface_with_agent(input_text: str, agent_name: str) -> str:
- """Chats with a specific AI agent using its loaded prompt."""
- agent_prompt = load_agent_prompt(agent_name)
- if agent_prompt is None:
- return f"Error: Agent '{agent_name}' not found."
-
- # Use a standard text generation model for agent interaction
- agent_chat_pipeline = load_text_generation_pipeline("microsoft/DialoGPT-medium") # Or another suitable model
- if agent_chat_pipeline is None:
- return "Error: Agent chat model not loaded."
+ self.main_loop = asyncio.get_running_loop()
+ logger.debug(f"IssueManager found running asyncio loop: {id(self.main_loop)}")
+ except RuntimeError:
+ self.main_loop = asyncio.new_event_loop()
+ asyncio.set_event_loop(self.main_loop)
+ logger.debug(f"IssueManager created and set new asyncio loop: {id(self.main_loop)}")
+
+ self.broadcast_task: Optional[asyncio.Task] = None
+ self.idle_task: Optional[asyncio.Task] = None
+
+ # --- State for Idle Processing Results ---
+ self.precomputed_context: Dict[int, Dict[str, Any]] = {} # {issue_id: {content: str, files: list, error: str, timestamp: float}}
+ self.precomputed_summaries: Dict[int, Dict[str, Any]] = {} # {issue_id: {summary: str, error: str, timestamp: float}}
+ self.precomputed_missing_info: Dict[int, Dict[str, Any]] = {} # {issue_id: {info_needed: str, error: str, timestamp: float}}
+ self.precomputed_analysis: Dict[int, Dict[str, Any]] = {} # {issue_id: {hypothesis: str, error: str, timestamp: float}}
+ # self.code_embeddings: Dict[str, List[float]] = {} # Not currently used after clustering
+ self.potential_duplicates: Dict[int, List[int]] = {} # {issue_id: [duplicate_issue_id, ...]}
+ self.stale_issues: List[int] = [] # [issue_id, ...]
+ self.high_priority_candidates: List[int] = [] # [issue_id, ...]
+ self.last_webhook_time: float = time.time() # Track last webhook for potential future use
+ self.needs_recluster: bool = False
+ self._webhook_change_count = 0
+
+ # --- Configuration for Idle Tasks ---
+ self.idle_processing_interval = IDLE_PROCESSING_INTERVAL_SECONDS
+ self.max_context_computations_per_cycle = MAX_CONTEXT_COMPUTATIONS_PER_CYCLE
+ self.max_summary_computations_per_cycle = MAX_SUMMARY_COMPUTATIONS_PER_CYCLE
+ self.max_missing_info_computations_per_cycle = MAX_MISSING_INFO_COMPUTATIONS_PER_CYCLE
+ self.max_analysis_computations_per_cycle = MAX_ANALYSIS_COMPUTATIONS_PER_CYCLE
+ self.stale_issue_threshold_days = STALE_ISSUE_THRESHOLD_DAYS
+ self.recluster_threshold = RECLUSTER_THRESHOLD
+
+ # Shutdown signals (placeholders, set by main execution block)
+ self.stop_ws_server = None
+ self.stop_webhook_server = None
+
+ def start_broadcast_loop(self):
+ """Starts the periodic broadcast task."""
+ # Ensure task is created in the correct loop
+ if not self.main_loop.is_running():
+ logger.error("Cannot start broadcast loop: Main event loop is not running.")
+ return
+
+ if not self.broadcast_task or self.broadcast_task.done():
+ self.broadcast_task = self.main_loop.create_task(self.broadcast_collaboration_status())
+ logger.info("Started collaboration status broadcast loop.")
+ else:
+ logger.debug("Broadcast loop already running.")
+
+
+ def stop_broadcast_loop(self):
+ """Stops the periodic broadcast task."""
+ if self.broadcast_task and not self.broadcast_task.done():
+ logger.info("Stopping collaboration status broadcast loop...")
+ self.broadcast_task.cancel()
+ # Await the task to finish cancellation in an async context if needed,
+ # but cancelling is sufficient to signal it to stop.
+ self.broadcast_task = None # Clear the reference
+
+
+ def _get_issue_hash(self, issue_data: Optional[dict]) -> str:
+ """Generates a hash based on key issue content for caching AI suggestions."""
+ if not issue_data:
+ return "empty_issue_hash" # Handle cases where issue_data is None
+
+ content = f"{issue_data.get('title', '')}{issue_data.get('body', '')}{','.join(issue_data.get('labels',[]))}"
+ return hashlib.md5(content.encode('utf-8', errors='ignore')).hexdigest() # Use utf-8 and ignore errors
+
+
+ @lru_cache(maxsize=100)
+ async def cached_suggestion(self, issue_hash: str, model_key: str) -> str:
+ """Retrieves or generates an AI suggestion, using an LRU cache based on issue content hash."""
+ logger.debug(f"Checking cache for suggestion: hash={issue_hash}, model={model_key}")
+ # The cache decorator handles the cache hit/miss logic.
+ # If it's a miss, the decorated function body is executed.
+
+ # Find the issue data corresponding to the hash
+ found_issue = None
+ # This linear scan is inefficient for many issues, but hashes are only for cache keys.
+ # A dict mapping hashes to issue IDs could be more efficient if this becomes a bottleneck.
+ # However, issues dict is not huge usually, so this is likely fine.
+ for issue in self.issues.values():
+ if self._get_issue_hash(issue) == issue_hash:
+ found_issue = issue
+ break
+
+ if not found_issue:
+ logger.error(f"Could not find issue data for hash {issue_hash} in current state. Suggestion cannot be generated.")
+ # Corrected: Return error message here if issue data is missing
+ return "Error: Issue data for this suggestion request (hash) not found in current state. The issue might have been updated or closed. Please re-select the issue."
+
+ if model_key not in HF_MODELS or HF_MODELS.get(model_key) is None:
+ logger.error(f"Invalid or unconfigured model key requested: {model_key}")
+ return f"Error: Invalid or unconfigured model key: {model_key}"
+
+ logger.info(f"Cache miss or first request for issue hash {issue_hash}. Requesting suggestion from {model_key}.")
+ # Call the actual suggestion generation function
+ return await self.suggest_resolution(found_issue, model_key)
+
+ async def handle_webhook_event(self, event: str, action: str, payload: dict):
+ """Processes incoming webhook events to update the issue state."""
+ logger.info(f"Processing webhook event: {event}, action: {action}")
+ issue_data = payload.get('issue')
+ repo_data = payload.get('repository')
+
+ if not issue_data or not repo_data:
+ logger.warning("Webhook payload missing 'issue' or 'repository' data.")
+ return
+
+ event_repo_url = repo_data.get('html_url')
+ # Only process events for the currently loaded repository
+ # Use .rstrip("/") on both sides for robust comparison
+ if self.repo_url is None or event_repo_url is None or event_repo_url.rstrip("/") != self.repo_url.rstrip("/"):
+ logger.info(f"Ignoring webhook event for different repository: {event_repo_url} (Current: {self.repo_url})")
+ return
+
+ issue_number = issue_data.get('number')
+ if issue_number is None: # Check explicitly for None
+ logger.warning("Webhook issue data missing 'number'.")
+ return
+
+ needs_ui_update = False
+ significant_change = False # Flag for changes affecting clustering/content/AI caches
+
+ if action == 'closed':
+ logger.info(f"Webhook: Removing closed issue {issue_number} from active list.")
+ if issue_number in self.issues:
+ self.issues.pop(issue_number)
+ needs_ui_update = True
+ significant_change = True # Closing is a significant change
+ # Clean up associated cached/computed data
+ self.precomputed_context.pop(issue_number, None)
+ self.precomputed_summaries.pop(issue_number, None)
+ self.precomputed_missing_info.pop(issue_number, None)
+ self.precomputed_analysis.pop(issue_number, None)
+ self.potential_duplicates.pop(issue_number, None)
+ # Remove from lists if present (use list comprehension for safe removal)
+ self.stale_issues = [i for i in self.stale_issues if i != issue_number]
+ self.high_priority_candidates = [i for i in self.high_priority_candidates if i != issue_number]
+ # Remove the code editor instance for the closed issue
+ self.code_editors.pop(issue_number, None)
+ logger.debug(f"Cleaned up state for closed issue {issue_number}.")
+ else:
+ logger.debug(f"Webhook: Issue {issue_number} closed, but not found in current active list. No state change needed.")
+
+
+ elif action in ['opened', 'reopened', 'edited', 'assigned', 'unassigned', 'labeled', 'unlabeled', 'milestoned', 'demilestoned']:
+ logger.info(f"Webhook: Adding/Updating issue {issue_number} (action: {action}).")
+ processed_data = self._process_issue_data(issue_data)
+
+ old_issue = self.issues.get(issue_number)
+ # Check for changes that impact AI suggestions or clustering
+ if not old_issue or \
+ old_issue.get('body') != processed_data.get('body') or \
+ old_issue.get('title') != processed_data.get('title') or \
+ set(old_issue.get('labels', [])) != set(processed_data.get('labels', [])):
+ significant_change = True
+ logger.info(f"Significant change detected for issue {issue_number} (content/labels).")
+ # Invalidate ALL precomputed AI state on significant edit
+ self.precomputed_context.pop(issue_number, None)
+ self.precomputed_summaries.pop(issue_number, None)
+ self.precomputed_missing_info.pop(issue_number, None)
+ self.precomputed_analysis.pop(issue_number, None)
+ # Clear the entire suggestion cache on significant change
+ self.cached_suggestion.cache_clear()
+ logger.debug("Cleared suggestion cache due to significant issue change.")
+
+ # Check if state-related fields changed (affecting idle processing lists)
+ # This check is for logging/debugging, the idle loop re-evaluates lists anyway
+ if not old_issue or \
+ old_issue.get('updated_at') != processed_data.get('updated_at') or \
+ old_issue.get('assignee') != processed_data.get('assignee') or \
+ set(old_issue.get('labels', [])) != set(processed_data.get('labels', [])) or \
+ old_issue.get('state') != processed_data.get('state'): # State change (open/reopened)
+ logger.debug(f"State-related change detected for issue {issue_number} (update time, assignee, labels, state). Idle loop will re-evaluate.")
+
+ self.issues[issue_number] = processed_data
+ needs_ui_update = True
+ else:
+ logger.info(f"Ignoring webhook action '{action}' for issue {issue_number} (already filtered).")
+
+ # --- Track changes for idle processing ---
+ if needs_ui_update:
+ self.last_webhook_time = time.time()
+ if significant_change:
+ self._increment_change_counter()
+ # Rebuild the list used for clustering immediately if a significant change occurred
+ # This list is a snapshot used by the async clustering task
+ self.issue_list_for_clustering = list(self.issues.values())
+ logger.info("Issue list for clustering updated due to significant webhook change.")
+ # Broadcast UI update notification
+ # Schedule this in the main loop using call_soon_threadsafe
+ if self.main_loop.is_running():
+ self.main_loop.call_soon_threadsafe(asyncio.create_task, self.broadcast_issue_update())
+ logger.debug("Scheduled issue update broadcast.")
+ else:
+ logger.warning("Main loop not running, cannot broadcast issue update.")
+
+
+ def _increment_change_counter(self):
+ """Increments change counter and sets recluster flag if threshold reached."""
+ self._webhook_change_count += 1
+ logger.debug(f"Significant change detected. Change count: {self._webhook_change_count}/{self.recluster_threshold}")
+ if self._webhook_change_count >= self.recluster_threshold:
+ self.needs_recluster = True
+ logger.info(f"Change threshold ({self.recluster_threshold}) reached. Flagging for re-clustering.")
+
+ def _process_issue_data(self, issue_data: dict) -> dict:
+ """Helper to structure issue data consistently."""
+ return {
+ "id": issue_data.get('number'), # Use .get for safety
+ "title": issue_data.get('title', 'No Title Provided'),
+ "body": issue_data.get('body', ''),
+ "state": issue_data.get('state', 'unknown'),
+ "labels": sorted([label.get('name', '') for label in issue_data.get('labels', []) if isinstance(label, dict) and label.get('name')]), # Ensure labels are dicts and have name
+ "assignee": issue_data.get('assignee', {}).get('login') if issue_data.get('assignee') and isinstance(issue_data.get('assignee'), dict) else None,
+ "url": issue_data.get('html_url', '#'),
+ "created_at": issue_data.get('created_at'),
+ "updated_at": issue_data.get('updated_at'),
+ }
+
+ async def crawl_issues(self, repo_url: str, github_token: Optional[str], hf_token: Optional[str]) -> Tuple[List[List], go.Figure, str, go.Figure]:
+ """
+ Crawls issues, resets state, clones repo, clusters, starts background tasks.
+ Returns dataframe data, stats plot, status message, and analytics plot.
+ """
+ # Strip whitespace from inputs
+ repo_url = repo_url.strip() if repo_url else None
+ github_token = github_token.strip() if github_token else None
+ hf_token = hf_token.strip() if hf_token else None
+
+ # Define a default empty plot for consistent return type
+ def get_empty_plot(title="Plot"):
+ fig = go.Figure()
+ fig.update_layout(title=title, xaxis={"visible": False}, yaxis={"visible": False},
+ annotations=[{"text": "Scan needed.", "xref": "paper", "yref": "paper", "showarrow": False, "font": {"size": 16}}],
+ plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)')
+ return fig
+
+ if not repo_url or not hf_token:
+ logger.error("Repository URL and Hugging Face Token are required.")
+ empty_fig = get_empty_plot("Issue Severity Distribution")
+ return [], empty_fig, "Error: Repository URL and Hugging Face Token are required.", empty_fig
+
+ logger.info(f"Starting new issue crawl and setup for {repo_url}")
+
+ # --- Reset Manager State ---
+ # Stop background tasks first
+ self.stop_idle_processing()
+ self.stop_broadcast_loop()
+
+ self.issues = {}
+ # Clear code_editors instances
+ self.code_editors = {}
+ self.issue_clusters = {}
+ self.issue_list_for_clustering = []
+ self.cached_suggestion.cache_clear() # Clear AI suggestion cache
+ self.precomputed_context = {}
+ self.precomputed_summaries = {}
+ self.precomputed_missing_info = {}
+ self.precomputed_analysis = {}
+ # self.code_embeddings = {} # Not used
+ self.potential_duplicates = {}
+ self.stale_issues = []
+ self.high_priority_candidates = []
+ self.needs_recluster = False
+ self._webhook_change_count = 0
+ self.last_webhook_time = time.time()
+ self.repo = None # Clear the repo object
+ self.repo_url = repo_url
+ self.github_token = github_token
+ self.hf_token = hf_token
+ logger.info("Internal state reset for new crawl.")
+
+ # --- Repository Cloning/Updating ---
+ match = re.match(r"https?://github\.com/([^/]+)/([^/]+)", self.repo_url)
+ if not match:
+ logger.error(f"Invalid GitHub URL format: {self.repo_url}")
+ empty_fig = get_empty_plot("Issue Severity Distribution")
+ return [], empty_fig, "Error: Invalid GitHub URL format. Use https://github.com/owner/repo", empty_fig
+ self.repo_owner, self.repo_name = match.groups()
+ self.repo_local_path = WORKSPACE / f"{self.repo_owner}_{self.repo_name}"
- try:
- # Combine agent prompt and user input for the model
- prompt = f"{agent_prompt}\n\nUser: {input_text}\n{agent_name}:"
- # Adjust max_length and other parameters as needed for better responses
- generated_response = agent_chat_pipeline(
- prompt,
- max_length=200, # Increased max_length
- num_return_sequences=1,
- do_sample=True,
- top_k=50,
- # Add truncation to handle long prompts if necessary
- truncation=True,
- max_new_tokens=150 # Prefer max_new_tokens for generation length control
- )[0]["generated_text"]
-
- # Post-process the response to potentially remove the input prompt
- # This is a simple approach and might need refinement depending on the model
- response_prefix = f"{agent_name}:"
- if response_prefix in generated_response:
- generated_response = generated_response.split(response_prefix, 1)[1].strip()
-
- return generated_response
- except Exception as e:
- st.error(f"Error communicating with agent '{agent_name}': {e}")
- return f"Error communicating with agent: {e}"
+ try:
+ if self.repo_local_path.exists():
+ logger.info(f"Attempting to update existing repository clone at {self.repo_local_path}")
+ try:
+ self.repo = Repo(self.repo_local_path)
+ # Ensure the origin remote matches the requested URL
+ if not self.repo.remotes or 'origin' not in self.repo.remotes:
+ logger.warning(f"Existing repo at {self.repo_local_path} has no 'origin' remote. Re-cloning.")
+ if self.repo_local_path.exists(): shutil.rmtree(self.repo_local_path)
+ self.repo = Repo.clone_from(self.repo_url, self.repo_local_path, progress=lambda op, cur, tot, msg: logger.debug(f"Clone progress: {msg}"))
+ else:
+ origin = self.repo.remotes.origin
+ remote_url = next((u for u in origin.urls), None) # Get first URL
+ expected_urls = {self.repo_url, self.repo_url + ".git"}
+ if remote_url not in expected_urls:
+ logger.warning(f"Existing repo path {self.repo_local_path} has different remote URL ('{remote_url}' vs '{self.repo_url}'). Removing and re-cloning.")
+ # Remove the directory entirely before re-cloning
+ if self.repo_local_path.exists(): shutil.rmtree(self.repo_local_path)
+ self.repo = Repo.clone_from(self.repo_url, self.repo_local_path, progress=lambda op, cur, tot, msg: logger.debug(f"Clone progress: {msg}"))
+ else:
+ logger.info("Pulling latest changes...")
+ # Use a timeout for pull operations
+ try:
+ # Fetch first to get latest refs
+ origin.fetch(progress=lambda op, cur, tot, msg: logger.debug(f"Fetch progress: {msg}"), timeout=120)
+ # Then pull
+ origin.pull(progress=lambda op, cur, tot, msg: logger.debug(f"Pull progress: {msg}"), timeout=120)
+ # Unshallow if necessary
+ if self.repo.git.rev_parse('--is-shallow-repository').strip() == 'true':
+ logger.info("Repository is shallow, unshallowing...")
+ # Use a timeout for unshallow
+ self.repo.git.fetch('--unshallow', timeout=300)
+ except GitCommandError as pull_err:
+ logger.error(f"Git pull/fetch error: {pull_err}. Proceeding with potentially stale local copy.")
+ except Exception as pull_err:
+ logger.exception(f"Unexpected error during git pull/fetch: {pull_err}. Proceeding with potentially stale local copy.")
+
+
+ except (InvalidGitRepositoryError, NoSuchPathError):
+ logger.warning(f"Invalid or missing Git repository at {self.repo_local_path}. Re-cloning.")
+ # Ensure directory is clean before re-cloning
+ if self.repo_local_path.exists(): shutil.rmtree(self.repo_local_path)
+ self.repo = Repo.clone_from(self.repo_url, self.repo_local_path, progress=lambda op, cur, tot, msg: logger.debug(f"Clone progress: {msg}"))
+ except GitCommandError as git_err:
+ logger.error(f"Git operation error during update: {git_err}. Trying to proceed with existing copy, but it might be stale.")
+ if not self.repo: # If repo object wasn't successfully created before the error
+ try: self.repo = Repo(self.repo_local_path)
+ except Exception: logger.error("Failed to even load existing repo after git error.")
+ except Exception as e:
+ logger.exception(f"An unexpected error occurred during repository update check: {e}")
+ # If repo object wasn't successfully created before the error
+ if not self.repo:
+ try: self.repo = Repo(self.repo_local_path)
+ except Exception: logger.error("Failed to even load existing repo after update error.")
+
+ else:
+ logger.info(f"Cloning repository {self.repo_url} to {self.repo_local_path}")
+ # Use a timeout for the initial clone
+ self.repo = Repo.clone_from(self.repo_url, self.repo_local_path, progress=lambda op, cur, tot, msg: logger.debug(f"Clone progress: {msg}"), timeout=300)
-# --- Tool Functions ---
-def terminal_interface(command: str, project_path: Optional[str] = None) -> str:
- """Runs a terminal command in the specified directory."""
- if project_path and not os.path.exists(project_path):
- return f"Error: Project directory '{project_path}' does not exist."
+ logger.info("Repository clone/update process finished.")
+ if not self.repo:
+ raise Exception("Repository object could not be initialized after cloning/update.")
+
+ except GitCommandError as e:
+ logger.error(f"Failed to clone/update repository: {e}")
+ empty_fig = get_empty_plot("Issue Severity Distribution")
+ empty_fig.update_layout(annotations=[{"text": "Repo Error.", "xref": "paper", "yref": "paper", "showarrow": False, "font": {"size": 16}}])
+ return [], empty_fig, f"Error cloning/updating repository: {e}. Check URL, permissions, and network.", empty_fig
+ except Exception as e:
+ logger.exception(f"An unexpected error occurred during repository handling: {e}")
+ empty_fig = get_empty_plot("Issue Severity Distribution")
+ empty_fig.update_layout(annotations=[{"text": "Repo Error.", "xref": "paper", "yref": "paper", "showarrow": False, "font": {"size": 16}}])
+ return [], empty_fig, f"An unexpected error occurred during repo setup: {e}", empty_fig
- try:
- # Using subprocess.run with shell=True can be risky, consider alternatives
- # if security is a major concern and commands are user-provided.
- # However, for a local dev tool, it's often acceptable.
- result = subprocess.run(
- command,
- shell=True,
- capture_output=True,
- text=True, # Decode stdout/stderr as text
- cwd=project_path # Run command in project directory if specified
- )
- output = f"Command: {command}\n"
- output += f"Return Code: {result.returncode}\n\n"
- if result.stdout:
- output += "STDOUT:\n" + result.stdout
- if result.stderr:
- output += "STDERR:\n" + result.stderr
- return output
- except Exception as e:
- return f"Error executing command '{command}': {e}"
-def code_editor_interface(code: str) -> Tuple[str, str]:
- """Formats code using Black and lints using Pylint."""
- formatted_code = code
- lint_message = "Linting and Formatting Results:\n"
+ # --- Issue Fetching ---
+ api_url = f"{GITHUB_API}/{self.repo_owner}/{self.repo_name}/issues?state=open&per_page=100"
+ headers = {"Accept": "application/vnd.github.v3+json"}
+ if github_token:
+ headers["Authorization"] = f"token {github_token}"
- if black:
try:
- formatted_code = black.format_str(code, mode=black.FileMode())
- lint_message += "Black formatting applied successfully.\n"
- except black.NothingChanged:
- lint_message += "Black formatting: No changes needed.\n"
+ all_issues_data = []
+ page = 1
+ logger.info(f"Fetching open issues from GitHub API (repo: {self.repo_owner}/{self.repo_name})...")
+ async with aiohttp.ClientSession(headers=headers) as session:
+ while True:
+ paginated_url = api_url
+ logger.debug(f"Fetching URL: {paginated_url}")
+ # Use a timeout for API requests
+ try:
+ async with session.get(paginated_url, timeout=30) as response:
+ rate_limit_remaining = response.headers.get('X-RateLimit-Remaining')
+ rate_limit_reset = response.headers.get('X-RateLimit-Reset')
+ logger.debug(f"GitHub API Response Status: {response.status}, RateLimit Remaining: {rate_limit_remaining}, Reset: {rate_limit_reset}")
+
+ if response.status == 403 and rate_limit_remaining == '0':
+ reset_time = int(rate_limit_reset) if rate_limit_reset else time.time() + 60
+ wait_time = max(reset_time - time.time() + 5, 0) # Wait until reset time + a buffer
+ logger.warning(f"GitHub API rate limit exceeded. Waiting until {datetime.fromtimestamp(reset_time).strftime('%H:%M:%S')} ({wait_time:.0f}s)")
+ await asyncio.sleep(wait_time)
+ continue # Retry the same page
+
+ response.raise_for_status() # Raise for other 4xx/5xx errors
+
+ issues_page_data = await response.json()
+ if not issues_page_data: break # No more issues on this page
+
+ logger.info(f"Fetched page {page} with {len(issues_page_data)} items.")
+ all_issues_data.extend(issues_page_data)
+
+ link_header = response.headers.get('Link')
+ if link_header and 'rel="next"' in link_header:
+ # Simple parsing for the next link, more robust parsing might be needed for complex headers
+ next_url_match = re.search(r'<([^>]+)>;\s*rel="next"', link_header)
+ if next_url_match:
+ # The next URL is provided directly, use it
+ api_url = next_url_match.group(1)
+ page += 1 # Increment page counter for logging, though not strictly needed for the loop logic now
+ logger.debug(f"Found next page link: {api_url}")
+ else:
+ logger.warning("Link header contains 'rel=\"next\"' but could not parse the URL. Stopping pagination.")
+ break
+ else:
+ logger.debug("No 'next' link found in Link header. Assuming last page.")
+ break
+ await asyncio.sleep(0.1) # Small delay between requests
+
+ except asyncio.TimeoutError:
+ logger.warning(f"GitHub API request timed out for page {page}. Stopping pagination early.")
+ break # Stop pagination on timeout
+ except aiohttp.ClientResponseError as e:
+ # Re-raise client response errors so the outer handler catches them
+ raise e
+ except Exception as e:
+ logger.exception(f"An unexpected error occurred during GitHub API pagination for page {page}. Stopping pagination.")
+ break # Stop pagination on unexpected error
+
+
+ logger.info(f"Total items fetched (including potential PRs): {len(all_issues_data)}")
+ # Filter out pull requests (issues with 'pull_request' key)
+ self.issues = {
+ issue_data['number']: self._process_issue_data(issue_data)
+ for issue_data in all_issues_data
+ if 'pull_request' not in issue_data and issue_data.get('number') is not None # Ensure number exists
+ }
+
+ logger.info(f"Filtered out pull requests, {len(self.issues)} actual open issues remaining.")
+
+ empty_fig = get_empty_plot("Issue Severity Distribution")
+ if not self.issues:
+ logger.warning("No open issues found for this repository.")
+ empty_fig.update_layout(annotations=[{"text": "No issues found.", "xref": "paper", "yref": "paper", "showarrow": False, "font": {"size": 16}}])
+ return [], empty_fig, "No open issues found in the repository.", empty_fig
+
+ # --- Clustering and UI Data Prep ---
+ self.issue_list_for_clustering = list(self.issues.values())
+ logger.info("Clustering issues...")
+ await self._cluster_similar_issues()
+
+ # --- Initial Idle Task Prep (Run synchronously after load) ---
+ logger.info("Identifying potential duplicates based on initial clusters...")
+ self._identify_potential_duplicates()
+ logger.info("Identifying potentially stale issues...")
+ self._identify_stale_issues()
+ logger.info("Identifying high priority candidates...")
+ self._identify_high_priority_candidates()
+
+ # --- Prepare Dataframe Output & Stats ---
+ dataframe_data = []
+ severity_counts = {"Critical": 0, "High": 0, "Medium": 0, "Low": 0, "Unknown": 0}
+ index_to_cluster_id: Dict[int, int] = {}
+ # Map issue index in the clustering list back to its cluster ID
+ for cluster_id, indices in self.issue_clusters.items():
+ for index in indices:
+ if 0 <= index < len(self.issue_list_for_clustering):
+ index_to_cluster_id[index] = cluster_id
+ else:
+ logger.warning(f"Clustering returned invalid index {index} for list of length {len(self.issue_list_for_clustering)}")
+
+ for i, issue in enumerate(self.issue_list_for_clustering):
+ severity = self._determine_severity(issue.get('labels', [])) # Use get for safety
+ severity_counts[severity] += 1
+ # Get cluster ID using the index from the clustering list
+ cluster_id = index_to_cluster_id.get(i, -1)
+ dataframe_data.append([
+ issue.get('id', 'N/A'), # Use get for safety
+ issue.get('title', 'No Title'), # Use get for safety
+ severity,
+ cluster_id if cluster_id != -1 else "N/A" # Display "N/A" for noise (-1)
+ ])
+
+ logger.info("Generating statistics plot...")
+ stats_fig = self._generate_stats_plot(severity_counts)
+
+ # --- Start Background Tasks ---
+ # Ensure tasks are created in the manager's loop
+ self.start_broadcast_loop()
+ self.start_idle_processing()
+
+ success_msg = f"Found {len(self.issues)} open issues. Clustered into {len(self.issue_clusters)} groups. Repo ready. Background analysis started."
+ logger.info(success_msg)
+ # Return both plots (stats and analytics severity are the same initially)
+ return dataframe_data, stats_fig, success_msg, stats_fig
+
+ except aiohttp.ClientResponseError as e:
+ logger.error(f"GitHub API request failed: Status={e.status}, Message='{e.message}', URL='{e.request_info.url}'")
+ error_msg = f"Error fetching issues: {e.status} - {e.message}. Check token/URL."
+ if e.status == 404: error_msg = f"Error: Repository not found at {self.repo_url}."
+ elif e.status == 401: error_msg = "Error: Invalid GitHub token or insufficient permissions for this repository."
+ elif e.status == 403:
+ rate_limit_remaining = e.headers.get('X-RateLimit-Remaining') # FIX: Access rate_limit_remaining from error headers
+ rate_limit_reset = e.headers.get('X-RateLimit-Reset')
+ reset_time_str = "unknown"
+ if rate_limit_reset:
+ try: reset_time_str = datetime.fromtimestamp(int(rate_limit_reset), timezone.utc).strftime('%Y-%m-%d %H:%M:%S %Z')
+ except ValueError: pass
+ error_msg = f"Error: GitHub API rate limit likely exceeded or access forbidden (Remaining: {rate_limit_remaining}). Reset time: {reset_time_str}. Check token or wait."
+ self.stop_idle_processing()
+ self.stop_broadcast_loop()
+ empty_fig = get_empty_plot("Issue Severity Distribution")
+ empty_fig.update_layout(annotations=[{"text": "API Error.", "xref": "paper", "yref": "paper", "showarrow": False, "font": {"size": 16}}])
+ return [], empty_fig, error_msg, empty_fig
+ except asyncio.TimeoutError:
+ logger.error("GitHub API request timed out.")
+ self.stop_idle_processing()
+ self.stop_broadcast_loop()
+ empty_fig = get_empty_plot("Issue Severity Distribution")
+ empty_fig.update_layout(annotations=[{"text": "API Timeout.", "xref": "paper", "yref": "paper", "showarrow": False, "font": {"size": 16}}])
+ return [], empty_fig, "Error: GitHub API request timed out.", empty_fig
except Exception as e:
- lint_message += f"Black formatting error: {e}\n"
- formatted_code = code # Revert to original if formatting fails
- else:
- lint_message += "Black not installed. Formatting skipped.\n"
+ self.stop_idle_processing()
+ self.stop_broadcast_loop()
+ logger.exception(f"An unexpected error occurred during issue crawl: {e}")
+ empty_fig = get_empty_plot("Issue Severity Distribution")
+ empty_fig.update_layout(annotations=[{"text": "Unexpected Error.", "xref": "paper", "yref": "paper", "showarrow": False, "font": {"size": 16}}])
+ return [], empty_fig, f"An unexpected error occurred: {e}", empty_fig
+
+ def _determine_severity(self, labels: List[str]) -> str:
+ """Determines issue severity based on labels using predefined rules."""
+ labels_lower = {label.lower().strip() for label in labels}
+ for severity, keywords in self.severity_rules.items():
+ if any(keyword in label for label in labels_lower for keyword in keywords):
+ return severity
+ return "Unknown"
+
+ def _generate_stats_plot(self, severity_counts: Dict[str, int]) -> go.Figure:
+ """Generates a Plotly bar chart for issue severity distribution."""
+ filtered_counts = {k: v for k, v in severity_counts.items() if v > 0}
+ if not filtered_counts:
+ fig = go.Figure()
+ fig.update_layout(title="Issue Severity Distribution", xaxis={"visible": False}, yaxis={"visible": False},
+ # FIX: Corrected dictionary syntax for font size
+ annotations=[{"text": "No issues to display.", "xref": "paper", "yref": "paper", "showarrow": False, "font": {"size": 16}}],
+ plot_bgcolor='rgba(0,0,0,0)', paper_bgcolor='rgba(0,0,0,0)')
+ return fig # Return the empty figure here
+
+ severities = list(filtered_counts.keys())
+ counts = list(filtered_counts.values())
+ order = ['Critical', 'High', 'Medium', 'Low', 'Unknown']
+ # Sort severities based on the predefined order
+ severities_sorted = sorted(severities, key=lambda x: order.index(x) if x in order else len(order))
+ counts_sorted = [filtered_counts[s] for s in severities_sorted]
+
+ fig = px.bar(x=severities_sorted, y=counts_sorted, title="Issue Severity Distribution",
+ labels={'x': 'Severity', 'y': 'Number of Issues'}, color=severities_sorted,
+ color_discrete_map={'Critical': '#DC2626', 'High': '#F97316', 'Medium': '#FACC15', 'Low': '#84CC16', 'Unknown': '#6B7280'},
+ text=counts_sorted)
+ fig.update_layout(xaxis_title=None, yaxis_title="Number of Issues", plot_bgcolor='rgba(0,0,0,0)',
+ paper_bgcolor='rgba(0,0,0,0)', showlegend=False,
+ xaxis={'categoryorder':'array', 'categoryarray': order},
+ yaxis={'rangemode': 'tozero'}) # Ensure y-axis starts at 0
+ fig.update_traces(textposition='outside')
+ return fig
+
+ async def _cluster_similar_issues(self):
+ """Generates embeddings and clusters issues using HDBSCAN. Uses self.issue_list_for_clustering."""
+ if not self.issue_list_for_clustering:
+ logger.warning("Cannot cluster issues: No issues loaded or list is empty.")
+ self.issue_clusters = {}
+ self._webhook_change_count = 0 # Reset on empty list
+ self.needs_recluster = False
+ return
+ if not self.hf_token or HF_EMBEDDING_MODEL is None:
+ logger.error("Cannot cluster issues: Hugging Face token or embedding model missing.")
+ self.issue_clusters = {}
+ self._webhook_change_count = 0 # Reset on missing token/model
+ self.needs_recluster = False
+ return
+
+ num_issues = len(self.issue_list_for_clustering)
+ logger.info(f"Generating embeddings for {num_issues} issues for clustering using {HF_EMBEDDING_MODEL}...")
+ try:
+ # Use title + a snippet of the body for embedding
+ texts_to_embed = [
+ f"Title: {i.get('title','')} Body: {i.get('body','')[:500]}" # Limit body length
+ for i in self.issue_list_for_clustering
+ ]
+ embeddings = await self._generate_embeddings(texts_to_embed)
+
+ if embeddings is None or not isinstance(embeddings, list) or len(embeddings) != num_issues:
+ logger.error(f"Failed to generate valid embeddings for clustering. Expected {num_issues}, got {type(embeddings)} len {len(embeddings) if embeddings else 'N/A'}.")
+ self.issue_clusters = {}
+ self._webhook_change_count = 0 # Reset on embedding failure
+ self.needs_recluster = False
+ return
+
+ logger.info(f"Generated {len(embeddings)} embeddings. Running HDBSCAN clustering...")
+ # Adjust min_cluster_size dynamically based on issue count?
+ min_cluster_size = max(2, min(5, num_issues // 10)) # Example: min 2, max 5, or 10% of issues
+ clusterer = HDBSCAN(min_cluster_size=min_cluster_size, metric='cosine', allow_single_cluster=True, gen_min_span_tree=True)
+ clusters = clusterer.fit_predict(embeddings)
+
+ new_issue_clusters: Dict[int, List[int]] = {}
+ noise_count = 0
+ for i, cluster_id in enumerate(clusters):
+ cluster_id_int = int(cluster_id)
+ if cluster_id_int == -1:
+ noise_count += 1
+ continue
+ if cluster_id_int not in new_issue_clusters:
+ new_issue_clusters[cluster_id_int] = []
+ new_issue_clusters[cluster_id_int].append(i)
+
+ self.issue_clusters = new_issue_clusters
+ logger.info(f"Clustering complete. Found {len(self.issue_clusters)} clusters (min size {min_cluster_size}) with {noise_count} noise points.")
+
+ # Reset the change counter and flag after successful clustering
+ self._webhook_change_count = 0
+ self.needs_recluster = False
+ logger.debug("Reset webhook change counter and recluster flag after clustering.")
+
+ except Exception as e:
+ logger.exception(f"Error during issue clustering: {e}")
+ self.issue_clusters = {}
+ self._webhook_change_count = 0 # Reset on clustering failure
+ self.needs_recluster = False
- if lint:
+
+ def _identify_potential_duplicates(self):
+ """Populates self.potential_duplicates based on self.issue_clusters and self.issue_list_for_clustering."""
+ self.potential_duplicates = {}
+ if not self.issue_clusters or not self.issue_list_for_clustering:
+ logger.debug("Skipping duplicate identification: No clusters or issue list.")
+ return
+
+ index_to_id = {}
try:
- # Pylint expects a file or module name, using StringIO as a workaround
- # This might not capture all linting issues compared to running on a file
- # A better approach for a real editor would be to save to a temp file and lint that.
- # For simplicity, using epylint.py_run which can take a string (though less common usage)
- # The typical use is lint.run(['your_module.py'], ...)
- # Let's simulate linting the string content.
- # Note: lint.py_run is for running pylint as a script, epylint.lint is better for programmatic use.
- # We need to capture stdout/stderr from the linting process.
- # This is a bit tricky with epylint's programmatic interface.
- # Let's stick closer to the original but use epylint's run function with output capture.
- # Or even simpler: call pylint via subprocess, similar to terminal_interface.
-
- # Option 1: Using subprocess (more reliable for capturing full output)
- # Create a temporary file
- temp_file_path = "temp_code_for_linting.py"
- with open(temp_file_path, "w") as f:
- f.write(code)
-
- lint_command = f"pylint --output-format=text {temp_file_path}"
- lint_result = subprocess.run(lint_command, shell=True, capture_output=True, text=True)
-
- lint_message += "\nPylint Output:\n"
- if lint_result.stdout:
- lint_message += lint_result.stdout
- if lint_result.stderr:
- # Pylint often prints to stderr for errors, but also normal messages sometimes
- # Check if it's just usage info or actual errors
- if "usage:" not in lint_result.stderr.lower():
- lint_message += "Pylint STDERR (might contain errors or warnings):\n" + lint_result.stderr
- else:
- lint_message += "Pylint STDERR (usage info or minor messages):\n" + lint_result.stderr
+ for i, issue in enumerate(self.issue_list_for_clustering):
+ issue_id = issue.get('id')
+ if issue_id is None:
+ logger.warning(f"Issue at index {i} in clustering list is missing an ID.")
+ continue
+ index_to_id[i] = issue_id
+ except Exception as e:
+ logger.error(f"Error creating index-to-ID map for duplicate check: {e}. Issue list might be inconsistent.")
+ return
+
+ for cluster_id, indices in self.issue_clusters.items():
+ if len(indices) > 1:
+ # Get issue IDs for indices in this cluster, skipping any invalid indices
+ cluster_issue_ids = [index_to_id[i] for i in indices if i in index_to_id]
+ if len(cluster_issue_ids) > 1: # Ensure there's more than one valid issue ID in the cluster
+ for issue_id in cluster_issue_ids:
+ # For each issue in the cluster, list all *other* issues in the same cluster as potential duplicates
+ self.potential_duplicates[issue_id] = [other_id for other_id in cluster_issue_ids if other_id != issue_id]
+
+ logger.info(f"Identified potential duplicates for {len(self.potential_duplicates)} issues based on clustering.")
+
+ async def _generate_embeddings(self, texts: List[str]):
+ """Generates sentence embeddings using Hugging Face Inference API."""
+ if not self.hf_token:
+ logger.error("Hugging Face token is not set. Cannot generate embeddings.")
+ return None
+ if not texts:
+ logger.warning("Embedding generation requested with empty text list.")
+ return []
+ if HF_EMBEDDING_MODEL is None:
+ logger.error("HF Embedding model is not configured.")
+ return None
+
+ model_id = HF_EMBEDDING_MODEL # Use the fixed embedding model
+ api_url = f"{HF_INFERENCE_API}/{model_id}"
+ headers = {"Authorization": f"Bearer {self.hf_token}"}
+ timeout = aiohttp.ClientTimeout(total=180) # Increased timeout for embedding large batches
+
+ logger.info(f"Requesting embeddings from {api_url} for {len(texts)} texts.")
+ # HF Inference API has a limit on the number of inputs per request (often 512 or 1024)
+ # Batching is recommended for large lists of texts.
+ batch_size = 500 # Example batch size, adjust based on model limits if known
+ all_embeddings = []
+
+ for i in range(0, len(texts), batch_size):
+ batch_texts = texts[i:i + batch_size]
+ payload = {"inputs": batch_texts, "options": {"wait_for_model": True}}
+ logger.debug(f"Processing embedding batch {i//batch_size + 1}/{(len(texts)-1)//batch_size + 1} ({len(batch_texts)} texts)")
+
+ # Implement retry logic for batches
+ retries = 3
+ for attempt in range(retries):
+ try:
+ async with aiohttp.ClientSession(headers=headers, timeout=timeout) as session:
+ async with session.post(api_url, json=payload) as response:
+ rate_limit_remaining = response.headers.get('X-Ratelimit-Remaining')
+ logger.debug(f"HF Embedding API Response Status: {response.status}, RateLimit Remaining: {rate_limit_remaining}")
+
+ if response.status == 429: # Too Many Requests
+ retry_after = int(response.headers.get('Retry-After', 10))
+ logger.warning(f"HF Embedding API rate limited. Waiting for {retry_after} seconds before retry {attempt + 1}/{retries}.")
+ await asyncio.sleep(retry_after)
+ continue # Retry the same batch
+ response.raise_for_status() # Raise for other 4xx/5xx errors
+
+ result = await response.json()
+
+ if isinstance(result, list) and all(isinstance(emb, list) and all(isinstance(f, float) for f in emb) for emb in result):
+ if len(result) == len(batch_texts):
+ all_embeddings.extend(result)
+ logger.debug(f"Successfully received {len(result)} embeddings for batch.")
+ break # Batch successful, move to next batch
+ else:
+ logger.error(f"HF Embedding API returned wrong number of embeddings for batch: Got {len(result)}, expected {len(batch_texts)}.")
+ return None # Indicate failure
+ elif isinstance(result, dict) and 'error' in result:
+ error_msg = result['error']
+ estimated_time = result.get('estimated_time')
+ logger.error(f"HF Inference API embedding error on batch: {error_msg}" + (f" (Estimated time: {estimated_time}s)" if estimated_time else ""))
+ return None # Indicate failure
+ else:
+ logger.error(f"Unexpected embedding format received on batch: Type={type(result)}. Response: {str(result)[:500]}")
+ return None # Indicate failure
+
+ except asyncio.TimeoutError:
+ logger.warning(f"HF Inference API embedding request timed out after {timeout.total} seconds for batch. Retry {attempt + 1}/{retries}.")
+ if attempt < retries - 1:
+ await asyncio.sleep(5) # Wait a bit before retrying timeout
+ continue
+ else:
+ logger.error("Max retries reached for embedding batch timeout.")
+ return None # Indicate failure
+ except aiohttp.ClientResponseError as e:
+ error_body = await e.text()
+ logger.error(f"HF Inference API embedding request failed on batch: Status={e.status}, Message='{e.message}'. Body: {error_body[:500]}. Retry {attempt + 1}/{retries}.")
+ if attempt < retries - 1 and e.status in [500, 502, 503, 504]: # Retry on server errors
+ await asyncio.sleep(5)
+ continue
+ else:
+ logger.error("Max retries reached or non-retryable error for embedding batch.")
+ return None # Indicate failure
+ except Exception as e:
+ logger.exception(f"Unexpected error during embedding generation on batch: {e}. Retry {attempt + 1}/{retries}.")
+ if attempt < retries - 1:
+ await asyncio.sleep(5)
+ continue
+ else:
+ logger.error("Max retries reached for unexpected error during embedding batch.")
+ return None # Indicate failure
+ else: # This else block executes if the inner loop completes without a 'break' (i.e., all retries failed)
+ logger.error(f"Failed to process embedding batch after {retries} retries.")
+ return None # Indicate failure
+
+ await asyncio.sleep(0.1) # Small delay between batches
+
+ if len(all_embeddings) == len(texts):
+ logger.info(f"Successfully generated embeddings for all {len(all_embeddings)} texts.")
+ return all_embeddings
+ else:
+ logger.error(f"Embedding generation failed partway through. Expected {len(texts)}, got {len(all_embeddings)}.")
+ return None # Indicate overall failure
+
+
+ async def generate_code_patch(self, issue_number: int, model_key: str) -> dict:
+ """Generates a code patch suggestion using a selected AI model."""
+ if issue_number not in self.issues:
+ return {"error": f"Issue {issue_number} not found."}
+ if not self.hf_token:
+ return {"error": "Hugging Face token not set."}
+ if model_key not in HF_MODELS or HF_MODELS.get(model_key) is None:
+ return {"error": f"Invalid or unconfigured model key: {model_key}"}
+ if not self.repo_local_path or not self.repo:
+ return {"error": "Repository not cloned/available locally. Please scan the repository first."}
+
+ issue = self.issues[issue_number]
+ model_id = HF_MODELS[model_key]
+ logger.info(f"Generating patch for issue {issue_number} ('{issue.get('title', 'N/A')[:50]}...') using model {model_id}")
+
+ # --- Context Gathering ---
+ context_str = "Context gathering failed or not available."
+ context_source = "Error"
+ start_time_context = time.time()
+ context_data = self.precomputed_context.get(issue_number) # Use .get for safety
+
+ if context_data:
+ timestamp = context_data.get('timestamp', 0)
+ timestamp_str = datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S')
+ if context_data.get("error"):
+ context_str = f"Pre-computed context retrieval failed: {context_data['error']}"
+ context_source = f"Pre-computed (Failed @ {timestamp_str})"
+ elif context_data.get("content"):
+ context_str = context_data["content"]
+ num_files = len(context_data.get('files',[]))
+ context_source = f"Pre-computed ({num_files} files @ {timestamp_str})"
+ else:
+ context_str = "Pre-computed context was empty or unavailable."
+ context_source = f"Pre-computed (Empty @ {timestamp_str})"
+ logger.info(f"Using pre-computed context for issue {issue_number} (Source: {context_source})")
+ else:
+ logger.info(f"No pre-computed context found for issue {issue_number}, computing now.")
+ context_source = "Computed On-Demand"
+ # Compute context on demand and store it
+ context_result = await self._get_code_context(issue)
+ self.precomputed_context[issue_number] = {
+ "content": context_result.get("content"),
+ "files": context_result.get("files", []),
+ "error": context_result.get("error"),
+ "timestamp": time.time()
+ }
+ if "error" in context_result and context_result["error"]:
+ context_str = f"Error retrieving context: {context_result['error']}"
+ context_source += " (Error)"
+ else:
+ context_str = context_result.get("content", "No specific context found.")
+ context_source += f" ({len(context_result.get('files',[]))} files)"
+
+ context_load_duration = time.time() - start_time_context
+ logger.info(f"Computed context on-demand in {context_load_duration:.2f}s. Source: {context_source}")
+
+ # --- Get Pre-computed Info ---
+ summary_text = self._get_precomputed_text(issue_number, self.precomputed_summaries, "summary", "Summary")
+ missing_info_text = self._get_precomputed_text(issue_number, self.precomputed_missing_info, "info_needed", "Missing Info Analysis")
+ analysis_text = self._get_precomputed_text(issue_number, self.precomputed_analysis, "hypothesis", "Preliminary Analysis")
+ duplicate_info = self._get_duplicate_info_text(issue_number)
+
+ # --- Enhanced Prompt ---
+ # Added clear delimiters and instructions for the AI
+ prompt = f"""You are an expert software engineer AI assistant generating a minimal `diff` code patch to fix a GitHub issue.
+
+
Select an issue from the 'Issue Board' tab.
" - try: - with open(file_path, "w") as file: - file.write(code) - # Add file name to the project's file list if new - if file_name not in st.session_state.workspace_projects[project_name]['files']: - st.session_state.workspace_projects[project_name]['files'].append(file_name) + try: + issue = manager.issues[issue_num] + # Use markdown2 for rendering issue body + html_body = markdown2.markdown( + issue.get('body', '*No description provided.*') or '*No description provided.*', + extras=["fenced-code-blocks", "tables", "strike", "task_list", "code-friendly", "html-classes", "nofollow", "spoiler"] + ) - return f"Code successfully written to '{file_name}' in project '{project_name}'." - except IOError as e: - return f"Error writing code to '{file_name}': {e}" + # Generate HTML for labels + labels_html = ' '.join(f'{gr.Textbox.sanitize_html(l)}' for l in issue.get('labels', [])) or 'None' + + # Generate HTML for status indicators + status_indicators = [] + if issue_num in manager.stale_issues: + status_indicators.append(f"[Stale]") + if issue_num in manager.high_priority_candidates: + severity = manager._determine_severity(issue.get('labels', [])) + if severity == "Critical": color, bgcolor = "#ef4444", "#fee2e2" + elif severity == "High": color, bgcolor = "#f97316", "#ffedd5" + else: color, bgcolor = "#c2410c", "#fffbeb" # Should ideally not happen if logic works + status_indicators.append(f"[{severity}]") + status_html = " ".join(status_indicators) + + # --- Get Precomputed Data with Helper --- + summary_text = manager._get_precomputed_text(issue_num, manager.precomputed_summaries, "summary", "Summary") + missing_info_text = manager._get_precomputed_text(issue_num, manager.precomputed_missing_info, "info_needed", "Missing Info") + analysis_text = manager._get_precomputed_text(issue_num, manager.precomputed_analysis, "hypothesis", "Analysis") + duplicate_text = manager._get_duplicate_info_text(issue_num) # This already includes formatting and links + + # --- Format AI Sections --- + ai_sections = [] + # Only show sections if they have content or an error, not just pending state + # Use the presence of icons (❌, ⏳, ℹ️) to determine if it's just a status message + if not summary_text.startswith("⏳") and not summary_text.startswith("(ℹ️"): + color = "#f0e6ff" if not summary_text.startswith("❌") else "#fee2e2" + border_color = "#ddd6fe" if not summary_text.startswith("❌") else "#fecaca" + ai_sections.append(f""" +Error generating preview for issue {issue_num}. Check logs.
" + + async def get_ai_suggestion_wrapper(issue_num: Optional[int], model_key: str, progress=gr.Progress()) -> str: + """UI wrapper for getting AI suggestions, handles state and progress.""" + progress(0, desc="Preparing request...") + if issue_num is None or issue_num not in manager.issues: + return "⚠️ Error: Please select a valid issue first." + if not manager.hf_token: + return "🔒 Error: Hugging Face Token is not configured." + if model_key not in HF_MODELS or HF_MODELS.get(model_key) is None: + return f"⚠️ Error: Invalid or unconfigured model key selected: {model_key}" + + issue = manager.issues[issue_num] + issue_hash = manager._get_issue_hash(issue) + logger.info(f"Requesting suggestion for issue {issue_num} (hash: {issue_hash}) using model {model_key}.") + + try: + progress(0.3, desc=f"Querying {model_key}...") + # The cached_suggestion method handles the cache lookup and actual generation + suggestion = await manager.cached_suggestion(issue_hash, model_key) + progress(1, desc="Suggestion received.") + + if suggestion.lower().startswith("error:"): + return f"⚠️ {suggestion}" + else: + # Format the output clearly + return f"**💡 Suggestion based on {model_key}:**\n\n---\n{suggestion}" + except Exception as e: + logger.exception(f"Error in get_ai_suggestion_wrapper for issue {issue_num}: {e}") + return f"❌ An unexpected error occurred while getting the suggestion: {e}" + + async def get_ai_patch_wrapper(issue_num: Optional[int], model_key: str, progress=gr.Progress()) -> str: + """UI wrapper for getting AI patches, handles state and progress.""" + progress(0, desc="Preparing request...") + if issue_num is None or issue_num not in manager.issues: + return "⚠️ Error: Please select a valid issue first." + if not manager.hf_token: + return "🔒 Error: Hugging Face Token is not configured." + if not manager.repo: + return "❌ Error: Repository not loaded. Please scan the repository first." + if model_key not in HF_MODELS or HF_MODELS.get(model_key) is None: + return f"⚠️ Error: Invalid or unconfigured model key selected: {model_key}" + + logger.info(f"Requesting patch for issue {issue_num} using model {model_key}.") + progress(0.1, desc="Gathering code context (using cache if available)...") + try: + # Context is gathered inside generate_code_patch, which uses the precomputed_context cache + progress(0.4, desc=f"Querying {model_key} for patch...") + result = await manager.generate_code_patch(issue_num, model_key) + progress(1, desc="Patch result received.") + + if "error" in result: + logger.error(f"Patch generation failed for issue {issue_num}: {result['error']}") + return f"**❌ Error generating patch:**\n\n{result['error']}" + else: + model_used = result.get('model_used', model_key) + explanation = result.get('explanation', '(No explanation provided)') + patch_content = result.get('patch') + status_msg = result.get('status') # Get specific status if available + + header = f"**🩹 Patch Suggestion from {model_used}:**" + if status_msg: + header = f"**🩹 Patch Generation Result from {model_used}:** ({status_msg})" + + if patch_content: + # Escape backticks in patch content for markdown code block + patch_content_sanitized = patch_content.replace('`', '\\`') + logger.info(f"Successfully generated patch for issue {issue_num} using {model_used}.") + return f"""{header} +**Explanation:** +{explanation} +--- +**Patch:** +```diff +{patch_content_sanitized} +```""" + else: + logger.warning(f"AI provided explanation but no patch for issue {issue_num}. Explanation: {explanation}") + return f"""{header} +**Explanation:** +{explanation} +--- +**(No valid diff block generated)**""" + + except Exception as e: + logger.exception(f"Error in get_ai_patch_wrapper for issue {issue_num}: {e}") + return f"❌ An unexpected error occurred while generating the patch: {e}" + + async def handle_issue_select(evt: gr.SelectData): + """ + Handles issue selection in the Dataframe: updates preview, loads code context into editor. + Reads the selected issue ID from the event data. + Returns updates for multiple components. + """ + # Default state for deselection or error + default_updates = { + # Update the hidden state element first + "selected_issue_id_hidden": gr.update(value=""), + "issue_preview_html": gr.update(value="Select an issue from the 'Issue Board' tab.
"), + # Clear the code editor + "code_edit_component": gr.update(value={"placeholder.txt": "# Select an issue to load relevant code context."}, interactive=True, language="text"), + "ai_output_display": gr.update(value="*AI suggestions and patches will appear here after selecting an issue and action.*"), + "copy_patch_btn": gr.update(visible=False), # Hide copy button on selection change + } + + # Check if a row was actually selected and has a value + if evt.index is None or not hasattr(evt, 'value') or not evt.value or evt.value[0] is None: + logger.info("Issue deselected or invalid selection event.") + # Return default updates to clear the UI + return default_updates + try: + # The first column (index 0) is the Issue ID + selected_id = int(evt.value[0]) + logger.info(f"Issue selected via Dataframe: ID {selected_id}") + + if selected_id not in manager.issues: + logger.error(f"Selected issue ID {selected_id} not found in manager's issue list.") + return { + **default_updates, + "issue_preview_html": gr.update(value=f"Error: Issue {selected_id} not found in the current list. Try re-scanning.
"), + "selected_issue_id_hidden": gr.update(value=""), # Ensure hidden state is cleared + } + + issue_data = manager.issues[selected_id] + files_content: Dict[str, str] = {} + context_source_msg = "Loading context..." + context_load_start = time.time() + + # --- Load or Compute Code Context --- + # Prioritize pre-computed context if available + context_data = manager.precomputed_context.get(selected_id) # Use .get for safety + + if context_data: + timestamp = context_data.get('timestamp', 0) + timestamp_str = datetime.fromtimestamp(timestamp).strftime('%Y-%m-%d %H:%M:%S') + if context_data.get("error"): + context_source_msg = f"Pre-computed (Failed @ {timestamp_str})" + files_content["error_context.txt"] = f"# Error loading pre-computed context:\n# {context_data['error']}" + elif context_data.get("files"): + context_source_msg = f"Pre-computed ({len(context_data['files'])} files @ {timestamp_str})" + logger.info(f"Loading {len(context_data['files'])} files from pre-computed context for issue {selected_id}: {context_data['files']}") + loaded_count = 0 + for file_path_str in context_data["files"]: + full_path = manager.repo_local_path / file_path_str + try: + if full_path.is_file(): + # Use resolved path for robust reading + resolved_full_path = full_path.resolve() + # Double-check it's still in the repo path after resolve + if resolved_full_path.is_relative_to(manager.repo_local_path.resolve()): + files_content[file_path_str] = resolved_full_path.read_text(encoding='utf-8', errors='ignore') + loaded_count += 1 + else: + logger.warning(f"Pre-computed file path {file_path_str} resolved outside repo during load for issue {selected_id}.") + files_content[file_path_str] = f"# File path resolved outside repository: {file_path_str}" + else: + logger.warning(f"Pre-computed file path {file_path_str} not found or not a file during load for issue {selected_id}.") + files_content[file_path_str] = f"# File not found or not a file: {file_path_str}" + except Exception as e: + logger.warning(f"Error reading pre-computed file {full_path} for issue {selected_id}: {e}") + files_content[file_path_str] = f"# Error reading file: {e}" + if loaded_count == 0 and context_data["files"]: + files_content["error_reading_files.txt"] = "# Precomputed context found file references, but failed to read any file content." + else: + context_source_msg = f"Pre-computed (No files found @ {timestamp_str})" + files_content[f"issue_{selected_id}_context.md"] = context_data.get("content", "# No specific code context found (pre-computed).") + else: + logger.info(f"Context not pre-computed for issue {selected_id}, computing on demand for editor.") + context_source_msg = "Computed On-Demand" + context_result = await manager._get_code_context(issue_data) + + # Store the newly computed context for future use (e.g., patch generation) + manager.precomputed_context[selected_id] = { + "content": context_result.get("content"), + "files": context_result.get("files", []), + "error": context_result.get("error"), + "timestamp": time.time() + } + + if "error" in context_result and context_result["error"]: + context_source_msg += f" (Error: {context_result['error']})" + files_content["error_context.txt"] = f"# Error loading context on demand:\n# {context_result['error']}" + elif context_result.get("files"): + context_source_msg += f" ({len(context_result['files'])} files)" + logger.info(f"Loading {len(context_result['files'])} files computed on-demand for issue {selected_id}: {context_result['files']}") + loaded_count = 0 + for file_path_str in context_result["files"]: + full_path = manager.repo_local_path / file_path_str + try: + if full_path.is_file(): + resolved_full_path = full_path.resolve() + if resolved_full_path.is_relative_to(manager.repo_local_path.resolve()): + files_content[file_path_str] = resolved_full_path.read_text(encoding='utf-8', errors='ignore') + loaded_count +=1 + else: + logger.warning(f"On-demand file path {file_path_str} resolved outside repo during load for issue {selected_id}.") + files_content[file_path_str] = f"# File path resolved outside repository: {file_path_str}" + else: + logger.warning(f"On-demand file path {file_path_str} not found or not a file during load for issue {selected_id}.") + files_content[file_path_str] = f"# File not found or not a file: {file_path_str}" + except Exception as e: + logger.warning(f"Error reading on-demand file {full_path} for issue {selected_id}: {e}") + files_content[file_path_str] = f"# Error reading file: {e}" + if loaded_count == 0 and context_result["files"]: + files_content["error_reading_files.txt"] = "# Context computation found file references, but failed to read any file content." + else: + context_source_msg += " (No files found)" + files_content[f"issue_{selected_id}_context.md"] = context_result.get("content", "# No specific code context found.") + + context_load_duration = time.time() - context_load_start + logger.info(f"Context loading for editor took {context_load_duration:.2f}s. Source: {context_source_msg}") + + if not files_content: + files_content["placeholder.txt"] = f"# No relevant files found or context failed to load for issue {selected_id}." + + # Initialize or update the OT editor instance for this issue + manager.code_editors[selected_id] = OTCodeEditor(initial_value=files_content) + logger.info(f"Initialized/Updated OT editor state for issue {selected_id} with files: {list(files_content.keys())}") + + # Determine the initial language hint for the editor based on the first file extension + initial_language = "text" # Default to plain text + if files_content: + first_file_name = list(files_content.keys())[0] + # Extract extension (part after the last dot) + _, ext = os.path.splitext(first_file_name) + if ext: + # Remove leading dot and convert to lowercase + ext = ext[1:].lower() + # Map common extensions to ACE editor modes (approximate) + # Check available modes in ACE editor documentation if needed + lang_map = { + 'py': 'python', 'js': 'javascript', 'jsx': 'javascript', 'ts': 'typescript', 'tsx': 'typescript', + 'java': 'java', 'c': 'c_cpp', 'cpp': 'c_cpp', 'h': 'c_cpp', 'hpp': 'c_cpp', 'cs': 'csharp', + 'go': 'golang', 'rs': 'rust', 'php': 'php', 'rb': 'ruby', 'html': 'html', 'css': 'css', + 'scss': 'scss', 'less': 'less', 'md': 'markdown', 'yaml': 'yaml', 'yml': 'yaml', 'json': 'json', + 'toml': 'toml', 'sh': 'sh', 'bash': 'sh', 'zsh': 'sh', 'sql': 'sql', 'xml': 'xml', + 'dockerfile': 'dockerfile', 'makefile': 'makefile', 'txt': 'text', 'log': 'text', + 'conf': 'text', 'cfg': 'text', 'ini': 'ini' + } + initial_language = lang_map.get(ext, 'text') + + + # Prepare updates for all relevant UI components + updates = { + # Update the hidden state element with the selected ID (as string) + "selected_issue_id_hidden": gr.update(value=str(selected_id)), + "issue_preview_html": gr.update(value=generate_issue_preview(selected_id)), + "code_edit_component": gr.update(value=files_content, interactive=True, language=initial_language), + "ai_output_display": gr.update(value=f"*Context loaded ({context_source_msg}). Ready for AI actions or editing.*"), + "copy_patch_btn": gr.update(visible=False), # Hide copy button on new selection + } + return updates + + except (ValueError, TypeError, IndexError, KeyError, AttributeError) as e: + logger.exception(f"Error processing issue selection event (evt.value: {getattr(evt, 'value', 'N/A')}): {e}") + # Return default state plus an error message + return { + **default_updates, + "issue_preview_html": gr.update(value="Error loading issue details. Please check logs and try again.
"), + "code_edit_component": gr.update(value={"error.txt": f"# Error loading code context for selection.\n# Error: {e}"}, interactive=False, language="text"), + "ai_output_display": gr.update(value="*Error processing selection. See logs.*"), + "selected_issue_id_hidden": gr.update(value=""), # Ensure hidden state is cleared + "copy_patch_btn": gr.update(visible=False), # Hide copy button on error + } + + # --- Analytics Helper --- + def update_cluster_analytics(mgr: IssueManager): + """Generates data for the cluster analytics dataframe.""" + if not mgr.issue_clusters or not mgr.issue_list_for_clustering: + return [["N/A", 0, "No clusters found"]] + + cluster_data = [] + # Expanded stop words list (lowercase) + stop_words = set("a an the is are was were be been being has have had do does did will would shall should can could may might must it its this that these those there their then than and or but if because as at by for with without about against between into through during before after above below to from up down in out on off over under again further then once here there when where why how all any both each few more most other some such no nor not only own same so than too very s t can will just don should now d ll m o re ve yo ain aren couldn didn doesn isn it's mustn ouren shan shouldn wasn weren won wouldn".split()) + # Add common code/issue terms (lowercase) + stop_words.update({'issue', 'bug', 'error', 'fix', 'feat', 'chore', 'refactor', 'docs', 'test', 'file', 'line', 'code', 'when', 'user', 'report', 'problem', 'need', 'want', 'get', 'use', 'try', 'make', 'add', 'remove', 'change', 'update', 'create', 'build', 'run', 'start', 'stop', 'click', 'button', 'page', 'app', 'data', 'system', 'service', 'component', 'module', 'function', 'class', 'method', 'variable', 'value', 'string', 'number', 'list', 'dict', 'object', 'array', 'true', 'false', 'null', 'none', 'type', 'size', 'color', 'style', 'width', 'height', 'margin', 'padding', 'border', 'display', 'position', 'float', 'clear', 'overflow', 'index', 'key', 'value', 'id', 'name', 'text', 'html', 'css', 'js', 'py', 'java', 'c', 'cpp', 'h', 'hpp', 'go', 'rs', 'php', 'rb', 'json', 'yaml', 'xml', 'sql', 'log', 'config', 'setup', 'install', 'version', 'release', 'branch', 'commit', 'pull', 'request', 'merge', 'diff', 'patch', 'context', 'suggestion', 'analysis', 'missing', 'information', 'duplicate', 'potential', 'cluster', 'severity', 'label', 'assignee', 'state', 'title', 'body', 'url', 'created', 'updated', 'time', 'day', 'week', 'month', 'year', 'ago', 'now', 'new', 'old', 'different', 'same', 'similar', 'group', 'count', 'distribution', 'analytics', 'overview', 'studio', 'board', 'resolution', 'collaborative', 'editor', 'context', 'aware', 'assistance', 'tools', 'output', 'patch', 'steps', 'approach', 'plan', 'identify', 'propose', 'recommendation', 'workflow', 'standard', 'important', 'critical', 'high', 'medium', 'low', 'unknown', 'none', 'needed', 'analysis', 'hypothesis', 'preliminary', 'relevant', 'area', 'investigation', 'implementation', 'testing', 'next'}) + + + def get_top_keywords(indices): + text = "" + for idx in indices: + # Ensure index is valid for the current issue list snapshot + if 0 <= idx < len(mgr.issue_list_for_clustering): + issue = mgr.issue_list_for_clustering[idx] + text += issue.get('title', '') + " " + issue.get('body', '')[:500] + " " # Use more body text + text = text.lower() + # Remove punctuation + text = text.translate(str.maketrans('', '', string.punctuation)) + # Split into words and filter + words = [w for w in text.split() if len(w) > 2 and w not in stop_words] # Min word length 3 + if not words: return "N/A" + counts = Counter(words) + # Get top 5 most common words + top_5 = [word for word, count in counts.most_common(5)] + return ", ".join(top_5) + + # Sort clusters by size (descending) + sorted_clusters = sorted(mgr.issue_clusters.items(), key=lambda item: len(item[1]), reverse=True) + + # Limit to top N clusters for display + top_n_clusters = 10 # Display top 10 clusters + cluster_data = [] + for cluster_id, indices in sorted_clusters[:top_n_clusters]: + keywords = get_top_keywords(indices) + cluster_data.append([cluster_id, len(indices), keywords]) + + if not cluster_data: + return [["N/A", 0, "No clusters found"]] + + return cluster_data + + # --- Gradio Blocks --- + with gr.Blocks(theme=theme, title="AI Issue Resolver Pro", css=""" + #collab-list .collab-item { margin-bottom: 4px; font-size: 0.9em; } + .gradio-container { max-width: 1600px !important; } + #issue_preview_div { max-height: 80vh; overflow-y: auto; } /* Ensure preview is scrollable */ + #ai_output_md { max-height: 60vh; overflow-y: auto; } /* Ensure AI output is scrollable */ + #code_editor_component { min-height: 500px; height: 70vh; } /* Give editor more space */ + #ai_tools_accordion > .label { background-color: #f0f9ff; } /* Light blue background for tools header */ + #ai_tools_accordion.closed > .label { background-color: #f0f9ff; } /* Keep background when closed */ + #ai_tools_accordion > .label:hover { background-color: #e0f2fe; } /* Hover effect */ + #ai_tools_accordion > .label > .icon { color: #0ea5e9; } /* Tool icon color */ + .panel { border: 1px solid #e5e7eb; border-radius: 8px; padding: 15px; background-color: #f9fafb; } /* Styled panel */ + #config-panel { margin-bottom: 20px; } + #status_output_txt textarea { font-family: monospace; font-size: 0.9em; background-color: #eef; } /* Style status bar */ + .gradio-dataframe-container { overflow-x: auto; } /* Ensure dataframe is scrollable horizontally */ + #issue_list_df table { width: 100%; } /* Make dataframe table use full width */ + #issue_list_df th, #issue_list_df td { white-space: nowrap; overflow: hidden; text-overflow: ellipsis; } /* Prevent text wrapping in table headers/cells */ + #issue_list_df td:nth-child(1) { width: 80px; min-width: 80px; } /* Fix ID column width */ + #issue_list_df td:nth-child(2) { width: 60%; max-width: 400px; } /* Give Title column more space */ + #issue_list_df td:nth-child(3) { width: 15%; min-width: 100px; } /* Severity */ + #issue_list_df td:nth-child(4) { width: 15%; min-width: 80px; } /* Cluster */ + .gradio-dataframe-container table td { cursor: pointer; } /* Indicate rows are clickable */ + + + """) as demo_app: + gr.Markdown(""" +Collaborative Issue Resolution Powered by AI
+Select an issue from the 'Issue Board' tab.
", elem_id="issue_preview_div") + + with gr.Accordion("🛠️ AI Assistance Tools", open=True, elem_id="ai_tools_accordion"): + suggest_btn = gr.Button("🧠 Suggest Resolution Steps", icon="💡", elem_id="suggest_btn", interactive=bool(HF_MODELS)) + patch_btn = gr.Button("📝 Generate Code Patch", icon="🩹", elem_id="patch_btn", interactive=bool(HF_MODELS)) + + gr.Markdown("### AI Output") + # Use a Markdown component with scrollability + ai_output_display = gr.Markdown(value="*AI suggestions and patches will appear here...*", elem_id="ai_output_md") + with gr.Row(): + copy_patch_btn = gr.Button("📋 Copy Patch", elem_id="copy_patch_btn", visible=False) # Initially hidden + clear_ai_output_btn = gr.Button("🧹 Clear Output", elem_id="clear_ai_output_btn") + + + with gr.Column(scale=2, min_width=600): + gr.Markdown("### Collaborative Code Editor (Context-Aware)") + gr.Markdown("⚠️ Warning: Real-time collaborative editing is experimental and may lose data with simultaneous edits. Use with caution and save work frequently!
") + # Initialize with placeholder content + code_edit_component = code_editor( + label="Code Context / Editor", + language="python", # Default language, can be changed based on file extension in JS + interactive=True, + elem_id="code_editor_component", + value={"placeholder.txt": "# Select an issue to load relevant code context."} + ) -def get_built_space_files(project_name: str) -> Dict[str, str]: - """Gathers files from the specified project to prepare for deployment.""" - if not project_name or project_name not in st.session_state.workspace_projects: - st.error(f"Error: Project '{project_name}' does not exist or is not selected.") - return {} + with gr.Tab("📈 Analytics", id="analytics", elem_id="tab-analytics"): + gr.Markdown("### Repository Analytics") + with gr.Row(equal_height=False): + with gr.Column(scale=1): + gr.Markdown("#### Issue Severity Distribution") + # Initialize with empty plot, will be updated after crawl + analytics_severity_plot = gr.Plot(label="Severity Distribution (Analytics)", elem_id="analytics_severity_plot", value=manager._generate_stats_plot({})) + with gr.Column(scale=1): + gr.Markdown("#### Issue Cluster Analysis (Top Clusters)") + cluster_info_df = gr.Dataframe( + headers=["Cluster ID", "Issue Count", "Top Keywords (Example)"], + datatype=["number", "number", "str"], + value=[["Scan a repository to see cluster data.", 0, ""]], # Initial placeholder data + label="Issue Clusters", elem_id="cluster_info_df", + interactive=False, # Make this dataframe non-interactive + row_count=(5, "dynamic"), + col_count=(3, "fixed"), + wrap=True + ) + gr.Markdown("*(Analytics update after scanning the repository. More detailed analytics could be added.)*") + + # --- Event Handlers --- + # The crawl button updates the issue list, stats plot, status, and analytics plot + crawl_btn.click( + fn=manager.crawl_issues, + inputs=[repo_url, github_token, hf_token], + outputs=[issue_list, stats_plot, status_output, analytics_severity_plot], + api_name="crawl_issues", + show_progress="full" + ).then( + # After crawl_issues completes, update the cluster analytics dataframe + fn=lambda: update_cluster_analytics(manager), + inputs=[], + outputs=[cluster_info_df] + ) - project_path = st.session_state.workspace_projects[project_name]['path'] - files_to_deploy: Dict[str, str] = {} + # The issue list selection updates the preview, code editor, AI output area, and hidden state + issue_list.select( + fn=handle_issue_select, + # Pass the event data, which contains the selected row's value (including ID) + inputs=[gr.SelectData()], + outputs=[selected_issue_id_hidden, issue_preview_html, code_edit_component, ai_output_display, copy_patch_btn], + show_progress="minimal", + # Trigger the JS function to update the tracked issue ID and editor listeners + # This is handled by the MutationObserver and reading the hidden input value in JS + ) - try: - # Simple approach: include all files in the project directory - # A more sophisticated approach might filter or ask the user which files to include - for root, _, files in os.walk(project_path): - for file in files: - file_path = os.path.join(root, file) - # Get the relative path from the project root - relative_path = os.path.relpath(file_path, project_path) - with open(file_path, "r", encoding="utf-8") as f: - files_to_deploy[relative_path] = f.read() - return files_to_deploy - except Exception as e: - st.error(f"Error gathering files for deployment from project '{project_name}': {e}") - return {} + # AI Suggestion button + suggest_btn.click( + fn=get_ai_suggestion_wrapper, + inputs=[selected_issue_id_hidden, model_select], # Read selected issue ID from hidden state + outputs=[ai_output_display], + api_name="suggest_resolution", + show_progress="full" + ).then( + # After getting suggestion, hide the copy patch button + fn=lambda: gr.update(visible=False), + inputs=[], + outputs=[copy_patch_btn] + ) + # AI Patch button + patch_btn.click( + fn=get_ai_patch_wrapper, + inputs=[selected_issue_id_hidden, model_select], # Read selected issue ID from hidden state + outputs=[ai_output_display], + api_name="generate_patch", + show_progress="full" + ).then( + # After getting patch, check if it contains a diff block and show the copy button if so + # Use a lambda to check the output text + fn=lambda output_text: gr.update(visible="```diff" in output_text), + inputs=[ai_output_display], + outputs=[copy_patch_btn] + ) -def deploy_project_to_hugging_face(project_name: str, space_name: str, space_description: str, public: bool = True): - """Deploys the specified project to a new Hugging Face Space.""" - if not hf_token: - st.error("Hugging Face API token not found in Streamlit secrets.") - return "Deployment failed: Hugging Face token missing." + # Clear AI Output button + clear_ai_output_btn.click( + fn=lambda: ["*AI suggestions and patches will appear here...*", gr.update(visible=False)], + inputs=[], + outputs=[ai_output_display, copy_patch_btn] + ) - if not project_name or project_name not in st.session_state.workspace_projects: - return f"Deployment failed: Project '{project_name}' does not exist or is not selected." + # --- JavaScript for WebSocket Communication and UI Interaction --- + def web_socket_js(ws_port, gradio_port): + # Generate a unique client ID on page load + # Note: This JS is generated *once* when the Gradio app is created. + # The actual client ID is generated and logged when the JS runs in the browser. + # The Python-side logging here is just for context during app startup. + temp_client_id_placeholder = f"client_{hashlib.sha1(os.urandom(16)).hexdigest()[:8]}" + logger.info(f"Generating JS with placeholder Client ID: {temp_client_id_placeholder}") + + return f""" + + """ + # The _js parameter injects the JavaScript code into the Gradio page + demo_app.load(_js=web_socket_js(WS_PORT, GRADIO_PORT), fn=None, inputs=None, outputs=None) + + return demo_app + +# ========== WebSocket Server Logic ========== +async def handle_ws_connection(websocket: WebSocketServerProtocol, path: str, manager: IssueManager): + """Handles incoming WebSocket connections and messages for collaboration.""" + # Generate a client ID and attach it to the websocket object + client_id = f"client_{hashlib.sha1(os.urandom(16)).hexdigest()[:8]}" + setattr(websocket, 'client_id', client_id) + remote_addr = websocket.remote_address + logger.info(f"WebSocket client connected: {remote_addr} assigned ID {client_id}") + + # Add the new client to the list of active clients + manager.ws_clients.append(websocket) + logger.info(f"Client list size: {len(manager.ws_clients)}") try: - api = HfApi() - # Create the Space repository - repo_id = f"{api.whoami()['name']}/{space_name}" # Use the authenticated user's name - create_repo(repo_id=repo_id, space_sdk="streamlit", repo_type="space", private=not public) - st.success(f"Hugging Face Space repository '{repo_id}' created.") - - # Gather files from the project - files_to_deploy = get_built_space_files(project_name) - if not files_to_deploy: - return "Deployment failed: No files found in the project." - - # Upload files to the Space - for file_path_in_repo, file_content in files_to_deploy.items(): - # Need to write content to a temporary file to use upload_file - temp_upload_path = f"temp_upload_{os.path.basename(file_path_in_repo)}" - with open(temp_upload_path, "w", encoding="utf-8") as f: - f.write(file_content) - - upload_file( - path_or_fileobj=temp_upload_path, - path_in_repo=file_path_in_repo, - repo_id=repo_id, - repo_type="space" - ) - os.remove(temp_upload_path) # Clean up temp file - st.write(f"Uploaded: {file_path_in_repo}") - - st.success(f"Project '{project_name}' successfully deployed to Hugging Face Space: https://huggingface.co/spaces/{repo_id}") - return f"Deployment successful: https://huggingface.co/spaces/{repo_id}" + # Wait for the first message (expected to be 'join') or other messages + async for message in websocket: + try: + # Ensure message is bytes or string before decoding/parsing + if isinstance(message, bytes): + message = message.decode('utf-8') + if not isinstance(message, str): + logger.warning(f"Received non-string/bytes message from {client_id}: {message!r}") + continue + + data = json.loads(message) + msg_type = data.get("type") + # Use the client_id assigned by the server, not one sent by the client, for security + sender_id = client_id + + logger.debug(f"Received WS message type '{msg_type}' from {sender_id} ({remote_addr})") + + if msg_type == "join": + # Store collaborator info when they explicitly join + # Use the name provided by the client, default if not provided + client_name = data.get("name", f"User_{sender_id[:4]}") + # Ensure name is a string and not too long + client_name = str(client_name)[:50] if client_name else f"User_{sender_id[:4]}" + + if sender_id in manager.collaborators: + # Update existing entry if client reconnects or sends join again + manager.collaborators[sender_id].update({"name": client_name, "status": "Connected"}) + logger.info(f"Client {sender_id} ({client_name}) updated status to Connected.") + else: + # Add new entry + manager.collaborators[sender_id] = {"name": client_name, "status": "Connected"} + logger.info(f"Client {sender_id} ({client_name}) joined collaboration. Current collaborators: {list(manager.collaborators.keys())}") + + # Broadcast updated status list to all clients + await manager.broadcast_collaboration_status_once() + + elif msg_type == "code_update": + issue_num = data.get("issue_num") + delta_str = data.get("delta") + # Ensure data is valid and sender ID matches + if issue_num is not None and delta_str is not None and sender_id == client_id: + # FIX: Corrected call - handle_ws_connection is already async, just await the async manager method + await manager.handle_code_editor_update(int(issue_num), delta_str, sender_id) + else: + logger.warning(f"Invalid or unauthorized 'code_update' message from {sender_id}: Missing issue_num/delta or sender mismatch. Data: {str(data)[:200]}") + + elif msg_type == "status_update": + status = data.get("status", "Idle") + # Only update status for the client ID that sent the message + if sender_id == client_id: + # Ensure status is a string and not too long + status = str(status)[:100] if status else "Idle" + + if sender_id in manager.collaborators: + manager.collaborators[sender_id]["status"] = status + # Broadcast updated status list + await manager.broadcast_collaboration_status_once() + else: + # This might happen if 'join' wasn't received first, or state is out of sync + logger.warning(f"Received status update from client {sender_id} not in collaborator list. Adding/Updating with default name.") + manager.collaborators[sender_id] = {"name": f"User_{sender_id[:4]} (Re-added)", "status": status} + await manager.broadcast_collaboration_status_once() + else: + logger.warning(f"Unauthorized status update from {sender_id} attempting to update status for another client. Ignoring.") - except Exception as e: - st.error(f"Error deploying to Hugging Face Space: {e}") - return f"Deployment failed: {e}" - - -# --- Streamlit App Layout --- -st.title("AI Agent and Workspace") - -# Sidebar navigation -st.sidebar.title("Navigation") -app_mode = st.sidebar.selectbox( - "Choose the app mode", ["AI Agent Creator", "Tool Box", "Workspace Chat App"]) - -# --- AI Agent Creator Mode --- -if app_mode == "AI Agent Creator": - st.header("Create an AI Agent") - - st.subheader("Create Agent from Text") - agent_name_input = st.text_input("Enter agent name:", key="create_agent_name") - text_input_skills = st.text_area("Enter skills (one per line):", key="create_agent_skills") - if st.button("Create Agent", key="create_agent_button"): - create_agent_from_text(agent_name_input, text_input_skills) - - st.subheader("Available Agents") - if st.session_state.available_agents: - st.write(", ".join(st.session_state.available_agents)) - else: - st.info("No agents created yet.") - - -# --- Tool Box Mode --- -elif app_mode == "Tool Box": - st.header("AI-Powered Tools") - - # Chat Interface - st.subheader("General Chat with CodeCraft") - chat_input_tb = st.text_area("Enter your message:", key="chat_input_tb") - if st.button("Send Message", key="send_chat_tb_button"): - if chat_input_tb: - with st.spinner("CodeCraft is thinking..."): - chat_response = chat_interface(chat_input_tb) - st.session_state.chat_history.append((f"Tool Box User: {chat_input_tb}", f"CodeCraft: {chat_response}")) - st.write(f"CodeCraft: {chat_response}") - - # Terminal Interface - st.subheader("Terminal") - terminal_input_tb = st.text_input("Enter a command:", key="terminal_input_tb") - if st.button("Run Command", key="run_terminal_tb_button"): - if terminal_input_tb: - with st.spinner(f"Running '{terminal_input_tb}'..."): - terminal_output = terminal_interface(terminal_input_tb) - st.session_state.terminal_history.append((f"Tool Box Command: {terminal_input_tb}", terminal_output)) - st.code(terminal_output, language="bash") - # Auto-scroll might be needed for long outputs, not native to Streamlit text_area/code - - # Code Editor Interface - st.subheader("Code Editor") - code_editor_tb = st.text_area("Write your code:", height=300, key="code_editor_tb") - if st.button("Format & Lint Code", key="format_lint_code_tb_button"): - if code_editor_tb: - with st.spinner("Formatting and linting code..."): - formatted_code, lint_message = code_editor_interface(code_editor_tb) - st.subheader("Formatted Code") - st.code(formatted_code, language="python") - st.subheader("Linting Results") - st.text(lint_message) # Use st.text for preformatted output - - # Text Summarization Tool - st.subheader("Summarize Text") - text_to_summarize_tb = st.text_area("Enter text to summarize:", key="summarize_text_tb") - if st.button("Summarize Text", key="summarize_button_tb"): - if text_to_summarize_tb: - with st.spinner("Summarizing..."): - summary_tb = summarize_text(text_to_summarize_tb) - st.write(f"Summary: {summary_tb}") - - # Sentiment Analysis Tool - st.subheader("Sentiment Analysis") - sentiment_text_tb = st.text_area("Enter text for sentiment analysis:", key="sentiment_text_tb") - if st.button("Analyze Sentiment", key="analyze_sentiment_button_tb"): - if sentiment_text_tb: - with st.spinner("Analyzing sentiment..."): - sentiment_tb = sentiment_analysis(sentiment_text_tb) - st.write(f"Sentiment: {sentiment_tb}") - - # Text Translation Tool (Code Translation) - st.subheader("Translate Text/Code") - code_to_translate_tb = st.text_area("Enter text or code to translate:", key="translate_code_tb") - source_language_tb = st.text_input("Enter source language (e.g., 'English'):", key="source_language_tb") - target_language_tb = st.text_input("Enter target language (e.g., 'Spanish'):", key="target_language_tb") - # Provide available models as a hint - st.info(f"Available translation pairs (Source to Target): {list(AVAILABLE_TRANSLATION_MODELS.keys())}") - if st.button("Translate", key="translate_button_tb"): - if code_to_translate_tb and source_language_tb and target_language_tb: - with st.spinner("Translating..."): - translated_code_tb = translate_code( - code_to_translate_tb, source_language_tb, target_language_tb - ) - st.subheader("Translated Text/Code") - # Attempt to guess language for highlighting, default to text - language_hint = target_language_tb.lower() if target_language_tb.lower() in ['python', 'javascript', 'json'] else 'text' - st.code(translated_code_tb, language=language_hint) + else: + logger.warning(f"Unknown WebSocket message type '{msg_type}' received from {sender_id} ({remote_addr}). Message: {str(message)[:200]}") - # Code Generation - st.subheader("Code Generation") - code_idea_tb = st.text_area("Describe your code idea:", key="code_idea_tb") - selected_model_tb = st.selectbox( - "Select a code-generative model", list(AVAILABLE_CODE_GENERATIVE_MODELS.keys()), key="select_codegen_model_tb" - ) - if st.button("Generate Code", key="generate_code_button_tb"): - if code_idea_tb and selected_model_tb: - model_name_for_gen = AVAILABLE_CODE_GENERATIVE_MODELS[selected_model_tb] - with st.spinner(f"Generating code using {selected_model_tb}..."): - generated_code_tb = generate_code(code_idea_tb, model_name_for_gen) - st.subheader("Generated Code") - st.code(generated_code_tb, language="python") # Assuming generated code is Python - - -# --- Workspace Chat App Mode --- -elif app_mode == "Workspace Chat App": - st.header("Workspace and AI Assistant") - - # Project Workspace Selection/Creation - st.subheader("Manage Project") - # Allow selecting an existing project or creating a new one - project_options = list(st.session_state.workspace_projects.keys()) - project_options.insert(0, "Create New Project...") # Add option to create new - - selected_project_action = st.selectbox( - "Select a project or create a new one:", project_options, key="select_project_action" - ) + except json.JSONDecodeError: + logger.error(f"Received invalid JSON over WebSocket from {client_id} ({remote_addr}): {str(message)[:200]}...") + except Exception as e: + logger.exception(f"Error processing WebSocket message from {client_id} ({remote_addr}): {e}") - if selected_project_action == "Create New Project...": - new_project_name = st.text_input("Enter name for the new project:", key="new_project_name_input") - if st.button("Create and Select Project", key="create_select_project_button"): - if new_project_name: - status = workspace_interface(new_project_name) - st.success(status) - # After creation, the workspace_interface function sets active_project - st.rerun() # Rerun to update the project selectbox - else: - st.warning("Please enter a name for the new project.") - else: - # User selected an existing project - st.session_state.active_project = selected_project_action - st.info(f"Active project: **{st.session_state.active_project}**") - - active_project_name = st.session_state.active_project - - if active_project_name: - # Display current project files - st.subheader(f"Files in '{active_project_name}'") - if active_project_name in st.session_state.workspace_projects and st.session_state.workspace_projects[active_project_name]['files']: - for file in st.session_state.workspace_projects[active_project_name]['files']: - st.write(f"- {file}") + # Catch standard socket exceptions for disconnects + except (ConnectionClosed, ConnectionClosedOK, ConnectionAbortedError, ConnectionResetError, WebSocketException) as e: + logger.info(f"WebSocket client {client_id} ({remote_addr}) disconnected: Type={type(e).__name__}, Code={getattr(e, 'code', 'N/A')}, Reason='{getattr(e, 'reason', 'N/A')}'") + except Exception as e: + logger.exception(f"Unexpected error in WebSocket handler for {client_id} ({remote_addr}): {e}") + finally: + # Ensure cleanup happens regardless of how the loop exits + logger.info(f"Cleaning up connection for client {client_id} ({remote_addr})") + # Pass the websocket object itself for removal + # Schedule this in the main loop if not already in it + if manager.main_loop.is_running(): + manager.main_loop.call_soon_threadsafe(manager.remove_ws_client, websocket) else: - st.info("No files in this project yet.") + logger.warning("Main loop not running, cannot schedule final client removal.") + +async def start_websocket_server(manager: IssueManager, port: int): + """Starts the WebSocket server.""" + # The handler needs the manager instance + handler_with_manager = lambda ws, path: handle_ws_connection(ws, path, manager) + server = None + # Use a Future to signal when the server should stop + stop_event = asyncio.Future() - # Add Code to Workspace - st.subheader("Add/Update Code File") - file_to_edit = st.selectbox( - "Select a file to edit or enter a new name:", - ["Enter New File Name..."] + st.session_state.workspace_projects.get(active_project_name, {}).get('files', []), - key="select_file_to_edit" + # Add a method to signal the server to stop from outside the async function + # This is needed for graceful shutdown from the main thread + manager.stop_ws_server = lambda: stop_event.set_result(True) if not stop_event.done() else None + + try: + # Start the websockets server + server = await websockets.serve( + handler_with_manager, + "0.0.0.0", # Listen on all interfaces + port, + ping_interval=20, # Send ping every 20 seconds + ping_timeout=20 # Close connection if no pong received within 20 seconds ) + logger.info(f"WebSocket server started successfully on ws://0.0.0.0:{port}") + + # Wait until the stop_event is set (signaled from main thread or shutdown) + await stop_event + + except OSError as e: + logger.error(f"Failed to start WebSocket server on port {port}: {e}. Is the port already in use?") + # Signal the main loop to stop gracefully if possible, or re-raise + # If this happens during startup, main thread will catch it and trigger shutdown. + # No need to explicitly call shutdown_handler here, the main thread's except/finally will handle it. + raise SystemExit(f"WebSocket Port {port} unavailable. Application cannot start.") from e + except asyncio.CancelledError: + logger.info("WebSocket server task cancelled.") + except Exception as e: + logger.exception(f"An unexpected error occurred starting or running the WebSocket server: {e}") + # Ensure the stop event is set so the await completes + if not stop_event.done(): stop_event.set_result(True) + raise # Re-raise to potentially stop the main loop + + finally: + if server: + logger.info("Attempting to stop WebSocket server...") + server.close() # Signal server to close + await server.wait_closed() # Wait for server to finish closing connections + logger.info("WebSocket server stopped.") + # Ensure the stop event is completed even if there was an error before await + if not stop_event.done(): + stop_event.set_result(True) + + +def run_webhook_server(manager: IssueManager, port: int, main_loop: asyncio.AbstractEventLoop): + """Starts the HTTP webhook server in a separate thread.""" + # Pass the manager instance and the main asyncio loop reference to the handler class + WebhookHandler.manager_instance = manager + WebhookHandler.main_loop = main_loop + httpd = None + + # Add a method to signal the webhook server thread to stop + # This is needed for graceful shutdown from the main thread + # Use a simple flag and check it periodically, or rely on httpd.shutdown() + # httpd.shutdown() is the standard way for BaseHTTPRequestHandler servers + manager.stop_webhook_server = lambda: httpd.shutdown() if httpd else None - if file_to_edit == "Enter New File Name...": - file_name_to_add = st.text_input("Enter the new file name (e.g., 'utils.py'):", key="new_file_name_input") - else: - file_name_to_add = file_to_edit # Use the selected file name - # Load existing code if editing a file - initial_code = "" - if file_to_edit != "Enter New File Name..." and active_project_name in st.session_state.workspace_projects: + try: + server_address = ("0.0.0.0", port) + # Create the HTTP server instance + httpd = HTTPServer(server_address, WebhookHandler) + logger.info(f"Webhook HTTP server starting on http://0.0.0.0:{port}") + # Start serving requests (this call blocks the thread) + httpd.serve_forever() + except OSError as e: + logger.error(f"Failed to start Webhook server on port {port}: {e}. Is the port already in use?") + # If the server fails to start, signal the main loop to stop + if main_loop.is_running(): + # Use call_soon_threadsafe as this is in a different thread + main_loop.call_soon_threadsafe(main_loop.stop) + except Exception as e: + logger.exception(f"Unexpected error in Webhook server thread: {e}") + # If an unexpected error occurs, signal the main loop to stop + if main_loop.is_running(): + main_loop.call_soon_threadsafe(main_loop.stop) + finally: + if httpd: + logger.info("Shutting down Webhook HTTP server...") + # This method stops the serve_forever() loop + # It needs to be called from a different thread than the one running serve_forever() + # The manager.stop_webhook_server lambda is designed for this. + # If this finally block is reached due to an exception *within* serve_forever(), + # httpd.shutdown() might not be needed or might fail, but calling it is safer. try: - project_path = st.session_state.workspace_projects[active_project_name]['path'] - file_path = os.path.join(project_path, file_name_to_add) - if os.path.exists(file_path): - with open(file_path, "r", encoding="utf-8") as f: - initial_code = f.read() + httpd.shutdown() # Signal the server to stop accepting new connections and finish current ones except Exception as e: - st.error(f"Error loading file {file_name_to_add}: {e}") + logger.warning(f"Error during httpd.shutdown(): {e}") + try: + httpd.server_close() # Close the server socket + except Exception as e: + logger.warning(f"Error during httpd.server_close(): {e}") + logger.info("Webhook server thread finished.") - code_to_add_wca = st.text_area( - "Enter code for the file:", - value=initial_code, - height=300, - key="code_to_add_wca" - ) - if st.button("Save Code to Project", key="save_code_button_wca"): - if active_project_name and file_name_to_add and code_to_add_wca: - status = add_code_to_workspace( - active_project_name, code_to_add_wca, file_name_to_add - ) - # Log the action in terminal history for context - st.session_state.terminal_history.append( - (f"Saved code to {active_project_name}/{file_name_to_add}", status) - ) - st.success(status) - st.rerun() # Rerun to update the file list selectbox - else: - st.warning("Please select/enter a project, file name, and code.") - - # Terminal Interface with Project Context - st.subheader(f"Terminal in '{active_project_name}'") - terminal_input_wca = st.text_input("Enter a command:", key="terminal_input_wca") - if st.button("Run Command in Project", key="run_terminal_button_wca"): - if active_project_name and terminal_input_wca: - project_path = st.session_state.workspace_projects[active_project_name]['path'] - with st.spinner(f"Running '{terminal_input_wca}' in {active_project_name}..."): - terminal_output_wca = terminal_interface(terminal_input_wca, project_path) - st.session_state.terminal_history.append((f"Workspace Command ({active_project_name}): {terminal_input_wca}", terminal_output_wca)) - st.code(terminal_output_wca, language="bash") - else: - st.warning("Please select an active project and enter a command.") - - - # Chat Interface for Guidance (using general chatbot) - st.subheader("Chat with CodeCraft for Guidance") - chat_input_wca = st.text_area("Enter your message for guidance:", key="chat_input_wca") - if st.button("Get Guidance", key="get_guidance_button_wca"): - if chat_input_wca: - with st.spinner("CodeCraft is thinking..."): - chat_response_wca = chat_interface(chat_input_wca) - st.session_state.chat_history.append((f"Workspace User: {chat_input_wca}", f"CodeCraft: {chat_response_wca}")) - st.write(f"CodeCraft: {chat_response_wca}") - - - # Chat with AI Agents (within Workspace context) - st.subheader("Chat with AI Agent (Project Context)") - if st.session_state.available_agents: - selected_agent_wca = st.selectbox( - "Select an AI agent to chat with:", - st.session_state.available_agents, - key="select_agent_wca" - ) - agent_chat_input_wca = st.text_area("Enter your message for the agent:", key="agent_chat_input_wca") - if st.button("Send to Agent", key="send_to_agent_button_wca"): - if agent_chat_input_wca and selected_agent_wca: - with st.spinner(f"Sending message to {selected_agent_wca}..."): - agent_chat_response_wca = chat_interface_with_agent( - agent_chat_input_wca, selected_agent_wca - ) - st.session_state.chat_history.append( - (f"User to {selected_agent_wca}: {agent_chat_input_wca}", f"{selected_agent_wca}: {agent_chat_response_wca}") - ) - st.write(f"{selected_agent_wca}: {agent_chat_response_wca}") - else: - st.warning("Please select an agent and enter a message.") +# ========== Main Execution ========== +if __name__ == "__main__": + print("--- Acknowledging potential TensorFlow/CUDA warnings ---") + print("If you see warnings like 'Could not load dynamic library...' or related to CUDA/GPU,") + print("they are often harmless if you are not using a local GPU-accelerated model.") + print("Hugging Face Inference API calls run remotely and do not require local GPU setup.") + print("--- Starting Application ---") + + # Get or create the event loop for the main thread + # This loop will run the async tasks (WS server, idle tasks, broadcast) + try: + loop = asyncio.get_event_loop() + logger.info(f"Using existing asyncio loop in main thread: {id(loop)}") + except RuntimeError: + loop = asyncio.new_event_loop() + asyncio.set_event_loop(loop) + logger.info(f"Created and set new asyncio loop in main thread: {id(loop)}") + + manager = IssueManager() + + # Start the webhook server in a separate thread + webhook_thread = threading.Thread( + target=run_webhook_server, + args=(manager, WEBHOOK_PORT, loop), # Pass the main loop to the webhook thread + name="WebhookServerThread", + daemon=True # Allow the main program to exit even if this thread is running + ) + webhook_thread.start() + + # Create the Gradio UI + app = create_ui(manager) + + # Start the WebSocket server as an asyncio task in the main loop + ws_task = loop.create_task(start_websocket_server(manager, WS_PORT)) + + # Start the manager's background tasks (idle processing, broadcast) + # These tasks are created in the manager's loop (which is the main loop here) + manager.start_idle_processing() + manager.start_broadcast_loop() + + + def shutdown_handler(signum, frame): + """Graceful shutdown logic.""" + logger.info(f"Signal {signum} received. Initiating shutdown...") + # Signal the WebSocket server to stop + if hasattr(manager, 'stop_ws_server') and manager.stop_ws_server: + manager.stop_ws_server() + # Signal the webhook server to stop + if hasattr(manager, 'stop_webhook_server') and manager.stop_webhook_server: + manager.stop_webhook_server() + # Stop manager's internal async tasks + manager.stop_idle_processing() + manager.stop_broadcast_loop() + + # Stop the main asyncio loop + if loop.is_running(): + logger.info("Stopping main asyncio loop...") + # Use call_soon_threadsafe because signal handlers run in a different context + loop.call_soon_threadsafe(loop.stop) else: - st.info("No AI agents available. Create one in the 'AI Agent Creator' tab.") + logger.warning("Main asyncio loop not running during shutdown handler.") - # Code Generation (within Workspace context) - st.subheader("Generate Code for Project") - code_idea_wca = st.text_area("Describe the code you need:", key="code_idea_wca") - selected_model_wca = st.selectbox( - "Select a code-generative model:", list(AVAILABLE_CODE_GENERATIVE_MODELS.keys()), key="select_codegen_model_wca" + # Add signal handlers for graceful shutdown (e.g., Ctrl+C) + try: + # For Unix-like systems + loop.add_signal_handler(signal.SIGINT, shutdown_handler, signal.SIGINT, None) + loop.add_signal_handler(signal.SIGTERM, shutdown_handler, signal.SIGTERM, None) + logger.info("Added signal handlers for SIGINT and SIGTERM.") + except NotImplementedError: + # Signal handlers are not available on Windows + logger.warning("Signal handlers not available on this platform (likely Windows). Ctrl+C may not be graceful.") + # On Windows, KeyboardInterrupt is usually caught directly in the main thread + + + # Launch Gradio app + # Use prevent_thread_lock=True to run Gradio server in a separate thread, + # freeing the main thread to run the asyncio loop. + try: + logger.info(f"Launching Gradio app on port {GRADIO_PORT}...") + app.launch( + server_name="0.0.0.0", + server_port=GRADIO_PORT, + share=False, # Set to True to share publicly (requires auth token usually) + debug=True, # Keep debug=True for development logs + inbrowser=True, + prevent_thread_lock=True # Run Gradio server in a separate thread ) - if st.button("Generate Code for Project", key="generate_code_button_wca"): - if code_idea_wca and selected_model_wca and active_project_name: - model_name_for_gen = AVAILABLE_CODE_GENERATIVE_MODELS[selected_model_wca] - with st.spinner(f"Generating code using {selected_model_wca}..."): - generated_code_wca = generate_code(code_idea_wca, model_name_for_gen) - st.subheader("Generated Code (Copy to File)") - st.code(generated_code_wca, language="python") # Assuming Python - - # Optional: Provide a button to add this generated code to a file - # st.button("Add Generated Code to File", key="add_generated_code_button") - # This would require selecting a file name and calling add_code_to_workspace - - - # Automate Build Process (Placeholder - needs specific logic) - st.subheader("Automate Project Actions") - st.info("This section is for triggering automated actions (e.g., run tests, deploy). Specific automation logic needs to be defined.") - # Example: A simple build command button - build_command_wca = st.text_input("Enter build/run command (e.g., 'streamlit run app.py'):", key="build_command_wca") - if st.button("Run Automated Command", key="run_automated_command_button"): - if active_project_name and build_command_wca: - project_path = st.session_state.workspace_projects[active_project_name]['path'] - with st.spinner(f"Running automated command '{build_command_wca}' in {active_project_n \ No newline at end of file + logger.info("Gradio app launched in separate thread.") + + # Run the main asyncio loop indefinitely to keep async tasks running + logger.info("Running main asyncio loop...") + loop.run_forever() # This blocks the main thread until loop.stop() is called + + except KeyboardInterrupt: + logger.info("KeyboardInterrupt received in main thread.") + # If signal handlers are not implemented (e.g., Windows), KeyboardInterrupt + # is caught here. Trigger shutdown manually. + # Check if signal handlers were likely not registered by checking if signal.SIGINT exists + if not hasattr(signal, 'SIGINT'): + shutdown_handler(None, None) # Call shutdown logic + + except SystemExit as e: + logger.error(f"SystemExit received: {e}") + # SystemExit might be raised by port binding errors etc. + # The source of SystemExit might have already triggered loop.stop() or shutdown. + # Ensure cleanup happens if not already started. + # Check if the loop is still running; if so, stop it. + if loop.is_running(): + logger.info("SystemExit caught before loop stopped, triggering shutdown.") + shutdown_handler(None, None) + else: + logger.info("SystemExit caught, but loop already stopped. Proceeding with final cleanup.") + + + except Exception as e: + logger.exception(f"An unexpected error occurred in the main thread: {e}") + # Trigger graceful shutdown on unexpected error + shutdown_handler(None, None) + + finally: + logger.info("Main thread exiting finally block.") + # Wait for background tasks/threads to complete shutdown... + logger.info("Running loop briefly to complete pending tasks (e.g., cancellations)...") + try: + # Gather remaining tasks (like ws_task cancellation, idle_task cancellation) + # Exclude the current task if inside a task (though we are in the main thread here) + # asyncio.all_tasks() returns tasks for the *current* thread's loop + pending_tasks = [task for task in asyncio.all_tasks(loop=loop) if not task.done()] + if pending_tasks: + # Use a timeout for waiting for tasks to finish + try: + # Wait for pending tasks, ignoring exceptions during shutdown + loop.run_until_complete(asyncio.wait(pending_tasks, timeout=5, return_when=asyncio.ALL_COMPLETED)) + logger.info(f"Completed pending tasks.") + except asyncio.TimeoutError: + logger.warning(f"Timed out waiting for {len(pending_tasks)} pending tasks to complete.") + except Exception as e: + logger.error(f"Error during final loop run_until_complete: {e}") + else: + logger.info("No pending tasks to complete.") + except Exception as e: + logger.error(f"Error checking pending tasks: {e}") + + + # Wait for the webhook thread to join (if it's not daemon, or if shutdown() needs time) + # Since it's daemon, it will be killed, but explicit shutdown is better. + # Add a small timeout for join in case shutdown() hangs. + # Check if the thread is still alive before joining + if webhook_thread.is_alive(): + logger.info("Waiting for webhook thread to join...") + webhook_thread.join(timeout=2) # Wait up to 2 seconds + + # Close the loop + if not loop.is_closed(): + logger.info("Closing asyncio loop.") + loop.close() + + logger.info("Application shutdown complete.") \ No newline at end of file