import spaces import gradio as gr import time import threading import logging from gradio.themes.utils import sizes from main import run_repository_ranking # Import the repository ranking function # --------------------------- # Global Logging Buffer Setup # --------------------------- LOG_BUFFER = [] LOG_BUFFER_LOCK = threading.Lock() class BufferLogHandler(logging.Handler): def emit(self, record): log_entry = self.format(record) with LOG_BUFFER_LOCK: LOG_BUFFER.append(log_entry) root_logger = logging.getLogger() if not any(isinstance(h, BufferLogHandler) for h in root_logger.handlers): handler = BufferLogHandler() formatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") handler.setFormatter(formatter) root_logger.addHandler(handler) def filter_logs(logs): filtered = [] last_was_fetching = False for log in logs: if "HTTP Request:" in log: if not last_was_fetching: filtered.append("Fetching repositories...") last_was_fetching = True else: filtered.append(log) last_was_fetching = False return filtered def parse_result_to_html(raw_result: str) -> str: """ Parses the raw string output from run_repository_ranking to an HTML table. Only the top 10 results are displayed. """ entries = raw_result.strip().split("Final Rank:") entries = entries[1:11] # Use only the first 10 entries if not entries: return "

No repositories found for your query.

" html = """ """ for entry in entries: lines = entry.strip().split("\n") data = {} data["Final Rank"] = lines[0].strip() if lines else "" for line in lines[1:]: if ": " in line: key, val = line.split(": ", 1) data[key.strip()] = val.strip() html += f""" """ html += "
Rank Title Link Combined Score
{data.get('Final Rank', '')} {data.get('Title', '')} GitHub {data.get('Combined Score', '')}
" return html # --------------------------- # GPU-enabled Wrapper for Repository Ranking # --------------------------- @spaces.GPU(duration=180) def gpu_run_repo(topic: str): return run_repository_ranking(topic) def run_lite_workflow(topic, result_container): result = gpu_run_repo(topic) result_container["raw_result"] = result def stream_lite_workflow(topic): with LOG_BUFFER_LOCK: LOG_BUFFER.clear() result_container = {} workflow_thread = threading.Thread(target=run_lite_workflow, args=(topic, result_container)) workflow_thread.start() last_index = 0 while workflow_thread.is_alive() or (last_index < len(LOG_BUFFER)): with LOG_BUFFER_LOCK: new_logs = LOG_BUFFER[last_index:] last_index = len(LOG_BUFFER) if new_logs: filtered_logs = filter_logs(new_logs) status_msg = filtered_logs[-1] detail_msg = "
".join(filtered_logs) yield status_msg, detail_msg time.sleep(0.5) workflow_thread.join() with LOG_BUFFER_LOCK: final_logs = LOG_BUFFER[:] raw_result = result_container.get("raw_result", "No results returned.") html_result = parse_result_to_html(raw_result) yield "", html_result def lite_runner(topic): # Provide immediate feedback upon button press. yield "Workflow started", "

Processing your request. Please wait...

" for status, details in stream_lite_workflow(topic): yield status, details # --------------------------- # App UI Setup Using Gradio Soft Theme with Centered Layout # --------------------------- with gr.Blocks( theme=gr.themes.Soft(text_size=sizes.text_md), title="DeepGit Lite", css=""" /* Center header and footer */ #header { text-align: center; margin-bottom: 20px; } #main-container { max-width: 800px; margin: auto; } #footer { text-align: center; margin-top: 20px; } """ ) as demo: gr.Markdown( """

DeepGit Lite

✨ DeepGit Lite is the lightweight pro version of DeepGit.
It harnesses advanced deep semantic search to explore GitHub repositories and deliver curated results.
Under the hood, it leverages a hybrid ranking approach combining dense retrieval, BM25 scoring, and cross-encoder re-ranking for optimal discovery.
If the agent returns no repositories found, it means no chain was invoked due to GPU unavailability. Please duplicate the space and re-run.

🚀 Check out the full DeepGit version on GitHub and ⭐ Star DeepGit on GitHub!

""", elem_id="header" ) with gr.Column(elem_id="main-container"): research_input = gr.Textbox( label="Research Query", placeholder="Enter your research topic here, e.g., 'data augmentation pipelines for LLM fine-tuning'", lines=3 ) run_button = gr.Button("Run DeepGit Lite", variant="primary") status_display = gr.Markdown(label="Status") detail_display = gr.HTML(label="Results") run_button.click( fn=lite_runner, inputs=[research_input], outputs=[status_display, detail_display], api_name="deepgit_lite", show_progress=True ) research_input.submit( fn=lite_runner, inputs=[research_input], outputs=[status_display, detail_display], api_name="deepgit_lite_submit", show_progress=True ) gr.HTML( """ """ ) demo.queue(max_size=10).launch()