Spaces:

Leeps
/

best-model-search

Sleeping

File size: 6,355 Bytes

from smolagents import CodeAgent, InferenceClientModel, GradioUI, tool
import os
from huggingface_hub import HfApi, login, snapshot_download
import requests
from typing import List, Dict
from typing import Optional, Union
from pathlib import Path

# read token from environment
HF_TOKEN = os.getenv("HUGGINGFACE_API_KEY")
if not HF_TOKEN:
    raise RuntimeError("HUGGINGFACE_API_KEY environment variable is not set")

# tell huggingface_hub to use it
login(token=HF_TOKEN)

@tool
def leaderboard_search(query: str) -> str:
    """
    Search Hugging Face Spaces specifically in the model benchmarking category.
    
    Args:
        query: The search query to find relevant model benchmarking spaces
        
    Returns:
        A formatted string containing search results with space names, descriptions, and additional information
    """
    api_url = "https://huggingface.co/api/spaces"
    
    search_words = ["arena", "leaderboard", "benchmark"]
    results = []

    try:
        for word in search_words:
            params = {
                "search": query + " " + word,
                "full": True  # Get full information
            }
            
            response = requests.get(api_url, params=params, headers={"Authorization": f"Bearer {HF_TOKEN}"})
            print(response)
            
            spaces = response.json()
            print(spaces)
            
            if not spaces:
                continue  # Skip if no spaces found for this search word
            
            for space in spaces:
                # Extract relevant information
                space_id = space.get("id", "Unknown")
                author = space_id.split("/")[0] if "/" in space_id else "Unknown"
                space_name = space_id.split("/")[1] if "/" in space_id else space_id
                likes = space.get("likes", 0)
                
                # Try to get detailed information if available
                title = space.get("cardData", {}).get("title") if space.get("cardData") else space_name
                description = space.get("cardData", {}).get("short_description", "No description available") if space.get("cardData") else "No description available"
                
                # Create formatted result string
                result = f"🚀 **{title}** ({space_id})\n"
                result += f"   👤 Author: {author}\n"
                result += f"   📝 {description}\n"
                result += f"   ❤️ Likes: {likes}\n"
                result += f"   🔗 URL: https://huggingface.co/spaces/{space_id}\n"
                
                results.append(result)
        
        if not results:
            return f"No model benchmarking spaces found for query: '{query}'"
        
        return "\n".join(results)
    except requests.exceptions.RequestException as e:
        return f"Error searching Hugging Face Spaces: {str(e)}"
    except Exception as e:
        return f"Unexpected error: {str(e)}"
        
    except requests.exceptions.RequestException as e:
        return f"Error searching Hugging Face Spaces: {str(e)}"
    except Exception as e:
        return f"Unexpected error: {str(e)}"

@tool
def get_space_content(space_id: str) -> str:
    """
    Fetch the full HTML content of a Hugging Face Space webpage.
    
    Args:
        space_id: The id of the Hugging Face Space (e.g., "owner/my-awesome-space") to view online
    """
    try:
        url = f"https://huggingface.co/spaces/{space_id}"
        response = requests.get(url)
        if response.status_code == 200:
            return response.text  # Return raw HTML
        return f"Failed to fetch page for '{space_id}', status code: {response.status_code}"
    except Exception as e:
        return f"Error fetching content for '{space_id}': {e}"
    
@tool
def download_space_files(
    space_id: str,
    force_download: bool = False,
    max_workers: int = 8
) -> str:
    """
    Download all files from a Hugging Face Space (snapshot).

    Args:
        space_id: e.g. "owner/my-awesome-space"
        force_download: redownload even if cached
        max_workers: parallel downloads

    Returns:
        The local folder path where the space’s files now live.
    """
    try:
        folder = snapshot_download(
            repo_id=space_id,
            repo_type="space",
            force_download=force_download,
            max_workers=max_workers
        )
        # List how many files were downloaded
        files = list(Path(folder).rglob("*"))
        return (
            f"✔️ Downloaded {len(files)} files from space `{space_id}`\n"
            f"📂 Local path: {folder}"
        )
    except Exception as e:
        return f"❌ Failed to download space `{space_id}`: {e}"
    

@tool
def get_file_from_space(space_id: str, file_path: str) -> str:
    """
    Get a specific file from a Hugging Face Space.
    
    Args:
        space_id: The Hugging Face Space ID
        file_path: Path to the file in the space
        
    Returns:
        The file content or error message
    """
    try:
        url = f"https://huggingface.co/spaces/{space_id}/raw/main/{file_path}"
        response = requests.get(url, headers={"Authorization": f"Bearer {HF_TOKEN}"})
        
        if response.status_code == 200:
            return f"Content of {file_path} from {space_id}:\n\n{response.text}"
        else:
            return f"Couldn't retrieve {file_path} from {space_id}"
            
    except Exception as e:
        return f"Error: {str(e)}"

# Initialize the agent with the leaderboard search and space content tools
model = InferenceClientModel()
agent = CodeAgent(
    tools=[leaderboard_search, get_space_content, get_file_from_space, download_space_files],
    additional_authorized_imports=["json", "requests", "pandas"],
    model=model,
    add_base_tools=False,
    description="Your job is to find the best possible model for a given task based on relevant leaderboards or arenas. You will be provided with a task description, and you should use the leaderboard tool to find relevant leaderboards or arenas. If you want to inspect the contents of a particular Space (e.g., README or code), use the space_content_tool. Respond with a list of the top models, including their names, scores, and links to their leaderboard pages.",
)

GradioUI(agent).launch()