Spaces:
Sleeping
Sleeping
File size: 6,355 Bytes
056beb8 24b90be 056beb8 f1f9e43 056beb8 24b90be 056beb8 24b90be 056beb8 a9dde73 056beb8 a9dde73 056beb8 a9dde73 056beb8 a9dde73 056beb8 a9dde73 056beb8 f1f9e43 a9dde73 f1f9e43 a9dde73 f1f9e43 056beb8 24b90be 056beb8 f1f9e43 240890c 056beb8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
from smolagents import CodeAgent, InferenceClientModel, GradioUI, tool
import os
from huggingface_hub import HfApi, login, snapshot_download
import requests
from typing import List, Dict
from typing import Optional, Union
from pathlib import Path
# read token from environment
HF_TOKEN = os.getenv("HUGGINGFACE_API_KEY")
if not HF_TOKEN:
raise RuntimeError("HUGGINGFACE_API_KEY environment variable is not set")
# tell huggingface_hub to use it
login(token=HF_TOKEN)
@tool
def leaderboard_search(query: str) -> str:
"""
Search Hugging Face Spaces specifically in the model benchmarking category.
Args:
query: The search query to find relevant model benchmarking spaces
Returns:
A formatted string containing search results with space names, descriptions, and additional information
"""
api_url = "https://huggingface.co/api/spaces"
search_words = ["arena", "leaderboard", "benchmark"]
results = []
try:
for word in search_words:
params = {
"search": query + " " + word,
"full": True # Get full information
}
response = requests.get(api_url, params=params, headers={"Authorization": f"Bearer {HF_TOKEN}"})
print(response)
spaces = response.json()
print(spaces)
if not spaces:
continue # Skip if no spaces found for this search word
for space in spaces:
# Extract relevant information
space_id = space.get("id", "Unknown")
author = space_id.split("/")[0] if "/" in space_id else "Unknown"
space_name = space_id.split("/")[1] if "/" in space_id else space_id
likes = space.get("likes", 0)
# Try to get detailed information if available
title = space.get("cardData", {}).get("title") if space.get("cardData") else space_name
description = space.get("cardData", {}).get("short_description", "No description available") if space.get("cardData") else "No description available"
# Create formatted result string
result = f"π **{title}** ({space_id})\n"
result += f" π€ Author: {author}\n"
result += f" π {description}\n"
result += f" β€οΈ Likes: {likes}\n"
result += f" π URL: https://huggingface.co/spaces/{space_id}\n"
results.append(result)
if not results:
return f"No model benchmarking spaces found for query: '{query}'"
return "\n".join(results)
except requests.exceptions.RequestException as e:
return f"Error searching Hugging Face Spaces: {str(e)}"
except Exception as e:
return f"Unexpected error: {str(e)}"
except requests.exceptions.RequestException as e:
return f"Error searching Hugging Face Spaces: {str(e)}"
except Exception as e:
return f"Unexpected error: {str(e)}"
@tool
def get_space_content(space_id: str) -> str:
"""
Fetch the full HTML content of a Hugging Face Space webpage.
Args:
space_id: The id of the Hugging Face Space (e.g., "owner/my-awesome-space") to view online
"""
try:
url = f"https://huggingface.co/spaces/{space_id}"
response = requests.get(url)
if response.status_code == 200:
return response.text # Return raw HTML
return f"Failed to fetch page for '{space_id}', status code: {response.status_code}"
except Exception as e:
return f"Error fetching content for '{space_id}': {e}"
@tool
def download_space_files(
space_id: str,
force_download: bool = False,
max_workers: int = 8
) -> str:
"""
Download all files from a Hugging Face Space (snapshot).
Args:
space_id: e.g. "owner/my-awesome-space"
force_download: redownload even if cached
max_workers: parallel downloads
Returns:
The local folder path where the spaceβs files now live.
"""
try:
folder = snapshot_download(
repo_id=space_id,
repo_type="space",
force_download=force_download,
max_workers=max_workers
)
# List how many files were downloaded
files = list(Path(folder).rglob("*"))
return (
f"βοΈ Downloaded {len(files)} files from space `{space_id}`\n"
f"π Local path: {folder}"
)
except Exception as e:
return f"β Failed to download space `{space_id}`: {e}"
@tool
def get_file_from_space(space_id: str, file_path: str) -> str:
"""
Get a specific file from a Hugging Face Space.
Args:
space_id: The Hugging Face Space ID
file_path: Path to the file in the space
Returns:
The file content or error message
"""
try:
url = f"https://huggingface.co/spaces/{space_id}/raw/main/{file_path}"
response = requests.get(url, headers={"Authorization": f"Bearer {HF_TOKEN}"})
if response.status_code == 200:
return f"Content of {file_path} from {space_id}:\n\n{response.text}"
else:
return f"Couldn't retrieve {file_path} from {space_id}"
except Exception as e:
return f"Error: {str(e)}"
# Initialize the agent with the leaderboard search and space content tools
model = InferenceClientModel()
agent = CodeAgent(
tools=[leaderboard_search, get_space_content, get_file_from_space, download_space_files],
additional_authorized_imports=["json", "requests", "pandas"],
model=model,
add_base_tools=False,
description="Your job is to find the best possible model for a given task based on relevant leaderboards or arenas. You will be provided with a task description, and you should use the leaderboard tool to find relevant leaderboards or arenas. If you want to inspect the contents of a particular Space (e.g., README or code), use the space_content_tool. Respond with a list of the top models, including their names, scores, and links to their leaderboard pages.",
)
GradioUI(agent).launch() |