Spaces:

smolagents
/

smolagents-leaderboard

Running

App Files Files Community

m-ric HF Staff commited on Feb 27

Commit

3fe1351

verified ·

1 Parent(s): acea508

Update app.py

Browse files

Files changed (1) hide show

app.py +75 -177

app.py CHANGED Viewed

@@ -1,185 +1,83 @@
-import React, { useState, useEffect } from 'react';
-import { chain } from 'lodash';
-import './App.css';
-const ScoreBar = ({ score }) => {
-  if (score === undefined || score === null) return null;
-  const percentage = score <= 1 ? score * 100 : score;
-  const hue = Math.min(percentage * 1.2, 120); // Maps 0-100% to 0-120 (red to green)
-  const backgroundColor = `hsl(${hue}, 80%, 50%)`;
-  return (
-    <div className="score-bar">
-      <div
-        className="score-fill"
-        style={{
-          width: `${percentage}%`,
-          backgroundColor
-        }}
-      />
-      <span className="score-text">
-        {percentage.toFixed(1)}%
-      </span>
-    </div>
-  );
-};
-const App = () => {
-  const [allData, setAllData] = useState([]);
-  const [loading, setLoading] = useState(true);
-  const [error, setError] = useState(null);
-  const [sortConfig, setSortConfig] = useState({ key: 'Average', direction: 'desc' });
-  const [searchQuery, setSearchQuery] = useState('');
-  const [showVanilla, setShowVanilla] = useState(true);
-  const [showToolCalling, setShowToolCalling] = useState(false);
-  useEffect(() => {
-    const fetchData = async () => {
-      try {
-        setLoading(true);
-        // Fetch all data from API
-        const response = await fetch('https://smolagents-smolagents-llm-leaderboard.hf.space/api/results');
-        if (!response.ok) {
-          throw new Error(`HTTP error! status: ${response.status}`);
-        }
-        const jsonData = await response.json();
-        setAllData(jsonData);
-      } catch (err) {
-        console.error('Error fetching data:', err);
-        setError(err.message);
-      } finally {
-        setLoading(false);
-      }
-    };
-    fetchData();
-  }, []);
-  const handleSort = (key) => {
-    const direction = sortConfig.key === key && sortConfig.direction === 'desc' ? 'asc' : 'desc';
-    setSortConfig({ key, direction });
-  };
-  // Filter data based on selected action type
-  const getFilteredData = () => {
-    const actionType = showToolCalling ? 'tool-calling' : 'code';
-    return allData.filter(item => item.source === actionType);
-  };
-  // Get vanilla score for a model
-  const getVanillaScore = (modelId, metric) => {
-    const vanillaEntry = allData.find(item =>
-      item.model_id === modelId && item.source === 'vanilla'
-    );
-    return vanillaEntry?.scores[metric];
-  };
-  const filteredAndSortedData = chain(getFilteredData())
-    .filter(item => item.model_id.toLowerCase().includes(searchQuery.toLowerCase()))
-    .orderBy(
-      [item => {
-        if (sortConfig.key === 'model') {
-          return item.model_id;
-        }
-        return item.scores[sortConfig.key] || 0;
-      }],
-      [sortConfig.direction]
-    )
-    .value();
-  if (loading) return <div className="container">Loading benchmark results...</div>;
-  if (error) return <div className="container" style={{color: 'red'}}>Error: {error}</div>;
-  return (
-    <div className="container">
-      <div className="header">
-        <h1 className="title">Smolagents Leaderboard</h1>
-        <p className="subtitle">How do different LLMs compare for powering agents?</p>
-        <p className="subtitle">Uses <a target="_blank" href="https://github.com/huggingface/smolagents">smolagents</a> with <a target="_blank" href="https://huggingface.co/datasets/smolagents/benchmark-v1">smolagents benchmark</a>.</p>
-      </div>
-      <div className="search-container">
-        <div className="search-with-options">
-          <input
-            type="text"
-            className="search-input"
-            placeholder="Search models..."
-            value={searchQuery}
-            onChange={(e) => setSearchQuery(e.target.value)}
-          />
-          <div className="options-container">
-            <label className="option-label">
-              <input
-                type="checkbox"
-                checked={showVanilla}
-                onChange={() => setShowVanilla(!showVanilla)}
-              />
-              Show Vanilla Scores
-            </label>
-            <label className="option-label">
-              <input
-                type="checkbox"
-                checked={showToolCalling}
-                onChange={() => setShowToolCalling(!showToolCalling)}
-              />
-              Show Tool-Calling Scores
-            </label>
-          </div>
-        </div>
-      </div>
-      <div className="table-container">
-        <table>
-          <thead>
-            <tr>
-              <th onClick={() => handleSort('model')}>
-                Model {sortConfig.key === 'model' && (
-                  sortConfig.direction === 'desc' ? '↓' : '↑'
-                )}
-              </th>
-              {["Average", "GAIA", "MATH", "SimpleQA"].map(benchmark => (
-                <th key={benchmark} onClick={() => handleSort(benchmark)}>
-                  {benchmark} {sortConfig.key === benchmark && (
-                    sortConfig.direction === 'desc' ? '↓' : '↑'
-                  )}
-                </th>
-              ))}
-            </tr>
-          </thead>
-          <tbody>
-            {filteredAndSortedData.map((item, index) => (
-              <tr key={index}>
-                <td className="model-cell">
-                  <div className="model-name">{item.model_id}</div>
-                  {showVanilla && (
-                    <div className="vanilla-name">
-                      {`vanilla: ${getVanillaScore(item.model_id, 'Average')?.toFixed(1) || 'N/A'}%`}
-                    </div>
-                  )}
-                </td>
-                {["Average", "GAIA", "MATH", "SimpleQA"].map(metric => (
-                  <td key={metric}>
-                    <ScoreBar score={item.scores[metric]} />
-                    {showVanilla && getVanillaScore(item.model_id, metric) !== undefined && (
-                      <ScoreBar score={getVanillaScore(item.model_id, metric)} />
-                    )}
-                  </td>
-                ))}
-              </tr>
-            ))}
-          </tbody>
-        </table>
-      </div>
-      <div className="legend">
-        <p><strong>Agent type:</strong> {showToolCalling ? 'Tool-Calling' : 'Code'}{showVanilla ? ' (with Vanilla comparison)' : ''}</p>
-      </div>
-    </div>
-  );
-};
-export default App;

+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from fastapi.staticfiles import StaticFiles
+import numpy as np
+import argparse
+import os
+from datasets import load_dataset
+HOST = os.environ.get("API_URL", "0.0.0.0")
+PORT = os.environ.get("PORT", 7860)
+parser = argparse.ArgumentParser()
+parser.add_argument("--host", default=HOST)
+parser.add_argument("--port", type=int, default=PORT)
+parser.add_argument("--reload", action="store_true", default=True)
+parser.add_argument("--ssl_certfile")
+parser.add_argument("--ssl_keyfile")
+args = parser.parse_args()
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+@app.get("/api/results")
+async def get_results():
+    try:
+        # Load the dataset
+        dataset = load_dataset("smolagents/results")
+        # Convert to list for processing
+        data = dataset["train"].to_pandas()
+        # Log some info to help debug
+        print("Dataset loaded, shape:", data.shape)
+        print("Columns:", data.columns)
+        print("First row:", data.iloc[0])
+        # Process the data to group by model and calculate scores
+        processed_data = []
+        grouped = data.groupby('model_id')
+        for model_id, group in grouped:
+            model_data = {
+                'model_id': model_id,
+                'scores': {}
+            }
+            # Calculate scores for each source
+            for source in group['source'].unique():
+                source_data = group[group['source'] == source]
+                avg_acc = source_data['acc'].mean()
+                model_data['scores'][source] = float(avg_acc)
+            model_data['scores']["Average"] = group["acc"].mean()
+            processed_data.append(model_data)
+        return processed_data
+    except Exception as e:
+        # Print the full error traceback to your logs
+        print("Error occurred:", str(e))
+        raise HTTPException(status_code=500, detail=str(e))
+app.mount("/", StaticFiles(directory="static", html=True), name="static")
+if __name__ == "__main__":
+    import uvicorn
+    print(args)
+    uvicorn.run(
+        "app:app",
+        host=args.host,
+        port=args.port,
+        reload=args.reload,
+        ssl_certfile=args.ssl_certfile,
+        ssl_keyfile=args.ssl_keyfile,
+    )