m-ric HF Staff commited on
Commit
3fe1351
·
verified ·
1 Parent(s): acea508

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +75 -177
app.py CHANGED
@@ -1,185 +1,83 @@
1
- import React, { useState, useEffect } from 'react';
2
- import { chain } from 'lodash';
3
- import './App.css';
 
 
 
 
 
4
 
5
- const ScoreBar = ({ score }) => {
6
- if (score === undefined || score === null) return null;
7
-
8
- const percentage = score <= 1 ? score * 100 : score;
9
- const hue = Math.min(percentage * 1.2, 120); // Maps 0-100% to 0-120 (red to green)
10
- const backgroundColor = `hsl(${hue}, 80%, 50%)`;
11
-
12
- return (
13
- <div className="score-bar">
14
- <div
15
- className="score-fill"
16
- style={{
17
- width: `${percentage}%`,
18
- backgroundColor
19
- }}
20
- />
21
- <span className="score-text">
22
- {percentage.toFixed(1)}%
23
- </span>
24
- </div>
25
- );
26
- };
27
 
28
- const App = () => {
29
- const [allData, setAllData] = useState([]);
30
- const [loading, setLoading] = useState(true);
31
- const [error, setError] = useState(null);
32
- const [sortConfig, setSortConfig] = useState({ key: 'Average', direction: 'desc' });
33
- const [searchQuery, setSearchQuery] = useState('');
34
- const [showVanilla, setShowVanilla] = useState(true);
35
- const [showToolCalling, setShowToolCalling] = useState(false);
36
 
37
- useEffect(() => {
38
- const fetchData = async () => {
39
- try {
40
- setLoading(true);
 
41
 
42
- // Fetch all data from API
43
- const response = await fetch('https://smolagents-smolagents-llm-leaderboard.hf.space/api/results');
44
- if (!response.ok) {
45
- throw new Error(`HTTP error! status: ${response.status}`);
46
- }
47
- const jsonData = await response.json();
48
- setAllData(jsonData);
49
- } catch (err) {
50
- console.error('Error fetching data:', err);
51
- setError(err.message);
52
- } finally {
53
- setLoading(false);
54
- }
55
- };
56
-
57
- fetchData();
58
- }, []);
59
-
60
- const handleSort = (key) => {
61
- const direction = sortConfig.key === key && sortConfig.direction === 'desc' ? 'asc' : 'desc';
62
- setSortConfig({ key, direction });
63
- };
64
-
65
- // Filter data based on selected action type
66
- const getFilteredData = () => {
67
- const actionType = showToolCalling ? 'tool-calling' : 'code';
68
- return allData.filter(item => item.source === actionType);
69
- };
70
-
71
- // Get vanilla score for a model
72
- const getVanillaScore = (modelId, metric) => {
73
- const vanillaEntry = allData.find(item =>
74
- item.model_id === modelId && item.source === 'vanilla'
75
- );
76
- return vanillaEntry?.scores[metric];
77
- };
78
 
79
- const filteredAndSortedData = chain(getFilteredData())
80
- .filter(item => item.model_id.toLowerCase().includes(searchQuery.toLowerCase()))
81
- .orderBy(
82
- [item => {
83
- if (sortConfig.key === 'model') {
84
- return item.model_id;
85
- }
86
- return item.scores[sortConfig.key] || 0;
87
- }],
88
- [sortConfig.direction]
89
- )
90
- .value();
91
 
92
- if (loading) return <div className="container">Loading benchmark results...</div>;
93
- if (error) return <div className="container" style={{color: 'red'}}>Error: {error}</div>;
94
 
95
- return (
96
- <div className="container">
97
- <div className="header">
98
- <h1 className="title">Smolagents Leaderboard</h1>
99
- <p className="subtitle">How do different LLMs compare for powering agents?</p>
100
- <p className="subtitle">Uses <a target="_blank" href="https://github.com/huggingface/smolagents">smolagents</a> with <a target="_blank" href="https://huggingface.co/datasets/smolagents/benchmark-v1">smolagents benchmark</a>.</p>
101
- </div>
102
-
103
- <div className="search-container">
104
- <div className="search-with-options">
105
- <input
106
- type="text"
107
- className="search-input"
108
- placeholder="Search models..."
109
- value={searchQuery}
110
- onChange={(e) => setSearchQuery(e.target.value)}
111
- />
112
-
113
- <div className="options-container">
114
- <label className="option-label">
115
- <input
116
- type="checkbox"
117
- checked={showVanilla}
118
- onChange={() => setShowVanilla(!showVanilla)}
119
- />
120
- Show Vanilla Scores
121
- </label>
122
-
123
- <label className="option-label">
124
- <input
125
- type="checkbox"
126
- checked={showToolCalling}
127
- onChange={() => setShowToolCalling(!showToolCalling)}
128
- />
129
- Show Tool-Calling Scores
130
- </label>
131
- </div>
132
- </div>
133
- </div>
134
-
135
- <div className="table-container">
136
- <table>
137
- <thead>
138
- <tr>
139
- <th onClick={() => handleSort('model')}>
140
- Model {sortConfig.key === 'model' && (
141
- sortConfig.direction === 'desc' ? '↓' : '↑'
142
- )}
143
- </th>
144
- {["Average", "GAIA", "MATH", "SimpleQA"].map(benchmark => (
145
- <th key={benchmark} onClick={() => handleSort(benchmark)}>
146
- {benchmark} {sortConfig.key === benchmark && (
147
- sortConfig.direction === 'desc' ? '↓' : '↑'
148
- )}
149
- </th>
150
- ))}
151
- </tr>
152
- </thead>
153
- <tbody>
154
- {filteredAndSortedData.map((item, index) => (
155
- <tr key={index}>
156
- <td className="model-cell">
157
- <div className="model-name">{item.model_id}</div>
158
- {showVanilla && (
159
- <div className="vanilla-name">
160
- {`vanilla: ${getVanillaScore(item.model_id, 'Average')?.toFixed(1) || 'N/A'}%`}
161
- </div>
162
- )}
163
- </td>
164
- {["Average", "GAIA", "MATH", "SimpleQA"].map(metric => (
165
- <td key={metric}>
166
- <ScoreBar score={item.scores[metric]} />
167
- {showVanilla && getVanillaScore(item.model_id, metric) !== undefined && (
168
- <ScoreBar score={getVanillaScore(item.model_id, metric)} />
169
- )}
170
- </td>
171
- ))}
172
- </tr>
173
- ))}
174
- </tbody>
175
- </table>
176
- </div>
177
-
178
- <div className="legend">
179
- <p><strong>Agent type:</strong> {showToolCalling ? 'Tool-Calling' : 'Code'}{showVanilla ? ' (with Vanilla comparison)' : ''}</p>
180
- </div>
181
- </div>
182
- );
183
- };
184
 
185
- export default App;
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import JSONResponse
4
+ from fastapi.staticfiles import StaticFiles
5
+ import numpy as np
6
+ import argparse
7
+ import os
8
+ from datasets import load_dataset
9
 
10
+ HOST = os.environ.get("API_URL", "0.0.0.0")
11
+ PORT = os.environ.get("PORT", 7860)
12
+ parser = argparse.ArgumentParser()
13
+ parser.add_argument("--host", default=HOST)
14
+ parser.add_argument("--port", type=int, default=PORT)
15
+ parser.add_argument("--reload", action="store_true", default=True)
16
+ parser.add_argument("--ssl_certfile")
17
+ parser.add_argument("--ssl_keyfile")
18
+ args = parser.parse_args()
 
 
 
 
 
 
 
 
 
 
 
 
 
19
 
20
+ app = FastAPI()
21
+ app.add_middleware(
22
+ CORSMiddleware,
23
+ allow_origins=["*"],
24
+ allow_credentials=True,
25
+ allow_methods=["*"],
26
+ allow_headers=["*"],
27
+ )
28
 
29
+ @app.get("/api/results")
30
+ async def get_results():
31
+ try:
32
+ # Load the dataset
33
+ dataset = load_dataset("smolagents/results")
34
 
35
+ # Convert to list for processing
36
+ data = dataset["train"].to_pandas()
37
+
38
+ # Log some info to help debug
39
+ print("Dataset loaded, shape:", data.shape)
40
+ print("Columns:", data.columns)
41
+ print("First row:", data.iloc[0])
42
+
43
+ # Process the data to group by model and calculate scores
44
+ processed_data = []
45
+ grouped = data.groupby('model_id')
46
+
47
+ for model_id, group in grouped:
48
+ model_data = {
49
+ 'model_id': model_id,
50
+ 'scores': {}
51
+ }
52
+
53
+ # Calculate scores for each source
54
+ for source in group['source'].unique():
55
+ source_data = group[group['source'] == source]
56
+ avg_acc = source_data['acc'].mean()
57
+ model_data['scores'][source] = float(avg_acc)
58
+ model_data['scores']["Average"] = group["acc"].mean()
59
+
60
+ processed_data.append(model_data)
61
+
62
+ return processed_data
63
+
64
+ except Exception as e:
65
+ # Print the full error traceback to your logs
66
+ print("Error occurred:", str(e))
67
+ raise HTTPException(status_code=500, detail=str(e))
 
 
 
68
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
 
 
70
 
71
+ app.mount("/", StaticFiles(directory="static", html=True), name="static")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
 
73
+ if __name__ == "__main__":
74
+ import uvicorn
75
+ print(args)
76
+ uvicorn.run(
77
+ "app:app",
78
+ host=args.host,
79
+ port=args.port,
80
+ reload=args.reload,
81
+ ssl_certfile=args.ssl_certfile,
82
+ ssl_keyfile=args.ssl_keyfile,
83
+ )