Update frontend/src/App.js
Browse files- frontend/src/App.js +117 -29
frontend/src/App.js
CHANGED
@@ -2,46 +2,90 @@ import React, { useState, useEffect } from 'react';
|
|
2 |
import { chain } from 'lodash';
|
3 |
import './App.css';
|
4 |
|
5 |
-
const ScoreBar = ({ score }) => {
|
6 |
if (score === undefined || score === null) return null;
|
7 |
|
8 |
const percentage = score <= 1 ? score * 100 : score;
|
9 |
const hue = Math.min(percentage * 1.2, 120); // Maps 0-100% to 0-120 (red to green)
|
10 |
const backgroundColor = `hsl(${hue}, 80%, 50%)`;
|
11 |
|
|
|
|
|
|
|
|
|
12 |
return (
|
13 |
-
<div className="score-
|
14 |
-
<div
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
</div>
|
25 |
);
|
26 |
};
|
27 |
|
28 |
const App = () => {
|
29 |
const [data, setData] = useState([]);
|
|
|
|
|
30 |
const [loading, setLoading] = useState(true);
|
31 |
const [error, setError] = useState(null);
|
32 |
const [sortConfig, setSortConfig] = useState({ key: 'Average', direction: 'desc' });
|
33 |
const [searchQuery, setSearchQuery] = useState('');
|
|
|
|
|
34 |
|
35 |
useEffect(() => {
|
36 |
const fetchData = async () => {
|
37 |
try {
|
38 |
setLoading(true);
|
39 |
-
|
40 |
-
|
41 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
}
|
43 |
-
const
|
44 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
45 |
} catch (err) {
|
46 |
console.error('Error fetching data:', err);
|
47 |
setError(err.message);
|
@@ -57,7 +101,21 @@ const App = () => {
|
|
57 |
setSortConfig({ key, direction });
|
58 |
};
|
59 |
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
.filter(item => item.model_id.toLowerCase().includes(searchQuery.toLowerCase()))
|
62 |
.orderBy(
|
63 |
[item => {
|
@@ -80,17 +138,39 @@ const App = () => {
|
|
80 |
<p className="subtitle">How do different LLMs compare for powering agents?</p>
|
81 |
<p className="subtitle">Uses <a target="_blank" href="https://github.com/huggingface/smolagents">smolagents</a> with <a target="_blank" href="https://huggingface.co/datasets/smolagents/benchmark-v1">smolagents benchmark</a>.</p>
|
82 |
</div>
|
83 |
-
|
84 |
-
<div className="
|
85 |
-
<
|
86 |
-
|
87 |
-
|
88 |
-
|
89 |
-
|
90 |
-
|
91 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
</div>
|
93 |
-
|
94 |
<div className="table-container">
|
95 |
<table>
|
96 |
<thead>
|
@@ -115,7 +195,11 @@ const App = () => {
|
|
115 |
<td className="model-name">{item.model_id}</td>
|
116 |
{["Average", "GAIA", "MATH", "SimpleQA"].map(metric => (
|
117 |
<td key={metric}>
|
118 |
-
<ScoreBar
|
|
|
|
|
|
|
|
|
119 |
</td>
|
120 |
))}
|
121 |
</tr>
|
@@ -123,6 +207,10 @@ const App = () => {
|
|
123 |
</tbody>
|
124 |
</table>
|
125 |
</div>
|
|
|
|
|
|
|
|
|
126 |
</div>
|
127 |
);
|
128 |
};
|
|
|
2 |
import { chain } from 'lodash';
|
3 |
import './App.css';
|
4 |
|
5 |
+
const ScoreBar = ({ score, vanillaScore, showVanilla = true }) => {
|
6 |
if (score === undefined || score === null) return null;
|
7 |
|
8 |
const percentage = score <= 1 ? score * 100 : score;
|
9 |
const hue = Math.min(percentage * 1.2, 120); // Maps 0-100% to 0-120 (red to green)
|
10 |
const backgroundColor = `hsl(${hue}, 80%, 50%)`;
|
11 |
|
12 |
+
const vanillaPercentage = vanillaScore <= 1 ? vanillaScore * 100 : vanillaScore;
|
13 |
+
const vanillaHue = Math.min(vanillaPercentage * 1.2, 120);
|
14 |
+
const vanillaBackgroundColor = `hsl(${vanillaHue}, 80%, 50%)`;
|
15 |
+
|
16 |
return (
|
17 |
+
<div className="score-container">
|
18 |
+
<div className="score-bar">
|
19 |
+
<div
|
20 |
+
className="score-fill"
|
21 |
+
style={{
|
22 |
+
width: `${percentage}%`,
|
23 |
+
backgroundColor
|
24 |
+
}}
|
25 |
+
/>
|
26 |
+
<span className="score-text">
|
27 |
+
{percentage.toFixed(1)}%
|
28 |
+
</span>
|
29 |
+
</div>
|
30 |
+
|
31 |
+
{showVanilla && vanillaScore !== undefined && vanillaScore !== null && (
|
32 |
+
<div className="score-bar vanilla-bar">
|
33 |
+
<div
|
34 |
+
className="score-fill"
|
35 |
+
style={{
|
36 |
+
width: `${vanillaPercentage}%`,
|
37 |
+
backgroundColor: vanillaBackgroundColor
|
38 |
+
}}
|
39 |
+
/>
|
40 |
+
<span className="score-text vanilla-text">
|
41 |
+
{vanillaPercentage.toFixed(1)}% <small>vanilla</small>
|
42 |
+
</span>
|
43 |
+
</div>
|
44 |
+
)}
|
45 |
</div>
|
46 |
);
|
47 |
};
|
48 |
|
49 |
const App = () => {
|
50 |
const [data, setData] = useState([]);
|
51 |
+
const [vanillaData, setVanillaData] = useState([]);
|
52 |
+
const [toolCallingData, setToolCallingData] = useState([]);
|
53 |
const [loading, setLoading] = useState(true);
|
54 |
const [error, setError] = useState(null);
|
55 |
const [sortConfig, setSortConfig] = useState({ key: 'Average', direction: 'desc' });
|
56 |
const [searchQuery, setSearchQuery] = useState('');
|
57 |
+
const [showVanilla, setShowVanilla] = useState(true);
|
58 |
+
const [showToolCalling, setShowToolCalling] = useState(false);
|
59 |
|
60 |
useEffect(() => {
|
61 |
const fetchData = async () => {
|
62 |
try {
|
63 |
setLoading(true);
|
64 |
+
|
65 |
+
// Fetch code agent data (default)
|
66 |
+
const codeResponse = await fetch('https://smolagents-smolagents-llm-leaderboard.hf.space/api/results');
|
67 |
+
if (!codeResponse.ok) {
|
68 |
+
throw new Error(`HTTP error! status: ${codeResponse.status}`);
|
69 |
+
}
|
70 |
+
const codeData = await codeResponse.json();
|
71 |
+
setData(codeData);
|
72 |
+
|
73 |
+
// Fetch vanilla data
|
74 |
+
const vanillaResponse = await fetch('https://smolagents-smolagents-llm-leaderboard.hf.space/api/results?agent_action_type=vanilla');
|
75 |
+
if (!vanillaResponse.ok) {
|
76 |
+
throw new Error(`HTTP error! status: ${vanillaResponse.status}`);
|
77 |
}
|
78 |
+
const vanillaJsonData = await vanillaResponse.json();
|
79 |
+
setVanillaData(vanillaJsonData);
|
80 |
+
|
81 |
+
// Fetch tool-calling data
|
82 |
+
const toolCallingResponse = await fetch('https://smolagents-smolagents-llm-leaderboard.hf.space/api/results?agent_action_type=tool-calling');
|
83 |
+
if (!toolCallingResponse.ok) {
|
84 |
+
throw new Error(`HTTP error! status: ${toolCallingResponse.status}`);
|
85 |
+
}
|
86 |
+
const toolCallingJsonData = await toolCallingResponse.json();
|
87 |
+
setToolCallingData(toolCallingJsonData);
|
88 |
+
|
89 |
} catch (err) {
|
90 |
console.error('Error fetching data:', err);
|
91 |
setError(err.message);
|
|
|
101 |
setSortConfig({ key, direction });
|
102 |
};
|
103 |
|
104 |
+
// Get active dataset based on user selection
|
105 |
+
const getActiveData = () => {
|
106 |
+
if (showToolCalling) {
|
107 |
+
return toolCallingData;
|
108 |
+
}
|
109 |
+
return data; // Default to code agent data
|
110 |
+
};
|
111 |
+
|
112 |
+
// Find vanilla score for a model
|
113 |
+
const getVanillaScore = (modelId, metric) => {
|
114 |
+
const vanillaModel = vanillaData.find(item => item.model_id === modelId);
|
115 |
+
return vanillaModel?.scores[metric];
|
116 |
+
};
|
117 |
+
|
118 |
+
const filteredAndSortedData = chain(getActiveData())
|
119 |
.filter(item => item.model_id.toLowerCase().includes(searchQuery.toLowerCase()))
|
120 |
.orderBy(
|
121 |
[item => {
|
|
|
138 |
<p className="subtitle">How do different LLMs compare for powering agents?</p>
|
139 |
<p className="subtitle">Uses <a target="_blank" href="https://github.com/huggingface/smolagents">smolagents</a> with <a target="_blank" href="https://huggingface.co/datasets/smolagents/benchmark-v1">smolagents benchmark</a>.</p>
|
140 |
</div>
|
141 |
+
|
142 |
+
<div className="filters-container">
|
143 |
+
<div className="search-container">
|
144 |
+
<input
|
145 |
+
type="text"
|
146 |
+
className="search-input"
|
147 |
+
placeholder="Search models..."
|
148 |
+
value={searchQuery}
|
149 |
+
onChange={(e) => setSearchQuery(e.target.value)}
|
150 |
+
/>
|
151 |
+
</div>
|
152 |
+
|
153 |
+
<div className="options-container">
|
154 |
+
<label className="option-label">
|
155 |
+
<input
|
156 |
+
type="checkbox"
|
157 |
+
checked={showVanilla}
|
158 |
+
onChange={() => setShowVanilla(!showVanilla)}
|
159 |
+
/>
|
160 |
+
Show Vanilla Scores
|
161 |
+
</label>
|
162 |
+
|
163 |
+
<label className="option-label">
|
164 |
+
<input
|
165 |
+
type="checkbox"
|
166 |
+
checked={showToolCalling}
|
167 |
+
onChange={() => setShowToolCalling(!showToolCalling)}
|
168 |
+
/>
|
169 |
+
Show Tool-Calling Scores
|
170 |
+
</label>
|
171 |
+
</div>
|
172 |
</div>
|
173 |
+
|
174 |
<div className="table-container">
|
175 |
<table>
|
176 |
<thead>
|
|
|
195 |
<td className="model-name">{item.model_id}</td>
|
196 |
{["Average", "GAIA", "MATH", "SimpleQA"].map(metric => (
|
197 |
<td key={metric}>
|
198 |
+
<ScoreBar
|
199 |
+
score={item.scores[metric]}
|
200 |
+
vanillaScore={getVanillaScore(item.model_id, metric)}
|
201 |
+
showVanilla={showVanilla}
|
202 |
+
/>
|
203 |
</td>
|
204 |
))}
|
205 |
</tr>
|
|
|
207 |
</tbody>
|
208 |
</table>
|
209 |
</div>
|
210 |
+
|
211 |
+
<div className="legend">
|
212 |
+
<p><strong>Agent types:</strong> {showToolCalling ? 'Tool-Calling' : 'Code'}{showVanilla ? ' (with Vanilla comparison)' : ''}</p>
|
213 |
+
</div>
|
214 |
</div>
|
215 |
);
|
216 |
};
|