Spaces:
Running
Running
<html lang="en"> | |
<head> | |
<meta charset="UTF-8"> | |
<title>Model Benchmark: First Grade Math</title> | |
<style> | |
:root { | |
--primary: #4a6fa5; | |
--secondary: #6b8cbe; | |
--accent: #ff6b6b; | |
--background: #f5f7fa; | |
--card-bg: #ffffff; | |
--text-dark: #2d3748; | |
--text-light: #4a5568; | |
--success: #48bb78; | |
--error: #e53e3e; | |
} | |
body { | |
font-family: 'Segoe UI', Roboto, -apple-system, BlinkMacSystemFont, sans-serif; | |
background: var(--background); | |
margin: 0; | |
padding: 0; | |
color: var(--text-dark); | |
line-height: 1.6; | |
} | |
.container { | |
max-width: 1000px; | |
margin: 0 auto; | |
padding: 40px 20px; | |
} | |
header { | |
background: linear-gradient(135deg, var(--primary), var(--secondary)); | |
color: white; | |
padding: 30px 0; | |
border-radius: 12px; | |
margin-bottom: 30px; | |
box-shadow: 0 4px 12px rgba(0,0,0,0.1); | |
} | |
h1 { | |
margin: 0; | |
font-size: 2.4rem; | |
font-weight: 700; | |
} | |
h2 { | |
color: var(--primary); | |
font-size: 1.8rem; | |
margin-top: 40px; | |
margin-bottom: 20px; | |
position: relative; | |
padding-bottom: 10px; | |
} | |
h2::after { | |
content: ''; | |
position: absolute; | |
bottom: 0; | |
left: 0; | |
width: 60px; | |
height: 4px; | |
background: var(--accent); | |
border-radius: 2px; | |
} | |
.chart-container { | |
background: var(--card-bg); | |
border-radius: 12px; | |
padding: 30px; | |
box-shadow: 0 8px 20px rgba(0,0,0,0.06); | |
margin: 30px 0; | |
} | |
img { | |
max-width: 100%; | |
height: auto; | |
border-radius: 8px; | |
box-shadow: 0 4px 12px rgba(0,0,0,0.08); | |
} | |
.model-info { | |
background: var(--card-bg); | |
border-radius: 12px; | |
padding: 30px; | |
box-shadow: 0 8px 20px rgba(0,0,0,0.06); | |
} | |
.model-cards { | |
display: flex; | |
flex-wrap: wrap; | |
gap: 20px; | |
justify-content: center; | |
margin-top: 20px; | |
} | |
.model-card { | |
background: var(--card-bg); | |
border-radius: 10px; | |
padding: 20px; | |
box-shadow: 0 4px 10px rgba(0,0,0,0.05); | |
border-left: 5px solid var(--primary); | |
width: calc(33% - 20px); | |
min-width: 250px; | |
transition: transform 0.2s, box-shadow 0.2s; | |
} | |
.model-card:hover { | |
transform: translateY(-5px); | |
box-shadow: 0 8px 15px rgba(0,0,0,0.1); | |
} | |
.model-name { | |
font-weight: 700; | |
font-size: 1.2rem; | |
color: var(--primary); | |
margin-bottom: 10px; | |
} | |
.stats { | |
display: flex; | |
justify-content: space-between; | |
margin-top: 15px; | |
} | |
.stat { | |
text-align: center; | |
flex: 1; | |
} | |
.stat-value { | |
font-size: 1.5rem; | |
font-weight: 700; | |
} | |
.correct { | |
color: var(--success); | |
} | |
.incorrect { | |
color: var(--error); | |
} | |
.stat-label { | |
font-size: 0.85rem; | |
color: var(--text-light); | |
} | |
.accuracy-bar { | |
height: 8px; | |
background: #f0f0f0; | |
border-radius: 4px; | |
margin-top: 10px; | |
overflow: hidden; | |
} | |
.accuracy-fill { | |
height: 100%; | |
background: var(--primary); | |
border-radius: 4px; | |
} | |
footer { | |
margin-top: 40px; | |
text-align: center; | |
color: var(--text-light); | |
font-size: 0.9rem; | |
} | |
</style> | |
</head> | |
<body> | |
<div class="container"> | |
<header> | |
<h1>Model Benchmark: First Grade Math</h1> | |
<p>Performance comparison across 1,000 questions per model</p> | |
</header> | |
<div class="chart-container"> | |
<h2>Performance Overview</h2> | |
<img src="download.png" alt="Model Benchmark Chart"> | |
</div> | |
<div class="model-info"> | |
<h2>Models Benchmarked</h2> | |
<div class="model-cards"> | |
<div class="model-card"> | |
<div class="model-name">FlameF0X/MathGPT2</div> | |
<div class="stats"> | |
<div class="stat"> | |
<div class="stat-value correct">763</div> | |
<div class="stat-label">Correct</div> | |
</div> | |
<div class="stat"> | |
<div class="stat-value incorrect">237</div> | |
<div class="stat-label">Incorrect</div> | |
</div> | |
</div> | |
<div class="accuracy-bar"> | |
<div class="accuracy-fill" style="width: 76.3%"></div> | |
</div> | |
</div> | |
<div class="model-card"> | |
<div class="model-name">FlameF0X/Muffin-2.9b-1C25</div> | |
<div class="stats"> | |
<div class="stat"> | |
<div class="stat-value correct">9</div> | |
<div class="stat-label">Correct</div> | |
</div> | |
<div class="stat"> | |
<div class="stat-value incorrect">991</div> | |
<div class="stat-label">Incorrect</div> | |
</div> | |
</div> | |
<div class="accuracy-bar"> | |
<div class="accuracy-fill" style="width: 0.9%"></div> | |
</div> | |
</div> | |
<div class="model-card"> | |
<div class="model-name">FlameF0X/MuffinFace-2</div> | |
<div class="stats"> | |
<div class="stat"> | |
<div class="stat-value correct">8</div> | |
<div class="stat-label">Correct</div> | |
</div> | |
<div class="stat"> | |
<div class="stat-value incorrect">992</div> | |
<div class="stat-label">Incorrect</div> | |
</div> | |
</div> | |
<div class="accuracy-bar"> | |
<div class="accuracy-fill" style="width: 0.8%"></div> | |
</div> | |
</div> | |
</div> | |
</div> | |
<footer> | |
<p>Benchmark results as of April 2025</p> | |
</footer> | |
</div> | |
</body> | |
</html> |