|
<!DOCTYPE html> |
|
<html lang="en"> |
|
<head> |
|
<meta charset="UTF-8"> |
|
<title>Model Benchmark: First Grade Math</title> |
|
<style> |
|
:root { |
|
--primary: #4a6fa5; |
|
--secondary: #6b8cbe; |
|
--accent: #ff6b6b; |
|
--background: #f5f7fa; |
|
--card-bg: #ffffff; |
|
--text-dark: #2d3748; |
|
--text-light: #4a5568; |
|
--success: #48bb78; |
|
--error: #e53e3e; |
|
} |
|
|
|
body { |
|
font-family: 'Segoe UI', Roboto, -apple-system, BlinkMacSystemFont, sans-serif; |
|
background: var(--background); |
|
margin: 0; |
|
padding: 0; |
|
color: var(--text-dark); |
|
line-height: 1.6; |
|
} |
|
|
|
.container { |
|
max-width: 1000px; |
|
margin: 0 auto; |
|
padding: 40px 20px; |
|
} |
|
|
|
header { |
|
background: linear-gradient(135deg, var(--primary), var(--secondary)); |
|
color: white; |
|
padding: 30px 0; |
|
border-radius: 12px; |
|
margin-bottom: 30px; |
|
box-shadow: 0 4px 12px rgba(0,0,0,0.1); |
|
} |
|
|
|
h1 { |
|
margin: 0; |
|
font-size: 2.4rem; |
|
font-weight: 700; |
|
} |
|
|
|
h2 { |
|
color: var(--primary); |
|
font-size: 1.8rem; |
|
margin-top: 40px; |
|
margin-bottom: 20px; |
|
position: relative; |
|
padding-bottom: 10px; |
|
} |
|
|
|
h2::after { |
|
content: ''; |
|
position: absolute; |
|
bottom: 0; |
|
left: 0; |
|
width: 60px; |
|
height: 4px; |
|
background: var(--accent); |
|
border-radius: 2px; |
|
} |
|
|
|
.chart-container { |
|
background: var(--card-bg); |
|
border-radius: 12px; |
|
padding: 30px; |
|
box-shadow: 0 8px 20px rgba(0,0,0,0.06); |
|
margin: 30px 0; |
|
} |
|
|
|
img { |
|
max-width: 100%; |
|
height: auto; |
|
border-radius: 8px; |
|
box-shadow: 0 4px 12px rgba(0,0,0,0.08); |
|
} |
|
|
|
.model-info { |
|
background: var(--card-bg); |
|
border-radius: 12px; |
|
padding: 30px; |
|
box-shadow: 0 8px 20px rgba(0,0,0,0.06); |
|
} |
|
|
|
.model-cards { |
|
display: flex; |
|
flex-wrap: wrap; |
|
gap: 20px; |
|
justify-content: center; |
|
margin-top: 20px; |
|
} |
|
|
|
.model-card { |
|
background: var(--card-bg); |
|
border-radius: 10px; |
|
padding: 20px; |
|
box-shadow: 0 4px 10px rgba(0,0,0,0.05); |
|
border-left: 5px solid var(--primary); |
|
width: calc(33% - 20px); |
|
min-width: 250px; |
|
transition: transform 0.2s, box-shadow 0.2s; |
|
} |
|
|
|
.model-card:hover { |
|
transform: translateY(-5px); |
|
box-shadow: 0 8px 15px rgba(0,0,0,0.1); |
|
} |
|
|
|
.model-name { |
|
font-weight: 700; |
|
font-size: 1.2rem; |
|
color: var(--primary); |
|
margin-bottom: 10px; |
|
} |
|
|
|
.stats { |
|
display: flex; |
|
justify-content: space-between; |
|
margin-top: 15px; |
|
} |
|
|
|
.stat { |
|
text-align: center; |
|
flex: 1; |
|
} |
|
|
|
.stat-value { |
|
font-size: 1.5rem; |
|
font-weight: 700; |
|
} |
|
|
|
.correct { |
|
color: var(--success); |
|
} |
|
|
|
.incorrect { |
|
color: var(--error); |
|
} |
|
|
|
.stat-label { |
|
font-size: 0.85rem; |
|
color: var(--text-light); |
|
} |
|
|
|
.accuracy-bar { |
|
height: 8px; |
|
background: #f0f0f0; |
|
border-radius: 4px; |
|
margin-top: 10px; |
|
overflow: hidden; |
|
} |
|
|
|
.accuracy-fill { |
|
height: 100%; |
|
background: var(--primary); |
|
border-radius: 4px; |
|
} |
|
|
|
footer { |
|
margin-top: 40px; |
|
text-align: center; |
|
color: var(--text-light); |
|
font-size: 0.9rem; |
|
} |
|
</style> |
|
</head> |
|
<body> |
|
<div class="container"> |
|
<header> |
|
<h1>Model Benchmark: First Grade Math</h1> |
|
<p>Performance comparison across 1,000 questions per model</p> |
|
</header> |
|
|
|
<div class="chart-container"> |
|
<h2>Performance Overview</h2> |
|
<img src="download.png" alt="Model Benchmark Chart"> |
|
</div> |
|
|
|
<div class="model-info"> |
|
<h2>Models Benchmarked</h2> |
|
<div class="model-cards"> |
|
<div class="model-card"> |
|
<div class="model-name">FlameF0X/MathGPT2</div> |
|
<div class="stats"> |
|
<div class="stat"> |
|
<div class="stat-value correct">763</div> |
|
<div class="stat-label">Correct</div> |
|
</div> |
|
<div class="stat"> |
|
<div class="stat-value incorrect">237</div> |
|
<div class="stat-label">Incorrect</div> |
|
</div> |
|
</div> |
|
<div class="accuracy-bar"> |
|
<div class="accuracy-fill" style="width: 76.3%"></div> |
|
</div> |
|
</div> |
|
|
|
<div class="model-card"> |
|
<div class="model-name">FlameF0X/Muffin-2.9b-1C25</div> |
|
<div class="stats"> |
|
<div class="stat"> |
|
<div class="stat-value correct">9</div> |
|
<div class="stat-label">Correct</div> |
|
</div> |
|
<div class="stat"> |
|
<div class="stat-value incorrect">991</div> |
|
<div class="stat-label">Incorrect</div> |
|
</div> |
|
</div> |
|
<div class="accuracy-bar"> |
|
<div class="accuracy-fill" style="width: 0.9%"></div> |
|
</div> |
|
</div> |
|
|
|
<div class="model-card"> |
|
<div class="model-name">FlameF0X/MuffinFace-2</div> |
|
<div class="stats"> |
|
<div class="stat"> |
|
<div class="stat-value correct">8</div> |
|
<div class="stat-label">Correct</div> |
|
</div> |
|
<div class="stat"> |
|
<div class="stat-value incorrect">992</div> |
|
<div class="stat-label">Incorrect</div> |
|
</div> |
|
</div> |
|
<div class="accuracy-bar"> |
|
<div class="accuracy-fill" style="width: 0.8%"></div> |
|
</div> |
|
</div> |
|
</div> |
|
</div> |
|
|
|
<footer> |
|
<p>Benchmark results as of April 2025</p> |
|
</footer> |
|
</div> |
|
</body> |
|
</html> |