metacritical commited on
Commit
5852be1
·
verified ·
1 Parent(s): 63c5240

default more papers.

Browse files
Files changed (1) hide show
  1. index.html +31 -44
index.html CHANGED
@@ -10,35 +10,6 @@
10
  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
11
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/bulma/0.9.3/css/bulma.min.css">
12
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
13
-
14
- <style>
15
- .publication-title {
16
- color: #363636;
17
- }
18
- .paper-card {
19
- margin-bottom: 2rem;
20
- transition: transform 0.2s;
21
- }
22
- .paper-card:hover {
23
- transform: translateY(-5px);
24
- }
25
- .coming-soon-badge {
26
- background-color: #3273dc;
27
- color: white;
28
- padding: 0.25rem 0.75rem;
29
- border-radius: 4px;
30
- font-size: 0.8rem;
31
- margin-left: 1rem;
32
- }
33
- .paper-description {
34
- color: #4a4a4a;
35
- margin-top: 0.5rem;
36
- }
37
- .release-date {
38
- color: #7a7a7a;
39
- font-size: 0.9rem;
40
- }
41
- </style>
42
  </head>
43
  <body>
44
 
@@ -61,32 +32,32 @@
61
  <div class="columns is-centered">
62
  <div class="column is-10">
63
 
64
- <!-- DeepSeekLLM -->
65
  <div class="card paper-card">
66
  <div class="card-content">
67
  <h3 class="title is-4">
68
- DeepSeekLLM: Scaling Open-Source Language Models with Longer-termism
69
  <span class="coming-soon-badge">Deep Dive Coming Soon</span>
70
  </h3>
71
- <p class="release-date">Released: November 29, 2023</p>
72
  <p class="paper-description">
73
- This foundational paper explores scaling laws and the trade-offs between data and model size,
74
- establishing the groundwork for subsequent models.
75
  </p>
76
  </div>
77
  </div>
78
 
79
- <!-- DeepSeek-V2 -->
80
  <div class="card paper-card">
81
  <div class="card-content">
82
  <h3 class="title is-4">
83
- DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model
84
  <span class="coming-soon-badge">Deep Dive Coming Soon</span>
85
  </h3>
86
- <p class="release-date">Released: May 2024</p>
87
  <p class="paper-description">
88
- Introduces a Mixture-of-Experts (MoE) architecture, enhancing performance while reducing
89
- training costs by 42%. Emphasizes strong performance characteristics and efficiency improvements.
90
  </p>
91
  </div>
92
  </div>
@@ -106,17 +77,17 @@
106
  </div>
107
  </div>
108
 
109
- <!-- DeepSeek-R1 -->
110
  <div class="card paper-card">
111
  <div class="card-content">
112
  <h3 class="title is-4">
113
- DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning
114
  <span class="coming-soon-badge">Deep Dive Coming Soon</span>
115
  </h3>
116
- <p class="release-date">Released: January 20, 2025</p>
117
  <p class="paper-description">
118
- The R1 model builds on previous work to enhance reasoning capabilities through large-scale
119
- reinforcement learning, competing directly with leading models like OpenAI's o1.
120
  </p>
121
  </div>
122
  </div>
@@ -136,6 +107,22 @@
136
  </div>
137
  </div>
138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
139
  <!-- DeepSeek-Prover -->
140
  <div class="card paper-card">
141
  <div class="card-content">
 
10
  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
11
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/bulma/0.9.3/css/bulma.min.css">
12
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  </head>
14
  <body>
15
 
 
32
  <div class="columns is-centered">
33
  <div class="column is-10">
34
 
35
+ <!-- Native Sparse Attention -->
36
  <div class="card paper-card">
37
  <div class="card-content">
38
  <h3 class="title is-4">
39
+ <a href="https://arxiv.org/abs/2502.11089">Native Sparse Attention: Hardware-Aligned and Natively Trainable Sparse Attention</a>
40
  <span class="coming-soon-badge">Deep Dive Coming Soon</span>
41
  </h3>
42
+ <p class="release-date">Released: February 2025</p>
43
  <p class="paper-description">
44
+ Introduces a new approach to sparse attention that is both hardware-efficient and natively trainable,
45
+ improving the performance of large language models.
46
  </p>
47
  </div>
48
  </div>
49
 
50
+ <!-- DeepSeek-R1 -->
51
  <div class="card paper-card">
52
  <div class="card-content">
53
  <h3 class="title is-4">
54
+ DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning
55
  <span class="coming-soon-badge">Deep Dive Coming Soon</span>
56
  </h3>
57
+ <p class="release-date">Released: January 20, 2025</p>
58
  <p class="paper-description">
59
+ The R1 model builds on previous work to enhance reasoning capabilities through large-scale
60
+ reinforcement learning, competing directly with leading models like OpenAI's o1.
61
  </p>
62
  </div>
63
  </div>
 
77
  </div>
78
  </div>
79
 
80
+ <!-- DeepSeek-V2 -->
81
  <div class="card paper-card">
82
  <div class="card-content">
83
  <h3 class="title is-4">
84
+ DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model
85
  <span class="coming-soon-badge">Deep Dive Coming Soon</span>
86
  </h3>
87
+ <p class="release-date">Released: May 2024</p>
88
  <p class="paper-description">
89
+ Introduces a Mixture-of-Experts (MoE) architecture, enhancing performance while reducing
90
+ training costs by 42%. Emphasizes strong performance characteristics and efficiency improvements.
91
  </p>
92
  </div>
93
  </div>
 
107
  </div>
108
  </div>
109
 
110
+ <!-- DeepSeekLLM -->
111
+ <div class="card paper-card">
112
+ <div class="card-content">
113
+ <h3 class="title is-4">
114
+ DeepSeekLLM: Scaling Open-Source Language Models with Longer-termism
115
+ <span class="coming-soon-badge">Deep Dive Coming Soon</span>
116
+ </h3>
117
+ <p class="release-date">Released: November 29, 2023</p>
118
+ <p class="paper-description">
119
+ This foundational paper explores scaling laws and the trade-offs between data and model size,
120
+ establishing the groundwork for subsequent models.
121
+ </p>
122
+ </div>
123
+ </div>
124
+
125
+ <!-- Papers without specific dates -->
126
  <!-- DeepSeek-Prover -->
127
  <div class="card paper-card">
128
  <div class="card-content">