File size: 7,883 Bytes
85bcb47
89f340d
85bcb47
89f340d
bf1e8f3
 
89f340d
bf1e8f3
89f340d
 
bf1e8f3
 
85bcb47
 
 
bf1e8f3
89f340d
 
 
 
bf1e8f3
 
89f340d
 
3e8c50c
89f340d
 
 
 
 
bf1e8f3
 
 
 
5852be1
bf1e8f3
 
 
5852be1
bf1e8f3
 
5852be1
bf1e8f3
5852be1
 
bf1e8f3
 
4af19e1
89f340d
5852be1
bf1e8f3
 
 
5852be1
bf1e8f3
 
5852be1
bf1e8f3
5852be1
 
bf1e8f3
 
4af19e1
89f340d
63c5240
bf1e8f3
 
 
73705e5
bf1e8f3
 
73705e5
bf1e8f3
73705e5
 
bf1e8f3
 
4af19e1
bf1e8f3
5852be1
63c5240
 
 
5852be1
63c5240
 
5852be1
63c5240
5852be1
 
63c5240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5852be1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63c5240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf1e8f3
89f340d
 
3e8c50c
89f340d
 
 
 
 
 
bf1e8f3
 
 
 
85bcb47
 
89f340d
 
85bcb47
94b76d5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <meta name="description" content="DeepSeek Papers: Advancing Open-Source Language Models">
  <meta name="keywords" content="DeepSeek, LLM, AI, Research">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>DeepSeek Papers: Advancing Open-Source Language Models</title>

  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/bulma/0.9.3/css/bulma.min.css">
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">
</head>
<body>

<section class="hero is-light">
  <div class="hero-body">
    <div class="container is-max-desktop">
      <div class="columns is-centered">
        <div class="column has-text-centered">
          <h1 class="title is-1 publication-title">DeepSeek Papers</h1>
          <h2 class="subtitle is-3">Advancing Open-Source Language Models</h2>
        </div>
      </div>
    </div>
  </div>
</section>

<section class="section">
  <div class="container is-max-desktop">
    <div class="content">
      <div class="columns is-centered">
        <div class="column is-10">
          
          <!-- Native Sparse Attention -->
          <div class="card paper-card">
            <div class="card-content">
              <h3 class="title is-4">
                <a href="https://arxiv.org/abs/2502.11089">Native Sparse Attention: Hardware-Aligned and Natively Trainable Sparse Attention</a>
                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
              </h3>
              <p class="release-date">Released: February 2025</p>
              <p class="paper-description">
                Introduces a new approach to sparse attention that is both hardware-efficient and natively trainable, 
                improving the performance of large language models.
              </p>
            </div>
          </div>

          <!-- DeepSeek-R1 -->
          <div class="card paper-card">
            <div class="card-content">
              <h3 class="title is-4">
                DeepSeek-R1: Incentivizing Reasoning Capability in LLMs via Reinforcement Learning
                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
              </h3>
              <p class="release-date">Released: January 20, 2025</p>
              <p class="paper-description">
                The R1 model builds on previous work to enhance reasoning capabilities through large-scale 
                reinforcement learning, competing directly with leading models like OpenAI's o1.
              </p>
            </div>
          </div>

          <!-- DeepSeek-V3 -->
          <div class="card paper-card">
            <div class="card-content">
              <h3 class="title is-4">
                DeepSeek-V3 Technical Report
                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
              </h3>
              <p class="release-date">Released: December 2024</p>
              <p class="paper-description">
                Discusses the scaling of sparse MoE networks to 671 billion parameters, utilizing mixed precision 
                training and high-performance computing (HPC) co-design strategies.
              </p>
            </div>
          </div>

          <!-- DeepSeek-V2 -->
          <div class="card paper-card">
            <div class="card-content">
              <h3 class="title is-4">
                DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model
                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
              </h3>
              <p class="release-date">Released: May 2024</p>
              <p class="paper-description">
                Introduces a Mixture-of-Experts (MoE) architecture, enhancing performance while reducing 
                training costs by 42%. Emphasizes strong performance characteristics and efficiency improvements.
              </p>
            </div>
          </div>

          <!-- DeepSeekMath -->
          <div class="card paper-card">
            <div class="card-content">
              <h3 class="title is-4">
                DeepSeekMath: Pushing the Limits of Mathematical Reasoning in Open Language Models
                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
              </h3>
              <p class="release-date">Released: April 2024</p>
              <p class="paper-description">
                This paper presents methods to improve mathematical reasoning in LLMs, introducing the 
                Group Relative Policy Optimization (GRPO) algorithm during reinforcement learning stages.
              </p>
            </div>
          </div>

          <!-- DeepSeekLLM -->
          <div class="card paper-card">
            <div class="card-content">
              <h3 class="title is-4">
                DeepSeekLLM: Scaling Open-Source Language Models with Longer-termism
                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
              </h3>
              <p class="release-date">Released: November 29, 2023</p>
              <p class="paper-description">
                This foundational paper explores scaling laws and the trade-offs between data and model size, 
                establishing the groundwork for subsequent models.
              </p>
            </div>
          </div>

          <!-- Papers without specific dates -->
          <!-- DeepSeek-Prover -->
          <div class="card paper-card">
            <div class="card-content">
              <h3 class="title is-4">
                DeepSeek-Prover: Advancing Theorem Proving in LLMs through Large-Scale Synthetic Data
                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
              </h3>
              <p class="paper-description">
                Focuses on enhancing theorem proving capabilities in language models using synthetic data 
                for training, establishing new benchmarks in automated mathematical reasoning.
              </p>
            </div>
          </div>

          <!-- DeepSeek-Coder-V2 -->
          <div class="card paper-card">
            <div class="card-content">
              <h3 class="title is-4">
                DeepSeek-Coder-V2: Breaking the Barrier of Closed-Source Models in Code Intelligence
                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
              </h3>
              <p class="paper-description">
                This paper details advancements in code-related tasks with an emphasis on open-source 
                methodologies, improving upon earlier coding models with enhanced capabilities.
              </p>
            </div>
          </div>

          <!-- DeepSeekMoE -->
          <div class="card paper-card">
            <div class="card-content">
              <h3 class="title is-4">
                DeepSeekMoE: Advancing Mixture-of-Experts Architecture
                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
              </h3>
              <p class="paper-description">
                Discusses the integration and benefits of the Mixture-of-Experts approach within the 
                DeepSeek framework, focusing on scalability and efficiency improvements.
              </p>
            </div>
          </div>

        </div>
      </div>
    </div>
  </div>
</section>

<footer class="footer">
  <div class="container">
    <div class="content has-text-centered">
      <p>
        This website is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">
        Creative Commons Attribution-ShareAlike 4.0 International License</a>.
      </p>
    </div>
  </div>
</footer>

</body>
</html>