File size: 4,387 Bytes
85bcb47
89f340d
85bcb47
89f340d
bf1e8f3
 
89f340d
bf1e8f3
89f340d
 
bf1e8f3
 
89f340d
bf1e8f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
85bcb47
 
 
bf1e8f3
89f340d
 
 
 
bf1e8f3
 
89f340d
 
3e8c50c
89f340d
 
 
 
 
bf1e8f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4af19e1
89f340d
73705e5
bf1e8f3
 
 
73705e5
bf1e8f3
 
73705e5
bf1e8f3
73705e5
 
bf1e8f3
 
4af19e1
89f340d
73705e5
bf1e8f3
 
 
73705e5
bf1e8f3
 
73705e5
bf1e8f3
73705e5
 
bf1e8f3
 
4af19e1
bf1e8f3
73705e5
bf1e8f3
89f340d
 
3e8c50c
89f340d
 
 
 
 
 
bf1e8f3
 
 
 
85bcb47
 
89f340d
 
85bcb47
94b76d5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
<!DOCTYPE html>
<html>
<head>
  <meta charset="utf-8">
  <meta name="description" content="DeepSeek Papers: Advancing Open-Source Language Models">
  <meta name="keywords" content="DeepSeek, LLM, AI, Research">
  <meta name="viewport" content="width=device-width, initial-scale=1">
  <title>DeepSeek Papers: Advancing Open-Source Language Models</title>

  <link href="https://fonts.googleapis.com/css?family=Google+Sans|Noto+Sans|Castoro" rel="stylesheet">
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/bulma/0.9.3/css/bulma.min.css">
  <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css">

  <style>
    .publication-title {
      color: #363636;
    }
    .paper-card {
      margin-bottom: 2rem;
      transition: transform 0.2s;
    }
    .paper-card:hover {
      transform: translateY(-5px);
    }
    .coming-soon-badge {
      background-color: #3273dc;
      color: white;
      padding: 0.25rem 0.75rem;
      border-radius: 4px;
      font-size: 0.8rem;
      margin-left: 1rem;
    }
    .paper-description {
      color: #4a4a4a;
      margin-top: 0.5rem;
    }
    .release-date {
      color: #7a7a7a;
      font-size: 0.9rem;
    }
  </style>
</head>
<body>

<section class="hero is-light">
  <div class="hero-body">
    <div class="container is-max-desktop">
      <div class="columns is-centered">
        <div class="column has-text-centered">
          <h1 class="title is-1 publication-title">DeepSeek Papers</h1>
          <h2 class="subtitle is-3">Advancing Open-Source Language Models</h2>
        </div>
      </div>
    </div>
  </div>
</section>

<section class="section">
  <div class="container is-max-desktop">
    <div class="content">
      <div class="columns is-centered">
        <div class="column is-10">
          
          <!-- DeepSeekLLM -->
          <div class="card paper-card">
            <div class="card-content">
              <h3 class="title is-4">
                DeepSeekLLM: Scaling Open-Source Language Models with Longer-termism
                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
              </h3>
              <p class="release-date">Released: November 29, 2023</p>
              <p class="paper-description">
                This foundational paper explores scaling laws and the trade-offs between data and model size, 
                establishing the groundwork for subsequent models.
              </p>
            </div>
          </div>

          <!-- DeepSeek-V2 -->
          <div class="card paper-card">
            <div class="card-content">
              <h3 class="title is-4">
                DeepSeek-V2: A Strong, Economical, and Efficient Mixture-of-Experts Language Model
                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
              </h3>
              <p class="release-date">Released: May 2024</p>
              <p class="paper-description">
                Introduces a Mixture-of-Experts (MoE) architecture, enhancing performance while reducing 
                training costs by 42%. Emphasizes strong performance characteristics and efficiency improvements.
              </p>
            </div>
          </div>

          <!-- Continue with other papers... -->
          <div class="card paper-card">
            <div class="card-content">
              <h3 class="title is-4">
                DeepSeek-V3 Technical Report
                <span class="coming-soon-badge">Deep Dive Coming Soon</span>
              </h3>
              <p class="release-date">Released: December 2024</p>
              <p class="paper-description">
                Discusses the scaling of sparse MoE networks to 671 billion parameters, utilizing mixed precision 
                training and high-performance computing (HPC) co-design strategies.
              </p>
            </div>
          </div>

          <!-- Add remaining papers following the same pattern -->

        </div>
      </div>
    </div>
  </div>
</section>

<footer class="footer">
  <div class="container">
    <div class="content has-text-centered">
      <p>
        This website is licensed under a <a rel="license" href="http://creativecommons.org/licenses/by-sa/4.0/">
        Creative Commons Attribution-ShareAlike 4.0 International License</a>.
      </p>
    </div>
  </div>
</footer>

</body>
</html>