Spaces:
Running
Running
Change order a bit
Browse files- index.html +52 -52
index.html
CHANGED
@@ -213,58 +213,6 @@ class BertModel(PreTrainedModel):
|
|
213 |
</ul>
|
214 |
</section>
|
215 |
|
216 |
-
<section>
|
217 |
-
<h2>Beyond Transformers: Ecosystem Reuse</h2>
|
218 |
-
<p><strong>Transformers</strong> makes modeling easy. <strong>vLLM</strong> makes inference fast.</p>
|
219 |
-
|
220 |
-
<div style="display: flex; gap: 2rem; margin-top: 2rem;">
|
221 |
-
<div style="flex: 1;">
|
222 |
-
<p><strong>🔧 Prototype with Transformers:</strong></p>
|
223 |
-
<pre><code class="language-python" data-trim data-noescape>
|
224 |
-
from transformers import pipeline
|
225 |
-
|
226 |
-
pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-1B")
|
227 |
-
print(pipe("The future of AI is")[0]["generated_text"])
|
228 |
-
</code></pre>
|
229 |
-
</div>
|
230 |
-
<div style="flex: 1;">
|
231 |
-
<img src="assets/vLLM-Full-Logo.png" alt="vLLM Illustration" style="border-radius: 1rem; box-shadow: 0 0 12px #000; width: 100%;" />
|
232 |
-
</div>
|
233 |
-
</div>
|
234 |
-
</section>
|
235 |
-
<section>
|
236 |
-
<h2>Deploy with vLLM — No Rewrite Needed</h2>
|
237 |
-
<p><strong>vLLM</strong> supports <code>transformers</code> models out of the box. </p>
|
238 |
-
|
239 |
-
<p>Just specify <code>model_impl="transformers"</code> if needed:</p>
|
240 |
-
|
241 |
-
<pre><code class="language-python" data-trim data-noescape>
|
242 |
-
from vllm import LLM, SamplingParams
|
243 |
-
|
244 |
-
llm = LLM(model="meta-llama/Llama-3.2-1B", model_impl="transformers")
|
245 |
-
params = SamplingParams(max_tokens=20)
|
246 |
-
outputs = llm.generate("The future of AI is", sampling_params=params)
|
247 |
-
print(outputs[0].outputs[0].text)
|
248 |
-
</code></pre>
|
249 |
-
<p class="fragment">We also support SGLang now, along with thousands of other libraries! </p>
|
250 |
-
|
251 |
-
</section>
|
252 |
-
<section>
|
253 |
-
<h2 style="margin-bottom: 1rem;">
|
254 |
-
Transformers × PyTorch — Enabling the Community
|
255 |
-
</h2>
|
256 |
-
<img src="assets/transformers_as_ref.png" alt="Transformers as Reference"
|
257 |
-
style="
|
258 |
-
width: 120%;
|
259 |
-
height: 110%;
|
260 |
-
object-fit: cover;
|
261 |
-
margin-left: -2.5%;
|
262 |
-
margin-top: -2.5%;
|
263 |
-
" />
|
264 |
-
</section>
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
<!-- 8 · Paradigms come at a cost -->
|
269 |
<section>
|
270 |
<h2>Paradigms Come at a Cost</h2>
|
@@ -625,6 +573,58 @@ model = AutoModelForConditionalGeneration.from_pretrained("Qwen/Qwen3-8B")
|
|
625 |
<iframe src="assets/model_growth.html" width="80%" height="600" style="border:none;"></iframe>
|
626 |
</section>
|
627 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
628 |
<section>
|
629 |
<h2>Takeaways & The Future</h2>
|
630 |
<div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 1rem; margin-top: 1.5rem;">
|
|
|
213 |
</ul>
|
214 |
</section>
|
215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
216 |
<!-- 8 · Paradigms come at a cost -->
|
217 |
<section>
|
218 |
<h2>Paradigms Come at a Cost</h2>
|
|
|
573 |
<iframe src="assets/model_growth.html" width="80%" height="600" style="border:none;"></iframe>
|
574 |
</section>
|
575 |
|
576 |
+
|
577 |
+
|
578 |
+
<section>
|
579 |
+
<h2>Beyond Transformers: Ecosystem Reuse</h2>
|
580 |
+
<p><strong>Transformers</strong> makes modeling easy. <strong>vLLM</strong> makes inference fast.</p>
|
581 |
+
|
582 |
+
<div style="display: flex; gap: 2rem; margin-top: 2rem;">
|
583 |
+
<div style="flex: 1;">
|
584 |
+
<p><strong>🔧 Prototype with Transformers:</strong></p>
|
585 |
+
<pre><code class="language-python" data-trim data-noescape>
|
586 |
+
from transformers import pipeline
|
587 |
+
|
588 |
+
pipe = pipeline("text-generation", model="meta-llama/Llama-3.2-1B")
|
589 |
+
print(pipe("The future of AI is")[0]["generated_text"])
|
590 |
+
</code></pre>
|
591 |
+
</div>
|
592 |
+
<div style="flex: 1;">
|
593 |
+
<img src="assets/vLLM-Full-Logo.png" alt="vLLM Illustration" style="border-radius: 1rem; box-shadow: 0 0 12px #000; width: 100%;" />
|
594 |
+
</div>
|
595 |
+
</div>
|
596 |
+
</section>
|
597 |
+
<section>
|
598 |
+
<h2>Deploy with vLLM — No Rewrite Needed</h2>
|
599 |
+
<p><strong>vLLM</strong> supports <code>transformers</code> models out of the box. </p>
|
600 |
+
|
601 |
+
<p>Just specify <code>model_impl="transformers"</code> if needed:</p>
|
602 |
+
|
603 |
+
<pre><code class="language-python" data-trim data-noescape>
|
604 |
+
from vllm import LLM, SamplingParams
|
605 |
+
|
606 |
+
llm = LLM(model="meta-llama/Llama-3.2-1B", model_impl="transformers")
|
607 |
+
params = SamplingParams(max_tokens=20)
|
608 |
+
outputs = llm.generate("The future of AI is", sampling_params=params)
|
609 |
+
print(outputs[0].outputs[0].text)
|
610 |
+
</code></pre>
|
611 |
+
<p class="fragment">We also support SGLang now, along with thousands of other libraries! </p>
|
612 |
+
|
613 |
+
</section>
|
614 |
+
<section>
|
615 |
+
<h2 style="margin-bottom: 1rem;">
|
616 |
+
Transformers × PyTorch — Enabling the Community
|
617 |
+
</h2>
|
618 |
+
<img src="assets/transformers_as_ref.png" alt="Transformers as Reference"
|
619 |
+
style="
|
620 |
+
width: 120%;
|
621 |
+
height: 110%;
|
622 |
+
object-fit: cover;
|
623 |
+
margin-left: -2.5%;
|
624 |
+
margin-top: -2.5%;
|
625 |
+
" />
|
626 |
+
</section>
|
627 |
+
|
628 |
<section>
|
629 |
<h2>Takeaways & The Future</h2>
|
630 |
<div style="display: grid; grid-template-columns: repeat(2, 1fr); gap: 1rem; margin-top: 1.5rem;">
|