Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Commit
·
f6e13e9
1
Parent(s):
e122263
thinking toggle
Browse files
PROMPTS.md
CHANGED
@@ -70,3 +70,9 @@ System: {{preprompt}}\nUser:{{#each messages}}{{#ifUser}}{{content}}\nFalcon:{{/
|
|
70 |
```env
|
71 |
{{#each messages}}{{#ifUser}}<start_of_turn>user\n{{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}}<end_of_turn>\n<start_of_turn>model\n{{/ifUser}}{{#ifAssistant}}{{content}}<end_of_turn>\n{{/ifAssistant}}{{/each}}
|
72 |
```
|
|
|
|
|
|
|
|
|
|
|
|
|
|
70 |
```env
|
71 |
{{#each messages}}{{#ifUser}}<start_of_turn>user\n{{#if @first}}{{#if @root.preprompt}}{{@root.preprompt}}\n{{/if}}{{/if}}{{content}}<end_of_turn>\n<start_of_turn>model\n{{/ifUser}}{{#ifAssistant}}{{content}}<end_of_turn>\n{{/ifAssistant}}{{/each}}
|
72 |
```
|
73 |
+
|
74 |
+
## Qwen3
|
75 |
+
|
76 |
+
```env
|
77 |
+
{{#if @root.preprompt}}<|im_start|>system\n{{@root.preprompt}}<|im_end|>\n{{/if}}{{#each messages}}{{#ifUser}}<|im_start|>user\n{{content}}<|im_end|>\n<|im_start|>assistant\n{{/ifUser}}{{#ifAssistant}}{{content}}<|im_end|>\n{{/ifAssistant}}{{/each}}
|
78 |
+
```
|
src/lib/components/chat/ChatInput.svelte
CHANGED
@@ -6,6 +6,7 @@
|
|
6 |
import IconInternet from "$lib/components/icons/IconInternet.svelte";
|
7 |
import IconImageGen from "$lib/components/icons/IconImageGen.svelte";
|
8 |
import IconPaperclip from "$lib/components/icons/IconPaperclip.svelte";
|
|
|
9 |
import { useSettingsStore } from "$lib/stores/settings";
|
10 |
import { webSearchParameters } from "$lib/stores/webSearchParameters";
|
11 |
import {
|
@@ -37,6 +38,7 @@
|
|
37 |
modelIsMultimodal?: boolean;
|
38 |
children?: import("svelte").Snippet;
|
39 |
onPaste?: (e: ClipboardEvent) => void;
|
|
|
40 |
}
|
41 |
|
42 |
let {
|
@@ -51,6 +53,7 @@
|
|
51 |
modelIsMultimodal = false,
|
52 |
children,
|
53 |
onPaste,
|
|
|
54 |
}: Props = $props();
|
55 |
|
56 |
const onFileChange = async (e: Event) => {
|
@@ -68,7 +71,7 @@
|
|
68 |
let textareaElement: HTMLTextAreaElement | undefined = $state();
|
69 |
let isCompositionOn = $state(false);
|
70 |
|
71 |
-
const dispatch = createEventDispatcher<{ submit:
|
72 |
|
73 |
onMount(() => {
|
74 |
if (!isVirtualKeyboard()) {
|
@@ -121,8 +124,11 @@
|
|
121 |
!isVirtualKeyboard() &&
|
122 |
value.trim() !== ""
|
123 |
) {
|
124 |
-
event.preventDefault();
|
125 |
-
|
|
|
|
|
|
|
126 |
}
|
127 |
}
|
128 |
|
@@ -158,6 +164,9 @@
|
|
158 |
let showExtraTools = $derived(modelHasTools && !assistant);
|
159 |
|
160 |
let showNoTools = $derived(!showWebSearch && !showImageGen && !showFileUpload && !showExtraTools);
|
|
|
|
|
|
|
161 |
</script>
|
162 |
|
163 |
<div class="flex min-h-full flex-1 flex-col" onpaste={onPaste}>
|
@@ -227,6 +236,33 @@
|
|
227 |
</button>
|
228 |
</HoverTooltip>
|
229 |
{/if}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
230 |
{#if showImageGen}
|
231 |
<HoverTooltip
|
232 |
label="Generate images"
|
|
|
6 |
import IconInternet from "$lib/components/icons/IconInternet.svelte";
|
7 |
import IconImageGen from "$lib/components/icons/IconImageGen.svelte";
|
8 |
import IconPaperclip from "$lib/components/icons/IconPaperclip.svelte";
|
9 |
+
import IconThinking from "$lib/components/icons/IconThinking.svelte";
|
10 |
import { useSettingsStore } from "$lib/stores/settings";
|
11 |
import { webSearchParameters } from "$lib/stores/webSearchParameters";
|
12 |
import {
|
|
|
38 |
modelIsMultimodal?: boolean;
|
39 |
children?: import("svelte").Snippet;
|
40 |
onPaste?: (e: ClipboardEvent) => void;
|
41 |
+
showThinking?: boolean;
|
42 |
}
|
43 |
|
44 |
let {
|
|
|
53 |
modelIsMultimodal = false,
|
54 |
children,
|
55 |
onPaste,
|
56 |
+
showThinking = false,
|
57 |
}: Props = $props();
|
58 |
|
59 |
const onFileChange = async (e: Event) => {
|
|
|
71 |
let textareaElement: HTMLTextAreaElement | undefined = $state();
|
72 |
let isCompositionOn = $state(false);
|
73 |
|
74 |
+
const dispatch = createEventDispatcher<{ submit: { text: string } }>();
|
75 |
|
76 |
onMount(() => {
|
77 |
if (!isVirtualKeyboard()) {
|
|
|
124 |
!isVirtualKeyboard() &&
|
125 |
value.trim() !== ""
|
126 |
) {
|
127 |
+
event.preventDefault();
|
128 |
+
const textToSend = thinkingIsOn
|
129 |
+
? `${value.trim()} /think`
|
130 |
+
: `${value.trim()} /no_think`;
|
131 |
+
dispatch("submit", { text: textToSend });
|
132 |
}
|
133 |
}
|
134 |
|
|
|
164 |
let showExtraTools = $derived(modelHasTools && !assistant);
|
165 |
|
166 |
let showNoTools = $derived(!showWebSearch && !showImageGen && !showFileUpload && !showExtraTools);
|
167 |
+
|
168 |
+
let thinkingIsOn = $state(false);
|
169 |
+
|
170 |
</script>
|
171 |
|
172 |
<div class="flex min-h-full flex-1 flex-col" onpaste={onPaste}>
|
|
|
236 |
</button>
|
237 |
</HoverTooltip>
|
238 |
{/if}
|
239 |
+
{#if showThinking}
|
240 |
+
<HoverTooltip
|
241 |
+
label="Thinking"
|
242 |
+
position="top"
|
243 |
+
TooltipClassNames="text-xs !text-left !w-auto whitespace-nowrap !py-1 !mb-0 max-sm:hidden {thinkingIsOn
|
244 |
+
? 'hidden'
|
245 |
+
: ''}"
|
246 |
+
>
|
247 |
+
<button
|
248 |
+
class="base-tool"
|
249 |
+
class:active-tool={thinkingIsOn}
|
250 |
+
disabled={loading}
|
251 |
+
onclick={(e) => {
|
252 |
+
e.preventDefault();
|
253 |
+
thinkingIsOn = !thinkingIsOn;
|
254 |
+
}}
|
255 |
+
>
|
256 |
+
<IconThinking classNames="text-xl" />
|
257 |
+
{#if thinkingIsOn}
|
258 |
+
Thinking
|
259 |
+
{:else}
|
260 |
+
Not Thinking
|
261 |
+
{/if}
|
262 |
+
|
263 |
+
</button>
|
264 |
+
</HoverTooltip>
|
265 |
+
{/if}
|
266 |
{#if showImageGen}
|
267 |
<HoverTooltip
|
268 |
label="Generate images"
|
src/lib/components/chat/ChatMessage.svelte
CHANGED
@@ -313,7 +313,7 @@
|
|
313 |
<p
|
314 |
class="disabled w-full appearance-none whitespace-break-spaces text-wrap break-words bg-inherit px-5 py-3.5 text-gray-500 dark:text-gray-400"
|
315 |
>
|
316 |
-
{message.content.trim()}
|
317 |
</p>
|
318 |
{:else}
|
319 |
<form
|
@@ -329,7 +329,7 @@
|
|
329 |
class="w-full whitespace-break-spaces break-words rounded-xl bg-gray-100 px-5 py-3.5 text-gray-500 *:h-max dark:bg-gray-800 dark:text-gray-400"
|
330 |
rows="5"
|
331 |
bind:this={editContentEl}
|
332 |
-
value={message.content.trim()}
|
333 |
onkeydown={handleKeyDown}
|
334 |
required
|
335 |
></textarea>
|
|
|
313 |
<p
|
314 |
class="disabled w-full appearance-none whitespace-break-spaces text-wrap break-words bg-inherit px-5 py-3.5 text-gray-500 dark:text-gray-400"
|
315 |
>
|
316 |
+
{message.content.trim().replace(/\/(no_)?think$/, '')}
|
317 |
</p>
|
318 |
{:else}
|
319 |
<form
|
|
|
329 |
class="w-full whitespace-break-spaces break-words rounded-xl bg-gray-100 px-5 py-3.5 text-gray-500 *:h-max dark:bg-gray-800 dark:text-gray-400"
|
330 |
rows="5"
|
331 |
bind:this={editContentEl}
|
332 |
+
value={message.content.trim().replace(/\/(no_)?think$/, '')}
|
333 |
onkeydown={handleKeyDown}
|
334 |
required
|
335 |
></textarea>
|
src/lib/components/chat/ChatWindow.svelte
CHANGED
@@ -73,6 +73,9 @@
|
|
73 |
let isSharedRecently = $state(false);
|
74 |
let editMsdgId: Message["id"] | null = $state(null);
|
75 |
let pastedLongContent = $state(false);
|
|
|
|
|
|
|
76 |
|
77 |
beforeNavigate(() => {
|
78 |
if (page.params.id) {
|
@@ -88,11 +91,19 @@
|
|
88 |
continue: { id: Message["id"] };
|
89 |
}>();
|
90 |
|
91 |
-
const handleSubmit = () => {
|
|
|
|
|
|
|
|
|
|
|
|
|
92 |
if (loading) return;
|
93 |
-
|
|
|
|
|
94 |
message = "";
|
95 |
-
}
|
96 |
|
97 |
let lastTarget: EventTarget | null = null;
|
98 |
|
@@ -438,6 +449,7 @@
|
|
438 |
disabled={isReadOnly || lastIsError}
|
439 |
modelHasTools={currentModel.tools}
|
440 |
modelIsMultimodal={currentModel.multimodal}
|
|
|
441 |
/>
|
442 |
{/if}
|
443 |
|
|
|
73 |
let isSharedRecently = $state(false);
|
74 |
let editMsdgId: Message["id"] | null = $state(null);
|
75 |
let pastedLongContent = $state(false);
|
76 |
+
let showThinking = $derived(
|
77 |
+
currentModel.name === "Qwen/Qwen3_8B"
|
78 |
+
);
|
79 |
|
80 |
beforeNavigate(() => {
|
81 |
if (page.params.id) {
|
|
|
91 |
continue: { id: Message["id"] };
|
92 |
}>();
|
93 |
|
94 |
+
// const handleSubmit = () => {
|
95 |
+
// if (loading) return;
|
96 |
+
// dispatch("message", message);
|
97 |
+
// message = "";
|
98 |
+
// };
|
99 |
+
|
100 |
+
function handleSubmit(ev?: CustomEvent<{ text: string }>) {
|
101 |
if (loading) return;
|
102 |
+
|
103 |
+
const content = ev?.detail?.text ?? message;
|
104 |
+
dispatch("message", content);
|
105 |
message = "";
|
106 |
+
}
|
107 |
|
108 |
let lastTarget: EventTarget | null = null;
|
109 |
|
|
|
449 |
disabled={isReadOnly || lastIsError}
|
450 |
modelHasTools={currentModel.tools}
|
451 |
modelIsMultimodal={currentModel.multimodal}
|
452 |
+
showThinking={showThinking}
|
453 |
/>
|
454 |
{/if}
|
455 |
|
src/lib/components/icons/IconThinking.svelte
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<script lang="ts">
|
2 |
+
interface Props {
|
3 |
+
classNames?: string;
|
4 |
+
}
|
5 |
+
let { classNames = "" }: Props = $props();
|
6 |
+
</script>
|
7 |
+
|
8 |
+
<svg
|
9 |
+
class={classNames}
|
10 |
+
xmlns="http://www.w3.org/2000/svg"
|
11 |
+
width="20"
|
12 |
+
height="20"
|
13 |
+
viewBox="0 0 24 24"
|
14 |
+
fill="none"
|
15 |
+
stroke="currentColor"
|
16 |
+
stroke-width="1.5"
|
17 |
+
stroke-linecap="round"
|
18 |
+
stroke-linejoin="round"
|
19 |
+
>
|
20 |
+
|
21 |
+
<path
|
22 |
+
stroke="none"
|
23 |
+
d="M0 0h24v24H0z"
|
24 |
+
fill="none"
|
25 |
+
/>
|
26 |
+
<path
|
27 |
+
d="M15.5 13a3.5 3.5 0 0 0 -3.5 3.5v1a3.5 3.5 0 0 0 7 0v-1.8" /><path d="M8.5 13a3.5 3.5 0 0 1 3.5 3.5v1a3.5 3.5 0 0 1 -7 0v-1.8" /><path d="M17.5 16a3.5 3.5 0 0 0 0 -7h-.5" /><path d="M19 9.3v-2.8a3.5 3.5 0 0 0 -7 0" /><path d="M6.5 16a3.5 3.5 0 0 1 0 -7h.5" /><path d="M5 9.3v-2.8a3.5 3.5 0 0 1 7 0v10" /></svg>
|
tgi_deploy.sh
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
cd text-generation-inference
|
3 |
+
conda create -n tgi python=3.11
|
4 |
+
eval "$(/home/user/miniconda3/bin/conda shell.bash hook)"
|
5 |
+
conda install -c conda-forge pkg-config openssl
|
6 |
+
|
7 |
+
|
8 |
+
conda activate tgi
|
9 |
+
export OPENSSL_DIR=$CONDA_PREFIX && \
|
10 |
+
export OPENSSL_INCLUDE_DIR=$CONDA_PREFIX/include && \
|
11 |
+
export OPENSSL_LIB_DIR=$CONDA_PREFIX/lib && \
|
12 |
+
export PKG_CONFIG_PATH=$CONDA_PREFIX/lib/pkgconfig
|
13 |
+
export PYTHONPATH=/home/user/miniconda3/envs/tgi/lib/python3.11/site-packages
|
14 |
+
export LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH
|
15 |
+
ln -s /usr/lib/x86_64-linux-gnu/libnvidia-ml.so.1 /home/user/libnvidia-ml.so
|
16 |
+
|
17 |
+
nohup text-generation-launcher --model-id HuggingFaceH4/zephyr-7b-beta -p 7860 &> qwen2.log &
|
18 |
+
|
19 |
+
PYTHONPATH=/home/user/:$PYTHONPATH \
|
20 |
+
LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH \
|
21 |
+
text-generation-launcher \
|
22 |
+
--model-id HuggingFaceH4/zephyr-7b-beta \
|
23 |
+
--disable-custom-kernels \
|
24 |
+
-p 7860
|
25 |
+
|
26 |
+
LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH \
|
27 |
+
text-generation-launcher \
|
28 |
+
--model-id HuggingFaceH4/zephyr-7b-beta \
|
29 |
+
-p 7860
|
30 |
+
|
31 |
+
LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH text-generation-launcher --model-id Qwen/Qwen2.5-VL-7B-Instruct -p 7860
|
32 |
+
LD_LIBRARY_PATH=/home/user/:$LD_LIBRARY_PATH text-generation-launcher --model-id Qwen/Qwen3-8B -p 7860
|
33 |
+
|
34 |
+
|
35 |
+
text-generation-launcher \
|
36 |
+
--model-id HuggingFaceH4/zephyr-7b-beta \
|
37 |
+
--disable-custom-kernels
|
38 |
+
|
39 |
+
|
40 |
+
# To run the server in the background, use:
|
41 |
+
nohup text-generation-launcher \
|
42 |
+
--model-id mistralai/Mistral-7B-v0.1 \
|
43 |
+
--port 8080 \
|
44 |
+
--max-batch-prefill-tokens 2048 \
|
45 |
+
--max-batch-total-tokens 4096 \
|
46 |
+
--max-input-length 4096 \
|
47 |
+
--max-total-tokens 8192 \
|
48 |
+
--max-batch-size 32 \
|
49 |
+
--max-waiting-tokens 20 \
|
50 |
+
--hostname 0.0.0.0 \
|
51 |
+
--cuda-memory-fraction 0.95 \
|
52 |
+
--max-concurrent-requests 128 \
|
53 |
+
--trust-remote-code \
|
54 |
+
--json-output > tgi.log 2>&1 &
|
55 |
+
|
56 |
+
|
57 |
+
# To stop the server, use:
|
58 |
+
ps aux | grep text-generation-launcher
|
59 |
+
pkill -f text-generation-launcher
|
60 |
+
kill -9 $(nvidia-smi | grep python | awk '{ print $5 }')
|
61 |
+
|
62 |
+
|
63 |
+
|
64 |
+
curl https://jdelavande-dev-tgi.hf.space/generate \
|
65 |
+
-X POST \
|
66 |
+
-H "Content-Type: application/json" \
|
67 |
+
-d '{"inputs":"Bonjour !", "parameters":{"max_new_tokens":20}}'
|
68 |
+
|
69 |
+
curl https://jdelavande-dev-tgi2.hf.space/ \
|
70 |
+
-X POST \
|
71 |
+
-H "Content-Type: application/json" \
|
72 |
+
-d '{"inputs":"Bonjour !", "parameters":{"max_new_tokens":20}}'
|
73 |
+
|
74 |
+
curl localhost:7860/generate \
|
75 |
+
-X POST \
|
76 |
+
-H "Content-Type: application/json" \
|
77 |
+
-d '{"inputs":"Bonjour !", "parameters":{"max_new_tokens":20}}'
|