jdelavande HF Staff commited on
Commit
145a107
·
1 Parent(s): 4f485a7

add streaming of energy, J conversion

Browse files
src/lib/components/chat/ChatMessage.svelte CHANGED
@@ -28,6 +28,7 @@
28
  import OpenReasoningResults from "./OpenReasoningResults.svelte";
29
  import Alternatives from "./Alternatives.svelte";
30
  import Vote from "./Vote.svelte";
 
31
 
32
  interface Props {
33
  message: Message;
@@ -198,24 +199,13 @@
198
  class="prose max-w-none dark:prose-invert max-sm:prose-sm prose-headings:font-semibold prose-h1:text-lg prose-h2:text-base prose-h3:text-base prose-pre:bg-gray-800 dark:prose-pre:bg-gray-900"
199
  >
200
  <MarkdownRenderer content={message.content} sources={webSearchSources} />
201
- {#if message.metadata?.energy_wh || message.metadata?.duration_seconds}
202
- <div class="mt-2 flex gap-2">
203
- {#if message.metadata?.energy_wh}
204
- <div
205
- class="text-xs text-gray-400 dark:text-gray-500 bg-gray-100 dark:bg-gray-800 px-3 py-1 rounded w-fit"
206
- >
207
- {message.metadata.energy_wh.toFixed(4)} Wh {#if message.metadata?.model_name !== "Qwen/Qwen2.5-VL-7B-Instruct"} (estimated)
208
- {/if}
209
- </div>
210
- {/if}
211
- {#if message.metadata?.duration_seconds}
212
- <div
213
- class="text-xs text-gray-400 dark:text-gray-500 bg-gray-100 dark:bg-gray-800 px-3 py-1 rounded w-fit"
214
- >
215
- {message.metadata.duration_seconds} sec
216
- </div>
217
- {/if}
218
- </div>
219
  {/if}
220
 
221
  </div>
 
28
  import OpenReasoningResults from "./OpenReasoningResults.svelte";
29
  import Alternatives from "./Alternatives.svelte";
30
  import Vote from "./Vote.svelte";
31
+ import EnergyDisplay from './EnergyDisplay.svelte';
32
 
33
  interface Props {
34
  message: Message;
 
199
  class="prose max-w-none dark:prose-invert max-sm:prose-sm prose-headings:font-semibold prose-h1:text-lg prose-h2:text-base prose-h3:text-base prose-pre:bg-gray-800 dark:prose-pre:bg-gray-900"
200
  >
201
  <MarkdownRenderer content={message.content} sources={webSearchSources} />
202
+ {#if message.metadata?.duration_seconds || message.metadata?.energy_wh_sim}
203
+ <EnergyDisplay
204
+ energyWh={message.metadata?.energy_wh}
205
+ energyWhSim={message.metadata?.energy_wh_sim}
206
+ durationSeconds={message.metadata?.duration_seconds}
207
+ />
208
+
 
 
 
 
 
 
 
 
 
 
 
209
  {/if}
210
 
211
  </div>
src/lib/components/chat/ChatWindow.svelte CHANGED
@@ -354,11 +354,13 @@
354
  class="dark:via-gray-80 pointer-events-none absolute inset-x-0 bottom-0 z-0 mx-auto flex w-full max-w-3xl flex-col items-center justify-center bg-gradient-to-t from-white via-white/80 to-white/0 px-3.5 py-4 dark:border-gray-800 dark:from-gray-900 dark:to-gray-900/0 max-md:border-t max-md:bg-white max-md:dark:bg-gray-900 sm:px-5 md:py-8 xl:max-w-4xl [&>*]:pointer-events-auto"
355
  >
356
  {#if messages.length > 0}
357
- {@const totalEnergy = messages.reduce((total, msg) => total + (msg.metadata?.energy_wh || 0), 0)}
 
358
  {#if totalEnergy > 0}
359
  <div class="mb-4 flex items-center justify-center">
360
  <div class="text-xs text-gray-400 dark:text-gray-500 bg-gray-100 dark:bg-gray-800 px-3 py-1 rounded">
361
- Total Energy: {totalEnergy.toFixed(4)} Wh (about {((totalEnergy / 19) * 100).toFixed(2)}% of charging a phone)
 
362
  </div>
363
  </div>
364
  {/if}
 
354
  class="dark:via-gray-80 pointer-events-none absolute inset-x-0 bottom-0 z-0 mx-auto flex w-full max-w-3xl flex-col items-center justify-center bg-gradient-to-t from-white via-white/80 to-white/0 px-3.5 py-4 dark:border-gray-800 dark:from-gray-900 dark:to-gray-900/0 max-md:border-t max-md:bg-white max-md:dark:bg-gray-900 sm:px-5 md:py-8 xl:max-w-4xl [&>*]:pointer-events-auto"
355
  >
356
  {#if messages.length > 0}
357
+ {@const totalEnergy = messages.reduce((total, msg) => total + (msg.metadata?.energy_wh || msg.metadata?.energy_wh_sim || 0), 0)}
358
+ {@const isNotEstimated = typeof messages.at(-1)?.metadata?.energy_wh === "number" && messages.at(-1).metadata.energy_wh !== 0}
359
  {#if totalEnergy > 0}
360
  <div class="mb-4 flex items-center justify-center">
361
  <div class="text-xs text-gray-400 dark:text-gray-500 bg-gray-100 dark:bg-gray-800 px-3 py-1 rounded">
362
+ Total Energy: {totalEnergy.toFixed(4)} Wh {#if !isNotEstimated} (estimated) {/if}
363
+ (about {((totalEnergy / 19) * 100).toFixed(2)}% of charging a phone)
364
  </div>
365
  </div>
366
  {/if}
src/lib/components/chat/EnergyDisplay.svelte ADDED
@@ -0,0 +1,102 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <script lang="ts">
2
+ export let energyWh: number | undefined = undefined;
3
+ export let energyWhSim: number | undefined = undefined;
4
+ export let durationSeconds: number | undefined = undefined;
5
+
6
+ let showJoules = false;
7
+ let showTooltip = false;
8
+
9
+ const isEstimated = !(typeof energyWh === 'number' && energyWh !== 0);
10
+ const energyToDisplay = isEstimated ? energyWhSim : energyWh;
11
+
12
+ function convertToJoules(wh: number): number {
13
+ return wh * 3600;
14
+ }
15
+ </script>
16
+
17
+ <style>
18
+ .energy-box {
19
+ transition: transform 0.2s ease;
20
+ cursor: pointer;
21
+ position: relative;
22
+ }
23
+ .energy-box:hover {
24
+ transform: scale(1.05);
25
+ }
26
+ .tooltip {
27
+ position: absolute;
28
+ top: 100%;
29
+ left: 0;
30
+ z-index: 10;
31
+ margin-top: 0.25rem;
32
+ padding: 0.5rem;
33
+ background-color: #f3f4f6;
34
+ color: #1f2937;
35
+ font-size: 0.75rem;
36
+ border-radius: 0.25rem;
37
+ box-shadow: 0 0 10px rgba(0, 0, 0, 0.1);
38
+ width: max-content;
39
+ max-width: 400px;
40
+ }
41
+ .info-button {
42
+ transition: transform 0.2s ease;
43
+ cursor: pointer;
44
+ position: relative;
45
+ }
46
+ .info-button:hover {
47
+ transform: scale(1.2);
48
+ }
49
+ </style>
50
+
51
+ {#if durationSeconds || energyToDisplay}
52
+ <div class="mt-2 flex gap-2 items-center relative">
53
+
54
+ <!-- Info button -->
55
+ <div
56
+ class="relative"
57
+ on:mouseover={() => (showTooltip = true)}
58
+ on:mouseleave={() => (showTooltip = false)}>
59
+ <button
60
+ class="text-xs text-gray-500 dark:text-gray-500 bg-gray-100 dark:bg-gray-800 px-2 py-1 rounded-full info-button">
61
+
62
+ </button>
63
+ </div>
64
+
65
+
66
+ <!-- Energy Box -->
67
+ {#if energyToDisplay}
68
+ <div
69
+ class="text-xs text-gray-500 dark:text-gray-500 bg-gray-100 dark:bg-gray-800 px-3 py-1 rounded w-fit energy-box"
70
+ on:click={() => (showJoules = !showJoules)}
71
+ >
72
+ {#if showJoules}
73
+ {convertToJoules(energyToDisplay).toFixed(2)} J {isEstimated ? "(estimated)" : ""}
74
+ {:else}
75
+ {energyToDisplay.toFixed(4)} Wh {isEstimated ? "(estimated)" : ""}
76
+ {/if}
77
+ </div>
78
+ {/if}
79
+
80
+
81
+ <!-- Duration -->
82
+ {#if durationSeconds}
83
+ <div
84
+ class="text-xs text-gray-500 dark:text-gray-500 bg-gray-100 dark:bg-gray-800 px-3 py-1 rounded w-fit"
85
+ >
86
+ {durationSeconds} sec
87
+ </div>
88
+ {/if}
89
+
90
+
91
+ <!-- Tooltip -->
92
+ {#if showTooltip}
93
+ <div class="tooltip">
94
+ {#if isEstimated}
95
+ Estimated energy consumption based on the average GPU power and inference duration. Use Qwen/Qwen/Qwen2.5-VL-7B-Instruct model for exact results.
96
+ {:else}
97
+ Energy consumption measured directly on the GPU during inference.
98
+ {/if}
99
+ </div>
100
+ {/if}
101
+ </div>
102
+ {/if}
src/lib/server/textGeneration/generate.ts CHANGED
@@ -128,38 +128,15 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
128
  webSources: output.webSources,
129
  };
130
 
131
- // simulation of metadata
132
- const durationInSeconds = (new Date().getTime() - startTime.getTime()) / 1000;
133
-
134
- // LLama 3.1 8B uses 17.38 Wh for 1000 queries according to https://huggingface.co/spaces/AIEnergyScore/Leaderboard
135
-
136
- const energyUsedwh_sim = 50 * (durationInSeconds / 3600); // Using P = 50W (H100 can use up to 700W)
137
- console.log("energyUsedwh_sim", energyUsedwh_sim);
138
- let energyUsedwh = 0;
139
- console.log("output", output);
140
- if (output.energy_consumption === undefined) {
141
- energyUsedwh = energyUsedwh_sim;
142
- } else {
143
- // if the model has energy consumption, we use it instead
144
- energyUsedwh = output.energy_consumption / 1000 / 3600; // converting from mJ to Wh
145
  }
146
- console.log("energyUsedwh", energyUsedwh);
147
- console.log("model.name", model.name);
148
- yield {
149
- type: MessageUpdateType.Metadata,
150
- key: "energy_wh",
151
- value: energyUsedwh,
152
- };
153
- yield {
154
- type: MessageUpdateType.Metadata,
155
- key: "duration_seconds",
156
- value: durationInSeconds,
157
- };
158
- yield {
159
- type: MessageUpdateType.Metadata,
160
- key: "model_name",
161
- value: model.name,
162
- };
163
 
164
  continue;
165
  }
@@ -222,6 +199,24 @@ Do not use prefixes such as Response: or Answer: when answering to the user.`,
222
  yield { type: MessageUpdateType.Stream, token: output.token.text };
223
  }
224
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
225
  // abort check
226
  const date = AbortedGenerations.getInstance().getList().get(conv._id.toString());
227
  if (date && date > promptedAt) break;
 
128
  webSources: output.webSources,
129
  };
130
 
131
+ if (output.energy_consumption !== undefined) {
132
+ const energyUsedwh = output.energy_consumption / 1000 / 3600; // converting from mJ to Wh;
133
+ console.log("energyUsedwh", energyUsedwh);
134
+ yield {
135
+ type: MessageUpdateType.Metadata,
136
+ key: "energy_wh",
137
+ value: energyUsedwh,
138
+ };
 
 
 
 
 
 
139
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
  continue;
142
  }
 
199
  yield { type: MessageUpdateType.Stream, token: output.token.text };
200
  }
201
 
202
+ if (!output.token.special) {
203
+ // simulation of metadata
204
+ const durationInSeconds = (new Date().getTime() - startTime.getTime()) / 1000;
205
+ const energyUsedwh_sim = 55 * (durationInSeconds / 3600); // Using P = 50W (H100 can use up to 700W)
206
+ console.log("energyUsedwh_sim", energyUsedwh_sim);
207
+ console.log("model.name", model.name);
208
+ yield {
209
+ type: MessageUpdateType.Metadata,
210
+ key: "energy_wh_sim",
211
+ value: energyUsedwh_sim,
212
+ };
213
+ yield {
214
+ type: MessageUpdateType.Metadata,
215
+ key: "duration_seconds",
216
+ value: durationInSeconds,
217
+ };
218
+ }
219
+
220
  // abort check
221
  const date = AbortedGenerations.getInstance().getList().get(conv._id.toString());
222
  if (date && date > promptedAt) break;
src/routes/conversation/[id]/+page.svelte CHANGED
@@ -328,6 +328,15 @@
328
  if (update.subtype === MessageReasoningUpdateType.Stream) {
329
  messageToWriteTo.reasoning += update.token;
330
  }
 
 
 
 
 
 
 
 
 
331
  }
332
  }
333
  } catch (err) {
 
328
  if (update.subtype === MessageReasoningUpdateType.Stream) {
329
  messageToWriteTo.reasoning += update.token;
330
  }
331
+
332
+ } else if (update.type === MessageUpdateType.Metadata) {
333
+ if (!messageToWriteTo.metadata) messageToWriteTo.metadata = {};
334
+ messageToWriteTo.metadata = {
335
+ ...messageToWriteTo.metadata,
336
+ [update.key]: update.value,
337
+ };
338
+
339
+
340
  }
341
  }
342
  } catch (err) {