randomUser69696 commited on
Commit
b40e24a
·
1 Parent(s): c88adec

testing out

Browse files
.gradio/flagged/dataset1.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ name,output,timestamp
2
+ yash,Hello yash!!,2025-03-17 01:21:30.390134
.ipynb_checkpoints/app-checkpoint.ipynb ADDED
@@ -0,0 +1,396 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": null,
6
+ "metadata": {},
7
+ "outputs": [],
8
+ "source": [
9
+ "from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer\n",
10
+ "import torch"
11
+ ]
12
+ },
13
+ {
14
+ "cell_type": "code",
15
+ "execution_count": null,
16
+ "metadata": {},
17
+ "outputs": [],
18
+ "source": [
19
+ "model_id = \"google/gemma-3-12b-it\""
20
+ ]
21
+ },
22
+ {
23
+ "cell_type": "code",
24
+ "execution_count": null,
25
+ "metadata": {
26
+ "scrolled": true
27
+ },
28
+ "outputs": [],
29
+ "source": [
30
+ "\n",
31
+ "\n",
32
+ "processor = AutoProcessor.from_pretrained(model_id, padding_side=\"left\")\n",
33
+ "model = Gemma3ForConditionalGeneration.from_pretrained(\n",
34
+ " model_id, device_map=\"auto\", torch_dtype=torch.bfloat16, attn_implementation=\"eager\"\n",
35
+ ")"
36
+ ]
37
+ },
38
+ {
39
+ "cell_type": "code",
40
+ "execution_count": null,
41
+ "metadata": {},
42
+ "outputs": [
43
+ {
44
+ "data": {
45
+ "application/vnd.jupyter.widget-view+json": {
46
+ "model_id": "97908a20851e455c895464dc7228dbb6",
47
+ "version_major": 2,
48
+ "version_minor": 0
49
+ },
50
+ "text/plain": [
51
+ "Fetching 2 files: 0%| | 0/2 [00:00<?, ?it/s]"
52
+ ]
53
+ },
54
+ "metadata": {},
55
+ "output_type": "display_data"
56
+ },
57
+ {
58
+ "data": {
59
+ "application/vnd.jupyter.widget-view+json": {
60
+ "model_id": "c8011adeae5247f89612a93c3ff9e963",
61
+ "version_major": 2,
62
+ "version_minor": 0
63
+ },
64
+ "text/plain": [
65
+ "model-00001-of-00002.safetensors: 2%|1 | 83.9M/4.96G [00:00<?, ?B/s]"
66
+ ]
67
+ },
68
+ "metadata": {},
69
+ "output_type": "display_data"
70
+ },
71
+ {
72
+ "data": {
73
+ "application/vnd.jupyter.widget-view+json": {
74
+ "model_id": "b604d0bd3be24ddda117cc36a8752c8e",
75
+ "version_major": 2,
76
+ "version_minor": 0
77
+ },
78
+ "text/plain": [
79
+ "model-00002-of-00002.safetensors: 3%|3 | 126M/3.64G [00:00<?, ?B/s]"
80
+ ]
81
+ },
82
+ "metadata": {},
83
+ "output_type": "display_data"
84
+ },
85
+ {
86
+ "name": "stderr",
87
+ "output_type": "stream",
88
+ "text": [
89
+ "Error while downloading from https://cdn-lfs-us-1.hf.co/repos/83/76/8376859a3a783fbbf8c6b8aff73e386e0379657f480bf946d481f9a936d4ceab/fdde0e5aa5ced0fa203b3d50f4ab78168b7e3a3e08c6349f5cc9326666e1bb13?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00002-of-00002.safetensors%3B+filename%3D%22model-00002-of-00002.safetensors%22%3B&Expires=1742502952&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0MjUwMjk1Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zLzgzLzc2LzgzNzY4NTlhM2E3ODNmYmJmOGM2YjhhZmY3M2UzODZlMDM3OTY1N2Y0ODBiZjk0NmQ0ODFmOWE5MzZkNGNlYWIvZmRkZTBlNWFhNWNlZDBmYTIwM2IzZDUwZjRhYjc4MTY4YjdlM2EzZTA4YzYzNDlmNWNjOTMyNjY2NmUxYmIxMz9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=h9yYRgT8Ui5-lDdTma5cMxNLJQn3a5qkQb1B9ICLWd2hPe68qnwhgkSyyze48E9LkP%7EstZozqs-6jQFBLFnbShc098FQdwXXw4NonedJMgex2loENeJ4Juaw7JVdhyUo1J1k8BM0j3t97kO%7ESsL2XN7mJ-DdqQqr6bbD5KmD3kxwaEMOt0Bo9EXSt%7EPUxFV1v9Z91m2XrJjjmgLKZEOY05K8p15-3VVhIFlQe1PcGVb72lpp3nMaXgPFoMJVY6NUVKIhszzJDFc6JhBfeO31Rb6Q9YmO-mm8duFSRCxUhqYzVMx-E4MN0iJO3lMrK0aTDXm6Ps8Fk1v8Rk4Bj6auew__&Key-Pair-Id=K24J24Z295AEI9: HTTPSConnectionPool(host='cdn-lfs-us-1.hf.co', port=443): Read timed out.\n",
90
+ "Trying to resume download...\n",
91
+ "Error while downloading from https://cdn-lfs-us-1.hf.co/repos/83/76/8376859a3a783fbbf8c6b8aff73e386e0379657f480bf946d481f9a936d4ceab/eb5fd5e97ddd07b56778733e9653c07312529cb00980a318fc3e1c4e3b5a8f1f?response-content-disposition=inline%3B+filename*%3DUTF-8%27%27model-00001-of-00002.safetensors%3B+filename%3D%22model-00001-of-00002.safetensors%22%3B&Expires=1742502952&Policy=eyJTdGF0ZW1lbnQiOlt7IkNvbmRpdGlvbiI6eyJEYXRlTGVzc1RoYW4iOnsiQVdTOkVwb2NoVGltZSI6MTc0MjUwMjk1Mn19LCJSZXNvdXJjZSI6Imh0dHBzOi8vY2RuLWxmcy11cy0xLmhmLmNvL3JlcG9zLzgzLzc2LzgzNzY4NTlhM2E3ODNmYmJmOGM2YjhhZmY3M2UzODZlMDM3OTY1N2Y0ODBiZjk0NmQ0ODFmOWE5MzZkNGNlYWIvZWI1ZmQ1ZTk3ZGRkMDdiNTY3Nzg3MzNlOTY1M2MwNzMxMjUyOWNiMDA5ODBhMzE4ZmMzZTFjNGUzYjVhOGYxZj9yZXNwb25zZS1jb250ZW50LWRpc3Bvc2l0aW9uPSoifV19&Signature=Vr9B3wdQIVtGgOocVA7IUl3gNH6pmGYUaeZ4TnqxQz7uO3BAXFQDsIcffFn1hePw-huNkRXwKBLMiPc7wD%7E6nAi4INXD7ydOfb8NDcIuzk0TZfHm0d4RheWz14OWuD4gohXh1QAa%7EF7dxn0X2OvOJ1C8hPlElE6G8g9NtGYncfWCASAkjsmFhyGMDg8i5MAt5hU5oscdJ0gpeckc1OjRNdqEISslG2jHSiTBeEn18UIyLMB3OGSxuaiguPoGPFpBUCTIkeSkUFbWCnnwOLkO7gD6hLCVw9F%7EiBZN4r92VdBlWCeRb6A%7ELH46Vo480H742XAACEEdXrqHSI2VWq9xAA__&Key-Pair-Id=K24J24Z295AEI9: HTTPSConnectionPool(host='cdn-lfs-us-1.hf.co', port=443): Read timed out.\n",
92
+ "Trying to resume download...\n"
93
+ ]
94
+ }
95
+ ],
96
+ "source": [
97
+ "from transformers import pipeline\n",
98
+ "import torch\n",
99
+ "\n",
100
+ "pipe = pipeline(\n",
101
+ " \"image-text-to-text\",\n",
102
+ " model=\"google/gemma-3-4b-it\",\n",
103
+ " device=\"cuda\",\n",
104
+ " torch_dtype=torch.bfloat16,\n",
105
+ " cache_dir=\"F:\\\\huggingface_cache\"\n",
106
+ ")\n"
107
+ ]
108
+ },
109
+ {
110
+ "cell_type": "code",
111
+ "execution_count": null,
112
+ "metadata": {},
113
+ "outputs": [],
114
+ "source": [
115
+ "# pip install accelerate\n",
116
+ "print(\"Hi\")\n",
117
+ "from transformers import AutoProcessor, Gemma3ForConditionalGeneration\n",
118
+ "import requests\n",
119
+ "import torch\n",
120
+ "from PIL import Image\n",
121
+ "\n",
122
+ "print(\"Done\")\n",
123
+ "model_id = \"google/gemma-3-4b-it\""
124
+ ]
125
+ },
126
+ {
127
+ "cell_type": "code",
128
+ "execution_count": null,
129
+ "metadata": {
130
+ "scrolled": true
131
+ },
132
+ "outputs": [],
133
+ "source": [
134
+ "pip install bitsandbytes\n"
135
+ ]
136
+ },
137
+ {
138
+ "cell_type": "code",
139
+ "execution_count": null,
140
+ "metadata": {},
141
+ "outputs": [],
142
+ "source": [
143
+ "import torch\n",
144
+ "from transformers import Gemma3ForConditionalGeneration, AutoProcessor\n",
145
+ "from transformers import BitsAndBytesConfig\n",
146
+ "device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
147
+ "print(f\"Using device: {device}\")\n",
148
+ "\n",
149
+ "model_id = \"google/gemma-3-4b-it\"\n",
150
+ "\n",
151
+ "\n",
152
+ "quantization_config = BitsAndBytesConfig(load_in_8bit=True)\n",
153
+ "# Load the model and move it to the correct device\n",
154
+ "model = Gemma3ForConditionalGeneration.from_pretrained(\n",
155
+ " model_id,\n",
156
+ " cache_dir=\"F:\\\\huggingface_cache\",\n",
157
+ " device_map=\"auto\", # Automatically assigns layers to available devices\n",
158
+ " quantization_config=quantization_config\n",
159
+ ").eval()\n",
160
+ "\n",
161
+ "# Load the processor\n",
162
+ "processor = AutoProcessor.from_pretrained(model_id, cache_dir=\"F:\\\\huggingface_cache\")"
163
+ ]
164
+ },
165
+ {
166
+ "cell_type": "code",
167
+ "execution_count": null,
168
+ "metadata": {},
169
+ "outputs": [],
170
+ "source": [
171
+ "messages = [\n",
172
+ " {\n",
173
+ " \"role\": \"system\",\n",
174
+ " \"content\": [{\"type\": \"text\", \"text\": \"You are a helpful assistant.\"}]\n",
175
+ " },\n",
176
+ " {\n",
177
+ " \"role\": \"user\",\n",
178
+ " \"content\": [\n",
179
+ " \n",
180
+ " {\"type\": \"text\", \"text\": \"Whats the color of sky?.\"}\n",
181
+ " ]\n",
182
+ " }\n",
183
+ "]"
184
+ ]
185
+ },
186
+ {
187
+ "cell_type": "code",
188
+ "execution_count": null,
189
+ "metadata": {},
190
+ "outputs": [],
191
+ "source": [
192
+ "inputs = processor.apply_chat_template(\n",
193
+ " messages, add_generation_prompt=True, tokenize=True,\n",
194
+ " return_dict=True, return_tensors=\"pt\"\n",
195
+ ").to(model.device, dtype=torch.bfloat16)"
196
+ ]
197
+ },
198
+ {
199
+ "cell_type": "code",
200
+ "execution_count": null,
201
+ "metadata": {},
202
+ "outputs": [],
203
+ "source": [
204
+ "input_len = inputs[\"input_ids\"].shape[-1]"
205
+ ]
206
+ },
207
+ {
208
+ "cell_type": "code",
209
+ "execution_count": null,
210
+ "metadata": {},
211
+ "outputs": [],
212
+ "source": [
213
+ "with torch.inference_mode():\n",
214
+ " generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)\n",
215
+ " generation = generation[0][input_len:]\n"
216
+ ]
217
+ },
218
+ {
219
+ "cell_type": "code",
220
+ "execution_count": null,
221
+ "metadata": {},
222
+ "outputs": [],
223
+ "source": [
224
+ "\n",
225
+ "with torch.inference_mode():\n",
226
+ " generation = model.generate(**inputs, max_new_tokens=100, do_sample=False)\n",
227
+ " generation = generation[0][input_len:]\n",
228
+ "\n",
229
+ "decoded = processor.decode(generation, skip_special_tokens=True)\n",
230
+ "print(decoded)\n",
231
+ "\n",
232
+ "# **Overall Impression:** The image is a close-up shot of a vibrant garden scene, \n",
233
+ "# focusing on a cluster of pink cosmos flowers and a busy bumblebee. \n",
234
+ "# It has a slightly soft, natural feel, likely captured in daylight.\n"
235
+ ]
236
+ },
237
+ {
238
+ "cell_type": "code",
239
+ "execution_count": null,
240
+ "metadata": {},
241
+ "outputs": [],
242
+ "source": []
243
+ },
244
+ {
245
+ "cell_type": "code",
246
+ "execution_count": null,
247
+ "metadata": {},
248
+ "outputs": [],
249
+ "source": [
250
+ "access_token='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'"
251
+ ]
252
+ },
253
+ {
254
+ "cell_type": "code",
255
+ "execution_count": null,
256
+ "metadata": {},
257
+ "outputs": [],
258
+ "source": []
259
+ },
260
+ {
261
+ "cell_type": "code",
262
+ "execution_count": null,
263
+ "metadata": {},
264
+ "outputs": [],
265
+ "source": []
266
+ },
267
+ {
268
+ "cell_type": "code",
269
+ "execution_count": null,
270
+ "metadata": {},
271
+ "outputs": [],
272
+ "source": []
273
+ },
274
+ {
275
+ "cell_type": "code",
276
+ "execution_count": null,
277
+ "metadata": {},
278
+ "outputs": [],
279
+ "source": []
280
+ },
281
+ {
282
+ "cell_type": "code",
283
+ "execution_count": null,
284
+ "metadata": {},
285
+ "outputs": [],
286
+ "source": []
287
+ },
288
+ {
289
+ "cell_type": "code",
290
+ "execution_count": null,
291
+ "metadata": {},
292
+ "outputs": [],
293
+ "source": []
294
+ },
295
+ {
296
+ "cell_type": "code",
297
+ "execution_count": null,
298
+ "metadata": {},
299
+ "outputs": [],
300
+ "source": []
301
+ },
302
+ {
303
+ "cell_type": "code",
304
+ "execution_count": null,
305
+ "metadata": {},
306
+ "outputs": [],
307
+ "source": []
308
+ },
309
+ {
310
+ "cell_type": "code",
311
+ "execution_count": null,
312
+ "metadata": {},
313
+ "outputs": [],
314
+ "source": [
315
+ "# Use a pipeline as a high-level helper\n",
316
+ "from transformers import pipeline\n",
317
+ "\n",
318
+ "messages = [\n",
319
+ " {\"role\": \"user\", \"content\": \"Who are you?\"},\n",
320
+ "]\n",
321
+ "pipe = pipeline(\"image-text-to-text\", model=\"google/gemma-3-4b-it\")\n",
322
+ "pipe(messages)"
323
+ ]
324
+ },
325
+ {
326
+ "cell_type": "code",
327
+ "execution_count": null,
328
+ "metadata": {},
329
+ "outputs": [],
330
+ "source": []
331
+ },
332
+ {
333
+ "cell_type": "code",
334
+ "execution_count": null,
335
+ "metadata": {},
336
+ "outputs": [],
337
+ "source": []
338
+ },
339
+ {
340
+ "cell_type": "code",
341
+ "execution_count": null,
342
+ "metadata": {},
343
+ "outputs": [],
344
+ "source": []
345
+ },
346
+ {
347
+ "cell_type": "code",
348
+ "execution_count": null,
349
+ "metadata": {},
350
+ "outputs": [],
351
+ "source": []
352
+ },
353
+ {
354
+ "cell_type": "code",
355
+ "execution_count": null,
356
+ "metadata": {},
357
+ "outputs": [],
358
+ "source": []
359
+ },
360
+ {
361
+ "cell_type": "code",
362
+ "execution_count": null,
363
+ "metadata": {},
364
+ "outputs": [],
365
+ "source": []
366
+ },
367
+ {
368
+ "cell_type": "code",
369
+ "execution_count": null,
370
+ "metadata": {},
371
+ "outputs": [],
372
+ "source": []
373
+ }
374
+ ],
375
+ "metadata": {
376
+ "kernelspec": {
377
+ "display_name": "hf_env_kernel",
378
+ "language": "python",
379
+ "name": "hf_env"
380
+ },
381
+ "language_info": {
382
+ "codemirror_mode": {
383
+ "name": "ipython",
384
+ "version": 3
385
+ },
386
+ "file_extension": ".py",
387
+ "mimetype": "text/x-python",
388
+ "name": "python",
389
+ "nbconvert_exporter": "python",
390
+ "pygments_lexer": "ipython3",
391
+ "version": "3.12.6"
392
+ }
393
+ },
394
+ "nbformat": 4,
395
+ "nbformat_minor": 4
396
+ }
app.py CHANGED
@@ -1,7 +1,141 @@
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
  def greet(name):
4
- return "Hello " + name + "!!"
5
 
6
  demo = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- demo.launch()
 
1
+ from transformers import AutoProcessor, Gemma3ForConditionalGeneration
2
  import gradio as gr
3
+ # from PIL import Image
4
+ # import requests
5
+ # import torch
6
+ # import os
7
+ # from transformers import Gemma3ForConditionalGeneration, AutoProcessor
8
+ # print("hey")
9
+ # # Set the cache directory
10
+ # cache_dir = "F:\\huggingface_cache"
11
 
12
+ # # Set environment variables for good measure
13
+ # # os.environ["TRANSFORMERS_CACHE"] = cache_dir
14
+ # # os.environ["HF_HOME"] = cache_dir
15
+ # # os.environ["HUGGINGFACE_HUB_CACHE"] = cache_dir
16
+
17
+ # # Model ID
18
+ # model_id = "gemma3:latest"
19
+
20
+ # from ollama import chat
21
+ # from ollama import ChatResponse
22
+
23
+ # def _get_response(message):
24
+ # messages = [
25
+ # {
26
+ # 'role': 'user',
27
+ # 'content': message,
28
+ # },
29
+ # ]
30
+ # response: ChatResponse = chat(model=model_id, messages=messages)
31
+ # return response.message.content
32
+
33
+
34
+
35
+ # import requests
36
+ # import base64
37
+ # # Function to encode image to Base64
38
+ # def encode_image_to_base64(image_path):
39
+ # with open(image_path, "rb") as image_file:
40
+ # return base64.b64encode(image_file.read()).decode("utf-8")
41
+
42
+ # def image_process():
43
+ # image_path = r"F:\HF\gemma-examples\WhatsApp Image 2025-03-21 at 10.05.06 PM.jpeg" # Replace with your image path
44
+
45
+ # # Encode the image
46
+ # image_base64 = encode_image_to_base64(image_path)
47
+
48
+ # # Ollama API endpoint
49
+ # OLLAMA_URL = "http://localhost:11434/api/generate"
50
+
51
+ # # Payload for the API request
52
+ # payload = {
53
+ # "model": model_id, # Specify the model version
54
+ # "prompt": "Given image is a handwritten text in english language, read it carefully and extract all the text mentioned in it.",
55
+ # "images": [image_base64], # List of Base64-encoded images
56
+ # "stream": False
57
+ # }
58
+
59
+ # # Headers for the request
60
+ # headers = {
61
+ # "Content-Type": "application/json"
62
+ # }
63
+
64
+ # # Send the POST request
65
+ # response = requests.post(OLLAMA_URL, json=payload, headers=headers)
66
+
67
+ # # Check the response
68
+ # if response.status_code == 200:
69
+ # data = response.json()
70
+ # print("Response from Gemma 3:")
71
+ # print(data.get("response", "No response field in the API response."))
72
+ # else:
73
+ # print(f"Error: {response.status_code}")
74
+ # print(response.text)
75
+ # return response.text
76
+
77
+ # # Path to your image
78
+
79
+
80
+
81
+ # def _hit_endpoint(name):
82
+ # import requests
83
+ # import json
84
+
85
+ # # Define the URL of the Ollama server
86
+ # OLLAMA_URL = "http://localhost:11434/api/generate"
87
+
88
+ # # Define the request payload
89
+ # payload = {
90
+ # "model": model_id, # Change this to your desired model
91
+ # "prompt": name,
92
+ # "stream": False
93
+ # }
94
+
95
+ # # Make the request
96
+ # response = requests.post(OLLAMA_URL, json=payload)
97
+
98
+ # # Parse and print the response
99
+ # if response.status_code == 200:
100
+ # data = response.json()
101
+ # print(data["response"]) # Extracting the generated text
102
+ # return data["response"]
103
+ # else:
104
+ # print(f"Error: {response.status_code} - {response.text}")
105
+ # return "An error occurred!"
106
+ import os
107
+ import torch
108
+ from transformers import AutoProcessor, Gemma3ForConditionalGeneration, TextIteratorStreamer
109
+ model_id = os.getenv("MODEL_ID", "google/gemma-3-12b-it")
110
+ processor = AutoProcessor.from_pretrained(model_id, padding_side="left")
111
+ model = Gemma3ForConditionalGeneration.from_pretrained(
112
+ model_id, device_map="auto", torch_dtype=torch.bfloat16, attn_implementation="eager"
113
+ )
114
+
115
+ def run_fn(message):
116
+ messages = []
117
+
118
+ messages.append({"role": "user", "content": (message)})
119
+
120
+ inputs = processor.apply_chat_template(
121
+ messages,
122
+ add_generation_prompt=True,
123
+ tokenize=True,
124
+ return_dict=True,
125
+ return_tensors="pt",
126
+ ).to(device=model.device, dtype=torch.bfloat16)
127
+ streamer = TextIteratorStreamer(processor, timeout=30.0, skip_prompt=True, skip_special_tokens=True)
128
+ max_new_tokens = 100
129
+ generate_kwargs = dict(
130
+ inputs,
131
+ streamer=streamer,
132
+ max_new_tokens=max_new_tokens,
133
+ )
134
+ outputs = model.generate(**generate_kwargs)
135
+ return outputs
136
+ # return None
137
  def greet(name):
138
+ return run_fn()
139
 
140
  demo = gr.Interface(fn=greet, inputs="text", outputs="text")
141
+ demo.launch()