Ruurd commited on
Commit
4b15ccd
·
1 Parent(s): 166106f

Generate with enter

Browse files

Initialize with small Llama model

Files changed (1) hide show
  1. app.py +30 -9
app.py CHANGED
@@ -2,7 +2,8 @@ import os
2
  import torch
3
  import gradio as gr
4
  import spaces
5
- from transformers import AutoTokenizer, AutoModelForCausalLM
 
6
 
7
  # Use a global variable to hold the current model and tokenizer
8
  current_model = None
@@ -30,16 +31,33 @@ def load_model_on_selection(model_name, progress=gr.Progress(track_tqdm=False)):
30
  def generate_text(prompt):
31
  global current_model, current_tokenizer
32
  if current_model is None or current_tokenizer is None:
33
- return "⚠️ No model loaded yet. Please select a model first."
34
- current_model.to('cuda')
 
35
  inputs = current_tokenizer(prompt, return_tensors="pt").to(current_model.device)
36
- outputs = current_model.generate(**inputs, max_new_tokens=256)
37
- return current_tokenizer.decode(outputs[0], skip_special_tokens=True)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
 
39
  # Model options
40
  model_choices = [
41
- "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
42
  "meta-llama/Llama-3.2-3B-Instruct",
 
43
  "google/gemma-7b"
44
  ]
45
 
@@ -51,14 +69,17 @@ with gr.Blocks() as demo:
51
  model_status = gr.Textbox(label="Model Status", interactive=False)
52
 
53
  input_text = gr.Textbox(label="Input Clinical Text")
54
- output_text = gr.Textbox(label="Generated Output")
55
-
56
  generate_btn = gr.Button("Generate")
57
 
 
 
58
  # Load model on dropdown change
59
  model_selector.change(fn=load_model_on_selection, inputs=model_selector, outputs=model_status)
60
 
61
  # Generate with current model
62
- generate_btn.click(fn=generate_text, inputs=input_text, outputs=output_text)
 
 
63
 
 
64
  demo.launch()
 
2
  import torch
3
  import gradio as gr
4
  import spaces
5
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
6
+
7
 
8
  # Use a global variable to hold the current model and tokenizer
9
  current_model = None
 
31
  def generate_text(prompt):
32
  global current_model, current_tokenizer
33
  if current_model is None or current_tokenizer is None:
34
+ yield "⚠️ No model loaded yet. Please select a model first."
35
+
36
+ current_model.to("cuda")
37
  inputs = current_tokenizer(prompt, return_tensors="pt").to(current_model.device)
38
+
39
+ output_ids = []
40
+ streamer_output = ""
41
+
42
+ def token_streamer():
43
+ for token_id in current_model.generate(
44
+ **inputs,
45
+ max_new_tokens=256,
46
+ do_sample=False,
47
+ return_dict_in_generate=True,
48
+ output_scores=False
49
+ ).sequences[0]:
50
+ output_ids.append(token_id.item())
51
+ yield current_tokenizer.decode(output_ids, skip_special_tokens=True)
52
+
53
+ for partial_output in token_streamer():
54
+ yield partial_output
55
+
56
 
57
  # Model options
58
  model_choices = [
 
59
  "meta-llama/Llama-3.2-3B-Instruct",
60
+ "deepseek-ai/DeepSeek-R1-Distill-Llama-8B",
61
  "google/gemma-7b"
62
  ]
63
 
 
69
  model_status = gr.Textbox(label="Model Status", interactive=False)
70
 
71
  input_text = gr.Textbox(label="Input Clinical Text")
 
 
72
  generate_btn = gr.Button("Generate")
73
 
74
+ output_text = gr.Textbox(label="Generated Output")
75
+
76
  # Load model on dropdown change
77
  model_selector.change(fn=load_model_on_selection, inputs=model_selector, outputs=model_status)
78
 
79
  # Generate with current model
80
+ generate_btn.click(fn=generate_text, inputs=input_text, outputs=output_text, stream=True)
81
+ input_text.submit(fn=generate_text, inputs=input_text, outputs=output_text, stream=True)
82
+
83
 
84
+ load_model_on_selection("meta-llama/Llama-3.2-3B-Instruct")
85
  demo.launch()