IST199655 commited on
Commit
dfc584d
·
1 Parent(s): cb2fe42

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +129 -61
app.py CHANGED
@@ -6,86 +6,154 @@ import os
6
  Copied from inference in colab notebook
7
  """
8
 
9
- from transformers import AutoTokenizer , AutoModelForSeq2SeqLM , TextIteratorStreamer
10
- from threading import Thread
11
 
12
  # Load model and tokenizer globally to avoid reloading for every request
13
- base_model = "google-t5/t5-small"
14
  model_path = "Mat17892/t5small_enfr_opus"
15
 
16
- # Load tokenizer
17
- tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=True, legacy=False)
18
 
19
- # Load the base model (e.g., LLaMA)
20
- base_model = AutoModelForSeq2SeqLM.from_pretrained(base_model, token = os.getenv('huggingface_token'))
 
 
 
 
 
 
 
21
 
22
- # Load LoRA adapter
23
- from peft import PeftModel
24
- model = PeftModel.from_pretrained(base_model, model_path, token = os.getenv('huggingface_token'))
 
 
 
 
 
25
 
26
  def respond(
27
  message: str,
28
  history: list[tuple[str, str]],
29
  system_message: str,
30
- max_tokens: int,
31
- temperature: float,
32
- top_p: float,
33
  ):
34
- # Combine system message and history into a single prompt
35
- messages = [{"role": "system", "content": system_message}]
36
- for val in history:
37
- if val[0]:
38
- messages.append({"role": "user", "content": val[0]})
39
- if val[1]:
40
- messages.append({"role": "assistant", "content": val[1]})
41
- messages.append({"role": "user", "content": message})
42
-
43
- # Tokenize the messages
44
- inputs = tokenizer.apply_chat_template(
45
- messages,
46
- tokenize = True,
47
- add_generation_prompt = True, # Must add for generation
48
- return_tensors = "pt",
 
 
 
49
  )
50
- # Generate tokens incrementally
51
- streamer = TextIteratorStreamer(tokenizer, skip_prompt=True, skip_special_tokens=True)
52
- generation_kwargs = {
53
- "input_ids": inputs,
54
- "max_new_tokens": max_tokens,
55
- "temperature": temperature,
56
- "top_p": top_p,
57
- "do_sample": True,
58
- "streamer": streamer,
59
- }
60
- thread = Thread(target=model.generate, kwargs=generation_kwargs)
61
- thread.start()
62
-
63
- # Yield responses as they are generated
64
- response = ""
65
- for token in streamer:
66
- response += token
67
- yield response
68
 
69
 
70
  """
71
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
72
  """
73
- demo = gr.ChatInterface(
74
- respond,
75
- additional_inputs=[
76
- gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
77
- gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
78
- gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
79
- gr.Slider(
80
- minimum=0.1,
81
- maximum=1.0,
82
- value=0.95,
83
- step=0.05,
84
- label="Top-p (nucleus sampling)",
85
- ),
86
- ],
87
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
 
90
  if __name__ == "__main__":
91
  demo.launch()
 
6
  Copied from inference in colab notebook
7
  """
8
 
9
+ from transformers import pipeline
 
10
 
11
  # Load model and tokenizer globally to avoid reloading for every request
 
12
  model_path = "Mat17892/t5small_enfr_opus"
13
 
14
+ # translator = pipeline("translation_xx_to_yy", model=model_path)
 
15
 
16
+ # def respond(
17
+ # message: str,
18
+ # history: list[tuple[str, str]],
19
+ # system_message: str,
20
+ # max_tokens: int,
21
+ # temperature: float,
22
+ # top_p: float,
23
+ # ):
24
+ # message = "translate English to French:" + message
25
 
26
+ # response = translator(message)[0]
27
+ # yield response['translation_text']
28
+
29
+ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, TextIteratorStreamer
30
+ import threading
31
+
32
+ tokenizer = AutoTokenizer.from_pretrained(model_path)
33
+ model = AutoModelForSeq2SeqLM.from_pretrained(model_path)
34
 
35
  def respond(
36
  message: str,
37
  history: list[tuple[str, str]],
38
  system_message: str,
39
+ max_tokens: int = 128,
40
+ temperature: float = 1.0,
41
+ top_p: float = 1.0,
42
  ):
43
+ # Preprocess the input message
44
+ input_text = system_message + " " + message
45
+ input_ids = tokenizer(input_text, return_tensors="pt").input_ids
46
+
47
+ # Set up the streamer
48
+ streamer = TextIteratorStreamer(tokenizer, skip_special_tokens=True)
49
+
50
+ # Generate in a separate thread to avoid blocking
51
+ generation_thread = threading.Thread(
52
+ target=model.generate,
53
+ kwargs={
54
+ "input_ids": input_ids,
55
+ "max_new_tokens": max_tokens,
56
+ "do_sample": True,
57
+ "temperature": temperature,
58
+ "top_p": top_p,
59
+ "streamer": streamer,
60
+ },
61
  )
62
+ generation_thread.start()
63
+
64
+ # Stream the output progressively
65
+ for token in streamer: # Append each token to the accumulated text
66
+ yield token
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
 
69
  """
70
  For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
71
  """
72
+ # demo = gr.ChatInterface(
73
+ # respond,
74
+ # additional_inputs=[
75
+ # gr.Textbox(value="You are a friendly Chatbot.", label="System message"),
76
+ # gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
77
+ # gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
78
+ # gr.Slider(
79
+ # minimum=0.1,
80
+ # maximum=1.0,
81
+ # value=0.95,
82
+ # step=0.05,
83
+ # label="Top-p (nucleus sampling)",
84
+ # ),
85
+ # ],
86
+ # )
87
+
88
+ # Function to process translation
89
+ def respond_google_translate(
90
+ source_text,
91
+ system_message,
92
+ max_tokens,
93
+ temperature,
94
+ top_p
95
+ ):
96
+ # Call the respond function and collect the final response
97
+ result = ""
98
+ for token in respond(
99
+ message=source_text,
100
+ history=[],
101
+ system_message=system_message,
102
+ max_tokens=max_tokens,
103
+ temperature=temperature,
104
+ top_p=top_p,
105
+ ):
106
+ result += token # Accumulate the tokens
107
+ return result
108
+
109
+ # Define the interface
110
+ with gr.Blocks() as demo:
111
+ gr.Markdown("# Google Translate-like Interface")
112
+
113
+ with gr.Row():
114
+ with gr.Column():
115
+ source_textbox = gr.Textbox(
116
+ placeholder="Enter text in English...",
117
+ label="Source Text (English)",
118
+ lines=5,
119
+ )
120
+ with gr.Column():
121
+ translated_textbox = gr.Textbox(
122
+ placeholder="Translation will appear here...",
123
+ label="Translated Text (French)",
124
+ lines=5,
125
+ interactive=False,
126
+ )
127
 
128
+ translate_button = gr.Button("Translate")
129
+
130
+ with gr.Accordion("Advanced Settings", open=False):
131
+ system_message_input = gr.Textbox(
132
+ value="translate English to French:",
133
+ label="System message",
134
+ )
135
+ max_tokens_slider = gr.Slider(
136
+ minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"
137
+ )
138
+ temperature_slider = gr.Slider(
139
+ minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"
140
+ )
141
+ top_p_slider = gr.Slider(
142
+ minimum=0.1, maximum=1.0, value=0.95, step=0.05, label="Top-p (nucleus sampling)"
143
+ )
144
+
145
+ # Define functionality
146
+ translate_button.click(
147
+ respond_google_translate,
148
+ inputs=[
149
+ source_textbox,
150
+ system_message_input,
151
+ max_tokens_slider,
152
+ temperature_slider,
153
+ top_p_slider,
154
+ ],
155
+ outputs=translated_textbox,
156
+ )
157
 
158
  if __name__ == "__main__":
159
  demo.launch()