sandz7 commited on
Commit
14cd22f
Β·
1 Parent(s): 7061b48

just returned the streamers into the bot_coms and just yielded the text

Browse files
Files changed (1) hide show
  1. app.py +16 -96
app.py CHANGED
@@ -77,8 +77,7 @@ def gpt_generation(input: str,
77
  def llama_generation(input_text: str,
78
  history: list,
79
  temperature: float,
80
- max_new_tokens: int,
81
- mode: str):
82
  """
83
  Pass input texts, tokenize, output and back to text.
84
  """
@@ -111,56 +110,7 @@ def llama_generation(input_text: str,
111
  thread = Thread(target=llama_model.generate, kwargs=generate_kwargs)
112
  thread.start()
113
 
114
- # outputs = []
115
- # for text in streamer:
116
- # outputs.append(text)
117
- # yield "".join(outputs)
118
-
119
- text = [text for text in streamer]
120
- output_text = output_list(text)
121
- print("llama mode was on.")
122
- return output_text, streamer
123
-
124
- # conversation = []
125
- # for user, assistant in history:
126
- # conversation.extend([{"role": "user", "content": user}, {"role": "assistant", "content": assistant}])
127
- # conversation.append({"role": "user", "content": input_text})
128
-
129
- # input_ids = llama_tokenizer.apply_chat_template(conversation, return_tensors='pt').to(llama_model.device)
130
-
131
- # streamer = TextIteratorStreamer(llama_tokenizer, timeout=10.0, skip_prompt=True, skip_special_tokens=True)
132
-
133
- # # generation arguments to pass in llm generate() eventually
134
- # generate_kwargs = dict(
135
- # input_ids=input_ids,
136
- # streamer=streamer,
137
- # max_new_tokens=max_new_tokens,
138
- # do_sample=True,
139
- # temperature=temperature,
140
- # eos_token_id=terminators[0]
141
- # )
142
-
143
- # # This makes a greedy generation when temperature is passed to 0 (selects the next token sequence generated by model regardless). Selects each token with the highest probability
144
- # if temperature == 0:
145
- # generate_kwargs["do_sample"] = False
146
-
147
- # # Place the generation in a thread so we can access it.
148
- # # place the function as target and place the kwargs next as the kwargs
149
- # thread = Thread(target=llama_model.generate, kwargs=generate_kwargs)
150
- # thread.start()
151
-
152
- # llama_outputs = [text for text in streamer]
153
- # output_text = output_list(llama_outputs)
154
- # stream = gpt_generation(input=input_text, llama_output=output_text)
155
- # print("loki mode was on.")
156
- # return stream
157
- # outputs = []
158
- # for chunk in stream:
159
- # if chunk.choices[0].delta.content is not None:
160
- # text = chunk.choices[0].delta.content
161
- # outputs.append(text)
162
- # yield "".join(outputs)
163
-
164
 
165
  def check_cuda():
166
  if torch.cuda.is_available():
@@ -171,13 +121,6 @@ def check_cuda():
171
  first_time = True
172
  llm_mode = ""
173
 
174
- # # Async generator function
175
- # async def async_generator():
176
- # for i in range(5):
177
- # # Simulate an asynchronous operation
178
- # await asyncio.sleep(1)
179
- # yield i
180
-
181
  def bot_comms(input_text: str,
182
  history: list,
183
  temperature: float,
@@ -211,20 +154,12 @@ def bot_comms(input_text: str,
211
  yield "Done. GPT-3.5-turbo is ready for your questions! πŸƒ"
212
 
213
  if llm_mode == "switch to llama":
214
- output_text, streamer = llama_generation(input_text=input_text,
215
- history=history,
216
- temperature=temperature,
217
- max_new_tokens=max_new_tokens,
218
- mode="llama")
219
- outputs = []
220
  for text in streamer:
221
- outputs.append(text)
222
- yield "".join(outputs)
223
 
224
  if llm_mode == "switch to gpt-4o":
225
- stream = gpt_generation(input=input_text,
226
- llama_output="",
227
- mode="gpt-4o")
228
  outputs = []
229
  print("gpt-4o only about to answer.")
230
  for chunk in stream:
@@ -234,9 +169,7 @@ def bot_comms(input_text: str,
234
  yield "".join(outputs)
235
 
236
  if llm_mode == "switch to gpt-3.5-turbo":
237
- stream = gpt_generation(input=input_text,
238
- llama_output="",
239
- mode="gpt-3.5-turbo")
240
  outputs = []
241
  print("gpt-3.5-turbo is about to answer.")
242
  for chunk in stream:
@@ -245,31 +178,18 @@ def bot_comms(input_text: str,
245
  outputs.append(text)
246
  yield "".join(outputs)
247
 
248
- if llm_mode is None:
249
- output_text, streamer = llama_generation(input_text=input_text,
250
- history=history,
251
- temperature=temperature,
252
- max_new_tokens=max_new_tokens,
253
- mode="loki")
254
- stream = gpt_generation(input=input_text,
255
- llama_output=output_text,
256
- mode="gpt-4o")
257
 
258
  outputs = []
259
- print("Loki is activate to answer")
260
- for text in stream:
261
- outputs.append(text)
262
- yield "".join(outputs)
263
-
264
- # # Integration in your existing code
265
- # async def main():
266
- # async for value in async_generator():
267
- # print(value)
268
- # # Add your existing logic here, e.g., call bot_comms with appropriate arguments
269
- # await bot_comms(input_text="example", history=[], temperature=0.5, max_new_tokens=128)
270
-
271
- # Rune async function
272
- # asyncio.run(main())
273
 
274
  chatbot=gr.Chatbot(height=600, label="Loki AI")
275
 
 
77
  def llama_generation(input_text: str,
78
  history: list,
79
  temperature: float,
80
+ max_new_tokens: int):
 
81
  """
82
  Pass input texts, tokenize, output and back to text.
83
  """
 
110
  thread = Thread(target=llama_model.generate, kwargs=generate_kwargs)
111
  thread.start()
112
 
113
+ return streamer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
114
 
115
  def check_cuda():
116
  if torch.cuda.is_available():
 
121
  first_time = True
122
  llm_mode = ""
123
 
 
 
 
 
 
 
 
124
  def bot_comms(input_text: str,
125
  history: list,
126
  temperature: float,
 
154
  yield "Done. GPT-3.5-turbo is ready for your questions! πŸƒ"
155
 
156
  if llm_mode == "switch to llama":
157
+ streamer = llama_generation(input_text=input_text, history=history, temperature=temperature, max_new_tokens=max_new_tokens)
 
 
 
 
 
158
  for text in streamer:
159
+ yield text
 
160
 
161
  if llm_mode == "switch to gpt-4o":
162
+ stream = gpt_generation(input=input_text, llama_output="", mode="gpt-4o")
 
 
163
  outputs = []
164
  print("gpt-4o only about to answer.")
165
  for chunk in stream:
 
169
  yield "".join(outputs)
170
 
171
  if llm_mode == "switch to gpt-3.5-turbo":
172
+ stream = gpt_generation(input=input_text, llama_output="", mode="gpt-3.5-turbo")
 
 
173
  outputs = []
174
  print("gpt-3.5-turbo is about to answer.")
175
  for chunk in stream:
 
178
  outputs.append(text)
179
  yield "".join(outputs)
180
 
181
+ if llm_mode is None or llm_mode == "":
182
+ streamer = llama_generation(input_text=input_text, history=history, temperature=temperature, max_new_tokens=max_new_tokens)
183
+ output_text = output_list([text for text in streamer])
184
+ stream = gpt_generation(input=input_text, llama_output=output_text, mode="gpt-4o")
 
 
 
 
 
185
 
186
  outputs = []
187
+ print("Loki is activated to answer")
188
+ for chunk in stream:
189
+ if chunk.choices[0].delta.content is not None:
190
+ text = chunk.choices[0].delta.content
191
+ outputs.append(text)
192
+ yield "".join(outputs)
 
 
 
 
 
 
 
 
193
 
194
  chatbot=gr.Chatbot(height=600, label="Loki AI")
195