sandz7 commited on
Commit
67cc1ee
Β·
1 Parent(s): 7b1925c

added more custom commands in bot comms

Browse files
Files changed (1) hide show
  1. app.py +92 -37
app.py CHANGED
@@ -12,6 +12,35 @@ space_id = "sandz7"
12
  # Authenticate with hf api
13
  api = HfApi()
14
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
  TOKEN = os.getenv('HF_AUTH_TOKEN')
16
  login(token=TOKEN,
17
  add_to_git_credential=False)
@@ -34,31 +63,6 @@ terminators = [
34
  llama_tokenizer.convert_tokens_to_ids("<|eot_id|>")
35
  ]
36
 
37
- # switch hardware function
38
- def space_hardware_config(instance_size: str,
39
- instance_type: str,
40
- vcpus: int,
41
- memory: int):
42
- """
43
- This will manually select what hardware we'll use in the space.
44
- """
45
-
46
- # Get Space
47
- space = api.get_repo(space_id)
48
-
49
- # Hardware Configuration
50
- space.config["compute"] = {
51
- "instance_type": instance_type,
52
- "instance_size": instance_size,
53
- "disk_size": 50,
54
- "vcpus": vcpus, # number of virtual CPU's
55
- "memory": memory # amount of memory in gb
56
- }
57
-
58
- # Save updated space config
59
- api.push_to_hub(space)
60
-
61
- return "Hardware configuration successfull. Check the cuda command."
62
 
63
  # The output
64
  def output_list(output: list):
@@ -79,19 +83,24 @@ def gpt_generation(input: str,
79
  Passes the llama output and all input,
80
  returns the stream, so we can yield it in final generation.
81
  """
 
 
 
 
 
82
 
83
- base_prompt = '''Here is the users question:\n\n {llama_input}\n\n
84
- Llama3 LLM gave the user this response:\n\n {llama_output}\n
85
- Answer the users question with the help of Llama3, if Llama3 response wasn't accurate,
86
- than ignore it's output and give your's alone.'''
87
 
88
- prompt = base_prompt.format(llama_input=input, llama_output=llama_output)
89
 
90
  # Setup the client
91
  client = OpenAI(api_key=API_KEY)
92
 
93
  stream = client.chat.completions.create(
94
- model="gpt-4o",
95
  messages=[{"role": "system", "content": "You are a helpful assistant called 'Loki'."},
96
  {"role": "user", "content": prompt}],
97
  stream=True,
@@ -200,6 +209,7 @@ def check_cuda():
200
  return "No GPU is being used right now."
201
 
202
  first_time = True
 
203
 
204
  def bot_comms(input_text: str,
205
  history: list,
@@ -209,11 +219,30 @@ def bot_comms(input_text: str,
209
  The connection between gradio and the LLM's
210
  """
211
  global first_time
 
 
 
 
 
 
 
212
 
213
  if input_text == "check cuda":
214
  return check_cuda()
215
 
216
  if input_text == "switch to llama":
 
 
 
 
 
 
 
 
 
 
 
 
217
  streamer = loki_generation(input_text=input_text,
218
  history=history,
219
  temperature=temperature,
@@ -223,12 +252,12 @@ def bot_comms(input_text: str,
223
  for text in streamer:
224
  outputs.append(text)
225
  yield "".join(outputs)
226
-
227
- if input_text == "switch to gpt-4o":
228
- space_hardware_config(instance_size="gpu",
229
- instance_type="1xL4",
230
- vcpus=8,
231
- memory=30)
232
  stream = gpt_generation(input=input_text,
233
  llama_output="",
234
  mode="gpt-4o")
@@ -240,6 +269,32 @@ def bot_comms(input_text: str,
240
  outputs.append(text)
241
  yield "".join(outputs)
242
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
243
 
244
  chatbot=gr.Chatbot(height=600, label="Loki AI")
245
 
 
12
  # Authenticate with hf api
13
  api = HfApi()
14
 
15
+ # switch hardware function
16
+ def space_hardware_config(instance_size: str="gpu",
17
+ instance_type: str="1xL4",
18
+ vcpus: int=8,
19
+ memory: int=30):
20
+ """
21
+ This will manually select what hardware we'll use in the space.
22
+ """
23
+
24
+ # Get Space
25
+ space = api.get_repo(space_id)
26
+
27
+ # Hardware Configuration
28
+ space.config["compute"] = {
29
+ "instance_type": instance_type,
30
+ "instance_size": instance_size,
31
+ "disk_size": 50,
32
+ "vcpus": vcpus, # number of virtual CPU's
33
+ "memory": memory # amount of memory in gb
34
+ }
35
+
36
+ # Save updated space config
37
+ api.push_to_hub(space)
38
+
39
+ return "Hardware configuration successfull. Check the cuda command."
40
+
41
+ # Automatically place to the standard config we need for loki
42
+ space_hardware_config()
43
+
44
  TOKEN = os.getenv('HF_AUTH_TOKEN')
45
  login(token=TOKEN,
46
  add_to_git_credential=False)
 
63
  llama_tokenizer.convert_tokens_to_ids("<|eot_id|>")
64
  ]
65
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
66
 
67
  # The output
68
  def output_list(output: list):
 
83
  Passes the llama output and all input,
84
  returns the stream, so we can yield it in final generation.
85
  """
86
+ if llama_output is not None:
87
+ base_prompt = '''Here is the users question:\n\n {llama_input}\n\n
88
+ Llama3 LLM gave the user this response:\n\n {llama_output}\n
89
+ Answer the users question with the help of Llama3, if Llama3 response wasn't accurate,
90
+ than ignore it's output and give your's alone.'''
91
 
92
+ prompt = base_prompt.format(llama_input=input, llama_output=llama_output)
93
+ else:
94
+ base_prompt = '''Here is the users question:\n\n {llama_input}\n\n
95
+ Respond in a thorough and complete way.'''
96
 
97
+ prompt = base_prompt.format(llama_input=input)
98
 
99
  # Setup the client
100
  client = OpenAI(api_key=API_KEY)
101
 
102
  stream = client.chat.completions.create(
103
+ model=mode,
104
  messages=[{"role": "system", "content": "You are a helpful assistant called 'Loki'."},
105
  {"role": "user", "content": prompt}],
106
  stream=True,
 
209
  return "No GPU is being used right now."
210
 
211
  first_time = True
212
+ llm_mode = ""
213
 
214
  def bot_comms(input_text: str,
215
  history: list,
 
219
  The connection between gradio and the LLM's
220
  """
221
  global first_time
222
+ global llm_mode
223
+
224
+ if input_text == "mode":
225
+ if llm_mode == "":
226
+ return "The mode is currently at Loki Default mode"
227
+ else:
228
+ return f"The current mode: {llm_mode}"
229
 
230
  if input_text == "check cuda":
231
  return check_cuda()
232
 
233
  if input_text == "switch to llama":
234
+ llm_mode = input_text
235
+ return "Got it! Llama is now activate for your questions only πŸ¦™"
236
+
237
+ if input_text == "switch to gpt-4o":
238
+ llm_mode = input_text
239
+ return "Understood! GPT-4o is now hearing your responses only πŸ‘Ύ"
240
+
241
+ if input_text == "switch to gpt-3.5-turbo":
242
+ llm_mode = input_text
243
+ return "Done. GPT-3.5-turbo is ready for your questions! πŸƒ"
244
+
245
+ if llm_mode == "switch to llama":
246
  streamer = loki_generation(input_text=input_text,
247
  history=history,
248
  temperature=temperature,
 
252
  for text in streamer:
253
  outputs.append(text)
254
  yield "".join(outputs)
255
+
256
+ if llm_mode == "switch to gpt-4o":
257
+ space_hardware_config(instance_size="cpu",
258
+ instance_type="basic",
259
+ vcpus=2,
260
+ memory=16)
261
  stream = gpt_generation(input=input_text,
262
  llama_output="",
263
  mode="gpt-4o")
 
269
  outputs.append(text)
270
  yield "".join(outputs)
271
 
272
+ if llm_mode == "switch to gpt-3.5-turbo":
273
+ space_hardware_config(instance_size="cpu",
274
+ instance_type="basic",
275
+ vcpus=2,
276
+ memory=16)
277
+ stream = gpt_generation(input=input_text,
278
+ llama_output="",
279
+ mode="gpt-3.5-turbo")
280
+ outputs = []
281
+ print("gpt-3.5-turbo is about to answer.")
282
+ for chunk in stream:
283
+ if chunk.choices[0].delta.content is not None:
284
+ text = chunk.choices[0].delta.content
285
+ outputs.append(text)
286
+ yield "".join(outputs)
287
+
288
+ if llm_mode is None:
289
+ stream = loki_generation(input_text=input_text,
290
+ history=history,
291
+ temperature=temperature,
292
+ max_new_tokens=max_new_tokens)
293
+ outputs = []
294
+ print("Loki is activate to answer")
295
+ for text in stream:
296
+ outputs.append(text)
297
+ yield "".join(outputs)
298
 
299
  chatbot=gr.Chatbot(height=600, label="Loki AI")
300