Spaces:

sandz7
/

loki

Runtime error

App Files Files Community

sandz7 commited on May 24, 2024

Commit

67cc1ee

1 Parent(s): 7b1925c

added more custom commands in bot comms

Browse files

Files changed (1) hide show

app.py +92 -37

app.py CHANGED Viewed

@@ -12,6 +12,35 @@ space_id = "sandz7"
 # Authenticate with hf api
 api = HfApi()
 TOKEN = os.getenv('HF_AUTH_TOKEN')
 login(token=TOKEN,
       add_to_git_credential=False)
@@ -34,31 +63,6 @@ terminators = [
     llama_tokenizer.convert_tokens_to_ids("<|eot_id|>")
 ]
-# switch hardware function
-def space_hardware_config(instance_size: str,
-                          instance_type: str,
-                          vcpus: int,
-                          memory: int):
-    """
-    This will manually select what hardware we'll use in the space.
-    """
-    # Get Space
-    space = api.get_repo(space_id)
-    # Hardware Configuration
-    space.config["compute"] = {
-        "instance_type": instance_type,
-        "instance_size": instance_size,
-        "disk_size": 50,
-        "vcpus": vcpus, # number of virtual CPU's
-        "memory": memory # amount of memory in gb
-    }
-    # Save updated space config
-    api.push_to_hub(space)
-    return "Hardware configuration successfull. Check the cuda command."
 # The output
 def output_list(output: list):
@@ -79,19 +83,24 @@ def gpt_generation(input: str,
     Passes the llama output and all input,
     returns the stream, so we can yield it in final generation.
     """
-    base_prompt = '''Here is the users question:\n\n {llama_input}\n\n
-    Llama3 LLM gave the user this response:\n\n {llama_output}\n
-    Answer the users question with the help of Llama3, if Llama3 response wasn't accurate,
-    than ignore it's output and give your's alone.'''
-    prompt = base_prompt.format(llama_input=input, llama_output=llama_output)
     # Setup the client
     client = OpenAI(api_key=API_KEY)
     stream = client.chat.completions.create(
-        model="gpt-4o",
         messages=[{"role": "system", "content": "You are a helpful assistant called 'Loki'."},
                   {"role": "user", "content": prompt}],
         stream=True,
@@ -200,6 +209,7 @@ def check_cuda():
         return "No GPU is being used right now."
 first_time = True
 def bot_comms(input_text: str,
               history: list,
@@ -209,11 +219,30 @@ def bot_comms(input_text: str,
     The connection between gradio and the LLM's
     """
     global first_time
     if input_text == "check cuda":
         return check_cuda()
     if input_text == "switch to llama":
         streamer = loki_generation(input_text=input_text,
                                    history=history,
                                    temperature=temperature,
@@ -223,12 +252,12 @@ def bot_comms(input_text: str,
         for text in streamer:
             outputs.append(text)
             yield "".join(outputs)
-    if input_text == "switch to gpt-4o":
-        space_hardware_config(instance_size="gpu",
-                              instance_type="1xL4",
-                              vcpus=8,
-                              memory=30)
         stream = gpt_generation(input=input_text,
                                 llama_output="",
                                 mode="gpt-4o")
@@ -240,6 +269,32 @@ def bot_comms(input_text: str,
                 outputs.append(text)
                 yield "".join(outputs)
 chatbot=gr.Chatbot(height=600, label="Loki AI")

 # Authenticate with hf api
 api = HfApi()
+# switch hardware function
+def space_hardware_config(instance_size: str="gpu",
+                          instance_type: str="1xL4",
+                          vcpus: int=8,
+                          memory: int=30):
+    """
+    This will manually select what hardware we'll use in the space.
+    """
+    # Get Space
+    space = api.get_repo(space_id)
+    # Hardware Configuration
+    space.config["compute"] = {
+        "instance_type": instance_type,
+        "instance_size": instance_size,
+        "disk_size": 50,
+        "vcpus": vcpus, # number of virtual CPU's
+        "memory": memory # amount of memory in gb
+    }
+    # Save updated space config
+    api.push_to_hub(space)
+    return "Hardware configuration successfull. Check the cuda command."
+# Automatically place to the standard config we need for loki
+space_hardware_config()
 TOKEN = os.getenv('HF_AUTH_TOKEN')
 login(token=TOKEN,
       add_to_git_credential=False)
     llama_tokenizer.convert_tokens_to_ids("<|eot_id|>")
 ]
 # The output
 def output_list(output: list):
     Passes the llama output and all input,
     returns the stream, so we can yield it in final generation.
     """
+    if llama_output is not None:
+        base_prompt = '''Here is the users question:\n\n {llama_input}\n\n
+        Llama3 LLM gave the user this response:\n\n {llama_output}\n
+        Answer the users question with the help of Llama3, if Llama3 response wasn't accurate,
+        than ignore it's output and give your's alone.'''
+        prompt = base_prompt.format(llama_input=input, llama_output=llama_output)
+    else:
+        base_prompt = '''Here is the users question:\n\n {llama_input}\n\n
+        Respond in a thorough and complete way.'''
+        prompt = base_prompt.format(llama_input=input)
     # Setup the client
     client = OpenAI(api_key=API_KEY)
     stream = client.chat.completions.create(
+        model=mode,
         messages=[{"role": "system", "content": "You are a helpful assistant called 'Loki'."},
                   {"role": "user", "content": prompt}],
         stream=True,
         return "No GPU is being used right now."
 first_time = True
+llm_mode = ""
 def bot_comms(input_text: str,
               history: list,
     The connection between gradio and the LLM's
     """
     global first_time
+    global llm_mode
+    if input_text == "mode":
+        if llm_mode == "":
+            return "The mode is currently at Loki Default mode"
+        else:
+            return f"The current mode: {llm_mode}"
     if input_text == "check cuda":
         return check_cuda()
     if input_text == "switch to llama":
+        llm_mode = input_text
+        return "Got it! Llama is now activate for your questions only 🦙"
+    if input_text == "switch to gpt-4o":
+        llm_mode = input_text
+        return "Understood! GPT-4o is now hearing your responses only 👾"
+    if input_text == "switch to gpt-3.5-turbo":
+        llm_mode = input_text
+        return "Done. GPT-3.5-turbo is ready for your questions! 🏃"
+    if llm_mode == "switch to llama":
         streamer = loki_generation(input_text=input_text,
                                    history=history,
                                    temperature=temperature,
         for text in streamer:
             outputs.append(text)
             yield "".join(outputs)
+    if llm_mode == "switch to gpt-4o":
+        space_hardware_config(instance_size="cpu",
+                              instance_type="basic",
+                              vcpus=2,
+                              memory=16)
         stream = gpt_generation(input=input_text,
                                 llama_output="",
                                 mode="gpt-4o")
                 outputs.append(text)
                 yield "".join(outputs)
+    if llm_mode == "switch to gpt-3.5-turbo":
+        space_hardware_config(instance_size="cpu",
+                              instance_type="basic",
+                              vcpus=2,
+                              memory=16)
+        stream = gpt_generation(input=input_text,
+                                llama_output="",
+                                mode="gpt-3.5-turbo")
+        outputs = []
+        print("gpt-3.5-turbo is about to answer.")
+        for chunk in stream:
+            if chunk.choices[0].delta.content is not None:
+                text = chunk.choices[0].delta.content
+                outputs.append(text)
+                yield "".join(outputs)
+    if llm_mode is None:
+        stream = loki_generation(input_text=input_text,
+                                 history=history,
+                                 temperature=temperature,
+                                 max_new_tokens=max_new_tokens)
+        outputs = []
+        print("Loki is activate to answer")
+        for text in stream:
+            outputs.append(text)
+            yield "".join(outputs)
 chatbot=gr.Chatbot(height=600, label="Loki AI")