Spaces:

sandz7
/

loki

Runtime error

sandz7 commited on May 26, 2024

Commit

3f52b9b

1 Parent(s): f2448b3

added context manager decor on llama_generation

Files changed (1) hide show

app.py CHANGED Viewed

@@ -4,6 +4,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStream
 from huggingface_hub import login
 import os
 import threading
 import spaces
 from openai import OpenAI
 # import multiprocessing as mp
@@ -78,11 +79,9 @@ def gpt_generation(input: str,
     return stream
-# Global lock variable
-lock = threading.Lock()
 # Place just input pass and return generation output
 @spaces.GPU(duration=120)
 def llama_generation(input_text: str,
                      history: list,
                      temperature: float,
@@ -115,16 +114,15 @@ def llama_generation(input_text: str,
         generate_kwargs["do_sample"] = False
     # Use a lock object to synchronize access to the llama_model
-    global lock
-    # # Place the generation in a thread so we can access it.
-    # # place the function as target and place the kwargs next as the kwargs
-    def generation_llama(lock=lock):
         with lock:
-            # Generate response using Llama3
             response = llama_model.generate(**generate_kwargs)
             return response
     # start the thread and wait for it to finish
     thread = threading.Thread(target=generation_llama)
     thread.start()

 from huggingface_hub import login
 import os
 import threading
+import contextlib
 import spaces
 from openai import OpenAI
 # import multiprocessing as mp
     return stream
 # Place just input pass and return generation output
 @spaces.GPU(duration=120)
+@contextlib.contextmanager
 def llama_generation(input_text: str,
                      history: list,
                      temperature: float,
         generate_kwargs["do_sample"] = False
     # Use a lock object to synchronize access to the llama_model
+    lock = threading.Lock()
+    def generate_llama():
         with lock:
+            # Generate the response using the llama_model
             response = llama_model.generate(**generate_kwargs)
             return response
     # start the thread and wait for it to finish
     thread = threading.Thread(target=generation_llama)
     thread.start()