sandz7 commited on
Commit
0679bd1
Β·
1 Parent(s): 705763e

locked the thread before generation on llama

Browse files
Files changed (1) hide show
  1. app.py +12 -7
app.py CHANGED
@@ -3,8 +3,7 @@ import gradio as gr
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
4
  from huggingface_hub import login
5
  import os
6
- from threading import Thread
7
- from openai import OpenAI
8
  import spaces
9
  # import multiprocessing as mp
10
  import sys
@@ -111,13 +110,19 @@ def llama_generation(input_text: str,
111
  if temperature == 0:
112
  generate_kwargs["do_sample"] = False
113
 
 
 
 
114
  # # Place the generation in a thread so we can access it.
115
  # # place the function as target and place the kwargs next as the kwargs
116
- # thread = Thread(target=llama_model.generate, kwargs=generate_kwargs)
117
- # thread.start()
118
-
119
- # Multiprocessing to avoid pickle errors
120
- thread = Thread(target=llama_model.generate, kwargs=generate_kwargs)
 
 
 
121
  thread.start()
122
  thread.join()
123
  return streamer
 
3
  from transformers import AutoTokenizer, AutoModelForCausalLM, TextIteratorStreamer
4
  from huggingface_hub import login
5
  import os
6
+ import threading
 
7
  import spaces
8
  # import multiprocessing as mp
9
  import sys
 
110
  if temperature == 0:
111
  generate_kwargs["do_sample"] = False
112
 
113
+ # Use a lock object to synchronize access to the llama_model
114
+ lock = threading.lock()
115
+
116
  # # Place the generation in a thread so we can access it.
117
  # # place the function as target and place the kwargs next as the kwargs
118
+ def generation_llama():
119
+ with lock:
120
+ # Generate response using Llama3
121
+ response = llama_model.generate(**generate_kwargs)
122
+ return response
123
+
124
+ # start the thread and wait for it to finish
125
+ thread = threading.Thread(target=generation_llama)
126
  thread.start()
127
  thread.join()
128
  return streamer