suayptalha commited on
Commit
a5ec87b
·
verified ·
1 Parent(s): 64322ab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -35
app.py CHANGED
@@ -32,21 +32,6 @@ def respond(
32
  temperature: float,
33
  top_p: float,
34
  ):
35
- """
36
- Generate a chat response using streaming with TextIteratorStreamer.
37
-
38
- Args:
39
- message: User's current message.
40
- history: List of (user, assistant) tuples from previous turns.
41
- system_message: Initial system prompt guiding the assistant.
42
- max_tokens: Maximum number of tokens to generate.
43
- temperature: Sampling temperature.
44
- top_p: Nucleus sampling probability.
45
-
46
- Yields:
47
- The growing response text as new tokens are generated.
48
- """
49
- # Assemble messages
50
  messages = [{"role": "system", "content": system_message}]
51
  for user_msg, bot_msg in history:
52
  if user_msg:
@@ -55,33 +40,18 @@ def respond(
55
  messages.append({"role": "assistant", "content": bot_msg})
56
  messages.append({"role": "user", "content": message})
57
 
58
- # Prepare prompt and tokenize
59
- prompt = tokenizer.apply_chat_template(
60
- messages, tokenize=False, add_generation_prompt=True
61
- )
62
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
63
 
64
- # Set up streamer for real-time output
65
- streamer = TextIteratorStreamer(
66
- tokenizer, skip_prompt=True, skip_special_tokens=True
67
- )
68
- generate_kwargs = dict(
69
  **inputs,
70
- streamer=streamer,
71
  max_new_tokens=max_tokens,
72
  temperature=temperature,
73
  top_p=top_p,
74
- do_sample=True,
75
  )
76
- # Start generation in a separate thread
77
- thread = threading.Thread(target=model.generate, kwargs=generate_kwargs)
78
- thread.start()
79
-
80
- # Stream tokens back to user
81
- response = ""
82
- for new_text in streamer:
83
- response += new_text
84
- yield response
85
 
86
  # Initialize Gradio chat interface
87
 
 
32
  temperature: float,
33
  top_p: float,
34
  ):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  messages = [{"role": "system", "content": system_message}]
36
  for user_msg, bot_msg in history:
37
  if user_msg:
 
40
  messages.append({"role": "assistant", "content": bot_msg})
41
  messages.append({"role": "user", "content": message})
42
 
43
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
 
 
 
44
  inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
45
 
46
+ outputs = model.generate(
 
 
 
 
47
  **inputs,
 
48
  max_new_tokens=max_tokens,
49
  temperature=temperature,
50
  top_p=top_p,
51
+ do_sample=True
52
  )
53
+ response = tokenizer.decode(outputs[0], skip_special_tokens=True)
54
+ yield response
 
 
 
 
 
 
 
55
 
56
  # Initialize Gradio chat interface
57