prithivMLmods commited on
Commit
f4bb0af
·
verified ·
1 Parent(s): 952d9f0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -14
app.py CHANGED
@@ -60,10 +60,33 @@ def progress_bar_html(label: str) -> str:
60
  </style>
61
  '''
62
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  # ---------------------------
64
  # 1. Chat Interface Tab
65
  # ---------------------------
66
- # Uses a text-only model: FastThink-0.5B-Tiny
67
 
68
  model_id_text = "prithivMLmods/DeepHermes-3-Llama-3-3B-Preview-abliterated"
69
  tokenizer = AutoTokenizer.from_pretrained(model_id_text)
@@ -74,16 +97,6 @@ model = AutoModelForCausalLM.from_pretrained(
74
  )
75
  model.eval()
76
 
77
- def clean_chat_history(chat_history):
78
- """
79
- Filter out any chat entries whose "content" is not a string.
80
- """
81
- cleaned = []
82
- for msg in chat_history:
83
- if isinstance(msg, dict) and isinstance(msg.get("content"), str):
84
- cleaned.append(msg)
85
- return cleaned
86
-
87
  @spaces.GPU
88
  def chat_generate(input_text: str, chat_history: list, max_new_tokens: int, temperature: float, top_p: float, top_k: int, repetition_penalty: float):
89
  """
@@ -92,7 +105,10 @@ def chat_generate(input_text: str, chat_history: list, max_new_tokens: int, temp
92
  # Prepare conversation by cleaning history and appending the new user message.
93
  conversation = clean_chat_history(chat_history)
94
  conversation.append({"role": "user", "content": input_text})
95
- input_ids = tokenizer.apply_chat_template(conversation, add_generation_prompt=True, return_tensors="pt")
 
 
 
96
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
97
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
98
  input_ids = input_ids.to(model.device)
@@ -115,7 +131,7 @@ def chat_generate(input_text: str, chat_history: list, max_new_tokens: int, temp
115
  for new_text in streamer:
116
  outputs.append(new_text)
117
  final_response = "".join(outputs)
118
- # Append assistant reply to chat history.
119
  updated_history = conversation + [{"role": "assistant", "content": final_response}]
120
  return final_response, updated_history
121
 
@@ -144,7 +160,7 @@ def generate_qwen_ocr(input_text: str, image):
144
  {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
145
  {"role": "user", "content": [{"type": "text", "text": input_text}, {"type": "image", "image": image}]}
146
  ]
147
- # Apply chat template.
148
  prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
149
  inputs = processor(text=[prompt_full], images=[image], return_tensors="pt", padding=True).to("cuda")
150
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)
 
60
  </style>
61
  '''
62
 
63
+ # Helper function for the chat interface
64
+ def apply_chat_template_for_text(conversation, add_generation_prompt=True):
65
+ """
66
+ Concatenates a conversation (list of dict with keys "role" and "content")
67
+ into a single string prompt. If add_generation_prompt is True, appends "assistant:".
68
+ """
69
+ prompt = ""
70
+ for msg in conversation:
71
+ prompt += f"{msg['role']}: {msg['content']}\n"
72
+ if add_generation_prompt:
73
+ prompt += "assistant:"
74
+ return prompt
75
+
76
+ def clean_chat_history(chat_history):
77
+ """
78
+ Filter out any chat entries whose "content" is not a string.
79
+ """
80
+ cleaned = []
81
+ for msg in chat_history:
82
+ if isinstance(msg, dict) and isinstance(msg.get("content"), str):
83
+ cleaned.append(msg)
84
+ return cleaned
85
+
86
  # ---------------------------
87
  # 1. Chat Interface Tab
88
  # ---------------------------
89
+ # Uses a text-only model: DeepHermes-3-Llama-3-3B-Preview-abliterated
90
 
91
  model_id_text = "prithivMLmods/DeepHermes-3-Llama-3-3B-Preview-abliterated"
92
  tokenizer = AutoTokenizer.from_pretrained(model_id_text)
 
97
  )
98
  model.eval()
99
 
 
 
 
 
 
 
 
 
 
 
100
  @spaces.GPU
101
  def chat_generate(input_text: str, chat_history: list, max_new_tokens: int, temperature: float, top_p: float, top_k: int, repetition_penalty: float):
102
  """
 
105
  # Prepare conversation by cleaning history and appending the new user message.
106
  conversation = clean_chat_history(chat_history)
107
  conversation.append({"role": "user", "content": input_text})
108
+
109
+ # Instead of tokenizer.apply_chat_template, we use our helper to generate a prompt.
110
+ prompt_text = apply_chat_template_for_text(conversation, add_generation_prompt=True)
111
+ input_ids = tokenizer(prompt_text, return_tensors="pt").input_ids
112
  if input_ids.shape[1] > MAX_INPUT_TOKEN_LENGTH:
113
  input_ids = input_ids[:, -MAX_INPUT_TOKEN_LENGTH:]
114
  input_ids = input_ids.to(model.device)
 
131
  for new_text in streamer:
132
  outputs.append(new_text)
133
  final_response = "".join(outputs)
134
+ # Append assistant reply to conversation.
135
  updated_history = conversation + [{"role": "assistant", "content": final_response}]
136
  return final_response, updated_history
137
 
 
160
  {"role": "system", "content": [{"type": "text", "text": "You are a helpful assistant."}]},
161
  {"role": "user", "content": [{"type": "text", "text": input_text}, {"type": "image", "image": image}]}
162
  ]
163
+ # Use the processor's chat template.
164
  prompt_full = processor.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
165
  inputs = processor(text=[prompt_full], images=[image], return_tensors="pt", padding=True).to("cuda")
166
  streamer = TextIteratorStreamer(processor, skip_prompt=True, skip_special_tokens=True)