CamiloVega commited on
Commit
89bb689
·
verified ·
1 Parent(s): f3affeb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -28
app.py CHANGED
@@ -94,12 +94,9 @@ def generate_response(user_input, chat_history):
94
  conversation_history = ""
95
  if chat_history:
96
  for message in chat_history:
97
- # Remove any [INST] tags from the history
98
- user_msg = message[0].replace("[INST]", "").replace("[/INST]", "").strip()
99
- assistant_msg = message[1].replace("[INST]", "").replace("[/INST]", "").strip()
100
- conversation_history += f"[INST] {user_msg} [/INST] {assistant_msg} "
101
 
102
- prompt = f"<s>[INST] {system_message}\n\n{conversation_history}[INST] {user_input} [/INST]"
103
 
104
  logger.info("Generating model response...")
105
  outputs = model_gen(
@@ -107,38 +104,16 @@ def generate_response(user_input, chat_history):
107
  max_new_tokens=256,
108
  return_full_text=False,
109
  pad_token_id=tokenizer.eos_token_id,
110
- do_sample=True,
111
- temperature=0.7,
112
- top_p=0.9,
113
- repetition_penalty=1.1
114
  )
115
  logger.info("Model response generated successfully")
116
 
117
- # Clean up the response by removing any [INST] tags and trimming
118
  assistant_response = outputs[0]['generated_text'].strip()
119
- assistant_response = assistant_response.replace("[INST]", "").replace("[/INST]", "").strip()
120
-
121
- # If the response is too short, try to generate a more detailed one
122
- if len(assistant_response.split()) < 10:
123
- prompt += "\nPlease provide a more detailed answer with context and explanation."
124
- outputs = model_gen(
125
- prompt,
126
- max_new_tokens=256,
127
- return_full_text=False,
128
- pad_token_id=tokenizer.eos_token_id,
129
- do_sample=True,
130
- temperature=0.7,
131
- top_p=0.9,
132
- repetition_penalty=1.1
133
- )
134
- assistant_response = outputs[0]['generated_text'].strip()
135
- assistant_response = assistant_response.replace("[INST]", "").replace("[/INST]", "").strip()
136
 
137
  # Calculate water consumption for output
138
  output_water_consumption = calculate_water_consumption(assistant_response, False)
139
  total_water_consumption += output_water_consumption
140
 
141
- # Update chat history with the cleaned messages
142
  chat_history.append([user_input, assistant_response])
143
 
144
  # Prepare water consumption message
@@ -163,6 +138,7 @@ def generate_response(user_input, chat_history):
163
  error_message = f"An error occurred: {str(e)}"
164
  chat_history.append([user_input, error_message])
165
  return chat_history, show_water
 
166
 
167
  # Constants for water consumption calculation
168
  WATER_PER_TOKEN = {
 
94
  conversation_history = ""
95
  if chat_history:
96
  for message in chat_history:
97
+ conversation_history += f"{message[0]} {message[1]} "
 
 
 
98
 
99
+ prompt = f"[INST] {system_message}\n\n{conversation_history}{user_input}"
100
 
101
  logger.info("Generating model response...")
102
  outputs = model_gen(
 
104
  max_new_tokens=256,
105
  return_full_text=False,
106
  pad_token_id=tokenizer.eos_token_id,
 
 
 
 
107
  )
108
  logger.info("Model response generated successfully")
109
 
 
110
  assistant_response = outputs[0]['generated_text'].strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
 
112
  # Calculate water consumption for output
113
  output_water_consumption = calculate_water_consumption(assistant_response, False)
114
  total_water_consumption += output_water_consumption
115
 
116
+ # Update chat history with the new formatted messages
117
  chat_history.append([user_input, assistant_response])
118
 
119
  # Prepare water consumption message
 
138
  error_message = f"An error occurred: {str(e)}"
139
  chat_history.append([user_input, error_message])
140
  return chat_history, show_water
141
+
142
 
143
  # Constants for water consumption calculation
144
  WATER_PER_TOKEN = {