Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -161,8 +161,8 @@ def initialize_qa_chain(llm_model, temperature):
|
|
161 |
return "Please process documents first.", None
|
162 |
|
163 |
try:
|
164 |
-
# Enable quantization for
|
165 |
-
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
166 |
llm = HuggingFaceEndpoint(
|
167 |
repo_id=LLM_MODELS[llm_model],
|
168 |
task="text-generation",
|
@@ -170,7 +170,7 @@ def initialize_qa_chain(llm_model, temperature):
|
|
170 |
max_new_tokens=512,
|
171 |
huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
|
172 |
timeout=30,
|
173 |
-
|
174 |
)
|
175 |
# Dynamically set k based on vector store size
|
176 |
collection = vector_store._collection
|
@@ -186,9 +186,9 @@ def initialize_qa_chain(llm_model, temperature):
|
|
186 |
except requests.exceptions.HTTPError as e:
|
187 |
logger.error(f"HTTP error initializing QA chain for {llm_model}: {str(e)}")
|
188 |
if "503" in str(e):
|
189 |
-
return f"Error: Hugging Face API temporarily unavailable for {llm_model}. Try '
|
190 |
elif "403" in str(e):
|
191 |
-
return f"Error: Access denied for {llm_model}.
|
192 |
return f"Error initializing QA chain: {str(e)}.", None
|
193 |
except Exception as e:
|
194 |
logger.error(f"Error initializing QA chain for {llm_model}: {str(e)}")
|
@@ -218,9 +218,9 @@ def answer_question(question, llm_model, embedding_model, temperature, chunk_siz
|
|
218 |
except requests.exceptions.HTTPError as e:
|
219 |
logger.error(f"HTTP error answering question: {str(e)}")
|
220 |
if "503" in str(e):
|
221 |
-
return f"Error: Hugging Face API temporarily unavailable for {llm_model}. Try '
|
222 |
elif "403" in str(e):
|
223 |
-
return f"Error: Access denied for {llm_model}.
|
224 |
return f"Error answering question: {str(e)}", chat_history
|
225 |
except Exception as e:
|
226 |
logger.error(f"Error answering question: {str(e)}")
|
@@ -301,6 +301,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="DocTalk: Document Q&A Chatbot") as
|
|
301 |
inputs=[llm_model, temperature],
|
302 |
outputs=[status, chat_display]
|
303 |
)
|
|
|
304 |
question.submit(
|
305 |
fn=answer_question,
|
306 |
inputs=[question, llm_model, embedding_model, temperature, chunk_size, chunk_overlap],
|
|
|
161 |
return "Please process documents first.", None
|
162 |
|
163 |
try:
|
164 |
+
# Enable 4-bit quantization for all models to reduce memory usage
|
165 |
+
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
166 |
llm = HuggingFaceEndpoint(
|
167 |
repo_id=LLM_MODELS[llm_model],
|
168 |
task="text-generation",
|
|
|
170 |
max_new_tokens=512,
|
171 |
huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
|
172 |
timeout=30,
|
173 |
+
model_kwargs={"quantization_config": quantization_config}
|
174 |
)
|
175 |
# Dynamically set k based on vector store size
|
176 |
collection = vector_store._collection
|
|
|
186 |
except requests.exceptions.HTTPError as e:
|
187 |
logger.error(f"HTTP error initializing QA chain for {llm_model}: {str(e)}")
|
188 |
if "503" in str(e):
|
189 |
+
return f"Error: Hugging Face API temporarily unavailable for {llm_model}. Try 'High Accuracy (Mixtral-8x7B)' or wait and retry.", None
|
190 |
elif "403" in str(e):
|
191 |
+
return f"Error: Access denied for {llm_model}. Check your HF token permissions or upgrade to a Pro account for larger models.", None
|
192 |
return f"Error initializing QA chain: {str(e)}.", None
|
193 |
except Exception as e:
|
194 |
logger.error(f"Error initializing QA chain for {llm_model}: {str(e)}")
|
|
|
218 |
except requests.exceptions.HTTPError as e:
|
219 |
logger.error(f"HTTP error answering question: {str(e)}")
|
220 |
if "503" in str(e):
|
221 |
+
return f"Error: Hugging Face API temporarily unavailable for {llm_model}. Try 'High Accuracy (Mixtral-8x7B)' or wait and retry.", chat_history
|
222 |
elif "403" in str(e):
|
223 |
+
return f"Error: Access denied for {llm_model}. Check your HF token permissions or upgrade to a Pro account for larger models.", chat_history
|
224 |
return f"Error answering question: {str(e)}", chat_history
|
225 |
except Exception as e:
|
226 |
logger.error(f"Error answering question: {str(e)}")
|
|
|
301 |
inputs=[llm_model, temperature],
|
302 |
outputs=[status, chat_display]
|
303 |
)
|
304 |
+
question里的
|
305 |
question.submit(
|
306 |
fn=answer_question,
|
307 |
inputs=[question, llm_model, embedding_model, temperature, chunk_size, chunk_overlap],
|