Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -16,7 +16,6 @@ import chromadb
|
|
16 |
import tempfile
|
17 |
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
18 |
import requests
|
19 |
-
from transformers import BitsAndBytesConfig
|
20 |
|
21 |
# Set up logging
|
22 |
logging.basicConfig(level=logging.INFO)
|
@@ -161,16 +160,13 @@ def initialize_qa_chain(llm_model, temperature):
|
|
161 |
return "Please process documents first.", None
|
162 |
|
163 |
try:
|
164 |
-
# Enable 4-bit quantization for all models to reduce memory usage
|
165 |
-
quantization_config = BitsAndBytesConfig(load_in_4bit=True)
|
166 |
llm = HuggingFaceEndpoint(
|
167 |
repo_id=LLM_MODELS[llm_model],
|
168 |
task="text-generation",
|
169 |
temperature=float(temperature),
|
170 |
max_new_tokens=512,
|
171 |
huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
|
172 |
-
timeout=30
|
173 |
-
model_kwargs={"quantization_config": quantization_config}
|
174 |
)
|
175 |
# Dynamically set k based on vector store size
|
176 |
collection = vector_store._collection
|
|
|
16 |
import tempfile
|
17 |
from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
|
18 |
import requests
|
|
|
19 |
|
20 |
# Set up logging
|
21 |
logging.basicConfig(level=logging.INFO)
|
|
|
160 |
return "Please process documents first.", None
|
161 |
|
162 |
try:
|
|
|
|
|
163 |
llm = HuggingFaceEndpoint(
|
164 |
repo_id=LLM_MODELS[llm_model],
|
165 |
task="text-generation",
|
166 |
temperature=float(temperature),
|
167 |
max_new_tokens=512,
|
168 |
huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
|
169 |
+
timeout=30
|
|
|
170 |
)
|
171 |
# Dynamically set k based on vector store size
|
172 |
collection = vector_store._collection
|