logasanjeev commited on
Commit
8b40c0d
·
verified ·
1 Parent(s): 34f6f25

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1 -5
app.py CHANGED
@@ -16,7 +16,6 @@ import chromadb
16
  import tempfile
17
  from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
18
  import requests
19
- from transformers import BitsAndBytesConfig
20
 
21
  # Set up logging
22
  logging.basicConfig(level=logging.INFO)
@@ -161,16 +160,13 @@ def initialize_qa_chain(llm_model, temperature):
161
  return "Please process documents first.", None
162
 
163
  try:
164
- # Enable 4-bit quantization for all models to reduce memory usage
165
- quantization_config = BitsAndBytesConfig(load_in_4bit=True)
166
  llm = HuggingFaceEndpoint(
167
  repo_id=LLM_MODELS[llm_model],
168
  task="text-generation",
169
  temperature=float(temperature),
170
  max_new_tokens=512,
171
  huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
172
- timeout=30,
173
- model_kwargs={"quantization_config": quantization_config}
174
  )
175
  # Dynamically set k based on vector store size
176
  collection = vector_store._collection
 
16
  import tempfile
17
  from tenacity import retry, stop_after_attempt, wait_exponential, retry_if_exception_type
18
  import requests
 
19
 
20
  # Set up logging
21
  logging.basicConfig(level=logging.INFO)
 
160
  return "Please process documents first.", None
161
 
162
  try:
 
 
163
  llm = HuggingFaceEndpoint(
164
  repo_id=LLM_MODELS[llm_model],
165
  task="text-generation",
166
  temperature=float(temperature),
167
  max_new_tokens=512,
168
  huggingfacehub_api_token=os.environ["HUGGINGFACEHUB_API_TOKEN"],
169
+ timeout=30
 
170
  )
171
  # Dynamically set k based on vector store size
172
  collection = vector_store._collection