Spaces:
Running
Running
Update rag_engine.py
Browse files- rag_engine.py +36 -11
rag_engine.py
CHANGED
@@ -16,12 +16,6 @@ import gc
|
|
16 |
# Force model to CPU for stability
|
17 |
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
18 |
|
19 |
-
# GCS Paths
|
20 |
-
metadata_file_gcs = "metadata/metadata.jsonl"
|
21 |
-
embeddings_file_gcs = "processed/embeddings/all_embeddings.npy"
|
22 |
-
faiss_index_file_gcs = "processed/indices/faiss_index.faiss"
|
23 |
-
text_chunks_file_gcs = "processed/chunks/text_chunks.txt"
|
24 |
-
|
25 |
# Local Paths
|
26 |
local_embeddings_file = "all_embeddings.npy"
|
27 |
local_faiss_index_file = "faiss_index.faiss"
|
@@ -32,9 +26,16 @@ local_metadata_file = "metadata.jsonl"
|
|
32 |
def setup_gcp_client():
|
33 |
try:
|
34 |
credentials = setup_gcp_auth()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
storage_client = storage.Client(credentials=credentials)
|
36 |
-
|
37 |
-
bucket = storage_client.bucket(bucket_name)
|
38 |
print("β
GCP client initialized successfully")
|
39 |
return bucket
|
40 |
except Exception as e:
|
@@ -64,10 +65,17 @@ def load_model():
|
|
64 |
# Force model to CPU
|
65 |
device = torch.device("cpu")
|
66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
67 |
# Load tokenizer and model
|
68 |
-
tokenizer = AutoTokenizer.from_pretrained(
|
69 |
model = AutoModel.from_pretrained(
|
70 |
-
|
71 |
torch_dtype=torch.float16
|
72 |
)
|
73 |
|
@@ -121,6 +129,16 @@ def load_data_files():
|
|
121 |
print("Failed to initialize required services")
|
122 |
return None, None, None
|
123 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
# Download necessary files
|
125 |
success = True
|
126 |
success &= download_file_from_gcs(bucket, faiss_index_file_gcs, local_faiss_index_file)
|
@@ -318,9 +336,16 @@ def answer_with_llm(query, context=None, word_limit=100):
|
|
318 |
{query}
|
319 |
"""
|
320 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
321 |
# Call OpenAI API
|
322 |
response = openai.chat.completions.create(
|
323 |
-
model=
|
324 |
messages=[
|
325 |
{"role": "system", "content": system_message},
|
326 |
{"role": "user", "content": user_message}
|
|
|
16 |
# Force model to CPU for stability
|
17 |
os.environ["CUDA_VISIBLE_DEVICES"] = ""
|
18 |
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
# Local Paths
|
20 |
local_embeddings_file = "all_embeddings.npy"
|
21 |
local_faiss_index_file = "faiss_index.faiss"
|
|
|
26 |
def setup_gcp_client():
|
27 |
try:
|
28 |
credentials = setup_gcp_auth()
|
29 |
+
|
30 |
+
# Get bucket name from secrets - required
|
31 |
+
try:
|
32 |
+
bucket_name_gcs = st.secrets["bucket_name_gcs"]
|
33 |
+
except KeyError:
|
34 |
+
print("β Error: GCS bucket name not found in secrets")
|
35 |
+
return None
|
36 |
+
|
37 |
storage_client = storage.Client(credentials=credentials)
|
38 |
+
bucket = storage_client.bucket(bucket_name_gcs)
|
|
|
39 |
print("β
GCP client initialized successfully")
|
40 |
return bucket
|
41 |
except Exception as e:
|
|
|
65 |
# Force model to CPU
|
66 |
device = torch.device("cpu")
|
67 |
|
68 |
+
# Get embedding model path from secrets
|
69 |
+
try:
|
70 |
+
embedding_model_path = st.secrets["embedding_model_path"]
|
71 |
+
except KeyError:
|
72 |
+
print("β Error: Embedding model path not found in secrets")
|
73 |
+
return None, None
|
74 |
+
|
75 |
# Load tokenizer and model
|
76 |
+
tokenizer = AutoTokenizer.from_pretrained(embedding_model_path)
|
77 |
model = AutoModel.from_pretrained(
|
78 |
+
embedding_model_path,
|
79 |
torch_dtype=torch.float16
|
80 |
)
|
81 |
|
|
|
129 |
print("Failed to initialize required services")
|
130 |
return None, None, None
|
131 |
|
132 |
+
# Get GCS paths from secrets - required
|
133 |
+
try:
|
134 |
+
metadata_file_gcs = st.secrets["metadata_file_gcs"]
|
135 |
+
embeddings_file_gcs = st.secrets["embeddings_file_gcs"]
|
136 |
+
faiss_index_file_gcs = st.secrets["faiss_index_file_gcs"]
|
137 |
+
text_chunks_file_gcs = st.secrets["text_chunks_file_gcs"]
|
138 |
+
except KeyError as e:
|
139 |
+
print(f"β Error: Required GCS path not found in secrets: {e}")
|
140 |
+
return None, None, None
|
141 |
+
|
142 |
# Download necessary files
|
143 |
success = True
|
144 |
success &= download_file_from_gcs(bucket, faiss_index_file_gcs, local_faiss_index_file)
|
|
|
336 |
{query}
|
337 |
"""
|
338 |
|
339 |
+
# Get LLM model from secrets
|
340 |
+
try:
|
341 |
+
llm_model = st.secrets["llm_model"]
|
342 |
+
except KeyError:
|
343 |
+
print("β Error: LLM model not found in secrets")
|
344 |
+
return "I apologize, but I'm unable to answer at the moment."
|
345 |
+
|
346 |
# Call OpenAI API
|
347 |
response = openai.chat.completions.create(
|
348 |
+
model=llm_model,
|
349 |
messages=[
|
350 |
{"role": "system", "content": system_message},
|
351 |
{"role": "user", "content": user_message}
|