Update app.py
Browse files
app.py
CHANGED
@@ -1,554 +1,555 @@
|
|
1 |
-
from fastapi import FastAPI, HTTPException, Query
|
2 |
-
from pydantic import BaseModel
|
3 |
-
import os
|
4 |
-
from langchain_community.embeddings import HuggingFaceEmbeddings
|
5 |
-
from langchain_community.vectorstores import FAISS
|
6 |
-
from langchain_community.document_loaders import CSVLoader
|
7 |
-
from langchain_openai import ChatOpenAI
|
8 |
-
from langchain_groq import ChatGroq
|
9 |
-
from langchain_core.prompts import ChatPromptTemplate
|
10 |
-
from langchain.chains.combine_documents import create_stuff_documents_chain
|
11 |
-
from langchain.chains import create_retrieval_chain
|
12 |
-
from langchain_google_genai import ChatGoogleGenerativeAI
|
13 |
-
from dotenv import load_dotenv
|
14 |
-
from fastapi.responses import PlainTextResponse
|
15 |
-
from fastapi.middleware.cors import CORSMiddleware
|
16 |
-
import asyncio
|
17 |
-
import json
|
18 |
-
import re
|
19 |
-
# Load environment variables
|
20 |
-
load_dotenv()
|
21 |
-
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
|
22 |
-
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
|
23 |
-
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
|
24 |
-
key = os.getenv("GOOGLE_API_KEY")
|
25 |
-
# Define paths
|
26 |
-
DB_FAISS_PATH = "bgi/db_faiss"
|
27 |
-
|
28 |
-
# Initialize FastAPI app
|
29 |
-
app = FastAPI()
|
30 |
-
app.add_middleware(
|
31 |
-
CORSMiddleware,
|
32 |
-
allow_origins=["*"], # Add the React app's URL
|
33 |
-
allow_credentials=True,
|
34 |
-
allow_methods=["*"], # Allow all HTTP methods
|
35 |
-
allow_headers=["*"], # Allow all headers
|
36 |
-
)
|
37 |
-
# Initialize variables
|
38 |
-
embeddings = None
|
39 |
-
db = None
|
40 |
-
|
41 |
-
# Load or create FAISS vector store
|
42 |
-
@app.on_event("startup")
|
43 |
-
def load_vector_store():
|
44 |
-
global embeddings, db
|
45 |
-
if os.path.exists(DB_FAISS_PATH):
|
46 |
-
print("Loading existing FAISS vector store.")
|
47 |
-
embeddings = HuggingFaceEmbeddings(model_name='BAAI/bge-small-en', model_kwargs={'device': 'cpu'})
|
48 |
-
db = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
|
49 |
-
print("Vector store loaded.")
|
50 |
-
else:
|
51 |
-
print("Creating new FAISS vector store.")
|
52 |
-
loader = CSVLoader(file_path="Final_Research_Dataset_2.csv", encoding="utf-8", csv_args={'delimiter': ','})
|
53 |
-
data = loader.load()
|
54 |
-
embeddings = HuggingFaceEmbeddings(model_name='BAAI/bge-small-en', model_kwargs={'device': 'cpu'})
|
55 |
-
db = FAISS.from_documents(data, embeddings)
|
56 |
-
db.save_local(DB_FAISS_PATH)
|
57 |
-
|
58 |
-
|
59 |
-
# Define request and response models
|
60 |
-
from typing import List, Optional
|
61 |
-
|
62 |
-
class FilterCriteria(BaseModel):
|
63 |
-
impactFactor: float
|
64 |
-
firstDecisionTime: int
|
65 |
-
publisher: Optional[str]
|
66 |
-
llmModel: str
|
67 |
-
|
68 |
-
class QueryRequest(BaseModel):
|
69 |
-
abstract: str
|
70 |
-
criteria: FilterCriteria
|
71 |
-
|
72 |
-
class Journal(BaseModel):
|
73 |
-
id: int
|
74 |
-
Name: str
|
75 |
-
JIF: float
|
76 |
-
Category: str
|
77 |
-
Keywords: str
|
78 |
-
Publisher: str
|
79 |
-
Decision_Time: int
|
80 |
-
|
81 |
-
# Define the QueryResponse model with a list of journals
|
82 |
-
class QueryResponse(BaseModel):
|
83 |
-
result: List[Journal]
|
84 |
-
|
85 |
-
|
86 |
-
@app.get("/", response_class=PlainTextResponse)
|
87 |
-
def read_root():
|
88 |
-
return "Welcome to the Journal Recommender API!"
|
89 |
-
# Define models
|
90 |
-
@app.get("/models")
|
91 |
-
def get_models():
|
92 |
-
return {"available_models": ["openai", "groq","mixtral","gemini-pro","faiss"]}
|
93 |
-
|
94 |
-
def fix_incomplete_json(raw_response):
|
95 |
-
"""
|
96 |
-
Fixes incomplete JSON by adding missing braces or brackets.
|
97 |
-
Returns a valid JSON string or None if not fixable.
|
98 |
-
"""
|
99 |
-
# Ensure the response ends with a closing bracket if it's a list
|
100 |
-
if raw_response.endswith("},"):
|
101 |
-
raw_response = raw_response[:-1] # Remove the last comma
|
102 |
-
if raw_response.count("{") > raw_response.count("}"):
|
103 |
-
raw_response += "}"
|
104 |
-
if raw_response.count("[") > raw_response.count("]"):
|
105 |
-
raw_response += "]"
|
106 |
-
|
107 |
-
# Try to load the fixed response
|
108 |
-
try:
|
109 |
-
json_response = json.loads(raw_response)
|
110 |
-
return json_response
|
111 |
-
except json.JSONDecodeError as e:
|
112 |
-
print(f"Error fixing JSON: {e}")
|
113 |
-
return None
|
114 |
-
|
115 |
-
|
116 |
-
# Query endpoint
|
117 |
-
@app.post("/query", response_model=QueryResponse)
|
118 |
-
async def query(request: QueryRequest):
|
119 |
-
global db
|
120 |
-
if not db:
|
121 |
-
raise HTTPException(status_code=500, detail="Vector store not loaded.")
|
122 |
-
|
123 |
-
query_text = request.abstract
|
124 |
-
model_choice = request.criteria.llmModel
|
125 |
-
impact_factor = request.criteria.impactFactor
|
126 |
-
preferred_publisher = request.criteria.publisher
|
127 |
-
# Perform the query
|
128 |
-
docs = db.similarity_search(query_text, k=5)
|
129 |
-
context = "\n".join([doc.page_content for doc in docs])
|
130 |
-
|
131 |
-
messages = [
|
132 |
-
{
|
133 |
-
"role": "system",
|
134 |
-
"content": (
|
135 |
-
"Give a strict comma-separated list of exactly 15 keywords from the following text. "
|
136 |
-
"Give a strict comma-separated list of exactly 15 keywords from the following text. "
|
137 |
-
"Do not include any bullet points, introductory text, or ending text. "
|
138 |
-
"No introductory or ending text strictly" # Added to ensure can be removed if results deteriorate
|
139 |
-
"Do not say anything like 'Here are the keywords.' "
|
140 |
-
"Only return the keywords, strictly comma-separated, without any additional words."
|
141 |
-
),
|
142 |
-
},
|
143 |
-
{"role": "user", "content": query_text},
|
144 |
-
]
|
145 |
-
llm = ChatGroq(model="llama3-8b-8192", temperature=0)
|
146 |
-
ai_msg = llm.invoke(messages)
|
147 |
-
keywords = ai_msg.content.split("keywords extracted from the text:\n")[-1].strip()
|
148 |
-
print("Keywords:", keywords)
|
149 |
-
if model_choice == "openai":
|
150 |
-
retriever = db.as_retriever()
|
151 |
-
|
152 |
-
# Set up system prompt
|
153 |
-
system_prompt = (
|
154 |
-
f"You are a specialized Journal recommender that compares all journals in database to given research paper keywords and based on JIF and publisher gives result."
|
155 |
-
f"From the provided context, recommend all journals that are suitable for research paper with {keywords} keywords."
|
156 |
-
f"Ensure that you include **every** journal with a Journal Impact Factor (JIF) strictly greater than {impact_factor}, and the Journal must be only from any Publishers in list: {preferred_publisher}. And Pls show that jif as in Context database "
|
157 |
-
f"Make sure to include both exact matches and related journals, and prioritize including **all relevant high-JIF journals without repetition**. "
|
158 |
-
f"Present the results in a json format with the following information: Journal Name, Publisher, JIF, Decsion Time. "
|
159 |
-
f"Ensure no introductory or ending texts are included. Give max 30 results"
|
160 |
-
"Context: {context}"
|
161 |
-
)
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
|
174 |
-
|
175 |
-
|
176 |
-
|
177 |
-
|
178 |
-
|
179 |
-
|
180 |
-
|
181 |
-
|
182 |
-
|
183 |
-
|
184 |
-
result
|
185 |
-
|
186 |
-
|
187 |
-
|
188 |
-
|
189 |
-
|
190 |
-
|
191 |
-
|
192 |
-
|
193 |
-
|
194 |
-
|
195 |
-
|
196 |
-
|
197 |
-
|
198 |
-
|
199 |
-
|
200 |
-
|
201 |
-
|
202 |
-
|
203 |
-
|
204 |
-
if
|
205 |
-
|
206 |
-
|
207 |
-
|
208 |
-
|
209 |
-
|
210 |
-
|
211 |
-
|
212 |
-
|
213 |
-
|
214 |
-
|
215 |
-
|
216 |
-
|
217 |
-
|
218 |
-
|
219 |
-
|
220 |
-
|
221 |
-
|
222 |
-
|
223 |
-
|
224 |
-
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
f"
|
232 |
-
f"
|
233 |
-
f"
|
234 |
-
f"
|
235 |
-
f"
|
236 |
-
"
|
237 |
-
|
238 |
-
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
250 |
-
|
251 |
-
|
252 |
-
|
253 |
-
|
254 |
-
|
255 |
-
|
256 |
-
|
257 |
-
|
258 |
-
|
259 |
-
|
260 |
-
|
261 |
-
|
262 |
-
result
|
263 |
-
|
264 |
-
|
265 |
-
|
266 |
-
|
267 |
-
|
268 |
-
|
269 |
-
|
270 |
-
|
271 |
-
|
272 |
-
|
273 |
-
|
274 |
-
|
275 |
-
|
276 |
-
|
277 |
-
|
278 |
-
|
279 |
-
|
280 |
-
|
281 |
-
|
282 |
-
|
283 |
-
|
284 |
-
|
285 |
-
|
286 |
-
|
287 |
-
|
288 |
-
if
|
289 |
-
|
290 |
-
|
291 |
-
|
292 |
-
|
293 |
-
|
294 |
-
|
295 |
-
|
296 |
-
|
297 |
-
|
298 |
-
|
299 |
-
|
300 |
-
|
301 |
-
|
302 |
-
|
303 |
-
|
304 |
-
|
305 |
-
|
306 |
-
|
307 |
-
|
308 |
-
|
309 |
-
|
310 |
-
|
311 |
-
|
312 |
-
|
313 |
-
|
314 |
-
|
315 |
-
|
316 |
-
|
317 |
-
|
318 |
-
|
319 |
-
f"
|
320 |
-
f"
|
321 |
-
f"
|
322 |
-
f"
|
323 |
-
f"
|
324 |
-
"
|
325 |
-
|
326 |
-
|
327 |
-
|
328 |
-
|
329 |
-
|
330 |
-
|
331 |
-
|
332 |
-
|
333 |
-
|
334 |
-
|
335 |
-
|
336 |
-
|
337 |
-
|
338 |
-
|
339 |
-
|
340 |
-
|
341 |
-
|
342 |
-
|
343 |
-
|
344 |
-
|
345 |
-
|
346 |
-
|
347 |
-
|
348 |
-
|
349 |
-
|
350 |
-
result
|
351 |
-
|
352 |
-
|
353 |
-
|
354 |
-
|
355 |
-
|
356 |
-
|
357 |
-
|
358 |
-
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
|
363 |
-
|
364 |
-
|
365 |
-
|
366 |
-
|
367 |
-
|
368 |
-
|
369 |
-
|
370 |
-
|
371 |
-
|
372 |
-
|
373 |
-
|
374 |
-
|
375 |
-
|
376 |
-
if
|
377 |
-
|
378 |
-
|
379 |
-
|
380 |
-
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
|
385 |
-
|
386 |
-
|
387 |
-
|
388 |
-
|
389 |
-
|
390 |
-
|
391 |
-
|
392 |
-
|
393 |
-
|
394 |
-
|
395 |
-
|
396 |
-
|
397 |
-
|
398 |
-
|
399 |
-
|
400 |
-
|
401 |
-
|
402 |
-
|
403 |
-
|
404 |
-
|
405 |
-
|
406 |
-
|
407 |
-
f"
|
408 |
-
f"
|
409 |
-
f"
|
410 |
-
f"
|
411 |
-
f"
|
412 |
-
"
|
413 |
-
|
414 |
-
|
415 |
-
|
416 |
-
|
417 |
-
|
418 |
-
|
419 |
-
|
420 |
-
|
421 |
-
|
422 |
-
|
423 |
-
|
424 |
-
|
425 |
-
|
426 |
-
|
427 |
-
|
428 |
-
|
429 |
-
|
430 |
-
|
431 |
-
|
432 |
-
|
433 |
-
|
434 |
-
|
435 |
-
|
436 |
-
|
437 |
-
|
438 |
-
|
439 |
-
|
440 |
-
result
|
441 |
-
|
442 |
-
|
443 |
-
|
444 |
-
|
445 |
-
|
446 |
-
|
447 |
-
|
448 |
-
|
449 |
-
|
450 |
-
|
451 |
-
|
452 |
-
|
453 |
-
|
454 |
-
|
455 |
-
|
456 |
-
|
457 |
-
|
458 |
-
|
459 |
-
|
460 |
-
if
|
461 |
-
|
462 |
-
|
463 |
-
|
464 |
-
|
465 |
-
|
466 |
-
|
467 |
-
|
468 |
-
|
469 |
-
|
470 |
-
|
471 |
-
|
472 |
-
|
473 |
-
|
474 |
-
|
475 |
-
|
476 |
-
|
477 |
-
|
478 |
-
|
479 |
-
|
480 |
-
|
481 |
-
|
482 |
-
|
483 |
-
|
484 |
-
|
485 |
-
|
486 |
-
|
487 |
-
|
488 |
-
|
489 |
-
|
490 |
-
|
491 |
-
|
492 |
-
|
493 |
-
|
494 |
-
|
495 |
-
|
496 |
-
|
497 |
-
|
498 |
-
|
499 |
-
|
500 |
-
|
501 |
-
|
502 |
-
|
503 |
-
|
504 |
-
|
505 |
-
|
506 |
-
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
525 |
-
|
526 |
-
|
527 |
-
|
528 |
-
|
529 |
-
|
530 |
-
|
531 |
-
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
541 |
-
|
542 |
-
|
543 |
-
|
544 |
-
|
545 |
-
|
546 |
-
|
547 |
-
|
548 |
-
|
549 |
-
|
550 |
-
|
551 |
-
|
552 |
-
|
553 |
-
|
554 |
-
#
|
|
|
|
1 |
+
from fastapi import FastAPI, HTTPException, Query
|
2 |
+
from pydantic import BaseModel
|
3 |
+
import os
|
4 |
+
from langchain_community.embeddings import HuggingFaceEmbeddings
|
5 |
+
from langchain_community.vectorstores import FAISS
|
6 |
+
from langchain_community.document_loaders import CSVLoader
|
7 |
+
from langchain_openai import ChatOpenAI
|
8 |
+
from langchain_groq import ChatGroq
|
9 |
+
from langchain_core.prompts import ChatPromptTemplate
|
10 |
+
from langchain.chains.combine_documents import create_stuff_documents_chain
|
11 |
+
from langchain.chains import create_retrieval_chain
|
12 |
+
from langchain_google_genai import ChatGoogleGenerativeAI
|
13 |
+
from dotenv import load_dotenv
|
14 |
+
from fastapi.responses import PlainTextResponse
|
15 |
+
from fastapi.middleware.cors import CORSMiddleware
|
16 |
+
import asyncio
|
17 |
+
import json
|
18 |
+
import re
|
19 |
+
# Load environment variables
|
20 |
+
load_dotenv()
|
21 |
+
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
|
22 |
+
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
|
23 |
+
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
|
24 |
+
key = os.getenv("GOOGLE_API_KEY")
|
25 |
+
# Define paths
|
26 |
+
DB_FAISS_PATH = "bgi/db_faiss"
|
27 |
+
|
28 |
+
# Initialize FastAPI app
|
29 |
+
app = FastAPI()
|
30 |
+
app.add_middleware(
|
31 |
+
CORSMiddleware,
|
32 |
+
allow_origins=["*"], # Add the React app's URL
|
33 |
+
allow_credentials=True,
|
34 |
+
allow_methods=["*"], # Allow all HTTP methods
|
35 |
+
allow_headers=["*"], # Allow all headers
|
36 |
+
)
|
37 |
+
# Initialize variables
|
38 |
+
embeddings = None
|
39 |
+
db = None
|
40 |
+
|
41 |
+
# Load or create FAISS vector store
|
42 |
+
@app.on_event("startup")
|
43 |
+
def load_vector_store():
|
44 |
+
global embeddings, db
|
45 |
+
if os.path.exists(DB_FAISS_PATH):
|
46 |
+
print("Loading existing FAISS vector store.")
|
47 |
+
embeddings = HuggingFaceEmbeddings(model_name='BAAI/bge-small-en', model_kwargs={'device': 'cpu'})
|
48 |
+
db = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
|
49 |
+
print("Vector store loaded.")
|
50 |
+
else:
|
51 |
+
print("Creating new FAISS vector store.")
|
52 |
+
loader = CSVLoader(file_path="Final_Research_Dataset_2.csv", encoding="utf-8", csv_args={'delimiter': ','})
|
53 |
+
data = loader.load()
|
54 |
+
embeddings = HuggingFaceEmbeddings(model_name='BAAI/bge-small-en', model_kwargs={'device': 'cpu'})
|
55 |
+
db = FAISS.from_documents(data, embeddings)
|
56 |
+
db.save_local(DB_FAISS_PATH)
|
57 |
+
|
58 |
+
|
59 |
+
# Define request and response models
|
60 |
+
from typing import List, Optional
|
61 |
+
|
62 |
+
class FilterCriteria(BaseModel):
|
63 |
+
impactFactor: float
|
64 |
+
firstDecisionTime: int
|
65 |
+
publisher: Optional[str]
|
66 |
+
llmModel: str
|
67 |
+
|
68 |
+
class QueryRequest(BaseModel):
|
69 |
+
abstract: str
|
70 |
+
criteria: FilterCriteria
|
71 |
+
|
72 |
+
class Journal(BaseModel):
|
73 |
+
id: int
|
74 |
+
Name: str
|
75 |
+
JIF: float
|
76 |
+
Category: str
|
77 |
+
Keywords: str
|
78 |
+
Publisher: str
|
79 |
+
Decision_Time: int
|
80 |
+
|
81 |
+
# Define the QueryResponse model with a list of journals
|
82 |
+
class QueryResponse(BaseModel):
|
83 |
+
result: List[Journal]
|
84 |
+
|
85 |
+
|
86 |
+
@app.get("/", response_class=PlainTextResponse)
|
87 |
+
def read_root():
|
88 |
+
return "Welcome to the Journal Recommender API!"
|
89 |
+
# Define models
|
90 |
+
@app.get("/models")
|
91 |
+
def get_models():
|
92 |
+
return {"available_models": ["openai", "groq","mixtral","gemini-pro","faiss"]}
|
93 |
+
|
94 |
+
def fix_incomplete_json(raw_response):
|
95 |
+
"""
|
96 |
+
Fixes incomplete JSON by adding missing braces or brackets.
|
97 |
+
Returns a valid JSON string or None if not fixable.
|
98 |
+
"""
|
99 |
+
# Ensure the response ends with a closing bracket if it's a list
|
100 |
+
if raw_response.endswith("},"):
|
101 |
+
raw_response = raw_response[:-1] # Remove the last comma
|
102 |
+
if raw_response.count("{") > raw_response.count("}"):
|
103 |
+
raw_response += "}"
|
104 |
+
if raw_response.count("[") > raw_response.count("]"):
|
105 |
+
raw_response += "]"
|
106 |
+
|
107 |
+
# Try to load the fixed response
|
108 |
+
try:
|
109 |
+
json_response = json.loads(raw_response)
|
110 |
+
return json_response
|
111 |
+
except json.JSONDecodeError as e:
|
112 |
+
print(f"Error fixing JSON: {e}")
|
113 |
+
return None
|
114 |
+
|
115 |
+
|
116 |
+
# Query endpoint
|
117 |
+
@app.post("/query", response_model=QueryResponse)
|
118 |
+
async def query(request: QueryRequest):
|
119 |
+
global db
|
120 |
+
if not db:
|
121 |
+
raise HTTPException(status_code=500, detail="Vector store not loaded.")
|
122 |
+
|
123 |
+
query_text = request.abstract
|
124 |
+
model_choice = request.criteria.llmModel
|
125 |
+
impact_factor = request.criteria.impactFactor
|
126 |
+
preferred_publisher = request.criteria.publisher
|
127 |
+
# Perform the query
|
128 |
+
docs = db.similarity_search(query_text, k=5)
|
129 |
+
context = "\n".join([doc.page_content for doc in docs])
|
130 |
+
|
131 |
+
messages = [
|
132 |
+
{
|
133 |
+
"role": "system",
|
134 |
+
"content": (
|
135 |
+
"Give a strict comma-separated list of exactly 15 keywords from the following text. "
|
136 |
+
"Give a strict comma-separated list of exactly 15 keywords from the following text. "
|
137 |
+
"Do not include any bullet points, introductory text, or ending text. "
|
138 |
+
"No introductory or ending text strictly" # Added to ensure can be removed if results deteriorate
|
139 |
+
"Do not say anything like 'Here are the keywords.' "
|
140 |
+
"Only return the keywords, strictly comma-separated, without any additional words."
|
141 |
+
),
|
142 |
+
},
|
143 |
+
{"role": "user", "content": query_text},
|
144 |
+
]
|
145 |
+
llm = ChatGroq(model="llama3-8b-8192", temperature=0)
|
146 |
+
ai_msg = llm.invoke(messages)
|
147 |
+
keywords = ai_msg.content.split("keywords extracted from the text:\n")[-1].strip()
|
148 |
+
print("Keywords:", keywords)
|
149 |
+
if model_choice == "openai":
|
150 |
+
retriever = db.as_retriever()
|
151 |
+
|
152 |
+
# Set up system prompt
|
153 |
+
system_prompt = (
|
154 |
+
f"You are a specialized Journal recommender that compares all journals in database to given research paper keywords and based on JIF and publisher gives result."
|
155 |
+
f"From the provided context, recommend all journals that are suitable for research paper with {keywords} keywords."
|
156 |
+
f"Ensure that you include **every** journal with a Journal Impact Factor (JIF) strictly greater than {impact_factor}, and the Journal must be only from any Publishers in list: {preferred_publisher}. And Pls show that jif as in Context database "
|
157 |
+
f"Make sure to include both exact matches and related journals, and prioritize including **all relevant high-JIF journals without repetition**. "
|
158 |
+
f"Present the results in a json format with the following information: Journal Name, Publisher, JIF, Decsion Time. "
|
159 |
+
f"Ensure no introductory or ending texts are included. Give max 30 results"
|
160 |
+
"Context: {context}"
|
161 |
+
)
|
162 |
+
print(os.environ["OPENAI_API_KEY"])
|
163 |
+
|
164 |
+
prompt = ChatPromptTemplate.from_messages(
|
165 |
+
[("system", system_prompt), ("user", "{input}")]
|
166 |
+
)
|
167 |
+
|
168 |
+
|
169 |
+
async def create_chain():
|
170 |
+
client = ChatOpenAI(model="gpt-4o")
|
171 |
+
return create_stuff_documents_chain(client, prompt)
|
172 |
+
|
173 |
+
# Create the question-answer chain using async function
|
174 |
+
question_answer_chain = await create_chain()
|
175 |
+
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
176 |
+
|
177 |
+
# Ensure the vector dimensions match the FAISS index
|
178 |
+
|
179 |
+
# Invoke the RAG chain
|
180 |
+
answer = rag_chain.invoke(
|
181 |
+
{"input": f"Keywords: {keywords}, Minimum JIF: {impact_factor},Publisher list: {preferred_publisher}"}
|
182 |
+
)
|
183 |
+
|
184 |
+
# Inspect the result structure
|
185 |
+
result = []
|
186 |
+
raw_response = answer['answer']
|
187 |
+
cleaned_response = raw_response.strip('```json\n').strip('```').strip()
|
188 |
+
|
189 |
+
# Parse the cleaned JSON response
|
190 |
+
try:
|
191 |
+
json_response = json.loads(cleaned_response)
|
192 |
+
|
193 |
+
# Initialize an empty list to hold the journal objects
|
194 |
+
result = []
|
195 |
+
|
196 |
+
# Process the JSON data and create Journal objects
|
197 |
+
for i, journal in enumerate(json_response):
|
198 |
+
try:
|
199 |
+
journal_name = journal.get('Journal Name')
|
200 |
+
publisher = journal.get('Publisher')
|
201 |
+
jif = float(journal.get('JIF', 0)) # Ensure valid float
|
202 |
+
decision_time = journal.get('Decsion Time', 0) # Default to 0 if not available
|
203 |
+
|
204 |
+
# Only include if JIF is greater than the minimum threshold
|
205 |
+
if jif > impact_factor:
|
206 |
+
result.append(
|
207 |
+
Journal(
|
208 |
+
id=i + 1,
|
209 |
+
Name=journal_name,
|
210 |
+
Publisher=publisher,
|
211 |
+
JIF=jif,
|
212 |
+
Category="", # Set to empty if not available
|
213 |
+
Keywords=keywords, # Use provided keywords
|
214 |
+
Decision_Time=decision_time,
|
215 |
+
)
|
216 |
+
)
|
217 |
+
except Exception as e:
|
218 |
+
print(f"Error processing journal data: {e}")
|
219 |
+
|
220 |
+
except json.JSONDecodeError as e:
|
221 |
+
print(f"Error parsing JSON response: {e}")
|
222 |
+
result = []
|
223 |
+
|
224 |
+
# Return the result wrapped in a QueryResponse
|
225 |
+
return QueryResponse(result=result)
|
226 |
+
elif model_choice == "groq":
|
227 |
+
retriever = db.as_retriever()
|
228 |
+
|
229 |
+
# Set up system prompt
|
230 |
+
system_prompt = (
|
231 |
+
f"You are a specialized Journal recommender that compares all journals in database to given research paper keywords and based on JIF and publisher gives result."
|
232 |
+
f"From the provided context, recommend all journals that are suitable for research paper with {keywords} keywords."
|
233 |
+
f"Ensure that you include **every** journal with a Journal Impact Factor (JIF) strictly greater than {impact_factor}, and the Journal must be only from any Publishers in list: {preferred_publisher}. And Pls show that jif as in Context database "
|
234 |
+
f"Make sure to include both exact matches and related journals, and prioritize including **all relevant high-JIF journals without repetition**. "
|
235 |
+
f"Present the results in a json format with the following information: Journal Name, Publisher, JIF, Decsion Time. "
|
236 |
+
f"Ensure no introductory or ending texts are included. Dont give more than 10 results"
|
237 |
+
"Context: {context}"
|
238 |
+
)
|
239 |
+
|
240 |
+
|
241 |
+
|
242 |
+
prompt = ChatPromptTemplate.from_messages(
|
243 |
+
[("system", system_prompt), ("user", "{input}")]
|
244 |
+
)
|
245 |
+
|
246 |
+
# Create the question-answer chain
|
247 |
+
async def create_chain():
|
248 |
+
client = ChatGroq(model="llama-3.2-3b-preview", temperature=0)
|
249 |
+
return create_stuff_documents_chain(client, prompt)
|
250 |
+
|
251 |
+
# Create the question-answer chain using async function
|
252 |
+
question_answer_chain = await create_chain()
|
253 |
+
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
254 |
+
|
255 |
+
# Ensure the vector dimensions match the FAISS index
|
256 |
+
|
257 |
+
# Invoke the RAG chain
|
258 |
+
answer = rag_chain.invoke(
|
259 |
+
{"input": f"Keywords: {keywords}, Minimum JIF: {impact_factor},Publisher list: {preferred_publisher}"}
|
260 |
+
)
|
261 |
+
|
262 |
+
# Inspect the result structure
|
263 |
+
result = []
|
264 |
+
raw_response = answer['answer']
|
265 |
+
|
266 |
+
cleaned_response = raw_response.strip('```json\n').strip('```').strip()
|
267 |
+
|
268 |
+
# Parse the cleaned JSON response
|
269 |
+
try:
|
270 |
+
# Parse the cleaned response
|
271 |
+
print("Cleaned Response:", cleaned_response) # For debugging
|
272 |
+
json_response = json.loads(cleaned_response)
|
273 |
+
|
274 |
+
# Initialize an empty list to hold the journal objects
|
275 |
+
result = []
|
276 |
+
|
277 |
+
# Process the JSON data and create Journal objects
|
278 |
+
for i, journal in enumerate(json_response["journals"]): # Accessing the 'journals' key
|
279 |
+
print("Journal entry:", journal) # For debugging
|
280 |
+
|
281 |
+
try:
|
282 |
+
if isinstance(journal, dict): # Ensure journal is a dictionary
|
283 |
+
journal_name = journal.get('Journal Name')
|
284 |
+
publisher = journal.get('Publisher')
|
285 |
+
jif = float(journal.get('JIF', 0)) # Ensure valid float
|
286 |
+
decision_time = journal.get('Decision Time', 0) # Default to 0 if not available
|
287 |
+
|
288 |
+
# Only include if JIF is greater than the minimum threshold
|
289 |
+
if jif > impact_factor:
|
290 |
+
result.append(
|
291 |
+
Journal(
|
292 |
+
id=i + 1,
|
293 |
+
Name=journal_name,
|
294 |
+
Publisher=publisher,
|
295 |
+
JIF=jif,
|
296 |
+
Category="", # Set to empty if not available
|
297 |
+
Keywords=keywords, # Use provided keywords
|
298 |
+
Decision_Time=decision_time,
|
299 |
+
)
|
300 |
+
)
|
301 |
+
else:
|
302 |
+
print(f"Skipping invalid journal entry: {journal}")
|
303 |
+
except Exception as e:
|
304 |
+
print(f"Error processing journal data: {e}")
|
305 |
+
|
306 |
+
except json.JSONDecodeError as e:
|
307 |
+
print(f"Error parsing JSON response: {e}")
|
308 |
+
result = []
|
309 |
+
|
310 |
+
# Return the result wrapped in a QueryResponse
|
311 |
+
return QueryResponse(result=result)
|
312 |
+
|
313 |
+
|
314 |
+
elif model_choice == "mixtral":
|
315 |
+
retriever = db.as_retriever()
|
316 |
+
|
317 |
+
# Set up system prompt
|
318 |
+
system_prompt = (
|
319 |
+
f"You are a specialized Journal recommender that compares all journals in database to given research paper keywords and based on JIF and publisher gives result."
|
320 |
+
f"From the provided context, recommend all journals that are suitable for research paper with {keywords} keywords."
|
321 |
+
f"Ensure that you include **every** journal with a Journal Impact Factor (JIF) strictly greater than {impact_factor}, and the Journal must be only from any Publishers in list: {preferred_publisher}. And Pls show that jif as in Context database "
|
322 |
+
f"Make sure to include both exact matches and related journals, and prioritize including **all relevant high-JIF journals without repetition**. "
|
323 |
+
f"Present the results in a json format with the following information: Journal Name, Publisher, JIF, Decsion Time. "
|
324 |
+
f"Ensure no introductory or ending texts are included. Dont give more than 10 results"
|
325 |
+
"Context: {context}"
|
326 |
+
)
|
327 |
+
|
328 |
+
prompt = ChatPromptTemplate.from_messages(
|
329 |
+
[("system", system_prompt), ("user", "{input}")]
|
330 |
+
)
|
331 |
+
|
332 |
+
# Create the question-answer chain
|
333 |
+
|
334 |
+
|
335 |
+
async def create_chain():
|
336 |
+
client = ChatGroq(model="mixtral-8x7b-32768",temperature=0)
|
337 |
+
return create_stuff_documents_chain(client, prompt)
|
338 |
+
|
339 |
+
# Create the question-answer chain using async function
|
340 |
+
question_answer_chain = await create_chain()
|
341 |
+
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
342 |
+
|
343 |
+
# Ensure the vector dimensions match the FAISS index
|
344 |
+
|
345 |
+
# Invoke the RAG chain
|
346 |
+
answer = rag_chain.invoke(
|
347 |
+
{"input": f"Keywords: {keywords}, Minimum JIF: {impact_factor},Publisher list: {preferred_publisher}"}
|
348 |
+
)
|
349 |
+
|
350 |
+
# Inspect the result structure
|
351 |
+
result = []
|
352 |
+
raw_response = answer['answer']
|
353 |
+
|
354 |
+
cleaned_response = raw_response.strip('```json\n').strip('```').strip()
|
355 |
+
|
356 |
+
# Parse the cleaned JSON response
|
357 |
+
try:
|
358 |
+
# Parse the cleaned response
|
359 |
+
print("Cleaned Response:", cleaned_response) # For debugging
|
360 |
+
json_response = json.loads(cleaned_response)
|
361 |
+
|
362 |
+
# Initialize an empty list to hold the journal objects
|
363 |
+
result = []
|
364 |
+
|
365 |
+
# Process the JSON data and create Journal objects
|
366 |
+
for i, journal in enumerate(json_response): # Iterate directly over the list
|
367 |
+
print("Journal entry:", journal) # For debugging
|
368 |
+
|
369 |
+
try:
|
370 |
+
if isinstance(journal, dict): # Ensure journal is a dictionary
|
371 |
+
journal_name = journal.get('Journal Name')
|
372 |
+
publisher = journal.get('Publisher')
|
373 |
+
jif = float(journal.get('JIF', 0)) # Ensure valid float
|
374 |
+
decision_time = journal.get('Decsion Time', 0) # Default to 0 if not available
|
375 |
+
|
376 |
+
# Only include if JIF is greater than the minimum threshold
|
377 |
+
if jif > impact_factor:
|
378 |
+
result.append(
|
379 |
+
Journal(
|
380 |
+
id=i + 1,
|
381 |
+
Name=journal_name,
|
382 |
+
Publisher=publisher,
|
383 |
+
JIF=jif,
|
384 |
+
Category="", # Set to empty if not available
|
385 |
+
Keywords=keywords, # Use provided keywords
|
386 |
+
Decision_Time=decision_time,
|
387 |
+
)
|
388 |
+
)
|
389 |
+
else:
|
390 |
+
print(f"Skipping invalid journal entry: {journal}")
|
391 |
+
except Exception as e:
|
392 |
+
print(f"Error processing journal data: {e}")
|
393 |
+
|
394 |
+
except json.JSONDecodeError as e:
|
395 |
+
print(f"Error parsing JSON response: {e}")
|
396 |
+
result = []
|
397 |
+
|
398 |
+
# Return the result wrapped in a QueryResponse
|
399 |
+
return QueryResponse(result=result)
|
400 |
+
|
401 |
+
elif model_choice == "gemini-pro":
|
402 |
+
print("Using Gemini-Pro model")
|
403 |
+
retriever = db.as_retriever()
|
404 |
+
|
405 |
+
# Set up system prompt
|
406 |
+
system_prompt = (
|
407 |
+
f"You are a specialized Journal recommender that compares all journals in database to given research paper keywords and based on JIF and publisher gives result."
|
408 |
+
f"From the provided context, recommend all journals that are suitable for research paper with {keywords} keywords."
|
409 |
+
f"Ensure that you include **every** journal with a Journal Impact Factor (JIF) strictly greater than {impact_factor}, and the Journal must be only from any Publishers in list: {preferred_publisher}. And Pls show that jif as in Context database "
|
410 |
+
f"Make sure to include both exact matches and related journals, and prioritize including **all relevant high-JIF journals without repetition**. "
|
411 |
+
f"Present the results in a json format with the following information: Journal Name, Publisher, JIF, Decsion Time. "
|
412 |
+
f"Ensure no introductory or ending texts are included."
|
413 |
+
"Context: {context}"
|
414 |
+
)
|
415 |
+
|
416 |
+
prompt = ChatPromptTemplate.from_messages(
|
417 |
+
[("system", system_prompt), ("user", "{input}")]
|
418 |
+
)
|
419 |
+
|
420 |
+
async def create_chain():
|
421 |
+
client = ChatGoogleGenerativeAI(
|
422 |
+
model="gemini-pro",
|
423 |
+
google_api_key=key,
|
424 |
+
convert_system_message_to_human=True,
|
425 |
+
)
|
426 |
+
return create_stuff_documents_chain(client, prompt)
|
427 |
+
|
428 |
+
# Create the question-answer chain using async function
|
429 |
+
question_answer_chain = await create_chain()
|
430 |
+
rag_chain = create_retrieval_chain(retriever, question_answer_chain)
|
431 |
+
|
432 |
+
|
433 |
+
# Ensure the vector dimensions match the FAISS index
|
434 |
+
|
435 |
+
# Invoke the RAG chain
|
436 |
+
answer = rag_chain.invoke(
|
437 |
+
{"input": f"Keywords: {keywords}, Minimum JIF: {impact_factor},Publisher list: {preferred_publisher}"}
|
438 |
+
)
|
439 |
+
|
440 |
+
# Inspect the result structure
|
441 |
+
result = []
|
442 |
+
raw_response = answer['answer']
|
443 |
+
cleaned_response = raw_response.strip('```json\n').strip('```').strip()
|
444 |
+
|
445 |
+
# Parse the cleaned JSON response
|
446 |
+
try:
|
447 |
+
json_response = json.loads(cleaned_response)
|
448 |
+
|
449 |
+
# Initialize an empty list to hold the journal objects
|
450 |
+
result = []
|
451 |
+
|
452 |
+
# Process the JSON data and create Journal objects
|
453 |
+
for i, journal in enumerate(json_response):
|
454 |
+
try:
|
455 |
+
journal_name = journal.get('Journal Name')
|
456 |
+
publisher = journal.get('Publisher')
|
457 |
+
jif = float(journal.get('JIF', 0)) # Ensure valid float
|
458 |
+
decision_time = journal.get('Decsion Time', 0) # Default to 0 if not available
|
459 |
+
|
460 |
+
# Only include if JIF is greater than the minimum threshold
|
461 |
+
if jif > impact_factor:
|
462 |
+
result.append(
|
463 |
+
Journal(
|
464 |
+
id=i + 1,
|
465 |
+
Name=journal_name,
|
466 |
+
Publisher=publisher,
|
467 |
+
JIF=jif,
|
468 |
+
Category="", # Set to empty if not available
|
469 |
+
Keywords=keywords, # Use provided keywords
|
470 |
+
Decision_Time=decision_time,
|
471 |
+
)
|
472 |
+
)
|
473 |
+
except Exception as e:
|
474 |
+
print(f"Error processing journal data: {e}")
|
475 |
+
|
476 |
+
except json.JSONDecodeError as e:
|
477 |
+
print(f"Error parsing JSON response: {e}")
|
478 |
+
result = []
|
479 |
+
|
480 |
+
# Return the result wrapped in a QueryResponse
|
481 |
+
return QueryResponse(result=result)
|
482 |
+
elif model_choice == "faiss":
|
483 |
+
embeddings = HuggingFaceEmbeddings(
|
484 |
+
model_name="BAAI/bge-small-en", model_kwargs={"device": "cpu"}
|
485 |
+
)
|
486 |
+
jif = impact_factor # Minimum JIF value for filtering
|
487 |
+
publisher = preferred_publisher # Preferred publisher list or "no preference"
|
488 |
+
|
489 |
+
# Load the FAISS index from local storage
|
490 |
+
db1 = FAISS.load_local(DB_FAISS_PATH, embeddings, allow_dangerous_deserialization=True)
|
491 |
+
|
492 |
+
# Embed the query
|
493 |
+
query_embedding = embeddings.embed_query(keywords)
|
494 |
+
|
495 |
+
# Perform similarity search with FAISS (retrieve top 20 results)
|
496 |
+
results = db1.similarity_search_by_vector(query_embedding, k=20)
|
497 |
+
|
498 |
+
# Prepare the context for processing results
|
499 |
+
context = "\n\n".join(doc.page_content for doc in results)
|
500 |
+
|
501 |
+
# Apply filters for JIF and publisher
|
502 |
+
min_jif = jif
|
503 |
+
valid_publishers = publisher if publisher != ["no preference"] else None
|
504 |
+
|
505 |
+
# Split the output based on each entry starting with 'Name: '
|
506 |
+
entries = re.split(r"\n(?=Name:)", context.strip())
|
507 |
+
|
508 |
+
# Initialize an empty list to hold the Journal models
|
509 |
+
journal_list = []
|
510 |
+
|
511 |
+
# Process each entry
|
512 |
+
for entry in entries:
|
513 |
+
# Use regex to capture different fields
|
514 |
+
name = re.search(r"Name: (.+)", entry)
|
515 |
+
jif_match = re.search(r"JIF: (.+)", entry)
|
516 |
+
category = re.search(r"Category: (.+)", entry)
|
517 |
+
keywords_match = re.search(r"Keywords: (.+)", entry)
|
518 |
+
publisher_match = re.search(r"Publisher: (.+)", entry)
|
519 |
+
first_decision_match = re.search(r"Decsion Time: (.+)", entry)
|
520 |
+
|
521 |
+
if jif_match :
|
522 |
+
# Extract values from regex matches
|
523 |
+
name_value = name.group(1).strip()
|
524 |
+
jif_value = float(jif_match.group(1).strip())
|
525 |
+
category_value = category.group(1).strip()
|
526 |
+
keywords_value = keywords_match.group(1).strip()
|
527 |
+
publisher_value = publisher_match.group(1).strip()
|
528 |
+
decision_time = first_decision_match.group(1).strip()
|
529 |
+
# Filter based on JIF and publisher preferences
|
530 |
+
if jif_value >= min_jif :
|
531 |
+
# Create the Journal model instance
|
532 |
+
journal = Journal(
|
533 |
+
id=len(journal_list) + 1, # Incrementing ID for each journal
|
534 |
+
Name=name_value,
|
535 |
+
JIF=jif_value,
|
536 |
+
Category=category_value,
|
537 |
+
Keywords=keywords_value,
|
538 |
+
Publisher=publisher_value,
|
539 |
+
Decision_Time = decision_time
|
540 |
+
)
|
541 |
+
|
542 |
+
# Add the journal to the list
|
543 |
+
journal_list.append(journal)
|
544 |
+
|
545 |
+
# Return the list of journals as a response or process it further
|
546 |
+
return {"result": [journal.dict() for journal in journal_list]}
|
547 |
+
else:
|
548 |
+
raise HTTPException(status_code=400, detail="Invalid model choice.")
|
549 |
+
|
550 |
+
# Generate response using LLM
|
551 |
+
response = llm.predict(f"Context: {context}\n\nQuestion: {query_text}")
|
552 |
+
return QueryResponse(result=response)
|
553 |
+
|
554 |
+
# Run the app with Uvicorn
|
555 |
+
# Command: uvicorn app:app --reload
|