test / database /query_processor.py
Quintino Fernandes
Different query(Original SQL one)
e8db2ab
raw
history blame
2.53 kB
import datetime
from typing import List, Dict, Any, Optional
import numpy as np
from models.LexRank import degree_centrality_scores
class QueryProcessor:
def __init__(self, embedding_model, summarization_model, nlp_model, db_service):
self.embedding_model = embedding_model
self.summarization_model = summarization_model
self.nlp_model = nlp_model
self.db_service = db_service
async def process(
self,
query: str,
topic: Optional[str] = None,
start_date: Optional[str] = None,
end_date: Optional[str] = None
) -> Dict[str, Any]:
# Convert string dates to datetime objects
start_dt = datetime.strptime(start_date, "%Y-%m-%d") if start_date else None
end_dt = datetime.strptime(end_date, "%Y-%m-%d") if end_date else None
# Get query embedding
query_embedding = self.embedding_model.encode(query).tolist()
# Get entities from the query
doc = self.nlp_model(query)
entities = [ent.text.lower() for ent in doc.ents] # Extract entity texts
# Semantic search with entities
articles = await self.db_service.semantic_search(
query_embedding=query_embedding,
start_date=start_dt,
end_date=end_dt,
topic=topic,
entities=entities # Pass entities to the search
)
if not articles:
return {"error": "No articles found matching the criteria"}
# Step 3: Process results
contents = [article["content"] for article in articles]
sentences = []
for content in contents:
sentences.extend(self.nlp_model.tokenize_sentences(content))
# Step 4: Generate summary
if sentences:
embeddings = self.embedding_model.encode(sentences)
similarity_matrix = np.inner(embeddings, embeddings)
centrality_scores = degree_centrality_scores(similarity_matrix, threshold=None)
top_indices = np.argsort(-centrality_scores)[0:10]
key_sentences = [sentences[idx].strip() for idx in top_indices]
combined_text = ' '.join(key_sentences)
summary = self.summarization_model.summarize(combined_text)
else:
key_sentences = []
summary = "No content available for summarization"
return {
"summary": summary,
"articles": articles
}