from fastapi import FastAPI from pydantic import BaseModel from typing import List, Dict, Any from pymongo import MongoClient from transformers import pipeline import spacy import subprocess import sys # FastAPI app setup app = FastAPI() # ========================== # MongoDB Connection Setup # ========================== connection_string = "mongodb+srv://clician:7iA2Qd6Uc89a52fd@hutterdev.h88gv.mongodb.net/?retryWrites=true&w=majority&appName=Hutterdev" client = MongoClient(connection_string) db = client["test"] # Replace with your database name products_collection = db["products"] # Replace with your collection name # ========================== # Transformers Pipeline Setup # ========================== # Load the Question-Answering pipeline qa_pipeline = pipeline("question-answering", model="bert-large-uncased-whole-word-masking-finetuned-squad") # ========================== # Static Context Message # ========================== context_msg = ( "Hutter Products GmbH provides a wide array of services to help businesses create high-quality, sustainable products. " "Their offerings include comprehensive product design, ensuring items are both visually appealing and functional, and product consulting, " "which provides expert advice on features, materials, and design elements. They also offer sustainability consulting to integrate eco-friendly practices, " "such as using recycled materials and Ocean Bound Plastic. Additionally, they manage customized production to ensure products meet the highest standards " "and offer product animation services, creating realistic rendered images and animations to enhance online engagement. These services collectively enable " "businesses to develop products that are sustainable, market-responsive, and aligned with their brand identity." ) # ========================== # spaCy NER Setup # ========================== # ========================== # spaCy NER Setup # ========================== from spacy.util import is_package # Ensure 'en_core_web_sm' is available; otherwise, download it try: spacy_model_path = "/home/user/app/en_core_web_sm-3.8.0" nlp = spacy.load(spacy_model_path) except OSError: # print("Downloading 'en_core_web_sm' model...") # subprocess.run([sys.executable, "-m", "spacy", "download", "en_core_web_sm"], check=True) nlp = spacy.load(spacy_model_path) # ========================== # Pydantic Models # ========================== class PromptRequest(BaseModel): input_text: str class CombinedResponse(BaseModel): ner: Dict[str, Any] qa: Dict[str, Any] products_matched: List[Dict[str, Any]] # ========================== # Helper Functions # ========================== def extract_keywords(text: str) -> List[str]: """ Extract keywords (nouns and proper nouns) using spaCy. """ doc = nlp(text) keywords = [token.text for token in doc if token.pos_ in ["NOUN", "PROPN"]] return list(set(keywords)) def search_products_by_keywords(keywords: List[str]) -> List[Dict[str, Any]]: """ Search MongoDB for products that match any of the extracted keywords. """ regex_patterns = [{"name": {"$regex": keyword, "$options": "i"}} for keyword in keywords] query = {"$or": regex_patterns} matched_products = [] cursor = products_collection.find(query) for product in cursor: matched_products.append({ "id": str(product.get("_id", "")), "name": product.get("name", ""), "description": product.get("description", ""), "skuNumber": product.get("skuNumber", ""), "baseModel": product.get("baseModel", ""), }) return matched_products def get_combined_context(products: List[Dict]) -> str: """ Combine the static context with product descriptions fetched from MongoDB. """ product_descriptions = " ".join([p["description"] for p in products if "description" in p and p["description"]]) combined_context = f"{product_descriptions} {context_msg}" return combined_context # ========================== # FastAPI Endpoints # ========================== @app.get("/") async def root(): return {"message": "Welcome to the NER and QA API!"} @app.post("/process/", response_model=CombinedResponse) async def process_prompt(request: PromptRequest): input_text = request.input_text # Step 1: Extract keywords using spaCy NER keywords = extract_keywords(input_text) ner_response = {"extracted_keywords": keywords} # Step 2: Search MongoDB for matching products products = search_products_by_keywords(keywords) # Step 3: Generate Combined Context combined_context = get_combined_context(products) # Step 4: Use Q&A Model if combined_context.strip(): # Ensure the combined context is not empty qa_input = {"question": input_text, "context": combined_context} qa_output = qa_pipeline(qa_input) qa_response = { "question": input_text, "answer": qa_output["answer"], "score": qa_output["score"] } else: qa_response = { "question": input_text, "answer": "No relevant context available.", "score": 0.0 } # Step 5: Return Combined Response return { "ner": ner_response, "qa": qa_response, "products_matched": products }