import os from fastapi import FastAPI from pydantic import BaseModel from transformers import pipeline # ✅ Step 1: Set the Correct Cache Directory (Only HF_HOME) os.environ["HF_HOME"] = "/app/cache" # ✅ Step 2: Ensure Cache Directory Exists os.makedirs("/app/cache", exist_ok=True) # ✅ Step 3: Load Model from Hugging Face question_extractor = pipeline("text-classification", model="textattack/bert-base-uncased-MRPC") app = FastAPI() class OCRText(BaseModel): text: str @app.post("/extract_question") def extract_question(data: OCRText): text = data.text lines = text.split("\n") # Use AI Model to Identify Question Parts ranked_lines = sorted(lines, key=lambda line: question_extractor(line)[0]['score'], reverse=True) top_sentences = [line for line in ranked_lines[:3] if len(line) > 10] question_text = " ".join(top_sentences) return {"extracted_question": question_text}