Spaces:

CwAnkit07
/

Question-textractor

Sleeping

File size: 704 Bytes

from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline

# ✅ Use Hugging Face Hub Instead of Local Model
question_extractor = pipeline("text-classification", model="textattack/bert-base-uncased-MRPC")

app = FastAPI()

class OCRText(BaseModel):
    text: str

@app.post("/extract_question")
def extract_question(data: OCRText):
    text = data.text
    lines = text.split("\n")
    
    ranked_lines = sorted(lines, key=lambda line: question_extractor(line)[0]['score'], reverse=True)
    top_sentences = [line for line in ranked_lines[:3] if len(line) > 10]  
    
    question_text = " ".join(top_sentences)
    
    return {"extracted_question": question_text}