Spaces:
Sleeping
Sleeping
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from transformers import pipeline | |
# β Use Hugging Face Hub Instead of Local Model | |
question_extractor = pipeline("text-classification", model="textattack/bert-base-uncased-MRPC") | |
app = FastAPI() | |
class OCRText(BaseModel): | |
text: str | |
def extract_question(data: OCRText): | |
text = data.text | |
lines = text.split("\n") | |
ranked_lines = sorted(lines, key=lambda line: question_extractor(line)[0]['score'], reverse=True) | |
top_sentences = [line for line in ranked_lines[:3] if len(line) > 10] | |
question_text = " ".join(top_sentences) | |
return {"extracted_question": question_text} | |