Spaces:
Running
Running
File size: 925 Bytes
61db3f5 7737855 d7c42fc 61db3f5 a1f847c 7737855 61db3f5 7737855 a1f847c 61db3f5 7737855 a1f847c 61db3f5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 |
import os
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline
# β
Step 1: Set the Correct Cache Directory (Only HF_HOME)
os.environ["HF_HOME"] = "/app/cache"
# β
Step 2: Ensure Cache Directory Exists
os.makedirs("/app/cache", exist_ok=True)
# β
Step 3: Load Model from Hugging Face
question_extractor = pipeline("text-classification", model="textattack/bert-base-uncased-MRPC")
app = FastAPI()
class OCRText(BaseModel):
text: str
@app.post("/extract_question")
def extract_question(data: OCRText):
text = data.text
lines = text.split("\n")
# Use AI Model to Identify Question Parts
ranked_lines = sorted(lines, key=lambda line: question_extractor(line)[0]['score'], reverse=True)
top_sentences = [line for line in ranked_lines[:3] if len(line) > 10]
question_text = " ".join(top_sentences)
return {"extracted_question": question_text}
|