File size: 925 Bytes
61db3f5
7737855
 
 
 
d7c42fc
61db3f5
 
 
 
 
 
a1f847c
7737855
 
 
 
 
 
 
 
 
 
61db3f5
 
7737855
a1f847c
61db3f5
7737855
a1f847c
61db3f5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
import os
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import pipeline

# βœ… Step 1: Set the Correct Cache Directory (Only HF_HOME)
os.environ["HF_HOME"] = "/app/cache"

# βœ… Step 2: Ensure Cache Directory Exists
os.makedirs("/app/cache", exist_ok=True)

# βœ… Step 3: Load Model from Hugging Face
question_extractor = pipeline("text-classification", model="textattack/bert-base-uncased-MRPC")

app = FastAPI()

class OCRText(BaseModel):
    text: str

@app.post("/extract_question")
def extract_question(data: OCRText):
    text = data.text
    lines = text.split("\n")

    # Use AI Model to Identify Question Parts
    ranked_lines = sorted(lines, key=lambda line: question_extractor(line)[0]['score'], reverse=True)
    top_sentences = [line for line in ranked_lines[:3] if len(line) > 10]  

    question_text = " ".join(top_sentences)

    return {"extracted_question": question_text}