Spaces:

CwAnkit07
/

Question-textractor

Running

CwAnkit07 commited on Feb 8

Commit

61db3f5

verified ·

1 Parent(s): a1f847c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,8 +1,16 @@
 from fastapi import FastAPI
 from pydantic import BaseModel
 from transformers import pipeline
-# ✅ Use Hugging Face Hub Instead of Local Model
 question_extractor = pipeline("text-classification", model="textattack/bert-base-uncased-MRPC")
 app = FastAPI()
@@ -14,11 +22,11 @@ class OCRText(BaseModel):
 def extract_question(data: OCRText):
     text = data.text
     lines = text.split("\n")
     ranked_lines = sorted(lines, key=lambda line: question_extractor(line)[0]['score'], reverse=True)
     top_sentences = [line for line in ranked_lines[:3] if len(line) > 10]
     question_text = " ".join(top_sentences)
-    return {"extracted_question": question_text}

+import os
 from fastapi import FastAPI
 from pydantic import BaseModel
 from transformers import pipeline
+# ✅ Step 1: Set a Custom Cache Directory (Writable)
+os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
+os.environ["HF_HOME"] = "/app/cache"
+# ✅ Step 2: Ensure Cache Directory Exists
+os.makedirs("/app/cache", exist_ok=True)
+# ✅ Step 3: Load Model from Hugging Face
 question_extractor = pipeline("text-classification", model="textattack/bert-base-uncased-MRPC")
 app = FastAPI()
 def extract_question(data: OCRText):
     text = data.text
     lines = text.split("\n")
+    # Use AI Model to Identify Question Parts
     ranked_lines = sorted(lines, key=lambda line: question_extractor(line)[0]['score'], reverse=True)
     top_sentences = [line for line in ranked_lines[:3] if len(line) > 10]
     question_text = " ".join(top_sentences)
+    return {"extracted_question": question_text}