CwAnkit07 commited on
Commit
61db3f5
Β·
verified Β·
1 Parent(s): a1f847c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +13 -5
app.py CHANGED
@@ -1,8 +1,16 @@
 
1
  from fastapi import FastAPI
2
  from pydantic import BaseModel
3
  from transformers import pipeline
4
 
5
- # βœ… Use Hugging Face Hub Instead of Local Model
 
 
 
 
 
 
 
6
  question_extractor = pipeline("text-classification", model="textattack/bert-base-uncased-MRPC")
7
 
8
  app = FastAPI()
@@ -14,11 +22,11 @@ class OCRText(BaseModel):
14
  def extract_question(data: OCRText):
15
  text = data.text
16
  lines = text.split("\n")
17
-
 
18
  ranked_lines = sorted(lines, key=lambda line: question_extractor(line)[0]['score'], reverse=True)
19
  top_sentences = [line for line in ranked_lines[:3] if len(line) > 10]
20
-
21
  question_text = " ".join(top_sentences)
22
-
23
- return {"extracted_question": question_text}
24
 
 
 
1
+ import os
2
  from fastapi import FastAPI
3
  from pydantic import BaseModel
4
  from transformers import pipeline
5
 
6
+ # βœ… Step 1: Set a Custom Cache Directory (Writable)
7
+ os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
8
+ os.environ["HF_HOME"] = "/app/cache"
9
+
10
+ # βœ… Step 2: Ensure Cache Directory Exists
11
+ os.makedirs("/app/cache", exist_ok=True)
12
+
13
+ # βœ… Step 3: Load Model from Hugging Face
14
  question_extractor = pipeline("text-classification", model="textattack/bert-base-uncased-MRPC")
15
 
16
  app = FastAPI()
 
22
  def extract_question(data: OCRText):
23
  text = data.text
24
  lines = text.split("\n")
25
+
26
+ # Use AI Model to Identify Question Parts
27
  ranked_lines = sorted(lines, key=lambda line: question_extractor(line)[0]['score'], reverse=True)
28
  top_sentences = [line for line in ranked_lines[:3] if len(line) > 10]
29
+
30
  question_text = " ".join(top_sentences)
 
 
31
 
32
+ return {"extracted_question": question_text}