IAMTFRMZA commited on
Commit
de022f5
Β·
verified Β·
1 Parent(s): 660d51d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +22 -50
app.py CHANGED
@@ -6,7 +6,6 @@ import json
6
  import requests
7
  from PIL import Image
8
  from openai import OpenAI
9
- import easyocr
10
  from io import BytesIO
11
 
12
  # ------------------ App Configuration ------------------
@@ -23,7 +22,15 @@ if not OPENAI_API_KEY or not ASSISTANT_ID:
23
  st.stop()
24
 
25
  client = OpenAI(api_key=OPENAI_API_KEY)
26
- reader = easyocr.Reader(['en'], gpu=False)
 
 
 
 
 
 
 
 
27
 
28
  # ------------------ Session State Initialization ------------------
29
  if "messages" not in st.session_state:
@@ -46,46 +53,6 @@ if st.sidebar.button("πŸ”„ Clear Chat"):
46
 
47
  show_image = st.sidebar.checkbox("πŸ“– Show Document Image", value=True)
48
 
49
- # ------------------ OCR + GPT Summary & FAQ Generator ------------------
50
- def generate_summary_and_faq_from_image_easyocr(image_url):
51
- try:
52
- response = requests.get(image_url, stream=True)
53
- image = Image.open(BytesIO(response.content)).convert("RGB")
54
-
55
- result = reader.readtext(np.array(image), detail=0)
56
- extracted_text = "\n".join(result)
57
-
58
- if not extracted_text.strip():
59
- return "No readable text found in image.", []
60
-
61
- prompt = f"""
62
- You are a pathology assistant. Given this OCR-extracted text from a pathology textbook page, do the following:
63
- 1. Provide a concise summary of the main point (1-2 sentences).
64
- 2. Provide two FAQs with brief answers.
65
-
66
- Text:
67
- {extracted_text[:3000]}
68
-
69
- Return only JSON:
70
- {{
71
- "summary": "...",
72
- "faqs": [
73
- {{"question": "...", "answer": "..."}},
74
- {{"question": "...", "answer": "..."}}
75
- ]
76
- }}
77
- """
78
- response = client.chat.completions.create(
79
- model="gpt-3.5-turbo",
80
- messages=[{"role": "user", "content": prompt}],
81
- temperature=0.3
82
- )
83
- result = json.loads(response.choices[0].message.content)
84
- return result.get("summary", "No summary generated."), result.get("faqs", [])
85
-
86
- except Exception as e:
87
- return f"Error generating summary: {e}", []
88
-
89
  # ------------------ Layout ------------------
90
  left, center, right = st.columns([1, 2, 1])
91
 
@@ -162,16 +129,21 @@ with center:
162
  except Exception as e:
163
  st.error(f"❌ Error: {str(e)}")
164
 
165
- # ------------------ Right Column: OCR-Based Summary + FAQ ------------------
166
  with right:
167
- st.subheader("πŸ“Œ Summary & FAQ (via EasyOCR)")
 
 
 
168
 
169
  if st.session_state.image_url:
170
- with st.spinner("πŸ” Extracting text and generating summary..."):
171
- summary_text, faq_list = generate_summary_and_faq_from_image_easyocr(st.session_state.image_url)
172
- else:
173
- summary_text = "No image selected."
174
- faq_list = []
 
 
175
 
176
  st.markdown(summary_text)
177
 
@@ -180,4 +152,4 @@ with right:
180
  for faq in faq_list:
181
  st.markdown(f"**Q:** {faq.get('question', '')}\n\n**A:** {faq.get('answer', '')}")
182
  else:
183
- st.info("No FAQs available or generated from this page.")
 
6
  import requests
7
  from PIL import Image
8
  from openai import OpenAI
 
9
  from io import BytesIO
10
 
11
  # ------------------ App Configuration ------------------
 
22
  st.stop()
23
 
24
  client = OpenAI(api_key=OPENAI_API_KEY)
25
+
26
+ # ------------------ Load Structured JSON ------------------
27
+ STRUCTURED_JSON_PATH = "/mnt/data/51940670-Manual-of-Surgical-Pathology-Third-Edition_1_structured_output (1).json"
28
+ try:
29
+ with open(STRUCTURED_JSON_PATH, "r") as f:
30
+ structured_data = json.load(f)
31
+ except Exception as e:
32
+ st.error(f"❌ Failed to load structured summary file: {e}")
33
+ st.stop()
34
 
35
  # ------------------ Session State Initialization ------------------
36
  if "messages" not in st.session_state:
 
53
 
54
  show_image = st.sidebar.checkbox("πŸ“– Show Document Image", value=True)
55
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  # ------------------ Layout ------------------
57
  left, center, right = st.columns([1, 2, 1])
58
 
 
129
  except Exception as e:
130
  st.error(f"❌ Error: {str(e)}")
131
 
132
+ # ------------------ Right Column: Structured Summary + FAQ ------------------
133
  with right:
134
+ st.subheader("πŸ“Œ Summary & FAQ (from Structured Data)")
135
+
136
+ summary_text = "No image selected or page not found."
137
+ faq_list = []
138
 
139
  if st.session_state.image_url:
140
+ match = re.search(r'/(\d{3})\.png', st.session_state.image_url)
141
+ if match:
142
+ page_number = match.group(1)
143
+ page_entry = next((entry for entry in structured_data if entry.get("page_number") == page_number), None)
144
+ if page_entry:
145
+ summary_text = page_entry.get("summary", "No summary available.")
146
+ faq_list = page_entry.get("faqs", [])
147
 
148
  st.markdown(summary_text)
149
 
 
152
  for faq in faq_list:
153
  st.markdown(f"**Q:** {faq.get('question', '')}\n\n**A:** {faq.get('answer', '')}")
154
  else:
155
+ st.info("No FAQs available for this page.")