mominah commited on
Commit
fbfcb17
·
verified ·
1 Parent(s): 10c7a75

Create check.py

Browse files
Files changed (1) hide show
  1. check.py +258 -0
check.py ADDED
@@ -0,0 +1,258 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+ import json
4
+ import numpy as np
5
+ import cv2
6
+ from PIL import Image
7
+ from pdf2image import convert_from_bytes
8
+ from fastapi import FastAPI, UploadFile, File, HTTPException
9
+ from fastapi.responses import JSONResponse, StreamingResponse
10
+ import uvicorn
11
+
12
+ # Get API key from environment
13
+ GENAI_API_KEY = os.getenv("GENAI_API_KEY")
14
+ if not GENAI_API_KEY:
15
+ raise Exception("GENAI_API_KEY not set in environment")
16
+
17
+ # Import the Google GenAI client libraries.
18
+ from google import genai
19
+ from google.genai import types
20
+
21
+ # Initialize the GenAI client with the API key.
22
+ client = genai.Client(api_key=GENAI_API_KEY)
23
+
24
+ app = FastAPI(title="Student Result Card API (Paper K only)")
25
+
26
+ # Use system temporary directory to store the results file.
27
+ TEMP_FOLDER = tempfile.gettempdir()
28
+ RESULT_FILE = os.path.join(TEMP_FOLDER, "result_cards.json")
29
+
30
+
31
+ ##############################################################
32
+ # Preprocessing & Extraction Functions
33
+ ##############################################################
34
+
35
+ def extract_json_from_output(output_str: str):
36
+ """
37
+ Extracts a JSON object from a string containing extra text.
38
+ """
39
+ start = output_str.find('{')
40
+ end = output_str.rfind('}')
41
+ if start == -1 or end == -1:
42
+ print("No JSON block found in the output.")
43
+ return None
44
+ json_str = output_str[start:end+1]
45
+ try:
46
+ return json.loads(json_str)
47
+ except json.JSONDecodeError as e:
48
+ print("Error decoding JSON:", e)
49
+ return None
50
+
51
+
52
+ def parse_all_answers(image_input: Image.Image) -> str:
53
+ """
54
+ Extracts answers from an image of a 15-question answer sheet.
55
+ Returns the raw JSON string response from the model.
56
+ """
57
+ output_format = """
58
+ Answer in the following JSON format. Do not write anything else:
59
+ {
60
+ "Answers": {
61
+ "1": "<option or text>",
62
+ "2": "<option or text>",
63
+ "3": "<option or text>",
64
+ "4": "<option or text>",
65
+ "5": "<option or text>",
66
+ "6": "<option or text>",
67
+ "7": "<option or text>",
68
+ "8": "<option or text>",
69
+ "9": "<option or text>",
70
+ "10": "<option or text>",
71
+ "11": "<free-text answer>",
72
+ "12": "<free-text answer>",
73
+ "13": "<free-text answer>",
74
+ "14": "<free-text answer>",
75
+ "15": "<free-text answer>"
76
+ }
77
+ }
78
+ """
79
+ prompt = f"""
80
+ You are an assistant that extracts answers from an image.
81
+ The image is a screenshot of an answer sheet containing 15 questions.
82
+ For questions 1 to 10, the answers are multiple-choice selections.
83
+ For questions 11 to 15, the answers are free-text responses.
84
+ Extract the answer for each question (1 to 15) and provide the result in JSON using the format below:
85
+ {output_format}
86
+ """
87
+ response = client.models.generate_content(
88
+ model="gemini-2.0-flash",
89
+ contents=[prompt, image_input]
90
+ )
91
+ return response.text
92
+
93
+
94
+ def parse_info(image_input: Image.Image) -> str:
95
+ """
96
+ Extracts candidate information including name, number, country, level and paper from an image.
97
+ Returns the raw JSON string response from the model.
98
+ """
99
+ output_format = """
100
+ Answer in the following JSON format. Do not write anything else:
101
+ {
102
+ "Candidate Info": {
103
+ "Name": "<name>",
104
+ "Number": "<number>",
105
+ "Country": "<country>",
106
+ "Level": "<level>",
107
+ "Paper": "<paper>"
108
+ }
109
+ }
110
+ """
111
+ prompt = f"""
112
+ You are an assistant that extracts candidate information from an image.
113
+ The image contains candidate details including name, candidate number, country, level and paper.
114
+ Extract the information accurately and provide the result in JSON using the following format:
115
+ {output_format}
116
+ """
117
+ response = client.models.generate_content(
118
+ model="gemini-2.0-flash",
119
+ contents=[prompt, image_input]
120
+ )
121
+ return response.text
122
+
123
+
124
+ def calculate_result(student_answers: dict, correct_answers: dict) -> dict:
125
+ """
126
+ Compares student's answers with the correct answers and calculates the score.
127
+ Assumes JSON structures with a top-level "Answers" key containing Q1 to Q15.
128
+ """
129
+ student_all = student_answers.get("Answers", {})
130
+ correct_all = correct_answers.get("Answers", {})
131
+ total_questions = 15
132
+ marks = 0
133
+ detailed = {}
134
+
135
+ for q in map(str, range(1, total_questions + 1)):
136
+ stud_ans = student_all.get(q, "").strip()
137
+ corr_ans = correct_all.get(q, "").strip()
138
+ if stud_ans == corr_ans:
139
+ marks += 1
140
+ detailed[q] = {"Student": stud_ans, "Correct": corr_ans, "Result": "Correct"}
141
+ else:
142
+ detailed[q] = {"Student": stud_ans, "Correct": corr_ans, "Result": "Incorrect"}
143
+
144
+ percentage = (marks / total_questions) * 100
145
+ return {
146
+ "Total Marks": marks,
147
+ "Total Questions": total_questions,
148
+ "Percentage": percentage,
149
+ "Detailed Results": detailed
150
+ }
151
+
152
+
153
+ def load_answer_key(pdf_bytes: bytes) -> dict:
154
+ """
155
+ Converts a PDF (as bytes) to images, takes the last page, and parses the answers.
156
+ Returns the parsed JSON answer key.
157
+ """
158
+ images = convert_from_bytes(pdf_bytes)
159
+ last_page_image = images[-1]
160
+ answer_key_response = parse_all_answers(last_page_image)
161
+ return extract_json_from_output(answer_key_response)
162
+
163
+
164
+ ##############################################################
165
+ # FastAPI Endpoints
166
+ ##############################################################
167
+
168
+ @app.post("/process")
169
+ async def process_pdfs(
170
+ original_pdf: UploadFile = File(..., description="PDF with all student answer sheets (one page per student)"),
171
+ paper_k_pdf: UploadFile = File(..., description="Answer key PDF for Paper K")
172
+ ):
173
+ try:
174
+ # Read file bytes
175
+ student_pdf_bytes = await original_pdf.read()
176
+ paper_k_bytes = await paper_k_pdf.read()
177
+
178
+ # Load the Paper K answer key
179
+ answer_key_k = load_answer_key(paper_k_bytes)
180
+ if answer_key_k is None:
181
+ raise Exception("Failed to parse Paper K answer key.")
182
+
183
+ # Convert the student answer PDF to images (each page = one student)
184
+ student_images = convert_from_bytes(student_pdf_bytes)
185
+ all_results = []
186
+
187
+ for idx, page in enumerate(student_images):
188
+ # --- Extract Candidate Info Region ---
189
+ page_cv = cv2.cvtColor(np.array(page), cv2.COLOR_RGB2BGR)
190
+ h, w = page_cv.shape[:2]
191
+ mask = np.zeros((h, w), dtype="uint8")
192
+ top, bottom = int(h * 0.10), int(h * 0.75)
193
+ cv2.rectangle(mask, (0, top), (w, h - bottom), 255, -1)
194
+ cropped = cv2.bitwise_and(page_cv, page_cv, mask=mask)
195
+ coords = cv2.findNonZero(mask)
196
+ if coords is None:
197
+ continue
198
+ x, y, mw, mh = cv2.boundingRect(coords)
199
+ cand_img = Image.fromarray(cv2.cvtColor(cropped[y:y+mh, x:x+mw], cv2.COLOR_BGR2RGB))
200
+
201
+ # Extract candidate info
202
+ info_resp = parse_info(cand_img)
203
+ cand_info = extract_json_from_output(info_resp) or {}
204
+
205
+ # Extract student answers
206
+ stud_resp = parse_all_answers(page)
207
+ stud_answers = extract_json_from_output(stud_resp) or {}
208
+
209
+ # Calculate result against Paper K key
210
+ result = calculate_result(stud_answers, answer_key_k)
211
+
212
+ all_results.append({
213
+ "Student Index": idx + 1,
214
+ "Candidate Info": cand_info.get("Candidate Info", {}),
215
+ "Student Answers": stud_answers,
216
+ "Correct Answer Key": answer_key_k,
217
+ "Result": result
218
+ })
219
+
220
+ # Write out JSON file
221
+ with open(RESULT_FILE, "w", encoding="utf-8") as f:
222
+ json.dump({"results": all_results}, f, indent=2)
223
+
224
+ return JSONResponse(content={"results": all_results})
225
+
226
+ except Exception as e:
227
+ raise HTTPException(status_code=500, detail=str(e))
228
+
229
+
230
+ @app.get("/download")
231
+ async def download_results():
232
+ """
233
+ Returns the result JSON file stored in the temporary folder.
234
+ """
235
+ if not os.path.exists(RESULT_FILE):
236
+ raise HTTPException(status_code=404, detail="Result file not found. Please run /process first.")
237
+ return StreamingResponse(
238
+ open(RESULT_FILE, "rb"),
239
+ media_type="application/json",
240
+ headers={"Content-Disposition": "attachment; filename=result_cards.json"}
241
+ )
242
+
243
+
244
+ @app.get("/")
245
+ async def root():
246
+ return {
247
+ "message": "Welcome to the Student Result Card API (Paper K only).",
248
+ "usage": (
249
+ "POST two PDFs to /process: "
250
+ "(1) original answer sheet PDF, "
251
+ "(2) Paper K answer-key PDF. "
252
+ "Then GET /download to retrieve the graded results."
253
+ )
254
+ }
255
+
256
+
257
+ if __name__ == "__main__":
258
+ uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True)