import openai import os from openpyxl import load_workbook, Workbook from src.transcribe_image import transcribe_image from src.assess_text import assess_essay_with_gpt def process_essays(images, question, guidelines, workbook): sheet = workbook.active # Create a new workbook to save results new_workbook = Workbook() new_sheet = new_workbook.active # Copy headers for col in range(1, sheet.max_column + 1): new_sheet.cell(row=1, column=col).value = sheet.cell(row=1, column=col).value # Find the first empty row in the student ID column first_empty_row = None for row in range(2, sheet.max_row + 1): student_id = sheet.cell(row=row, column=1).value if student_id is None: first_empty_row = row break else: # If no empty cell was found, set first_empty_row to max_row + 1 to process all rows first_empty_row = sheet.max_row + 1 img_index = 0 # First Pass: Transcribe missing texts for row in range(2, first_empty_row): student_id = sheet.cell(row=row, column=1).value num_pages = sheet.cell(row=row, column=2).value transcribed_text = sheet.cell(row=row, column=3).value # Copy student ID and number of pages new_sheet.cell(row=row, column=1).value = student_id new_sheet.cell(row=row, column=2).value = num_pages # Transcribe if text is missing if transcribed_text is None: print(f"Transcribing essay for student {student_id}...") essay_text = "" for _ in range(num_pages): essay_text += transcribe_image(images[img_index]) + "\n" img_index += 1 new_sheet.cell(row=row, column=3).value = essay_text.strip() else: # Copy the existing transcription if available new_sheet.cell(row=row, column=3).value = transcribed_text # Save current state with transcriptions # new_workbook.save("data/transcribed_essays.xlsx") # print("All transcriptions completed. Saved as 'transcribed_essays.xlsx'.") # Collect graded examples and initialize list examples = [] for row in range(2, first_empty_row): student_id = sheet.cell(row=row, column=1).value transcribed_text = sheet.cell(row=row, column=3).value mark = sheet.cell(row=row, column=4).value reason = sheet.cell(row=row, column=5).value # Store graded examples for prompt generation if mark is not None or reason is not None: assert mark is not None and reason is not None, f"Mark or reason missing for student {student_id}." examples.append({"essay": transcribed_text, "mark": mark, "reason": reason}) # Second Pass: Grade missing grades/reasons for row in range(2, first_empty_row): student_id = sheet.cell(row=row, column=1).value transcribed_text = new_sheet.cell(row=row, column=3).value mark = sheet.cell(row=row, column=4).value reason = sheet.cell(row=row, column=5).value if mark is None and reason is None: print(f"Assessing essay for student {student_id}...") assessment = assess_essay_with_gpt(transcribed_text, question, guidelines, examples) new_sheet.cell(row=row, column=4).value = assessment['mark'] new_sheet.cell(row=row, column=5).value = assessment['reason'] # Add the assessed essay as an example for subsequent assessments examples.append({"essay": transcribed_text, "mark": assessment['mark'], "reason": assessment['reason']}) else: # Copy the existing mark and reason to the new sheet new_sheet.cell(row=row, column=4).value = mark new_sheet.cell(row=row, column=5).value = reason # Save the new Excel file with assessments filled in return new_workbook if __name__ == "__main__": folder_path = "data/images" # Replace with actual folder path question_file = "data/question.txt" # Replace with actual file path guidelines_file = "data/assessment_guidelines.txt" # Replace with actual file path excel_file = "data/essays.xlsx" # Load images = sorted([os.path.join(folder_path, img) for img in os.listdir(folder_path)], key=os.path.getmtime) with open(question_file, 'r') as file: question = file.read().strip() with open(guidelines_file, 'r') as file: guidelines = file.read().strip() workbook = load_workbook(excel_file) new_workbook = process_essays( images, question, guidelines, workbook ) new_workbook.save(excel_file.replace(".xlsx", "_assessed.xlsx")) print("Assessment complete. Results saved in assessed version of the Excel file.")