AutoAssess / src /main.py
TensorFlo's picture
fixed iterate till first empty row
b035c24
raw
history blame
4.81 kB
import openai
import os
from openpyxl import load_workbook, Workbook
from src.transcribe_image import transcribe_image
from src.assess_text import assess_essay_with_gpt
def process_essays(images, question, guidelines, workbook):
sheet = workbook.active
# Create a new workbook to save results
new_workbook = Workbook()
new_sheet = new_workbook.active
# Copy headers
for col in range(1, sheet.max_column + 1):
new_sheet.cell(row=1, column=col).value = sheet.cell(row=1, column=col).value
# Find the first empty row in the student ID column
first_empty_row = None
for row in range(2, sheet.max_row + 1):
student_id = sheet.cell(row=row, column=1).value
if student_id is None:
first_empty_row = row
break
else:
# If no empty cell was found, set first_empty_row to max_row + 1 to process all rows
first_empty_row = sheet.max_row + 1
img_index = 0
# First Pass: Transcribe missing texts
for row in range(2, first_empty_row + 1):
student_id = sheet.cell(row=row, column=1).value
num_pages = sheet.cell(row=row, column=2).value
transcribed_text = sheet.cell(row=row, column=3).value
# Copy student ID and number of pages
new_sheet.cell(row=row, column=1).value = student_id
new_sheet.cell(row=row, column=2).value = num_pages
# Transcribe if text is missing
if transcribed_text is None:
print(f"Transcribing essay for student {student_id}...")
essay_text = ""
for _ in range(num_pages):
essay_text += transcribe_image(images[img_index]) + "\n"
img_index += 1
new_sheet.cell(row=row, column=3).value = essay_text.strip()
else:
# Copy the existing transcription if available
new_sheet.cell(row=row, column=3).value = transcribed_text
# Save current state with transcriptions
# new_workbook.save("data/transcribed_essays.xlsx")
# print("All transcriptions completed. Saved as 'transcribed_essays.xlsx'.")
# Collect graded examples and initialize list
examples = []
for row in range(2, first_empty_row):
student_id = sheet.cell(row=row, column=1).value
transcribed_text = sheet.cell(row=row, column=3).value
mark = sheet.cell(row=row, column=4).value
reason = sheet.cell(row=row, column=5).value
# Store graded examples for prompt generation
if mark is not None or reason is not None:
assert mark is not None and reason is not None, f"Mark or reason missing for student {student_id}."
examples.append({"essay": transcribed_text, "mark": mark, "reason": reason})
# Second Pass: Grade missing grades/reasons
for row in range(2, first_empty_row):
student_id = sheet.cell(row=row, column=1).value
transcribed_text = new_sheet.cell(row=row, column=3).value
mark = sheet.cell(row=row, column=4).value
reason = sheet.cell(row=row, column=5).value
if mark is None and reason is None:
print(f"Assessing essay for student {student_id}...")
assessment = assess_essay_with_gpt(transcribed_text, question, guidelines, examples)
new_sheet.cell(row=row, column=4).value = assessment['mark']
new_sheet.cell(row=row, column=5).value = assessment['reason']
# Add the assessed essay as an example for subsequent assessments
examples.append({"essay": transcribed_text, "mark": assessment['mark'], "reason": assessment['reason']})
else:
# Copy the existing mark and reason to the new sheet
new_sheet.cell(row=row, column=4).value = mark
new_sheet.cell(row=row, column=5).value = reason
# Save the new Excel file with assessments filled in
return new_workbook
if __name__ == "__main__":
folder_path = "data/images" # Replace with actual folder path
question_file = "data/question.txt" # Replace with actual file path
guidelines_file = "data/assessment_guidelines.txt" # Replace with actual file path
excel_file = "data/essays.xlsx"
# Load
images = sorted([os.path.join(folder_path, img) for img in os.listdir(folder_path)], key=os.path.getmtime)
with open(question_file, 'r') as file:
question = file.read().strip()
with open(guidelines_file, 'r') as file:
guidelines = file.read().strip()
workbook = load_workbook(excel_file)
new_workbook = process_essays(
images,
question,
guidelines,
workbook
)
new_workbook.save(excel_file.replace(".xlsx", "_assessed.xlsx"))
print("Assessment complete. Results saved in assessed version of the Excel file.")