File size: 4,398 Bytes
37b9a66
 
 
 
 
 
 
 
03c245b
37b9a66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03c245b
 
37b9a66
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
03c245b
37b9a66
 
03c245b
 
 
 
 
 
37b9a66
03c245b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import openai
import os
from openpyxl import load_workbook, Workbook

from src.transcribe_image import transcribe_image
from src.assess_text import assess_essay_with_gpt


def process_essays(images, question, guidelines, workbook):
    sheet = workbook.active

    # Create a new workbook to save results
    new_workbook = Workbook()
    new_sheet = new_workbook.active

    # Copy headers
    for col in range(1, sheet.max_column + 1):
        new_sheet.cell(row=1, column=col).value = sheet.cell(row=1, column=col).value

    img_index = 0

    # First Pass: Transcribe missing texts
    for row in range(2, sheet.max_row + 1):
        student_id = sheet.cell(row=row, column=1).value
        num_pages = sheet.cell(row=row, column=2).value
        transcribed_text = sheet.cell(row=row, column=3).value

        # Copy student ID and number of pages
        new_sheet.cell(row=row, column=1).value = student_id
        new_sheet.cell(row=row, column=2).value = num_pages

        # Transcribe if text is missing
        if transcribed_text is None:
            print(f"Transcribing essay for student {student_id}...")
            essay_text = ""
            for _ in range(num_pages):
                essay_text += transcribe_image(images[img_index]) + "\n"
                img_index += 1
            new_sheet.cell(row=row, column=3).value = essay_text.strip()
        else:
            # Copy the existing transcription if available
            new_sheet.cell(row=row, column=3).value = transcribed_text

    # Save current state with transcriptions
    # new_workbook.save("data/transcribed_essays.xlsx")
    # print("All transcriptions completed. Saved as 'transcribed_essays.xlsx'.")

    # Collect graded examples and initialize list
    examples = []
    for row in range(2, sheet.max_row + 1):
        student_id = sheet.cell(row=row, column=1).value
        transcribed_text = sheet.cell(row=row, column=3).value
        mark = sheet.cell(row=row, column=4).value
        reason = sheet.cell(row=row, column=5).value

        # Store graded examples for prompt generation
        if mark is not None or reason is not None:
            assert mark is not None and reason is not None, f"Mark or reason missing for student {student_id}."
            examples.append({"essay": transcribed_text, "mark": mark, "reason": reason})
    
    # Second Pass: Grade missing grades/reasons
    for row in range(2, sheet.max_row + 1):
        student_id = sheet.cell(row=row, column=1).value
        transcribed_text = new_sheet.cell(row=row, column=3).value
        mark = sheet.cell(row=row, column=4).value
        reason = sheet.cell(row=row, column=5).value

        if mark is None and reason is None:
            print(f"Assessing essay for student {student_id}...")
            assessment = assess_essay_with_gpt(transcribed_text, question, guidelines, examples)
            new_sheet.cell(row=row, column=4).value = assessment['mark']
            new_sheet.cell(row=row, column=5).value = assessment['reason']
            # Add the assessed essay as an example for subsequent assessments
            examples.append({"essay": transcribed_text, "mark": assessment['mark'], "reason": assessment['reason']})
        else:
            # Copy the existing mark and reason to the new sheet
            new_sheet.cell(row=row, column=4).value = mark
            new_sheet.cell(row=row, column=5).value = reason

    # Save the new Excel file with assessments filled in
    return new_workbook


if __name__ == "__main__":
        
    folder_path = "data/images"  # Replace with actual folder path
    question_file = "data/question.txt"  # Replace with actual file path
    guidelines_file = "data/assessment_guidelines.txt"  # Replace with actual file path
    excel_file = "data/essays.xlsx"

    # Load
    images = sorted([os.path.join(folder_path, img) for img in os.listdir(folder_path)], key=os.path.getmtime)
    with open(question_file, 'r') as file:
        question = file.read().strip()
    with open(guidelines_file, 'r') as file:
        guidelines = file.read().strip()
    workbook = load_workbook(excel_file)


    new_workbook = process_essays(
        images,
        question,
        guidelines,
        workbook
    )

    new_workbook.save(excel_file.replace(".xlsx", "_assessed.xlsx"))
    print("Assessment complete. Results saved in assessed version of the Excel file.")