Spaces:
Sleeping
Sleeping
File size: 4,356 Bytes
37b9a66 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 |
import openai
import os
from openpyxl import load_workbook, Workbook
from src.transcribe_image import transcribe_image
from src.assess_text import assess_essay_with_gpt
# OpenAI API key setup
openai.api_key = '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'
def process_essays(folder_path, question_file, guidelines_file, excel_file):
# Load question and guidelines
with open(question_file, 'r') as file:
question = file.read().strip()
with open(guidelines_file, 'r') as file:
guidelines = file.read().strip()
# Load the Excel sheet
workbook = load_workbook(excel_file)
sheet = workbook.active
# Create a new workbook to save results
new_workbook = Workbook()
new_sheet = new_workbook.active
# Copy headers
for col in range(1, sheet.max_column + 1):
new_sheet.cell(row=1, column=col).value = sheet.cell(row=1, column=col).value
# Sort images in folder
images = sorted([os.path.join(folder_path, img) for img in os.listdir(folder_path)], key=os.path.getmtime)
img_index = 0
# First Pass: Transcribe missing texts
for row in range(2, sheet.max_row + 1):
student_id = sheet.cell(row=row, column=1).value
num_pages = sheet.cell(row=row, column=2).value
transcribed_text = sheet.cell(row=row, column=3).value
# Copy student ID and number of pages
new_sheet.cell(row=row, column=1).value = student_id
new_sheet.cell(row=row, column=2).value = num_pages
# Transcribe if text is missing
if transcribed_text is None:
print(f"Transcribing essay for student {student_id}...")
essay_text = ""
for _ in range(num_pages):
essay_text += transcribe_image(images[img_index]) + "\n"
img_index += 1
new_sheet.cell(row=row, column=3).value = essay_text.strip()
else:
# Copy the existing transcription if available
new_sheet.cell(row=row, column=3).value = transcribed_text
# Save current state with transcriptions
new_workbook.save("data/transcribed_essays.xlsx")
print("All transcriptions completed. Saved as 'transcribed_essays.xlsx'.")
# Collect graded examples and initialize list
examples = []
for row in range(2, sheet.max_row + 1):
student_id = sheet.cell(row=row, column=1).value
transcribed_text = sheet.cell(row=row, column=3).value
mark = sheet.cell(row=row, column=4).value
reason = sheet.cell(row=row, column=5).value
# Store graded examples for prompt generation
if mark is not None or reason is not None:
assert mark is not None and reason is not None, f"Mark or reason missing for student {student_id}."
examples.append({"essay": transcribed_text, "mark": mark, "reason": reason})
# Second Pass: Grade missing grades/reasons
for row in range(2, sheet.max_row + 1):
student_id = sheet.cell(row=row, column=1).value
transcribed_text = new_sheet.cell(row=row, column=3).value
mark = sheet.cell(row=row, column=4).value
reason = sheet.cell(row=row, column=5).value
if mark is None and reason is None:
print(f"Assessing essay for student {student_id}...")
assessment = assess_essay_with_gpt(transcribed_text, question, guidelines, examples)
new_sheet.cell(row=row, column=4).value = assessment['mark']
new_sheet.cell(row=row, column=5).value = assessment['reason']
# Add the assessed essay as an example for subsequent assessments
examples.append({"essay": transcribed_text, "mark": assessment['mark'], "reason": assessment['reason']})
else:
# Copy the existing mark and reason to the new sheet
new_sheet.cell(row=row, column=4).value = mark
new_sheet.cell(row=row, column=5).value = reason
# Save the new Excel file with assessments filled in
new_workbook.save(excel_file.replace(".xlsx", "_assessed.xlsx"))
print("Assessment complete. Results saved in assessed version of the Excel file.")
# Replace with actual file paths
process_essays(
folder_path="data/images",
question_file="data/question.txt",
guidelines_file="data/assessment_guidelines.txt",
excel_file="data/essays.xlsx"
) |