Spaces:

TensorFlo
/

AutoAssess

Sleeping

App Files Files Community

TensorFlo commited on Nov 7, 2024

Commit

03c245b

1 Parent(s): 2f974c6

minor changes

Browse files

Files changed (3) hide show

app.py +54 -24
src/main.py +28 -25
src/transcribe_image.py +8 -2

app.py CHANGED Viewed

@@ -1,43 +1,73 @@
 import streamlit as st
-import pandas as pd
-from src.main import process_image  # Assume process_image is a function in main.py
-from src.assess_text import assess_essay_with_gpt
-from src.transcribe_image import transcribe_image
 from PIL import Image
 st.title("AutoAssess: Student Essay Transcription and Assessment")
 # Upload folder of images
-uploaded_files = st.file_uploader("Upload a folder of student essays (images)", type=['jpg', 'jpeg', 'png'], accept_multiple_files=True)
 # Text inputs for question and criteria
-essay_question = st.text_input("Enter the essay question:")
-grading_criteria = st.text_area("Enter grading criteria or relevant marking information:")
 # Upload Excel file with student IDs and page count
-student_info_file = st.file_uploader("Upload Excel file with student IDs and page count", type=["xlsx"])
 if st.button("Process Essays"):
-    if not uploaded_files or not essay_question or not grading_criteria or not student_info_file:
         st.warning("Please upload all required files and enter necessary information.")
     else:
         # Process student info file
-        student_df = pd.read_excel(student_info_file)
-        st.write("Student Information:")
-        st.write(student_df)
-        results = []
-        for uploaded_file in uploaded_files:
-            image = Image.open(uploaded_file)
-            # Use your backend function to process each image
-            transcription = process_image(image, essay_question, grading_criteria)
-            results.append({"filename": uploaded_file.name, "transcription": transcription})
-        for result in results:
-            st.write(f"**File:** {result['filename']}")
-            st.write(result['transcription'])
         # Optional: Save results to the output folder
-        output_file = "output/results.csv"
-        pd.DataFrame(results).to_csv(output_file)
-        st.success(f"All essays processed. Results saved to {output_file}")

 import streamlit as st
+from src.main import process_essays
 from PIL import Image
+import os
+from openpyxl import load_workbook, Workbook
+from io import BytesIO
+import openai
+def save_workbook_to_bytes(wb):
+    # Save the workbook into a BytesIO object (in memory, not on disk)
+    byte_io = BytesIO()
+    wb.save(byte_io)
+    byte_io.seek(0)  # Go to the beginning of the BytesIO buffer
+    return byte_io.getvalue()
+openai_api_key = os.getenv("OPENAI_API_KEY")
+# Set the OpenAI API key
+openai.api_key = openai_api_key
 st.title("AutoAssess: Student Essay Transcription and Assessment")
+st.title("AutoAssess")
+st.write("If you see this, the basic app is loading correctly!")
 # Upload folder of images
+# uploaded_files = sorted(st.file_uploader("Upload a folder of student essays (images)", type=['jpg', 'jpeg', 'png'], accept_multiple_files=True))
+# replace uploaded files with files loading from directory
+image_dir = "data/images"
+uploaded_files = []
+for file in os.listdir(image_dir):
+    with open(image_dir + '/' + file, "rb") as image_file:
+        uploaded_files.append(image_file.read())
 # Text inputs for question and criteria
+# essay_question = st.text_input("Enter the essay question:")
+# grading_criteria = st.text_area("Enter grading criteria or relevant marking information:")
+essay_question = "What is beauty?"
+grading_criteria = "1. Introduction\n2. Body\n3. Conclusion\n4. Grammar\n5. Spelling\n6. Punctuation\n7. Originality\n8. Creativity"
 # Upload Excel file with student IDs and page count
+# student_info_file = st.file_uploader("Upload Excel file with student IDs and page count", type=["xlsx"])
+excel_file = "data/essays.xlsx"
 if st.button("Process Essays"):
+    if not uploaded_files or not essay_question or not grading_criteria or not excel_file:
         st.warning("Please upload all required files and enter necessary information.")
     else:
         # Process student info file
+        workbook = load_workbook(excel_file)
+        new_workbook = process_essays(uploaded_files,essay_question,grading_criteria,workbook)
         # Optional: Save results to the output folder
+        output_file = "output/results.xlsx"
+        new_workbook.save(output_file)
+        st.success(f"All essays processed. Results saved to {output_file}")
+        # Convert the workbook to bytes
+        excel_file = save_workbook_to_bytes(new_workbook)
+        # Display the download button
+        st.download_button(
+            label="Download the Excel file",
+            data=excel_file,
+            file_name="results.xlsx",
+            mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
+)

src/main.py CHANGED Viewed

@@ -5,19 +5,8 @@ from openpyxl import load_workbook, Workbook
 from src.transcribe_image import transcribe_image
 from src.assess_text import assess_essay_with_gpt
-# OpenAI API key setup
-openai.api_key = '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'
-def process_essays(folder_path, question_file, guidelines_file, excel_file):
-    # Load question and guidelines
-    with open(question_file, 'r') as file:
-        question = file.read().strip()
-    with open(guidelines_file, 'r') as file:
-        guidelines = file.read().strip()
-    # Load the Excel sheet
-    workbook = load_workbook(excel_file)
     sheet = workbook.active
     # Create a new workbook to save results
@@ -28,8 +17,6 @@ def process_essays(folder_path, question_file, guidelines_file, excel_file):
     for col in range(1, sheet.max_column + 1):
         new_sheet.cell(row=1, column=col).value = sheet.cell(row=1, column=col).value
-    # Sort images in folder
-    images = sorted([os.path.join(folder_path, img) for img in os.listdir(folder_path)], key=os.path.getmtime)
     img_index = 0
     # First Pass: Transcribe missing texts
@@ -55,8 +42,8 @@ def process_essays(folder_path, question_file, guidelines_file, excel_file):
             new_sheet.cell(row=row, column=3).value = transcribed_text
     # Save current state with transcriptions
-    new_workbook.save("data/transcribed_essays.xlsx")
-    print("All transcriptions completed. Saved as 'transcribed_essays.xlsx'.")
     # Collect graded examples and initialize list
     examples = []
@@ -91,15 +78,31 @@ def process_essays(folder_path, question_file, guidelines_file, excel_file):
             new_sheet.cell(row=row, column=5).value = reason
     # Save the new Excel file with assessments filled in
-    new_workbook.save(excel_file.replace(".xlsx", "_assessed.xlsx"))
-    print("Assessment complete. Results saved in assessed version of the Excel file.")
-# Replace with actual file paths
-process_essays(
-    folder_path="data/images",
-    question_file="data/question.txt",
-    guidelines_file="data/assessment_guidelines.txt",
-    excel_file="data/essays.xlsx"
-)

 from src.transcribe_image import transcribe_image
 from src.assess_text import assess_essay_with_gpt
+def process_essays(images, question, guidelines, workbook):
     sheet = workbook.active
     # Create a new workbook to save results
     for col in range(1, sheet.max_column + 1):
         new_sheet.cell(row=1, column=col).value = sheet.cell(row=1, column=col).value
     img_index = 0
     # First Pass: Transcribe missing texts
             new_sheet.cell(row=row, column=3).value = transcribed_text
     # Save current state with transcriptions
+    # new_workbook.save("data/transcribed_essays.xlsx")
+    # print("All transcriptions completed. Saved as 'transcribed_essays.xlsx'.")
     # Collect graded examples and initialize list
     examples = []
             new_sheet.cell(row=row, column=5).value = reason
     # Save the new Excel file with assessments filled in
+    return new_workbook
+if __name__ == "__main__":
+    folder_path = "data/images"  # Replace with actual folder path
+    question_file = "data/question.txt"  # Replace with actual file path
+    guidelines_file = "data/assessment_guidelines.txt"  # Replace with actual file path
+    excel_file = "data/essays.xlsx"
+    # Load
+    images = sorted([os.path.join(folder_path, img) for img in os.listdir(folder_path)], key=os.path.getmtime)
+    with open(question_file, 'r') as file:
+        question = file.read().strip()
+    with open(guidelines_file, 'r') as file:
+        guidelines = file.read().strip()
+    workbook = load_workbook(excel_file)
+    new_workbook = process_essays(
+        images,
+        question,
+        guidelines,
+        workbook
+    )
+    new_workbook.save(excel_file.replace(".xlsx", "_assessed.xlsx"))
+    print("Assessment complete. Results saved in assessed version of the Excel file.")

src/transcribe_image.py CHANGED Viewed

@@ -7,14 +7,20 @@ def encode_image(image_path):
     assert os.path.exists(image_path), "The image file does not exist."
     with open(image_path, "rb") as image_file:
         return base64.b64encode(image_file.read()).decode('utf-8')
-def transcribe_image(image_path):
     """Transcribe handwritten text from an image using OCR."""
     # Initialize the OpenAI client
     client = OpenAI()
     # Encoding the image
-    base64_image = encode_image(image_path)
     # Preparing the API call

     assert os.path.exists(image_path), "The image file does not exist."
     with open(image_path, "rb") as image_file:
         return base64.b64encode(image_file.read()).decode('utf-8')
+def encode_image_from_uploaded_file(image):
+    # Convert image to bytes
+    assert image is not None, "No image uploaded."
+    image_bytes = image.read()
+    return base64.b64encode(image_bytes).decode('utf-8')
+def transcribe_image(image_file):
     """Transcribe handwritten text from an image using OCR."""
     # Initialize the OpenAI client
     client = OpenAI()
     # Encoding the image
+    base64_image = encode_image_from_uploaded_file(image_file)
     # Preparing the API call