TensorFlo commited on
Commit
03c245b
·
1 Parent(s): 2f974c6

minor changes

Browse files
Files changed (3) hide show
  1. app.py +54 -24
  2. src/main.py +28 -25
  3. src/transcribe_image.py +8 -2
app.py CHANGED
@@ -1,43 +1,73 @@
1
  import streamlit as st
2
- import pandas as pd
3
- from src.main import process_image # Assume process_image is a function in main.py
4
- from src.assess_text import assess_essay_with_gpt
5
- from src.transcribe_image import transcribe_image
6
  from PIL import Image
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
 
8
  st.title("AutoAssess: Student Essay Transcription and Assessment")
9
 
 
 
 
10
  # Upload folder of images
11
- uploaded_files = st.file_uploader("Upload a folder of student essays (images)", type=['jpg', 'jpeg', 'png'], accept_multiple_files=True)
 
 
 
 
 
 
 
12
 
13
  # Text inputs for question and criteria
14
- essay_question = st.text_input("Enter the essay question:")
15
- grading_criteria = st.text_area("Enter grading criteria or relevant marking information:")
 
 
 
16
 
17
  # Upload Excel file with student IDs and page count
18
- student_info_file = st.file_uploader("Upload Excel file with student IDs and page count", type=["xlsx"])
 
19
 
20
  if st.button("Process Essays"):
21
- if not uploaded_files or not essay_question or not grading_criteria or not student_info_file:
22
  st.warning("Please upload all required files and enter necessary information.")
23
  else:
24
  # Process student info file
25
- student_df = pd.read_excel(student_info_file)
26
- st.write("Student Information:")
27
- st.write(student_df)
28
 
29
- results = []
30
- for uploaded_file in uploaded_files:
31
- image = Image.open(uploaded_file)
32
- # Use your backend function to process each image
33
- transcription = process_image(image, essay_question, grading_criteria)
34
- results.append({"filename": uploaded_file.name, "transcription": transcription})
35
 
36
- for result in results:
37
- st.write(f"**File:** {result['filename']}")
38
- st.write(result['transcription'])
39
 
40
  # Optional: Save results to the output folder
41
- output_file = "output/results.csv"
42
- pd.DataFrame(results).to_csv(output_file)
43
- st.success(f"All essays processed. Results saved to {output_file}")
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from src.main import process_essays
 
 
 
3
  from PIL import Image
4
+ import os
5
+ from openpyxl import load_workbook, Workbook
6
+ from io import BytesIO
7
+ import openai
8
+
9
+ def save_workbook_to_bytes(wb):
10
+ # Save the workbook into a BytesIO object (in memory, not on disk)
11
+ byte_io = BytesIO()
12
+ wb.save(byte_io)
13
+ byte_io.seek(0) # Go to the beginning of the BytesIO buffer
14
+ return byte_io.getvalue()
15
+
16
+
17
+ openai_api_key = os.getenv("OPENAI_API_KEY")
18
+
19
+ # Set the OpenAI API key
20
+ openai.api_key = openai_api_key
21
 
22
  st.title("AutoAssess: Student Essay Transcription and Assessment")
23
 
24
+ st.title("AutoAssess")
25
+ st.write("If you see this, the basic app is loading correctly!")
26
+
27
  # Upload folder of images
28
+ # uploaded_files = sorted(st.file_uploader("Upload a folder of student essays (images)", type=['jpg', 'jpeg', 'png'], accept_multiple_files=True))
29
+
30
+ # replace uploaded files with files loading from directory
31
+ image_dir = "data/images"
32
+ uploaded_files = []
33
+ for file in os.listdir(image_dir):
34
+ with open(image_dir + '/' + file, "rb") as image_file:
35
+ uploaded_files.append(image_file.read())
36
 
37
  # Text inputs for question and criteria
38
+ # essay_question = st.text_input("Enter the essay question:")
39
+ # grading_criteria = st.text_area("Enter grading criteria or relevant marking information:")
40
+
41
+ essay_question = "What is beauty?"
42
+ grading_criteria = "1. Introduction\n2. Body\n3. Conclusion\n4. Grammar\n5. Spelling\n6. Punctuation\n7. Originality\n8. Creativity"
43
 
44
  # Upload Excel file with student IDs and page count
45
+ # student_info_file = st.file_uploader("Upload Excel file with student IDs and page count", type=["xlsx"])
46
+ excel_file = "data/essays.xlsx"
47
 
48
  if st.button("Process Essays"):
49
+ if not uploaded_files or not essay_question or not grading_criteria or not excel_file:
50
  st.warning("Please upload all required files and enter necessary information.")
51
  else:
52
  # Process student info file
53
+ workbook = load_workbook(excel_file)
54
+
 
55
 
56
+ new_workbook = process_essays(uploaded_files,essay_question,grading_criteria,workbook)
 
 
 
 
 
57
 
 
 
 
58
 
59
  # Optional: Save results to the output folder
60
+ output_file = "output/results.xlsx"
61
+ new_workbook.save(output_file)
62
+ st.success(f"All essays processed. Results saved to {output_file}")
63
+
64
+ # Convert the workbook to bytes
65
+ excel_file = save_workbook_to_bytes(new_workbook)
66
+
67
+ # Display the download button
68
+ st.download_button(
69
+ label="Download the Excel file",
70
+ data=excel_file,
71
+ file_name="results.xlsx",
72
+ mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet"
73
+ )
src/main.py CHANGED
@@ -5,19 +5,8 @@ from openpyxl import load_workbook, Workbook
5
  from src.transcribe_image import transcribe_image
6
  from src.assess_text import assess_essay_with_gpt
7
 
8
- # OpenAI API key setup
9
- openai.api_key = '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'
10
 
11
- def process_essays(folder_path, question_file, guidelines_file, excel_file):
12
- # Load question and guidelines
13
- with open(question_file, 'r') as file:
14
- question = file.read().strip()
15
-
16
- with open(guidelines_file, 'r') as file:
17
- guidelines = file.read().strip()
18
-
19
- # Load the Excel sheet
20
- workbook = load_workbook(excel_file)
21
  sheet = workbook.active
22
 
23
  # Create a new workbook to save results
@@ -28,8 +17,6 @@ def process_essays(folder_path, question_file, guidelines_file, excel_file):
28
  for col in range(1, sheet.max_column + 1):
29
  new_sheet.cell(row=1, column=col).value = sheet.cell(row=1, column=col).value
30
 
31
- # Sort images in folder
32
- images = sorted([os.path.join(folder_path, img) for img in os.listdir(folder_path)], key=os.path.getmtime)
33
  img_index = 0
34
 
35
  # First Pass: Transcribe missing texts
@@ -55,8 +42,8 @@ def process_essays(folder_path, question_file, guidelines_file, excel_file):
55
  new_sheet.cell(row=row, column=3).value = transcribed_text
56
 
57
  # Save current state with transcriptions
58
- new_workbook.save("data/transcribed_essays.xlsx")
59
- print("All transcriptions completed. Saved as 'transcribed_essays.xlsx'.")
60
 
61
  # Collect graded examples and initialize list
62
  examples = []
@@ -91,15 +78,31 @@ def process_essays(folder_path, question_file, guidelines_file, excel_file):
91
  new_sheet.cell(row=row, column=5).value = reason
92
 
93
  # Save the new Excel file with assessments filled in
94
- new_workbook.save(excel_file.replace(".xlsx", "_assessed.xlsx"))
95
- print("Assessment complete. Results saved in assessed version of the Excel file.")
96
 
97
 
 
 
 
 
 
 
98
 
99
- # Replace with actual file paths
100
- process_essays(
101
- folder_path="data/images",
102
- question_file="data/question.txt",
103
- guidelines_file="data/assessment_guidelines.txt",
104
- excel_file="data/essays.xlsx"
105
- )
 
 
 
 
 
 
 
 
 
 
 
 
5
  from src.transcribe_image import transcribe_image
6
  from src.assess_text import assess_essay_with_gpt
7
 
 
 
8
 
9
+ def process_essays(images, question, guidelines, workbook):
 
 
 
 
 
 
 
 
 
10
  sheet = workbook.active
11
 
12
  # Create a new workbook to save results
 
17
  for col in range(1, sheet.max_column + 1):
18
  new_sheet.cell(row=1, column=col).value = sheet.cell(row=1, column=col).value
19
 
 
 
20
  img_index = 0
21
 
22
  # First Pass: Transcribe missing texts
 
42
  new_sheet.cell(row=row, column=3).value = transcribed_text
43
 
44
  # Save current state with transcriptions
45
+ # new_workbook.save("data/transcribed_essays.xlsx")
46
+ # print("All transcriptions completed. Saved as 'transcribed_essays.xlsx'.")
47
 
48
  # Collect graded examples and initialize list
49
  examples = []
 
78
  new_sheet.cell(row=row, column=5).value = reason
79
 
80
  # Save the new Excel file with assessments filled in
81
+ return new_workbook
 
82
 
83
 
84
+ if __name__ == "__main__":
85
+
86
+ folder_path = "data/images" # Replace with actual folder path
87
+ question_file = "data/question.txt" # Replace with actual file path
88
+ guidelines_file = "data/assessment_guidelines.txt" # Replace with actual file path
89
+ excel_file = "data/essays.xlsx"
90
 
91
+ # Load
92
+ images = sorted([os.path.join(folder_path, img) for img in os.listdir(folder_path)], key=os.path.getmtime)
93
+ with open(question_file, 'r') as file:
94
+ question = file.read().strip()
95
+ with open(guidelines_file, 'r') as file:
96
+ guidelines = file.read().strip()
97
+ workbook = load_workbook(excel_file)
98
+
99
+
100
+ new_workbook = process_essays(
101
+ images,
102
+ question,
103
+ guidelines,
104
+ workbook
105
+ )
106
+
107
+ new_workbook.save(excel_file.replace(".xlsx", "_assessed.xlsx"))
108
+ print("Assessment complete. Results saved in assessed version of the Excel file.")
src/transcribe_image.py CHANGED
@@ -7,14 +7,20 @@ def encode_image(image_path):
7
  assert os.path.exists(image_path), "The image file does not exist."
8
  with open(image_path, "rb") as image_file:
9
  return base64.b64encode(image_file.read()).decode('utf-8')
 
 
 
 
 
 
10
 
11
- def transcribe_image(image_path):
12
  """Transcribe handwritten text from an image using OCR."""
13
  # Initialize the OpenAI client
14
  client = OpenAI()
15
 
16
  # Encoding the image
17
- base64_image = encode_image(image_path)
18
 
19
 
20
  # Preparing the API call
 
7
  assert os.path.exists(image_path), "The image file does not exist."
8
  with open(image_path, "rb") as image_file:
9
  return base64.b64encode(image_file.read()).decode('utf-8')
10
+
11
+ def encode_image_from_uploaded_file(image):
12
+ # Convert image to bytes
13
+ assert image is not None, "No image uploaded."
14
+ image_bytes = image.read()
15
+ return base64.b64encode(image_bytes).decode('utf-8')
16
 
17
+ def transcribe_image(image_file):
18
  """Transcribe handwritten text from an image using OCR."""
19
  # Initialize the OpenAI client
20
  client = OpenAI()
21
 
22
  # Encoding the image
23
+ base64_image = encode_image_from_uploaded_file(image_file)
24
 
25
 
26
  # Preparing the API call