abubasith86 commited on
Commit
e807a06
Β·
verified Β·
1 Parent(s): f4fbdf0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -35
app.py CHANGED
@@ -9,11 +9,9 @@ st.title("πŸ“š JSONL Dataset Editor")
9
  TMP_DIR = "temp"
10
  TMP_FILE = os.path.join(TMP_DIR, "session_dataset.jsonl")
11
 
12
- # --- Helper: ensure tmp dir exists ---
13
  os.makedirs(TMP_DIR, exist_ok=True)
14
 
15
 
16
- # --- Helper: get all unique fields from records ---
17
  def get_all_fields(data):
18
  all_keys = set()
19
  for record in data:
@@ -21,15 +19,25 @@ def get_all_fields(data):
21
  return sorted(all_keys)
22
 
23
 
 
 
 
 
 
 
 
 
24
  # --- Load session data from temp file if exists ---
25
  if "data" not in st.session_state:
26
  if os.path.exists(TMP_FILE):
27
  with open(TMP_FILE, "r", encoding="utf-8") as f:
28
  st.session_state.data = [json.loads(line) for line in f]
29
  st.session_state.all_fields = get_all_fields(st.session_state.data)
 
30
  else:
31
  st.session_state.data = []
32
  st.session_state.all_fields = []
 
33
 
34
  # --- Upload JSONL File ---
35
  uploaded_file = st.file_uploader("Upload a JSONL file", type=["jsonl"])
@@ -38,8 +46,8 @@ if uploaded_file:
38
  content = uploaded_file.read().decode("utf-8")
39
  st.session_state.data = [json.loads(line) for line in content.strip().splitlines()]
40
  st.session_state.all_fields = get_all_fields(st.session_state.data)
 
41
 
42
- # Save to temp
43
  with open(TMP_FILE, "w", encoding="utf-8") as f:
44
  for item in st.session_state.data:
45
  f.write(json.dumps(item, ensure_ascii=False) + "\n")
@@ -48,7 +56,7 @@ if uploaded_file:
48
  f"Loaded {len(st.session_state.data)} records with fields: {st.session_state.all_fields}"
49
  )
50
 
51
- # If still no data, use safe fallback fields
52
  if not st.session_state.data and not st.session_state.all_fields:
53
  st.session_state.all_fields = ["context", "question", "answer"]
54
 
@@ -58,12 +66,10 @@ st.markdown("### ✏️ Edit Records")
58
  df = pd.DataFrame(st.session_state.data)
59
  df = df.reindex(columns=st.session_state.all_fields)
60
 
61
- # Fix: Convert likely text fields to string to avoid StreamlitAPIException
62
  for field in st.session_state.all_fields:
63
  if field.lower() in ["context", "answer", "question"]:
64
  df[field] = df[field].astype(str)
65
 
66
- # Auto-set long fields like "context", "answer" as textareas
67
  column_configs = {
68
  field: (
69
  st.column_config.TextColumn(label=field, width="large")
@@ -73,49 +79,38 @@ column_configs = {
73
  for field in st.session_state.all_fields
74
  }
75
 
76
- # --- Use st.data_editor for editable table ---
77
- # Track previous data
78
- if "prev_data" not in st.session_state:
79
- st.session_state.prev_data = []
80
-
81
- # Show editable table
82
  edited_df = st.data_editor(
83
  df,
84
  use_container_width=True,
85
  num_rows="dynamic",
86
  column_config=column_configs,
87
- key="editable_table",
88
  )
89
 
90
- # Save only if changed
91
  new_data = edited_df.fillna("").to_dict(orient="records")
92
-
93
- if new_data != st.session_state.get("prev_data", []):
94
  st.session_state.data = new_data
95
- st.session_state.prev_data = new_data
96
 
97
  with open(TMP_FILE, "w", encoding="utf-8") as f:
98
- for item in new_data:
99
  f.write(json.dumps(item, ensure_ascii=False) + "\n")
100
 
101
- st.experimental_rerun()
102
-
103
-
104
 
105
  # --- Add New Entry ---
106
  st.markdown("### βž• Add New Entry")
107
 
108
- # Show form with current fields
109
  with st.form("new_entry_form"):
110
  new_record = {}
111
  for field in st.session_state.all_fields:
112
  new_record[field] = st.text_area(f"{field}", key=f"input_{field}")
113
-
114
  submitted = st.form_submit_button("Add Entry")
115
  if submitted:
116
  st.session_state.data.append(new_record)
 
117
 
118
- # Save to temp
119
  with open(TMP_FILE, "w", encoding="utf-8") as f:
120
  for item in st.session_state.data:
121
  f.write(json.dumps(item, ensure_ascii=False) + "\n")
@@ -123,7 +118,7 @@ with st.form("new_entry_form"):
123
  st.success("βœ… New entry added!")
124
  st.rerun()
125
 
126
- # Option to add a new field
127
  with st.expander("βž• Add New Field"):
128
  new_field = st.text_input("New field name", key="new_field_name")
129
  if st.button("Add Field"):
@@ -132,11 +127,9 @@ with st.expander("βž• Add New Field"):
132
  st.success(f"βœ… Field '{new_field}' added!")
133
  st.rerun()
134
 
135
-
136
  # --- Export JSONL ---
137
  st.markdown("### πŸ“€ Export Dataset")
138
 
139
- # Let user define a custom export path
140
  export_path = st.text_input(
141
  "Custom save path (e.g., ./exports/my_dataset.jsonl)",
142
  value="./exports/exported_dataset.jsonl",
@@ -144,20 +137,15 @@ export_path = st.text_input(
144
 
145
  col1, col2 = st.columns(2)
146
 
147
- # --- Export Button ---
148
  with col1:
149
  if st.button("πŸ“ Export JSONL"):
150
- if not os.path.exists(os.path.dirname(export_path)):
151
- os.makedirs(os.path.dirname(export_path))
152
-
153
- # Write to custom path
154
  with open(export_path, "w", encoding="utf-8") as f_out:
155
  for row in st.session_state.data:
156
  f_out.write(json.dumps(row, ensure_ascii=False) + "\n")
157
 
158
  st.success(f"βœ… Dataset saved to {export_path}")
159
 
160
- # Load content for download
161
  with open(export_path, "r", encoding="utf-8") as f_download:
162
  exported_content = f_download.read()
163
 
@@ -168,14 +156,12 @@ with col1:
168
  mime="application/json",
169
  )
170
 
171
- # Reset session and temp
172
  if os.path.exists(TMP_FILE):
173
  os.remove(TMP_FILE)
174
  st.session_state.clear()
175
  st.success("🧹 Temporary session cleared. You're starting fresh!")
176
  st.rerun()
177
 
178
- # --- Download Temp Only Button ---
179
  with col2:
180
  if os.path.exists(TMP_FILE):
181
  with open(TMP_FILE, "r", encoding="utf-8") as f_tmp:
 
9
  TMP_DIR = "temp"
10
  TMP_FILE = os.path.join(TMP_DIR, "session_dataset.jsonl")
11
 
 
12
  os.makedirs(TMP_DIR, exist_ok=True)
13
 
14
 
 
15
  def get_all_fields(data):
16
  all_keys = set()
17
  for record in data:
 
19
  return sorted(all_keys)
20
 
21
 
22
+ # --- Clear session handler ---
23
+ if st.button("🧹 Clear Session"):
24
+ st.session_state.clear()
25
+ if os.path.exists(TMP_FILE):
26
+ os.remove(TMP_FILE)
27
+ st.success("Session and temp file cleared!")
28
+ st.rerun()
29
+
30
  # --- Load session data from temp file if exists ---
31
  if "data" not in st.session_state:
32
  if os.path.exists(TMP_FILE):
33
  with open(TMP_FILE, "r", encoding="utf-8") as f:
34
  st.session_state.data = [json.loads(line) for line in f]
35
  st.session_state.all_fields = get_all_fields(st.session_state.data)
36
+ st.session_state.prev_data = st.session_state.data.copy()
37
  else:
38
  st.session_state.data = []
39
  st.session_state.all_fields = []
40
+ st.session_state.prev_data = []
41
 
42
  # --- Upload JSONL File ---
43
  uploaded_file = st.file_uploader("Upload a JSONL file", type=["jsonl"])
 
46
  content = uploaded_file.read().decode("utf-8")
47
  st.session_state.data = [json.loads(line) for line in content.strip().splitlines()]
48
  st.session_state.all_fields = get_all_fields(st.session_state.data)
49
+ st.session_state.prev_data = st.session_state.data.copy()
50
 
 
51
  with open(TMP_FILE, "w", encoding="utf-8") as f:
52
  for item in st.session_state.data:
53
  f.write(json.dumps(item, ensure_ascii=False) + "\n")
 
56
  f"Loaded {len(st.session_state.data)} records with fields: {st.session_state.all_fields}"
57
  )
58
 
59
+ # Fallback default fields
60
  if not st.session_state.data and not st.session_state.all_fields:
61
  st.session_state.all_fields = ["context", "question", "answer"]
62
 
 
66
  df = pd.DataFrame(st.session_state.data)
67
  df = df.reindex(columns=st.session_state.all_fields)
68
 
 
69
  for field in st.session_state.all_fields:
70
  if field.lower() in ["context", "answer", "question"]:
71
  df[field] = df[field].astype(str)
72
 
 
73
  column_configs = {
74
  field: (
75
  st.column_config.TextColumn(label=field, width="large")
 
79
  for field in st.session_state.all_fields
80
  }
81
 
 
 
 
 
 
 
82
  edited_df = st.data_editor(
83
  df,
84
  use_container_width=True,
85
  num_rows="dynamic",
86
  column_config=column_configs,
87
+ key="editable_table"
88
  )
89
 
90
+ # --- Auto-save if any changes ---
91
  new_data = edited_df.fillna("").to_dict(orient="records")
92
+ if new_data != st.session_state.prev_data:
 
93
  st.session_state.data = new_data
94
+ st.session_state.prev_data = new_data.copy()
95
 
96
  with open(TMP_FILE, "w", encoding="utf-8") as f:
97
+ for item in st.session_state.data:
98
  f.write(json.dumps(item, ensure_ascii=False) + "\n")
99
 
100
+ st.toast("βœ… Changes auto-saved!", icon="πŸ’Ύ")
 
 
101
 
102
  # --- Add New Entry ---
103
  st.markdown("### βž• Add New Entry")
104
 
 
105
  with st.form("new_entry_form"):
106
  new_record = {}
107
  for field in st.session_state.all_fields:
108
  new_record[field] = st.text_area(f"{field}", key=f"input_{field}")
 
109
  submitted = st.form_submit_button("Add Entry")
110
  if submitted:
111
  st.session_state.data.append(new_record)
112
+ st.session_state.prev_data = st.session_state.data.copy()
113
 
 
114
  with open(TMP_FILE, "w", encoding="utf-8") as f:
115
  for item in st.session_state.data:
116
  f.write(json.dumps(item, ensure_ascii=False) + "\n")
 
118
  st.success("βœ… New entry added!")
119
  st.rerun()
120
 
121
+ # --- Add New Field ---
122
  with st.expander("βž• Add New Field"):
123
  new_field = st.text_input("New field name", key="new_field_name")
124
  if st.button("Add Field"):
 
127
  st.success(f"βœ… Field '{new_field}' added!")
128
  st.rerun()
129
 
 
130
  # --- Export JSONL ---
131
  st.markdown("### πŸ“€ Export Dataset")
132
 
 
133
  export_path = st.text_input(
134
  "Custom save path (e.g., ./exports/my_dataset.jsonl)",
135
  value="./exports/exported_dataset.jsonl",
 
137
 
138
  col1, col2 = st.columns(2)
139
 
 
140
  with col1:
141
  if st.button("πŸ“ Export JSONL"):
142
+ os.makedirs(os.path.dirname(export_path), exist_ok=True)
 
 
 
143
  with open(export_path, "w", encoding="utf-8") as f_out:
144
  for row in st.session_state.data:
145
  f_out.write(json.dumps(row, ensure_ascii=False) + "\n")
146
 
147
  st.success(f"βœ… Dataset saved to {export_path}")
148
 
 
149
  with open(export_path, "r", encoding="utf-8") as f_download:
150
  exported_content = f_download.read()
151
 
 
156
  mime="application/json",
157
  )
158
 
 
159
  if os.path.exists(TMP_FILE):
160
  os.remove(TMP_FILE)
161
  st.session_state.clear()
162
  st.success("🧹 Temporary session cleared. You're starting fresh!")
163
  st.rerun()
164
 
 
165
  with col2:
166
  if os.path.exists(TMP_FILE):
167
  with open(TMP_FILE, "r", encoding="utf-8") as f_tmp: