abubasith86 commited on
Commit
c973974
·
verified ·
1 Parent(s): e807a06

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +59 -57
app.py CHANGED
@@ -9,54 +9,46 @@ st.title("📚 JSONL Dataset Editor")
9
  TMP_DIR = "temp"
10
  TMP_FILE = os.path.join(TMP_DIR, "session_dataset.jsonl")
11
 
 
12
  os.makedirs(TMP_DIR, exist_ok=True)
13
 
 
 
 
14
 
15
- def get_all_fields(data):
16
- all_keys = set()
17
- for record in data:
18
- all_keys.update(record.keys())
19
- return sorted(all_keys)
20
-
21
-
22
- # --- Clear session handler ---
23
- if st.button("🧹 Clear Session"):
24
- st.session_state.clear()
25
- if os.path.exists(TMP_FILE):
26
- os.remove(TMP_FILE)
27
- st.success("Session and temp file cleared!")
28
- st.rerun()
29
-
30
- # --- Load session data from temp file if exists ---
31
  if "data" not in st.session_state:
32
  if os.path.exists(TMP_FILE):
33
  with open(TMP_FILE, "r", encoding="utf-8") as f:
34
  st.session_state.data = [json.loads(line) for line in f]
35
- st.session_state.all_fields = get_all_fields(st.session_state.data)
36
- st.session_state.prev_data = st.session_state.data.copy()
37
  else:
38
  st.session_state.data = []
39
- st.session_state.all_fields = []
40
- st.session_state.prev_data = []
 
 
 
 
 
 
 
 
41
 
42
  # --- Upload JSONL File ---
43
  uploaded_file = st.file_uploader("Upload a JSONL file", type=["jsonl"])
44
-
45
  if uploaded_file:
46
  content = uploaded_file.read().decode("utf-8")
47
  st.session_state.data = [json.loads(line) for line in content.strip().splitlines()]
48
- st.session_state.all_fields = get_all_fields(st.session_state.data)
49
  st.session_state.prev_data = st.session_state.data.copy()
50
 
51
  with open(TMP_FILE, "w", encoding="utf-8") as f:
52
  for item in st.session_state.data:
53
  f.write(json.dumps(item, ensure_ascii=False) + "\n")
54
 
55
- st.success(
56
- f"Loaded {len(st.session_state.data)} records with fields: {st.session_state.all_fields}"
57
- )
58
 
59
- # Fallback default fields
60
  if not st.session_state.data and not st.session_state.all_fields:
61
  st.session_state.all_fields = ["context", "question", "answer"]
62
 
@@ -67,41 +59,40 @@ df = pd.DataFrame(st.session_state.data)
67
  df = df.reindex(columns=st.session_state.all_fields)
68
 
69
  for field in st.session_state.all_fields:
70
- if field.lower() in ["context", "answer", "question"]:
71
  df[field] = df[field].astype(str)
72
 
73
  column_configs = {
74
  field: (
75
  st.column_config.TextColumn(label=field, width="large")
76
- if field.lower() in ["context", "answer", "question"]
77
  else None
78
  )
79
  for field in st.session_state.all_fields
80
  }
81
 
82
- edited_df = st.data_editor(
83
- df,
84
- use_container_width=True,
85
- num_rows="dynamic",
86
- column_config=column_configs,
87
- key="editable_table"
88
- )
 
89
 
90
- # --- Auto-save if any changes ---
91
- new_data = edited_df.fillna("").to_dict(orient="records")
92
- if new_data != st.session_state.prev_data:
93
- st.session_state.data = new_data
94
- st.session_state.prev_data = new_data.copy()
95
 
96
- with open(TMP_FILE, "w", encoding="utf-8") as f:
97
- for item in st.session_state.data:
98
- f.write(json.dumps(item, ensure_ascii=False) + "\n")
99
 
100
- st.toast("✅ Changes auto-saved!", icon="💾")
101
 
102
  # --- Add New Entry ---
103
  st.markdown("### ➕ Add New Entry")
104
-
105
  with st.form("new_entry_form"):
106
  new_record = {}
107
  for field in st.session_state.all_fields:
@@ -110,6 +101,7 @@ with st.form("new_entry_form"):
110
  if submitted:
111
  st.session_state.data.append(new_record)
112
  st.session_state.prev_data = st.session_state.data.copy()
 
113
 
114
  with open(TMP_FILE, "w", encoding="utf-8") as f:
115
  for item in st.session_state.data:
@@ -127,46 +119,47 @@ with st.expander("�� Add New Field"):
127
  st.success(f"✅ Field '{new_field}' added!")
128
  st.rerun()
129
 
130
- # --- Export JSONL ---
131
  st.markdown("### 📤 Export Dataset")
132
-
133
  export_path = st.text_input(
134
  "Custom save path (e.g., ./exports/my_dataset.jsonl)",
135
  value="./exports/exported_dataset.jsonl",
136
  )
137
 
138
- col1, col2 = st.columns(2)
139
 
 
140
  with col1:
141
  if st.button("📁 Export JSONL"):
142
  os.makedirs(os.path.dirname(export_path), exist_ok=True)
143
- with open(export_path, "w", encoding="utf-8") as f_out:
144
  for row in st.session_state.data:
145
- f_out.write(json.dumps(row, ensure_ascii=False) + "\n")
146
 
147
  st.success(f"✅ Dataset saved to {export_path}")
148
 
149
- with open(export_path, "r", encoding="utf-8") as f_download:
150
- exported_content = f_download.read()
151
 
152
  st.download_button(
153
  "⬇️ Download JSONL",
154
- exported_content,
155
  file_name=os.path.basename(export_path),
156
  mime="application/json",
157
  )
158
 
 
159
  if os.path.exists(TMP_FILE):
160
  os.remove(TMP_FILE)
161
  st.session_state.clear()
162
- st.success("🧹 Temporary session cleared. You're starting fresh!")
163
  st.rerun()
164
 
 
165
  with col2:
166
  if os.path.exists(TMP_FILE):
167
- with open(TMP_FILE, "r", encoding="utf-8") as f_tmp:
168
- tmp_content = f_tmp.read()
169
-
170
  st.download_button(
171
  "⬇️ Download Temp File",
172
  tmp_content,
@@ -175,3 +168,12 @@ with col2:
175
  )
176
  else:
177
  st.warning("⚠️ No temp file found to download.")
 
 
 
 
 
 
 
 
 
 
9
  TMP_DIR = "temp"
10
  TMP_FILE = os.path.join(TMP_DIR, "session_dataset.jsonl")
11
 
12
+ # --- Setup temp directory ---
13
  os.makedirs(TMP_DIR, exist_ok=True)
14
 
15
+ # --- Reset update flag on rerun ---
16
+ if st.session_state.get("updated"):
17
+ st.session_state.updated = False
18
 
19
+ # --- Load session data ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  if "data" not in st.session_state:
21
  if os.path.exists(TMP_FILE):
22
  with open(TMP_FILE, "r", encoding="utf-8") as f:
23
  st.session_state.data = [json.loads(line) for line in f]
 
 
24
  else:
25
  st.session_state.data = []
26
+
27
+ # Load all unique fields
28
+ def get_all_fields(data):
29
+ all_keys = set()
30
+ for record in data:
31
+ all_keys.update(record.keys())
32
+ return sorted(all_keys)
33
+
34
+ st.session_state.all_fields = get_all_fields(st.session_state.data)
35
+ st.session_state.prev_data = st.session_state.data.copy()
36
 
37
  # --- Upload JSONL File ---
38
  uploaded_file = st.file_uploader("Upload a JSONL file", type=["jsonl"])
 
39
  if uploaded_file:
40
  content = uploaded_file.read().decode("utf-8")
41
  st.session_state.data = [json.loads(line) for line in content.strip().splitlines()]
42
+ st.session_state.all_fields = sorted(set().union(*(record.keys() for record in st.session_state.data)))
43
  st.session_state.prev_data = st.session_state.data.copy()
44
 
45
  with open(TMP_FILE, "w", encoding="utf-8") as f:
46
  for item in st.session_state.data:
47
  f.write(json.dumps(item, ensure_ascii=False) + "\n")
48
 
49
+ st.success(f"Loaded {len(st.session_state.data)} records.")
 
 
50
 
51
+ # --- Safe fallback fields if no data yet ---
52
  if not st.session_state.data and not st.session_state.all_fields:
53
  st.session_state.all_fields = ["context", "question", "answer"]
54
 
 
59
  df = df.reindex(columns=st.session_state.all_fields)
60
 
61
  for field in st.session_state.all_fields:
62
+ if field.lower() in ["context", "question", "answer"]:
63
  df[field] = df[field].astype(str)
64
 
65
  column_configs = {
66
  field: (
67
  st.column_config.TextColumn(label=field, width="large")
68
+ if field.lower() in ["context", "question", "answer"]
69
  else None
70
  )
71
  for field in st.session_state.all_fields
72
  }
73
 
74
+ if not st.session_state.get("updated"):
75
+ edited_df = st.data_editor(
76
+ df,
77
+ use_container_width=True,
78
+ num_rows="dynamic",
79
+ column_config=column_configs,
80
+ key="editable_table",
81
+ )
82
 
83
+ new_data = edited_df.fillna("").to_dict(orient="records")
84
+ if new_data != st.session_state.prev_data:
85
+ st.session_state.data = new_data
86
+ st.session_state.prev_data = new_data.copy()
 
87
 
88
+ with open(TMP_FILE, "w", encoding="utf-8") as f:
89
+ for item in st.session_state.data:
90
+ f.write(json.dumps(item, ensure_ascii=False) + "\n")
91
 
92
+ st.toast("✅ Changes auto-saved!", icon="💾")
93
 
94
  # --- Add New Entry ---
95
  st.markdown("### ➕ Add New Entry")
 
96
  with st.form("new_entry_form"):
97
  new_record = {}
98
  for field in st.session_state.all_fields:
 
101
  if submitted:
102
  st.session_state.data.append(new_record)
103
  st.session_state.prev_data = st.session_state.data.copy()
104
+ st.session_state.updated = True
105
 
106
  with open(TMP_FILE, "w", encoding="utf-8") as f:
107
  for item in st.session_state.data:
 
119
  st.success(f"✅ Field '{new_field}' added!")
120
  st.rerun()
121
 
122
+ # --- Export Section ---
123
  st.markdown("### 📤 Export Dataset")
 
124
  export_path = st.text_input(
125
  "Custom save path (e.g., ./exports/my_dataset.jsonl)",
126
  value="./exports/exported_dataset.jsonl",
127
  )
128
 
129
+ col1, col2, col3 = st.columns(3)
130
 
131
+ # Export to path and download
132
  with col1:
133
  if st.button("📁 Export JSONL"):
134
  os.makedirs(os.path.dirname(export_path), exist_ok=True)
135
+ with open(export_path, "w", encoding="utf-8") as f:
136
  for row in st.session_state.data:
137
+ f.write(json.dumps(row, ensure_ascii=False) + "\n")
138
 
139
  st.success(f"✅ Dataset saved to {export_path}")
140
 
141
+ with open(export_path, "r", encoding="utf-8") as f:
142
+ content = f.read()
143
 
144
  st.download_button(
145
  "⬇️ Download JSONL",
146
+ content,
147
  file_name=os.path.basename(export_path),
148
  mime="application/json",
149
  )
150
 
151
+ # Clear session + temp
152
  if os.path.exists(TMP_FILE):
153
  os.remove(TMP_FILE)
154
  st.session_state.clear()
155
+ st.success("🧹 Temporary session cleared.")
156
  st.rerun()
157
 
158
+ # Temp file download
159
  with col2:
160
  if os.path.exists(TMP_FILE):
161
+ with open(TMP_FILE, "r", encoding="utf-8") as f:
162
+ tmp_content = f.read()
 
163
  st.download_button(
164
  "⬇️ Download Temp File",
165
  tmp_content,
 
168
  )
169
  else:
170
  st.warning("⚠️ No temp file found to download.")
171
+
172
+ # Clear session button
173
+ with col3:
174
+ if st.button("🗑️ Clear Session"):
175
+ if os.path.exists(TMP_FILE):
176
+ os.remove(TMP_FILE)
177
+ st.session_state.clear()
178
+ st.success("🧹 Session and temp cleared!")
179
+ st.rerun()