mohitkumarrajbadi commited on
Commit
e37cfd0
Β·
1 Parent(s): 2bdd84f

New Improvement in Pages

Browse files
.gitattributes CHANGED
@@ -32,4 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
- *tfevents* filter=lfs diff=lfs merge=lfs -text
 
32
  *.xz filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ *.env
__pycache__/utils.cpython-311.pyc ADDED
Binary file (24.6 kB). View file
 
app.py CHANGED
@@ -2,17 +2,36 @@ import streamlit as st
2
 
3
  st.set_page_config(page_title="Gemma LLM Fine-Tuning UI", layout="wide")
4
 
 
5
  st.title("Gemma LLM Fine-Tuning Suite πŸš€")
6
  st.markdown("""
7
- ### πŸ”₯ **Multi-page AI Model Trainer**
8
  - **Chat**: Interact with the model.
9
  - **Fine-tuning**: Train on `train_data.csv` or upload new datasets.
10
  - **Conversion**: Export models to TorchScript and ONNX.
11
  - **Dataset Management**: View and add to your training data.
12
  """)
13
 
14
- # st.sidebar.title("Navigation")
15
- # st.sidebar.page_link("pages/Chat.py", label="πŸ”Ή Chat")
16
- # st.sidebar.page_link("pages/Finetune.py", label="πŸ”Ή Fine-tuning")
17
- # st.sidebar.page_link("pages/Conversion.py", label="πŸ”Ή Model Conversion")
18
- # st.sidebar.page_link("pages/Dataset_Management.py", label="πŸ”Ή Dataset Management")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
 
3
  st.set_page_config(page_title="Gemma LLM Fine-Tuning UI", layout="wide")
4
 
5
+ # Main Page Title and Description
6
  st.title("Gemma LLM Fine-Tuning Suite πŸš€")
7
  st.markdown("""
8
+ ### πŸ”₯ Multi-page AI Model Trainer
9
  - **Chat**: Interact with the model.
10
  - **Fine-tuning**: Train on `train_data.csv` or upload new datasets.
11
  - **Conversion**: Export models to TorchScript and ONNX.
12
  - **Dataset Management**: View and add to your training data.
13
  """)
14
 
15
+ # Sidebar Navigation with Custom Labels
16
+ st.sidebar.title("Navigation")
17
+ nav_options = [
18
+ "πŸ”Ή Chat",
19
+ "πŸ”Ή Fine-tuning",
20
+ "πŸ”Ή Conversion",
21
+ "πŸ”Ή Dataset Management"
22
+ ]
23
+ selected_page = st.sidebar.radio("Go to", nav_options)
24
+
25
+ # Page Content based on Navigation Selection
26
+ if selected_page == "πŸ”Ή Chat":
27
+ st.header("Chat with Gemma")
28
+ st.write("Interact with the model in a conversational interface. Coming soon!")
29
+ elif selected_page == "πŸ”Ή Fine-tuning":
30
+ st.header("Fine-tuning Gemma")
31
+ st.write("Fine-tune your Gemma model using your dataset. Coming soon!")
32
+ elif selected_page == "πŸ”Ή Conversion":
33
+ st.header("Model Conversion")
34
+ st.write("Convert your model to various formats. Coming soon!")
35
+ elif selected_page == "πŸ”Ή Dataset Management":
36
+ st.header("Dataset Management")
37
+ st.write("Manage your training datasets. Coming soon!")
pages/Conversion.py CHANGED
@@ -1,5 +1,13 @@
1
  import streamlit as st
2
- from utils import load_model, convert_to_torchscript, convert_to_onnx, get_hf_token
 
 
 
 
 
 
 
 
3
 
4
  st.title("πŸ”§ Model Conversion")
5
 
@@ -10,15 +18,34 @@ hf_token = get_hf_token()
10
  model_path = "fine_tuned_model.pt"
11
  tokenizer, model = load_model("google/gemma-3-1b-it", hf_token, model_path)
12
 
13
- conversion_option = st.selectbox("Select Conversion Format", ["TorchScript", "ONNX"])
 
 
 
 
14
 
15
  if st.button("Convert Model"):
16
  if conversion_option == "TorchScript":
17
  with st.spinner("Converting to TorchScript..."):
18
  ts_model = convert_to_torchscript(model)
19
  st.success("Model converted to TorchScript!")
20
-
21
  elif conversion_option == "ONNX":
22
  with st.spinner("Converting to ONNX..."):
23
  onnx_path = convert_to_onnx(model)
24
- st.success("Model converted to ONNX!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from utils import (
3
+ load_model,
4
+ convert_to_torchscript,
5
+ convert_to_onnx,
6
+ convert_to_gguf,
7
+ convert_to_tf_saved_model,
8
+ convert_to_pytorch,
9
+ get_hf_token
10
+ )
11
 
12
  st.title("πŸ”§ Model Conversion")
13
 
 
18
  model_path = "fine_tuned_model.pt"
19
  tokenizer, model = load_model("google/gemma-3-1b-it", hf_token, model_path)
20
 
21
+ # Select conversion format
22
+ conversion_option = st.selectbox(
23
+ "Select Conversion Format",
24
+ ["TorchScript", "ONNX", "GGUF", "TensorFlow SavedModel", "PyTorch"]
25
+ )
26
 
27
  if st.button("Convert Model"):
28
  if conversion_option == "TorchScript":
29
  with st.spinner("Converting to TorchScript..."):
30
  ts_model = convert_to_torchscript(model)
31
  st.success("Model converted to TorchScript!")
32
+
33
  elif conversion_option == "ONNX":
34
  with st.spinner("Converting to ONNX..."):
35
  onnx_path = convert_to_onnx(model)
36
+ st.success(f"Model converted to ONNX! Saved at: {onnx_path}")
37
+
38
+ elif conversion_option == "GGUF":
39
+ with st.spinner("Converting to GGUF..."):
40
+ gguf_path = convert_to_gguf(model)
41
+ st.success(f"Model converted to GGUF! Saved at: {gguf_path}")
42
+
43
+ elif conversion_option == "TensorFlow SavedModel":
44
+ with st.spinner("Converting to TensorFlow SavedModel..."):
45
+ tf_path = convert_to_tf_saved_model(model)
46
+ st.success(f"Model converted to TensorFlow SavedModel! Saved at: {tf_path}")
47
+
48
+ elif conversion_option == "PyTorch":
49
+ with st.spinner("Converting to PyTorch..."):
50
+ pytorch_path = convert_to_pytorch(model)
51
+ st.success(f"Model saved in PyTorch format! Saved at: {pytorch_path}")
pages/Dataset_Management.py CHANGED
@@ -98,7 +98,7 @@ tabs = st.tabs([
98
  with tabs[0]:
99
  st.subheader("πŸ“‹ Current Dataset Preview")
100
  if not df.empty:
101
- st.dataframe(df.head(10))
102
  st.markdown("#### πŸ”Ž Basic Statistics")
103
  st.write(df.describe(include="all"))
104
  else:
 
98
  with tabs[0]:
99
  st.subheader("πŸ“‹ Current Dataset Preview")
100
  if not df.empty:
101
+ st.dataframe(df)
102
  st.markdown("#### πŸ”Ž Basic Statistics")
103
  st.write(df.describe(include="all"))
104
  else:
pages/Finetune.py CHANGED
@@ -59,19 +59,13 @@ elif finetune_option == "Refinetune existing model":
59
  # -------------------------------
60
  # Dataset Selection
61
  # -------------------------------
62
-
63
  st.subheader("πŸ“š Dataset Selection")
64
-
65
- # Dataset source selection
66
  dataset_option = st.radio("Choose dataset:", ["Upload New Dataset", "Use Existing Dataset (`train_data.csv`)"])
67
-
68
- dataset_path = "train_data.csv"
69
 
70
  if dataset_option == "Upload New Dataset":
71
  uploaded_file = st.file_uploader("πŸ“€ Upload Dataset (CSV or JSON)", type=["csv", "json"])
72
-
73
  if uploaded_file is not None:
74
- # Handle CSV or JSON upload
75
  if uploaded_file.name.endswith(".csv"):
76
  new_data = pd.read_csv(uploaded_file)
77
  elif uploaded_file.name.endswith(".json"):
@@ -81,14 +75,12 @@ if dataset_option == "Upload New Dataset":
81
  st.error("❌ Unsupported file format. Please upload CSV or JSON.")
82
  st.stop()
83
 
84
- # Append or create new dataset
85
  if os.path.exists(dataset_path):
86
  new_data.to_csv(dataset_path, mode='a', index=False, header=False)
87
  st.success(f"βœ… Data appended to `{dataset_path}`!")
88
  else:
89
  new_data.to_csv(dataset_path, index=False)
90
  st.success(f"βœ… Dataset saved as `{dataset_path}`!")
91
-
92
  elif dataset_option == "Use Existing Dataset (`train_data.csv`)":
93
  if os.path.exists(dataset_path):
94
  st.success("βœ… Using existing `train_data.csv` for fine-tuning.")
@@ -99,69 +91,71 @@ elif dataset_option == "Use Existing Dataset (`train_data.csv`)":
99
  # -------------------------------
100
  # Hyperparameters Configuration
101
  # -------------------------------
 
102
  learning_rate = st.number_input("πŸ“Š Learning Rate", value=1e-4, format="%.5f")
103
  batch_size = st.number_input("πŸ› οΈ Batch Size", value=16, step=1)
104
  epochs = st.number_input("⏱️ Epochs", value=3, step=1)
105
 
 
106
  # -------------------------------
107
- # Fine-tuning Execution
108
  # -------------------------------
109
  if st.button("πŸš€ Start Fine-tuning"):
110
- st.info(f"Fine-tuning process initiated...")
111
-
112
- # Retrieve Hugging Face Token
113
  hf_token = get_hf_token()
114
 
115
  # Model loading logic
116
  if finetune_option == "Refinetune existing model" and saved_model_path:
117
- # Load the base model first
118
  tokenizer, model = load_model("google/gemma-3-1b-it", hf_token)
119
-
120
- # Load the saved model checkpoint for re-finetuning
121
  model = load_finetuned_model(model, saved_model_path)
122
-
123
  if model:
124
  st.success(f"βœ… Loaded saved model: `{saved_model_path}` for refinement!")
125
  else:
126
  st.error("❌ Failed to load the saved model. Aborting.")
127
  st.stop()
128
-
129
  else:
130
- # Fine-tune from scratch (load base model)
131
  if not selected_model:
132
  st.error("❌ Please select a model to fine-tune.")
133
  st.stop()
134
-
135
  tokenizer, model = load_model(selected_model, hf_token)
136
-
137
  if model:
138
  st.success(f"βœ… Base model loaded: `{selected_model}`")
139
  else:
140
  st.error("❌ Failed to load the base model. Aborting.")
141
  st.stop()
142
 
143
- # Simulate fine-tuning loop
144
- progress_bar = st.progress(0)
145
- training_placeholder = st.empty()
146
-
147
- for epoch, losses, accs in simulate_training(epochs):
148
- fig = plot_training_metrics(epoch, losses, accs)
149
- training_placeholder.pyplot(fig)
150
- progress_bar.progress(epoch / epochs)
151
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  # Save fine-tuned model with timestamp
153
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
154
- new_model_name = f"models/fine_tuned_model_{selected_model.replace('/', '_')}_{timestamp}.pt"
 
155
 
156
- # Save the fine-tuned model
157
  saved_model_path = save_model(model, new_model_name)
158
-
159
  if saved_model_path:
160
  st.success(f"βœ… Fine-tuning completed! Model saved as `{saved_model_path}`")
161
-
162
- # Load the fine-tuned model for immediate inference
163
  model = load_finetuned_model(model, saved_model_path)
164
-
165
  if model:
166
  st.success("πŸ› οΈ Fine-tuned model loaded and ready for inference!")
167
  else:
 
59
  # -------------------------------
60
  # Dataset Selection
61
  # -------------------------------
 
62
  st.subheader("πŸ“š Dataset Selection")
 
 
63
  dataset_option = st.radio("Choose dataset:", ["Upload New Dataset", "Use Existing Dataset (`train_data.csv`)"])
64
+ dataset_path = "datasets/train_data.csv"
 
65
 
66
  if dataset_option == "Upload New Dataset":
67
  uploaded_file = st.file_uploader("πŸ“€ Upload Dataset (CSV or JSON)", type=["csv", "json"])
 
68
  if uploaded_file is not None:
 
69
  if uploaded_file.name.endswith(".csv"):
70
  new_data = pd.read_csv(uploaded_file)
71
  elif uploaded_file.name.endswith(".json"):
 
75
  st.error("❌ Unsupported file format. Please upload CSV or JSON.")
76
  st.stop()
77
 
 
78
  if os.path.exists(dataset_path):
79
  new_data.to_csv(dataset_path, mode='a', index=False, header=False)
80
  st.success(f"βœ… Data appended to `{dataset_path}`!")
81
  else:
82
  new_data.to_csv(dataset_path, index=False)
83
  st.success(f"βœ… Dataset saved as `{dataset_path}`!")
 
84
  elif dataset_option == "Use Existing Dataset (`train_data.csv`)":
85
  if os.path.exists(dataset_path):
86
  st.success("βœ… Using existing `train_data.csv` for fine-tuning.")
 
91
  # -------------------------------
92
  # Hyperparameters Configuration
93
  # -------------------------------
94
+ st.subheader("πŸ”§ Hyperparameter Configuration")
95
  learning_rate = st.number_input("πŸ“Š Learning Rate", value=1e-4, format="%.5f")
96
  batch_size = st.number_input("πŸ› οΈ Batch Size", value=16, step=1)
97
  epochs = st.number_input("⏱️ Epochs", value=3, step=1)
98
 
99
+
100
  # -------------------------------
101
+ # Fine-tuning Execution with Real-Time Visualization
102
  # -------------------------------
103
  if st.button("πŸš€ Start Fine-tuning"):
104
+ st.info("Fine-tuning process initiated...")
 
 
105
  hf_token = get_hf_token()
106
 
107
  # Model loading logic
108
  if finetune_option == "Refinetune existing model" and saved_model_path:
 
109
  tokenizer, model = load_model("google/gemma-3-1b-it", hf_token)
 
 
110
  model = load_finetuned_model(model, saved_model_path)
 
111
  if model:
112
  st.success(f"βœ… Loaded saved model: `{saved_model_path}` for refinement!")
113
  else:
114
  st.error("❌ Failed to load the saved model. Aborting.")
115
  st.stop()
 
116
  else:
 
117
  if not selected_model:
118
  st.error("❌ Please select a model to fine-tune.")
119
  st.stop()
 
120
  tokenizer, model = load_model(selected_model, hf_token)
 
121
  if model:
122
  st.success(f"βœ… Base model loaded: `{selected_model}`")
123
  else:
124
  st.error("❌ Failed to load the base model. Aborting.")
125
  st.stop()
126
 
127
+ # Create placeholders for training progress
128
+ loss_chart = st.line_chart() # Loss curve
129
+ acc_chart = st.line_chart() # Accuracy curve
130
+ progress_text = st.empty()
131
+
132
+ # Simulate training loop with real-time visualization
133
+ losses_over_epochs = []
134
+ accuracies_over_epochs = []
135
+
136
+ for epoch, losses, accs in simulate_training(epochs, learning_rate, batch_size):
137
+ # Update training text
138
+ progress_text.text(f"Epoch {epoch}/{epochs} in progress...")
139
+
140
+ # Assume simulate_training returns overall average loss and accuracy per epoch
141
+ losses_over_epochs.append(losses) # e.g., average loss of the epoch
142
+ accuracies_over_epochs.append(accs) # e.g., average accuracy of the epoch
143
+
144
+ # Update real-time charts
145
+ loss_chart.add_rows(pd.DataFrame({"Loss": [losses]}))
146
+ acc_chart.add_rows(pd.DataFrame({"Accuracy": [accs]}))
147
+
148
+ progress_text.text("Fine-tuning completed!")
149
+
150
  # Save fine-tuned model with timestamp
151
  timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
152
+ model_identifier = selected_model if selected_model else os.path.basename(saved_model_path)
153
+ new_model_name = f"models/fine_tuned_model_{model_identifier.replace('/', '_')}_{timestamp}.pt"
154
 
 
155
  saved_model_path = save_model(model, new_model_name)
 
156
  if saved_model_path:
157
  st.success(f"βœ… Fine-tuning completed! Model saved as `{saved_model_path}`")
 
 
158
  model = load_finetuned_model(model, saved_model_path)
 
159
  if model:
160
  st.success("πŸ› οΈ Fine-tuned model loaded and ready for inference!")
161
  else:
requirements.txt CHANGED
@@ -9,4 +9,6 @@ FuzzyTM>=0.4.0
9
  requests>=2.28.0
10
  xlsxwriter>=3.0.1
11
  python-dotenv>=0.19.0
12
- scipy>=1.7.3
 
 
 
9
  requests>=2.28.0
10
  xlsxwriter>=3.0.1
11
  python-dotenv>=0.19.0
12
+ scipy>=1.7.3
13
+ seaborn>=0.13.2
14
+ llama-cpp-python>=0.3.8
utils.py CHANGED
@@ -11,7 +11,7 @@ import os
11
  import asyncio
12
  from dotenv import load_dotenv
13
  from scipy.stats import skew, kurtosis, zscore
14
-
15
  # -------------------------------
16
  # Environment and Token Management
17
  # -------------------------------
@@ -192,6 +192,33 @@ def convert_to_onnx(model, output_path="model.onnx"):
192
  st.error(f"❌ ONNX conversion failed: {e}")
193
  return None
194
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
 
196
  # -------------------------------
197
  # Model Inference and Training
@@ -355,7 +382,7 @@ def compute_dataset_score(df):
355
  if df.empty:
356
  return 0.0
357
 
358
- total_cells = np.product(df.shape)
359
  missing_cells = df.isnull().sum().sum()
360
  missing_ratio = missing_cells / total_cells
361
 
 
11
  import asyncio
12
  from dotenv import load_dotenv
13
  from scipy.stats import skew, kurtosis, zscore
14
+ import llama_cpp
15
  # -------------------------------
16
  # Environment and Token Management
17
  # -------------------------------
 
192
  st.error(f"❌ ONNX conversion failed: {e}")
193
  return None
194
 
195
+ # Convert to GGUF (for Llama.cpp)
196
+ def convert_to_gguf(model, output_path="model.gguf"):
197
+ llama_cpp.export_gguf(model, output_path)
198
+ return output_path
199
+
200
+ # Convert to TensorFlow SavedModel
201
+ def convert_to_tf_saved_model(model, output_path="model_tf"):
202
+ tf_model = tf.Module()
203
+
204
+ # Export the PyTorch model to TensorFlow using ONNX as intermediary
205
+ dummy_input = torch.randn(1, 3, 224, 224)
206
+ torch.onnx.export(model, dummy_input, "temp_model.onnx")
207
+
208
+ # Load ONNX model into TensorFlow
209
+ import onnx
210
+ from onnx_tf.backend import prepare
211
+
212
+ onnx_model = onnx.load("temp_model.onnx")
213
+ tf_rep = prepare(onnx_model)
214
+ tf_rep.export_graph(output_path)
215
+
216
+ return output_path
217
+
218
+ # Convert to PyTorch format
219
+ def convert_to_pytorch(model, output_path="model.pth"):
220
+ torch.save(model.state_dict(), output_path)
221
+ return output_path
222
 
223
  # -------------------------------
224
  # Model Inference and Training
 
382
  if df.empty:
383
  return 0.0
384
 
385
+ total_cells = np.prod(df.shape)
386
  missing_cells = df.isnull().sum().sum()
387
  missing_ratio = missing_cells / total_cells
388