Spaces:
Build error
Build error
Commit
Β·
e37cfd0
1
Parent(s):
2bdd84f
New Improvement in Pages
Browse files- .gitattributes +1 -1
- .gitignore +1 -0
- __pycache__/utils.cpython-311.pyc +0 -0
- app.py +25 -6
- pages/Conversion.py +31 -4
- pages/Dataset_Management.py +1 -1
- pages/Finetune.py +30 -36
- requirements.txt +3 -1
- utils.py +29 -2
.gitattributes
CHANGED
@@ -32,4 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
32 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
32 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
+
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
*.env
|
__pycache__/utils.cpython-311.pyc
ADDED
Binary file (24.6 kB). View file
|
|
app.py
CHANGED
@@ -2,17 +2,36 @@ import streamlit as st
|
|
2 |
|
3 |
st.set_page_config(page_title="Gemma LLM Fine-Tuning UI", layout="wide")
|
4 |
|
|
|
5 |
st.title("Gemma LLM Fine-Tuning Suite π")
|
6 |
st.markdown("""
|
7 |
-
### π₯
|
8 |
- **Chat**: Interact with the model.
|
9 |
- **Fine-tuning**: Train on `train_data.csv` or upload new datasets.
|
10 |
- **Conversion**: Export models to TorchScript and ONNX.
|
11 |
- **Dataset Management**: View and add to your training data.
|
12 |
""")
|
13 |
|
14 |
-
#
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2 |
|
3 |
st.set_page_config(page_title="Gemma LLM Fine-Tuning UI", layout="wide")
|
4 |
|
5 |
+
# Main Page Title and Description
|
6 |
st.title("Gemma LLM Fine-Tuning Suite π")
|
7 |
st.markdown("""
|
8 |
+
### π₯ Multi-page AI Model Trainer
|
9 |
- **Chat**: Interact with the model.
|
10 |
- **Fine-tuning**: Train on `train_data.csv` or upload new datasets.
|
11 |
- **Conversion**: Export models to TorchScript and ONNX.
|
12 |
- **Dataset Management**: View and add to your training data.
|
13 |
""")
|
14 |
|
15 |
+
# Sidebar Navigation with Custom Labels
|
16 |
+
st.sidebar.title("Navigation")
|
17 |
+
nav_options = [
|
18 |
+
"πΉ Chat",
|
19 |
+
"πΉ Fine-tuning",
|
20 |
+
"πΉ Conversion",
|
21 |
+
"πΉ Dataset Management"
|
22 |
+
]
|
23 |
+
selected_page = st.sidebar.radio("Go to", nav_options)
|
24 |
+
|
25 |
+
# Page Content based on Navigation Selection
|
26 |
+
if selected_page == "πΉ Chat":
|
27 |
+
st.header("Chat with Gemma")
|
28 |
+
st.write("Interact with the model in a conversational interface. Coming soon!")
|
29 |
+
elif selected_page == "πΉ Fine-tuning":
|
30 |
+
st.header("Fine-tuning Gemma")
|
31 |
+
st.write("Fine-tune your Gemma model using your dataset. Coming soon!")
|
32 |
+
elif selected_page == "πΉ Conversion":
|
33 |
+
st.header("Model Conversion")
|
34 |
+
st.write("Convert your model to various formats. Coming soon!")
|
35 |
+
elif selected_page == "πΉ Dataset Management":
|
36 |
+
st.header("Dataset Management")
|
37 |
+
st.write("Manage your training datasets. Coming soon!")
|
pages/Conversion.py
CHANGED
@@ -1,5 +1,13 @@
|
|
1 |
import streamlit as st
|
2 |
-
from utils import
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
st.title("π§ Model Conversion")
|
5 |
|
@@ -10,15 +18,34 @@ hf_token = get_hf_token()
|
|
10 |
model_path = "fine_tuned_model.pt"
|
11 |
tokenizer, model = load_model("google/gemma-3-1b-it", hf_token, model_path)
|
12 |
|
13 |
-
|
|
|
|
|
|
|
|
|
14 |
|
15 |
if st.button("Convert Model"):
|
16 |
if conversion_option == "TorchScript":
|
17 |
with st.spinner("Converting to TorchScript..."):
|
18 |
ts_model = convert_to_torchscript(model)
|
19 |
st.success("Model converted to TorchScript!")
|
20 |
-
|
21 |
elif conversion_option == "ONNX":
|
22 |
with st.spinner("Converting to ONNX..."):
|
23 |
onnx_path = convert_to_onnx(model)
|
24 |
-
st.success("Model converted to ONNX!")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
+
from utils import (
|
3 |
+
load_model,
|
4 |
+
convert_to_torchscript,
|
5 |
+
convert_to_onnx,
|
6 |
+
convert_to_gguf,
|
7 |
+
convert_to_tf_saved_model,
|
8 |
+
convert_to_pytorch,
|
9 |
+
get_hf_token
|
10 |
+
)
|
11 |
|
12 |
st.title("π§ Model Conversion")
|
13 |
|
|
|
18 |
model_path = "fine_tuned_model.pt"
|
19 |
tokenizer, model = load_model("google/gemma-3-1b-it", hf_token, model_path)
|
20 |
|
21 |
+
# Select conversion format
|
22 |
+
conversion_option = st.selectbox(
|
23 |
+
"Select Conversion Format",
|
24 |
+
["TorchScript", "ONNX", "GGUF", "TensorFlow SavedModel", "PyTorch"]
|
25 |
+
)
|
26 |
|
27 |
if st.button("Convert Model"):
|
28 |
if conversion_option == "TorchScript":
|
29 |
with st.spinner("Converting to TorchScript..."):
|
30 |
ts_model = convert_to_torchscript(model)
|
31 |
st.success("Model converted to TorchScript!")
|
32 |
+
|
33 |
elif conversion_option == "ONNX":
|
34 |
with st.spinner("Converting to ONNX..."):
|
35 |
onnx_path = convert_to_onnx(model)
|
36 |
+
st.success(f"Model converted to ONNX! Saved at: {onnx_path}")
|
37 |
+
|
38 |
+
elif conversion_option == "GGUF":
|
39 |
+
with st.spinner("Converting to GGUF..."):
|
40 |
+
gguf_path = convert_to_gguf(model)
|
41 |
+
st.success(f"Model converted to GGUF! Saved at: {gguf_path}")
|
42 |
+
|
43 |
+
elif conversion_option == "TensorFlow SavedModel":
|
44 |
+
with st.spinner("Converting to TensorFlow SavedModel..."):
|
45 |
+
tf_path = convert_to_tf_saved_model(model)
|
46 |
+
st.success(f"Model converted to TensorFlow SavedModel! Saved at: {tf_path}")
|
47 |
+
|
48 |
+
elif conversion_option == "PyTorch":
|
49 |
+
with st.spinner("Converting to PyTorch..."):
|
50 |
+
pytorch_path = convert_to_pytorch(model)
|
51 |
+
st.success(f"Model saved in PyTorch format! Saved at: {pytorch_path}")
|
pages/Dataset_Management.py
CHANGED
@@ -98,7 +98,7 @@ tabs = st.tabs([
|
|
98 |
with tabs[0]:
|
99 |
st.subheader("π Current Dataset Preview")
|
100 |
if not df.empty:
|
101 |
-
st.dataframe(df
|
102 |
st.markdown("#### π Basic Statistics")
|
103 |
st.write(df.describe(include="all"))
|
104 |
else:
|
|
|
98 |
with tabs[0]:
|
99 |
st.subheader("π Current Dataset Preview")
|
100 |
if not df.empty:
|
101 |
+
st.dataframe(df)
|
102 |
st.markdown("#### π Basic Statistics")
|
103 |
st.write(df.describe(include="all"))
|
104 |
else:
|
pages/Finetune.py
CHANGED
@@ -59,19 +59,13 @@ elif finetune_option == "Refinetune existing model":
|
|
59 |
# -------------------------------
|
60 |
# Dataset Selection
|
61 |
# -------------------------------
|
62 |
-
|
63 |
st.subheader("π Dataset Selection")
|
64 |
-
|
65 |
-
# Dataset source selection
|
66 |
dataset_option = st.radio("Choose dataset:", ["Upload New Dataset", "Use Existing Dataset (`train_data.csv`)"])
|
67 |
-
|
68 |
-
dataset_path = "train_data.csv"
|
69 |
|
70 |
if dataset_option == "Upload New Dataset":
|
71 |
uploaded_file = st.file_uploader("π€ Upload Dataset (CSV or JSON)", type=["csv", "json"])
|
72 |
-
|
73 |
if uploaded_file is not None:
|
74 |
-
# Handle CSV or JSON upload
|
75 |
if uploaded_file.name.endswith(".csv"):
|
76 |
new_data = pd.read_csv(uploaded_file)
|
77 |
elif uploaded_file.name.endswith(".json"):
|
@@ -81,14 +75,12 @@ if dataset_option == "Upload New Dataset":
|
|
81 |
st.error("β Unsupported file format. Please upload CSV or JSON.")
|
82 |
st.stop()
|
83 |
|
84 |
-
# Append or create new dataset
|
85 |
if os.path.exists(dataset_path):
|
86 |
new_data.to_csv(dataset_path, mode='a', index=False, header=False)
|
87 |
st.success(f"β
Data appended to `{dataset_path}`!")
|
88 |
else:
|
89 |
new_data.to_csv(dataset_path, index=False)
|
90 |
st.success(f"β
Dataset saved as `{dataset_path}`!")
|
91 |
-
|
92 |
elif dataset_option == "Use Existing Dataset (`train_data.csv`)":
|
93 |
if os.path.exists(dataset_path):
|
94 |
st.success("β
Using existing `train_data.csv` for fine-tuning.")
|
@@ -99,69 +91,71 @@ elif dataset_option == "Use Existing Dataset (`train_data.csv`)":
|
|
99 |
# -------------------------------
|
100 |
# Hyperparameters Configuration
|
101 |
# -------------------------------
|
|
|
102 |
learning_rate = st.number_input("π Learning Rate", value=1e-4, format="%.5f")
|
103 |
batch_size = st.number_input("π οΈ Batch Size", value=16, step=1)
|
104 |
epochs = st.number_input("β±οΈ Epochs", value=3, step=1)
|
105 |
|
|
|
106 |
# -------------------------------
|
107 |
-
# Fine-tuning Execution
|
108 |
# -------------------------------
|
109 |
if st.button("π Start Fine-tuning"):
|
110 |
-
st.info(
|
111 |
-
|
112 |
-
# Retrieve Hugging Face Token
|
113 |
hf_token = get_hf_token()
|
114 |
|
115 |
# Model loading logic
|
116 |
if finetune_option == "Refinetune existing model" and saved_model_path:
|
117 |
-
# Load the base model first
|
118 |
tokenizer, model = load_model("google/gemma-3-1b-it", hf_token)
|
119 |
-
|
120 |
-
# Load the saved model checkpoint for re-finetuning
|
121 |
model = load_finetuned_model(model, saved_model_path)
|
122 |
-
|
123 |
if model:
|
124 |
st.success(f"β
Loaded saved model: `{saved_model_path}` for refinement!")
|
125 |
else:
|
126 |
st.error("β Failed to load the saved model. Aborting.")
|
127 |
st.stop()
|
128 |
-
|
129 |
else:
|
130 |
-
# Fine-tune from scratch (load base model)
|
131 |
if not selected_model:
|
132 |
st.error("β Please select a model to fine-tune.")
|
133 |
st.stop()
|
134 |
-
|
135 |
tokenizer, model = load_model(selected_model, hf_token)
|
136 |
-
|
137 |
if model:
|
138 |
st.success(f"β
Base model loaded: `{selected_model}`")
|
139 |
else:
|
140 |
st.error("β Failed to load the base model. Aborting.")
|
141 |
st.stop()
|
142 |
|
143 |
-
#
|
144 |
-
|
145 |
-
|
146 |
-
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
|
151 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
152 |
# Save fine-tuned model with timestamp
|
153 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
154 |
-
|
|
|
155 |
|
156 |
-
# Save the fine-tuned model
|
157 |
saved_model_path = save_model(model, new_model_name)
|
158 |
-
|
159 |
if saved_model_path:
|
160 |
st.success(f"β
Fine-tuning completed! Model saved as `{saved_model_path}`")
|
161 |
-
|
162 |
-
# Load the fine-tuned model for immediate inference
|
163 |
model = load_finetuned_model(model, saved_model_path)
|
164 |
-
|
165 |
if model:
|
166 |
st.success("π οΈ Fine-tuned model loaded and ready for inference!")
|
167 |
else:
|
|
|
59 |
# -------------------------------
|
60 |
# Dataset Selection
|
61 |
# -------------------------------
|
|
|
62 |
st.subheader("π Dataset Selection")
|
|
|
|
|
63 |
dataset_option = st.radio("Choose dataset:", ["Upload New Dataset", "Use Existing Dataset (`train_data.csv`)"])
|
64 |
+
dataset_path = "datasets/train_data.csv"
|
|
|
65 |
|
66 |
if dataset_option == "Upload New Dataset":
|
67 |
uploaded_file = st.file_uploader("π€ Upload Dataset (CSV or JSON)", type=["csv", "json"])
|
|
|
68 |
if uploaded_file is not None:
|
|
|
69 |
if uploaded_file.name.endswith(".csv"):
|
70 |
new_data = pd.read_csv(uploaded_file)
|
71 |
elif uploaded_file.name.endswith(".json"):
|
|
|
75 |
st.error("β Unsupported file format. Please upload CSV or JSON.")
|
76 |
st.stop()
|
77 |
|
|
|
78 |
if os.path.exists(dataset_path):
|
79 |
new_data.to_csv(dataset_path, mode='a', index=False, header=False)
|
80 |
st.success(f"β
Data appended to `{dataset_path}`!")
|
81 |
else:
|
82 |
new_data.to_csv(dataset_path, index=False)
|
83 |
st.success(f"β
Dataset saved as `{dataset_path}`!")
|
|
|
84 |
elif dataset_option == "Use Existing Dataset (`train_data.csv`)":
|
85 |
if os.path.exists(dataset_path):
|
86 |
st.success("β
Using existing `train_data.csv` for fine-tuning.")
|
|
|
91 |
# -------------------------------
|
92 |
# Hyperparameters Configuration
|
93 |
# -------------------------------
|
94 |
+
st.subheader("π§ Hyperparameter Configuration")
|
95 |
learning_rate = st.number_input("π Learning Rate", value=1e-4, format="%.5f")
|
96 |
batch_size = st.number_input("π οΈ Batch Size", value=16, step=1)
|
97 |
epochs = st.number_input("β±οΈ Epochs", value=3, step=1)
|
98 |
|
99 |
+
|
100 |
# -------------------------------
|
101 |
+
# Fine-tuning Execution with Real-Time Visualization
|
102 |
# -------------------------------
|
103 |
if st.button("π Start Fine-tuning"):
|
104 |
+
st.info("Fine-tuning process initiated...")
|
|
|
|
|
105 |
hf_token = get_hf_token()
|
106 |
|
107 |
# Model loading logic
|
108 |
if finetune_option == "Refinetune existing model" and saved_model_path:
|
|
|
109 |
tokenizer, model = load_model("google/gemma-3-1b-it", hf_token)
|
|
|
|
|
110 |
model = load_finetuned_model(model, saved_model_path)
|
|
|
111 |
if model:
|
112 |
st.success(f"β
Loaded saved model: `{saved_model_path}` for refinement!")
|
113 |
else:
|
114 |
st.error("β Failed to load the saved model. Aborting.")
|
115 |
st.stop()
|
|
|
116 |
else:
|
|
|
117 |
if not selected_model:
|
118 |
st.error("β Please select a model to fine-tune.")
|
119 |
st.stop()
|
|
|
120 |
tokenizer, model = load_model(selected_model, hf_token)
|
|
|
121 |
if model:
|
122 |
st.success(f"β
Base model loaded: `{selected_model}`")
|
123 |
else:
|
124 |
st.error("β Failed to load the base model. Aborting.")
|
125 |
st.stop()
|
126 |
|
127 |
+
# Create placeholders for training progress
|
128 |
+
loss_chart = st.line_chart() # Loss curve
|
129 |
+
acc_chart = st.line_chart() # Accuracy curve
|
130 |
+
progress_text = st.empty()
|
131 |
+
|
132 |
+
# Simulate training loop with real-time visualization
|
133 |
+
losses_over_epochs = []
|
134 |
+
accuracies_over_epochs = []
|
135 |
+
|
136 |
+
for epoch, losses, accs in simulate_training(epochs, learning_rate, batch_size):
|
137 |
+
# Update training text
|
138 |
+
progress_text.text(f"Epoch {epoch}/{epochs} in progress...")
|
139 |
+
|
140 |
+
# Assume simulate_training returns overall average loss and accuracy per epoch
|
141 |
+
losses_over_epochs.append(losses) # e.g., average loss of the epoch
|
142 |
+
accuracies_over_epochs.append(accs) # e.g., average accuracy of the epoch
|
143 |
+
|
144 |
+
# Update real-time charts
|
145 |
+
loss_chart.add_rows(pd.DataFrame({"Loss": [losses]}))
|
146 |
+
acc_chart.add_rows(pd.DataFrame({"Accuracy": [accs]}))
|
147 |
+
|
148 |
+
progress_text.text("Fine-tuning completed!")
|
149 |
+
|
150 |
# Save fine-tuned model with timestamp
|
151 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
152 |
+
model_identifier = selected_model if selected_model else os.path.basename(saved_model_path)
|
153 |
+
new_model_name = f"models/fine_tuned_model_{model_identifier.replace('/', '_')}_{timestamp}.pt"
|
154 |
|
|
|
155 |
saved_model_path = save_model(model, new_model_name)
|
|
|
156 |
if saved_model_path:
|
157 |
st.success(f"β
Fine-tuning completed! Model saved as `{saved_model_path}`")
|
|
|
|
|
158 |
model = load_finetuned_model(model, saved_model_path)
|
|
|
159 |
if model:
|
160 |
st.success("π οΈ Fine-tuned model loaded and ready for inference!")
|
161 |
else:
|
requirements.txt
CHANGED
@@ -9,4 +9,6 @@ FuzzyTM>=0.4.0
|
|
9 |
requests>=2.28.0
|
10 |
xlsxwriter>=3.0.1
|
11 |
python-dotenv>=0.19.0
|
12 |
-
scipy>=1.7.3
|
|
|
|
|
|
9 |
requests>=2.28.0
|
10 |
xlsxwriter>=3.0.1
|
11 |
python-dotenv>=0.19.0
|
12 |
+
scipy>=1.7.3
|
13 |
+
seaborn>=0.13.2
|
14 |
+
llama-cpp-python>=0.3.8
|
utils.py
CHANGED
@@ -11,7 +11,7 @@ import os
|
|
11 |
import asyncio
|
12 |
from dotenv import load_dotenv
|
13 |
from scipy.stats import skew, kurtosis, zscore
|
14 |
-
|
15 |
# -------------------------------
|
16 |
# Environment and Token Management
|
17 |
# -------------------------------
|
@@ -192,6 +192,33 @@ def convert_to_onnx(model, output_path="model.onnx"):
|
|
192 |
st.error(f"β ONNX conversion failed: {e}")
|
193 |
return None
|
194 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
195 |
|
196 |
# -------------------------------
|
197 |
# Model Inference and Training
|
@@ -355,7 +382,7 @@ def compute_dataset_score(df):
|
|
355 |
if df.empty:
|
356 |
return 0.0
|
357 |
|
358 |
-
total_cells = np.
|
359 |
missing_cells = df.isnull().sum().sum()
|
360 |
missing_ratio = missing_cells / total_cells
|
361 |
|
|
|
11 |
import asyncio
|
12 |
from dotenv import load_dotenv
|
13 |
from scipy.stats import skew, kurtosis, zscore
|
14 |
+
import llama_cpp
|
15 |
# -------------------------------
|
16 |
# Environment and Token Management
|
17 |
# -------------------------------
|
|
|
192 |
st.error(f"β ONNX conversion failed: {e}")
|
193 |
return None
|
194 |
|
195 |
+
# Convert to GGUF (for Llama.cpp)
|
196 |
+
def convert_to_gguf(model, output_path="model.gguf"):
|
197 |
+
llama_cpp.export_gguf(model, output_path)
|
198 |
+
return output_path
|
199 |
+
|
200 |
+
# Convert to TensorFlow SavedModel
|
201 |
+
def convert_to_tf_saved_model(model, output_path="model_tf"):
|
202 |
+
tf_model = tf.Module()
|
203 |
+
|
204 |
+
# Export the PyTorch model to TensorFlow using ONNX as intermediary
|
205 |
+
dummy_input = torch.randn(1, 3, 224, 224)
|
206 |
+
torch.onnx.export(model, dummy_input, "temp_model.onnx")
|
207 |
+
|
208 |
+
# Load ONNX model into TensorFlow
|
209 |
+
import onnx
|
210 |
+
from onnx_tf.backend import prepare
|
211 |
+
|
212 |
+
onnx_model = onnx.load("temp_model.onnx")
|
213 |
+
tf_rep = prepare(onnx_model)
|
214 |
+
tf_rep.export_graph(output_path)
|
215 |
+
|
216 |
+
return output_path
|
217 |
+
|
218 |
+
# Convert to PyTorch format
|
219 |
+
def convert_to_pytorch(model, output_path="model.pth"):
|
220 |
+
torch.save(model.state_dict(), output_path)
|
221 |
+
return output_path
|
222 |
|
223 |
# -------------------------------
|
224 |
# Model Inference and Training
|
|
|
382 |
if df.empty:
|
383 |
return 0.0
|
384 |
|
385 |
+
total_cells = np.prod(df.shape)
|
386 |
missing_cells = df.isnull().sum().sum()
|
387 |
missing_ratio = missing_cells / total_cells
|
388 |
|