hillol7 commited on
Commit
3f8cb61
·
verified ·
1 Parent(s): 5b6044b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +49 -22
app.py CHANGED
@@ -1,6 +1,9 @@
1
  import gradio as gr
2
  import pickle
3
  import pandas as pd
 
 
 
4
  from sklearn.preprocessing import StandardScaler
5
  from sklearn.cluster import KMeans
6
 
@@ -9,32 +12,56 @@ with open('kmeans_model.pkl', 'rb') as file:
9
  with open('scaler.pkl', 'rb') as file:
10
  scaler = pickle.load(file)
11
 
12
- def predict_spending_score(annual_income, family_size, work_experience):
13
- df = pd.DataFrame({
14
- 'Annual Income ($)': [annual_income],
15
- 'Family Size': [family_size],
16
- 'Work Experience': [work_experience]
17
- })
18
-
19
- df['Family_Income_Product'] = df['Family Size'] * df['Annual Income ($)']
20
- df['Family_Income_Ratio'] = df['Family Size'] / (df['Annual Income ($)'] + 1e-5)
21
-
22
- features = df[['Annual Income ($)', 'Family Size', 'Family_Income_Product', 'Family_Income_Ratio']]
23
- features_scaled = scaler.transform(features)
24
-
25
-
26
- cluster = kmeans.predict(features_scaled)
27
- return f'Cluster: {int(cluster[0])}'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
  iface = gr.Interface(
30
- fn=predict_spending_score,
31
- inputs=[
32
- gr.Number(label="Annual Income ($)", value=50000),
33
- gr.Number(label="Family Size", value=2),
34
- gr.Number(label="Work Experience (years)", value=5)
 
35
  ],
36
- outputs="text",
37
  live=True
38
  )
39
 
 
40
  iface.launch()
 
1
  import gradio as gr
2
  import pickle
3
  import pandas as pd
4
+ import numpy as np
5
+ import matplotlib.pyplot as plt
6
+ import seaborn as sns
7
  from sklearn.preprocessing import StandardScaler
8
  from sklearn.cluster import KMeans
9
 
 
12
  with open('scaler.pkl', 'rb') as file:
13
  scaler = pickle.load(file)
14
 
15
+ dataset_path = 'Customers.csv'
16
+ data = pd.read_csv(dataset_path)
17
+
18
+
19
+ data['Family_Income_Product'] = data['Family Size'] * data['Annual Income ($)']
20
+ data['Family_Income_Ratio'] = data['Family Size'] / (data['Annual Income ($)'] + 1e-5)
21
+
22
+ features = data[['Annual Income ($)', 'Family Size', 'Family_Income_Product', 'Family_Income_Ratio']]
23
+ features_scaled = scaler.transform(features)
24
+ clusters = kmeans.predict(features_scaled)
25
+ data['Cluster'] = clusters
26
+
27
+ def plot_cluster_distribution():
28
+ plt.figure(figsize=(10, 6))
29
+ sns.countplot(data=data, x='Cluster', palette='viridis')
30
+ plt.title('Customer Distribution Across Clusters')
31
+ plt.xlabel('Cluster')
32
+ plt.ylabel('Number of Customers')
33
+ plt.tight_layout()
34
+ plt.savefig('/kaggle/working/cluster_distribution.png')
35
+ return '/kaggle/working/cluster_distribution.png'
36
+
37
+ def plot_spending_score_distribution():
38
+ plt.figure(figsize=(10, 6))
39
+ sns.boxplot(data=data, x='Cluster', y='Spending Score (1-100)', palette='viridis')
40
+ plt.title('Spending Score Distribution Across Clusters')
41
+ plt.xlabel('Cluster')
42
+ plt.ylabel('Spending Score (1-100)')
43
+ plt.tight_layout()
44
+ plt.savefig('/kaggle/working/spending_score_distribution.png')
45
+ return '/kaggle/working/spending_score_distribution.png'
46
+
47
+ def cluster_summary():
48
+ summary = data.groupby('Cluster').agg({
49
+ 'Annual Income ($)': ['mean', 'std'],
50
+ 'Family Size': ['mean', 'std'],
51
+ 'Spending Score (1-100)': ['mean', 'std']
52
+ }).reset_index()
53
+ return summary.to_dict()
54
 
55
  iface = gr.Interface(
56
+ fn=cluster_summary,
57
+ inputs=[],
58
+ outputs=[
59
+ gr.Dataframe(label="Cluster Summary"),
60
+ gr.Image(label="Customer Distribution Across Clusters"),
61
+ gr.Image(label="Spending Score Distribution Across Clusters")
62
  ],
 
63
  live=True
64
  )
65
 
66
+ # Launch the interface
67
  iface.launch()