Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -1,152 +1,152 @@
|
|
1 |
-
import os
|
2 |
-
import shutil
|
3 |
-
import pandas as pd
|
4 |
-
import matplotlib.pyplot as plt
|
5 |
-
import seaborn as sns
|
6 |
-
import plotly.express as px
|
7 |
-
import gradio as gr
|
8 |
-
|
9 |
-
css = '''
|
10 |
-
.gradio-container{max-width: 900px !important}
|
11 |
-
h1{text-align:center}
|
12 |
-
'''
|
13 |
-
|
14 |
-
def create_visualizations(data):
|
15 |
-
plots = []
|
16 |
-
|
17 |
-
# Create figures directory
|
18 |
-
figures_dir = "./figures"
|
19 |
-
shutil.rmtree(figures_dir, ignore_errors=True)
|
20 |
-
os.makedirs(figures_dir, exist_ok=True)
|
21 |
-
|
22 |
-
# Histograms for numeric columns
|
23 |
-
numeric_cols = data.select_dtypes(include=['number']).columns
|
24 |
-
for col in numeric_cols:
|
25 |
-
plt.figure()
|
26 |
-
sns.histplot(data[col], kde=True)
|
27 |
-
plt.title(f'Histogram of {col}')
|
28 |
-
plt.xlabel(col)
|
29 |
-
plt.ylabel('Frequency')
|
30 |
-
hist_path = os.path.join(figures_dir, f'histogram_{col}.png')
|
31 |
-
plt.savefig(hist_path)
|
32 |
-
plt.close()
|
33 |
-
plots.append(hist_path)
|
34 |
-
|
35 |
-
# Box plots for numeric columns
|
36 |
-
for col in numeric_cols:
|
37 |
-
plt.figure()
|
38 |
-
sns.boxplot(x=data[col])
|
39 |
-
plt.title(f'Box Plot of {col}')
|
40 |
-
box_path = os.path.join(figures_dir, f'boxplot_{col}.png')
|
41 |
-
plt.savefig(box_path)
|
42 |
-
plt.close()
|
43 |
-
plots.append(box_path)
|
44 |
-
|
45 |
-
# Scatter plot matrix
|
46 |
-
if len(numeric_cols) > 1:
|
47 |
-
plt.figure()
|
48 |
-
sns.pairplot(data[numeric_cols])
|
49 |
-
plt.title('Scatter Plot Matrix')
|
50 |
-
scatter_matrix_path = os.path.join(figures_dir, 'scatter_matrix.png')
|
51 |
-
plt.savefig(scatter_matrix_path)
|
52 |
-
plt.close()
|
53 |
-
plots.append(scatter_matrix_path)
|
54 |
-
|
55 |
-
# Correlation heatmap
|
56 |
-
if len(numeric_cols) > 1:
|
57 |
-
plt.figure()
|
58 |
-
corr = data[numeric_cols].corr()
|
59 |
-
sns.heatmap(corr, annot=True, cmap='coolwarm')
|
60 |
-
plt.title('Correlation Heatmap')
|
61 |
-
heatmap_path = os.path.join(figures_dir, 'correlation_heatmap.png')
|
62 |
-
plt.savefig(heatmap_path)
|
63 |
-
plt.close()
|
64 |
-
plots.append(heatmap_path)
|
65 |
-
|
66 |
-
# Bar charts for categorical columns
|
67 |
-
categorical_cols = data.select_dtypes(include=['object']).columns
|
68 |
-
if not categorical_cols.empty:
|
69 |
-
for col in categorical_cols:
|
70 |
-
plt.figure()
|
71 |
-
data[col].value_counts().plot(kind='bar')
|
72 |
-
plt.title(f'Bar Chart of {col}')
|
73 |
-
plt.xlabel(col)
|
74 |
-
plt.ylabel('Count')
|
75 |
-
bar_path = os.path.join(figures_dir, f'bar_chart_{col}.png')
|
76 |
-
plt.savefig(bar_path)
|
77 |
-
plt.close()
|
78 |
-
plots.append(bar_path)
|
79 |
-
|
80 |
-
# Line charts (if a 'date' column is present)
|
81 |
-
if 'date' in data.columns:
|
82 |
-
plt.figure()
|
83 |
-
data['date'] = pd.to_datetime(data['date'])
|
84 |
-
data.set_index('date').plot()
|
85 |
-
plt.title('Line Chart of Date Series')
|
86 |
-
line_chart_path = os.path.join(figures_dir, 'line_chart.png')
|
87 |
-
plt.savefig(line_chart_path)
|
88 |
-
plt.close()
|
89 |
-
plots.append(line_chart_path)
|
90 |
-
|
91 |
-
# Scatter plot using Plotly
|
92 |
-
if len(numeric_cols) >= 2:
|
93 |
-
fig = px.scatter(data, x=numeric_cols[0], y=numeric_cols[1], title='Scatter Plot')
|
94 |
-
scatter_plot_path = os.path.join(figures_dir, 'scatter_plot.html')
|
95 |
-
fig.write_html(scatter_plot_path)
|
96 |
-
plots.append(scatter_plot_path)
|
97 |
-
|
98 |
-
# Pie chart for categorical columns (only the first categorical column)
|
99 |
-
if not categorical_cols.empty:
|
100 |
-
fig = px.pie(data, names=categorical_cols[0], title='Pie Chart of ' + categorical_cols[0])
|
101 |
-
pie_chart_path = os.path.join(figures_dir, 'pie_chart.html')
|
102 |
-
fig.write_html(pie_chart_path)
|
103 |
-
plots.append(pie_chart_path)
|
104 |
-
|
105 |
-
# Heatmaps (e.g., for a correlation matrix or cross-tabulation)
|
106 |
-
if len(numeric_cols) > 1:
|
107 |
-
heatmap_data = data[numeric_cols].corr()
|
108 |
-
fig = px.imshow(heatmap_data, text_auto=True, title='Heatmap of Numeric Variables')
|
109 |
-
heatmap_plot_path = os.path.join(figures_dir, 'heatmap_plot.html')
|
110 |
-
fig.write_html(heatmap_plot_path)
|
111 |
-
plots.append(heatmap_plot_path)
|
112 |
-
|
113 |
-
# Violin plots for numeric columns
|
114 |
-
for col in numeric_cols:
|
115 |
-
plt.figure()
|
116 |
-
sns.violinplot(x=data[col])
|
117 |
-
plt.title(f'Violin Plot of {col}')
|
118 |
-
violin_path = os.path.join(figures_dir, f'violin_plot_{col}.png')
|
119 |
-
plt.savefig(violin_path)
|
120 |
-
plt.close()
|
121 |
-
plots.append(violin_path)
|
122 |
-
|
123 |
-
return plots
|
124 |
-
|
125 |
-
def analyze_data(file_input):
|
126 |
-
data = pd.read_csv(file_input.name)
|
127 |
-
return create_visualizations(data)
|
128 |
-
# Example file path
|
129 |
-
example_file_path = "./example/🤗example.csv"
|
130 |
-
|
131 |
-
with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
132 |
-
gr.Markdown("# DATA BOARD📊\nUpload a `.csv` file to generate various visualizations and interactive plots.")
|
133 |
-
|
134 |
-
file_input = gr.File(label="Upload your `.csv` file")
|
135 |
-
submit = gr.Button("Generate Dashboards")
|
136 |
-
|
137 |
-
# Display images and interactive plots in a gallery
|
138 |
-
gallery = gr.Gallery(label="Visualizations")
|
139 |
-
|
140 |
-
# Example block with cache_examples set to True
|
141 |
-
examples = gr.Examples(
|
142 |
-
examples=[[example_file_path]],
|
143 |
-
inputs=file_input,
|
144 |
-
outputs=gallery,
|
145 |
-
fn=analyze_data, # Provide the processing function
|
146 |
-
cache_examples=True # Enable caching
|
147 |
-
)
|
148 |
-
|
149 |
-
submit.click(analyze_data, file_input, gallery)
|
150 |
-
|
151 |
-
if __name__ == "__main__":
|
152 |
-
demo.launch()
|
|
|
1 |
+
import os
|
2 |
+
import shutil
|
3 |
+
import pandas as pd
|
4 |
+
import matplotlib.pyplot as plt
|
5 |
+
import seaborn as sns
|
6 |
+
import plotly.express as px
|
7 |
+
import gradio as gr
|
8 |
+
|
9 |
+
css = '''
|
10 |
+
.gradio-container{max-width: 900px !important}
|
11 |
+
h1{text-align:center}
|
12 |
+
'''
|
13 |
+
|
14 |
+
def create_visualizations(data):
|
15 |
+
plots = []
|
16 |
+
|
17 |
+
# Create figures directory
|
18 |
+
figures_dir = "./figures"
|
19 |
+
shutil.rmtree(figures_dir, ignore_errors=True)
|
20 |
+
os.makedirs(figures_dir, exist_ok=True)
|
21 |
+
|
22 |
+
# Histograms for numeric columns
|
23 |
+
numeric_cols = data.select_dtypes(include=['number']).columns
|
24 |
+
for col in numeric_cols:
|
25 |
+
plt.figure()
|
26 |
+
sns.histplot(data[col], kde=True)
|
27 |
+
plt.title(f'Histogram of {col}')
|
28 |
+
plt.xlabel(col)
|
29 |
+
plt.ylabel('Frequency')
|
30 |
+
hist_path = os.path.join(figures_dir, f'histogram_{col}.png')
|
31 |
+
plt.savefig(hist_path)
|
32 |
+
plt.close()
|
33 |
+
plots.append(hist_path)
|
34 |
+
|
35 |
+
# Box plots for numeric columns
|
36 |
+
for col in numeric_cols:
|
37 |
+
plt.figure()
|
38 |
+
sns.boxplot(x=data[col])
|
39 |
+
plt.title(f'Box Plot of {col}')
|
40 |
+
box_path = os.path.join(figures_dir, f'boxplot_{col}.png')
|
41 |
+
plt.savefig(box_path)
|
42 |
+
plt.close()
|
43 |
+
plots.append(box_path)
|
44 |
+
|
45 |
+
# Scatter plot matrix
|
46 |
+
if len(numeric_cols) > 1:
|
47 |
+
plt.figure()
|
48 |
+
sns.pairplot(data[numeric_cols])
|
49 |
+
plt.title('Scatter Plot Matrix')
|
50 |
+
scatter_matrix_path = os.path.join(figures_dir, 'scatter_matrix.png')
|
51 |
+
plt.savefig(scatter_matrix_path)
|
52 |
+
plt.close()
|
53 |
+
plots.append(scatter_matrix_path)
|
54 |
+
|
55 |
+
# Correlation heatmap
|
56 |
+
if len(numeric_cols) > 1:
|
57 |
+
plt.figure()
|
58 |
+
corr = data[numeric_cols].corr()
|
59 |
+
sns.heatmap(corr, annot=True, cmap='coolwarm')
|
60 |
+
plt.title('Correlation Heatmap')
|
61 |
+
heatmap_path = os.path.join(figures_dir, 'correlation_heatmap.png')
|
62 |
+
plt.savefig(heatmap_path)
|
63 |
+
plt.close()
|
64 |
+
plots.append(heatmap_path)
|
65 |
+
|
66 |
+
# Bar charts for categorical columns
|
67 |
+
categorical_cols = data.select_dtypes(include=['object']).columns
|
68 |
+
if not categorical_cols.empty:
|
69 |
+
for col in categorical_cols:
|
70 |
+
plt.figure()
|
71 |
+
data[col].value_counts().plot(kind='bar')
|
72 |
+
plt.title(f'Bar Chart of {col}')
|
73 |
+
plt.xlabel(col)
|
74 |
+
plt.ylabel('Count')
|
75 |
+
bar_path = os.path.join(figures_dir, f'bar_chart_{col}.png')
|
76 |
+
plt.savefig(bar_path)
|
77 |
+
plt.close()
|
78 |
+
plots.append(bar_path)
|
79 |
+
|
80 |
+
# Line charts (if a 'date' column is present)
|
81 |
+
if 'date' in data.columns:
|
82 |
+
plt.figure()
|
83 |
+
data['date'] = pd.to_datetime(data['date'])
|
84 |
+
data.set_index('date').plot()
|
85 |
+
plt.title('Line Chart of Date Series')
|
86 |
+
line_chart_path = os.path.join(figures_dir, 'line_chart.png')
|
87 |
+
plt.savefig(line_chart_path)
|
88 |
+
plt.close()
|
89 |
+
plots.append(line_chart_path)
|
90 |
+
|
91 |
+
# Scatter plot using Plotly
|
92 |
+
if len(numeric_cols) >= 2:
|
93 |
+
fig = px.scatter(data, x=numeric_cols[0], y=numeric_cols[1], title='Scatter Plot')
|
94 |
+
scatter_plot_path = os.path.join(figures_dir, 'scatter_plot.html')
|
95 |
+
fig.write_html(scatter_plot_path)
|
96 |
+
plots.append(scatter_plot_path)
|
97 |
+
|
98 |
+
# Pie chart for categorical columns (only the first categorical column)
|
99 |
+
if not categorical_cols.empty:
|
100 |
+
fig = px.pie(data, names=categorical_cols[0], title='Pie Chart of ' + categorical_cols[0])
|
101 |
+
pie_chart_path = os.path.join(figures_dir, 'pie_chart.html')
|
102 |
+
fig.write_html(pie_chart_path)
|
103 |
+
plots.append(pie_chart_path)
|
104 |
+
|
105 |
+
# Heatmaps (e.g., for a correlation matrix or cross-tabulation)
|
106 |
+
if len(numeric_cols) > 1:
|
107 |
+
heatmap_data = data[numeric_cols].corr()
|
108 |
+
fig = px.imshow(heatmap_data, text_auto=True, title='Heatmap of Numeric Variables')
|
109 |
+
heatmap_plot_path = os.path.join(figures_dir, 'heatmap_plot.html')
|
110 |
+
fig.write_html(heatmap_plot_path)
|
111 |
+
plots.append(heatmap_plot_path)
|
112 |
+
|
113 |
+
# Violin plots for numeric columns
|
114 |
+
for col in numeric_cols:
|
115 |
+
plt.figure()
|
116 |
+
sns.violinplot(x=data[col])
|
117 |
+
plt.title(f'Violin Plot of {col}')
|
118 |
+
violin_path = os.path.join(figures_dir, f'violin_plot_{col}.png')
|
119 |
+
plt.savefig(violin_path)
|
120 |
+
plt.close()
|
121 |
+
plots.append(violin_path)
|
122 |
+
|
123 |
+
return plots
|
124 |
+
|
125 |
+
def analyze_data(file_input):
|
126 |
+
data = pd.read_csv(file_input.name)
|
127 |
+
return create_visualizations(data)
|
128 |
+
# Example file path
|
129 |
+
example_file_path = "./example/🤗example.csv"
|
130 |
+
|
131 |
+
with gr.Blocks(css=css, theme="bethecloud/storj_theme") as demo:
|
132 |
+
gr.Markdown("# DATA BOARD📊\nUpload a `.csv` file to generate various visualizations and interactive plots.")
|
133 |
+
|
134 |
+
file_input = gr.File(label="Upload your `.csv` file")
|
135 |
+
submit = gr.Button("Generate Dashboards")
|
136 |
+
|
137 |
+
# Display images and interactive plots in a gallery
|
138 |
+
gallery = gr.Gallery(label="Visualizations")
|
139 |
+
|
140 |
+
# Example block with cache_examples set to True
|
141 |
+
examples = gr.Examples(
|
142 |
+
examples=[[example_file_path]],
|
143 |
+
inputs=file_input,
|
144 |
+
outputs=gallery,
|
145 |
+
fn=analyze_data, # Provide the processing function
|
146 |
+
cache_examples=True # Enable caching
|
147 |
+
)
|
148 |
+
|
149 |
+
submit.click(analyze_data, file_input, gallery)
|
150 |
+
|
151 |
+
if __name__ == "__main__":
|
152 |
+
demo.launch(ssr_mode=False)
|