Spaces:
Sleeping
Sleeping
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import seaborn as sns | |
from sklearn.linear_model import LogisticRegression | |
import pickle | |
import os | |
# Set the visual style | |
plt.style.use('ggplot') | |
sns.set_context("talk") | |
plt.rcParams['figure.figsize'] = (12, 8) | |
# Function to generate synthetic meeting data | |
def generate_meeting_data(n_meetings=500): | |
"""Generate synthetic meeting data with various parameters.""" | |
np.random.seed(42) # For reproducibility | |
# Generate random meeting features | |
data = { | |
'meeting_id': range(1, n_meetings + 1), | |
'duration_minutes': np.random.choice( | |
[15, 30, 45, 60, 90, 120], | |
size=n_meetings, | |
p=[0.1, 0.25, 0.2, 0.3, 0.1, 0.05] | |
), | |
'n_participants': np.random.randint(2, 15, size=n_meetings), | |
'presenter_talk_percent': np.random.uniform(30, 95, size=n_meetings), | |
'questions_asked': np.random.randint(0, 12, size=n_meetings), | |
'actionable_items': np.random.randint(0, 8, size=n_meetings), | |
'silence_percent': np.random.uniform(0, 40, size=n_meetings), | |
'topic_changes': np.random.randint(1, 10, size=n_meetings), | |
'slides_count': np.random.randint(0, 40, size=n_meetings) | |
} | |
# Add meeting types | |
meeting_topics = [ | |
"Weekly Status Update", "Quarterly Planning", "Project Kickoff", | |
"Brainstorming Session", "Customer Feedback Review", "Budget Review", | |
"Team Building", "Product Demo", "Strategic Alignment", "Post-Mortem", | |
"OKR Review", "All-Hands", "Happy Hour Planning" | |
] | |
data['meeting_type'] = np.random.choice(meeting_topics, size=n_meetings) | |
# Convert to dataframe | |
df = pd.DataFrame(data) | |
# Calculate the "email score" based on various factors | |
df['email_score'] = ( | |
# Longer meetings get lower scores (less email-able) | |
-0.2 * df['duration_minutes'] + | |
# More participants = less email-able | |
-0.5 * df['n_participants'] + | |
# If one person does all the talking, could be an email | |
0.3 * df['presenter_talk_percent'] + | |
# Few questions = could be an email | |
-3 * df['questions_asked'] + | |
# Few action items = could be an email | |
-5 * df['actionable_items'] + | |
# Lots of silence = waste of time | |
0.5 * df['silence_percent'] + | |
# Lots of topic changes = less email-able | |
-2 * df['topic_changes'] + | |
# Many slides = information dump, could be emailed | |
0.2 * df['slides_count'] + | |
# Random noise | |
np.random.normal(0, 15, size=n_meetings) | |
) | |
# Normalize to 0-100 scale | |
df['email_score'] = (df['email_score'] - df['email_score'].min()) / (df['email_score'].max() - df['email_score'].min()) * 100 | |
df['email_score'] = df['email_score'].round(1) | |
# Add binary classification (could have been an email or not) | |
df['could_be_email'] = (df['email_score'] > 65).astype(int) | |
return df | |
# Function to train the model | |
def train_model(df): | |
# Select features | |
features = [ | |
'duration_minutes', 'n_participants', 'presenter_talk_percent', | |
'questions_asked', 'actionable_items', 'silence_percent', | |
'topic_changes', 'slides_count' | |
] | |
X = df[features] | |
y = df['could_be_email'] | |
# Train model | |
model = LogisticRegression(random_state=42) | |
model.fit(X, y) | |
return model, features | |
# Function to predict whether a meeting could be an email | |
def predict_meeting( | |
duration, participants, presenter_talk, questions, | |
action_items, silence, topic_changes, slides | |
): | |
# Create a dataframe with the input values | |
input_data = pd.DataFrame({ | |
'duration_minutes': [duration], | |
'n_participants': [participants], | |
'presenter_talk_percent': [presenter_talk], | |
'questions_asked': [questions], | |
'actionable_items': [action_items], | |
'silence_percent': [silence], | |
'topic_changes': [topic_changes], | |
'slides_count': [slides] | |
}) | |
# Make prediction | |
probability = model.predict_proba(input_data)[0][1] * 100 | |
is_email = model.predict(input_data)[0] | |
# Calculate wasted time | |
wasted_minutes = duration * participants if is_email else duration * participants * 0.2 | |
wasted_workdays = wasted_minutes / (8 * 60) # assuming 8-hour workday | |
# Generate visualization | |
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(16, 8)) | |
# Email-ability gauge chart | |
import matplotlib.patches as mpatches | |
# Create a semicircular gauge | |
theta = np.linspace(0, np.pi, 100) | |
r = 1.0 | |
# Convert email probability to color (red for high, green for low) | |
from matplotlib.colors import LinearSegmentedColormap | |
colors = [(0.0, 0.7, 0.0), (1.0, 1.0, 0.0), (1.0, 0.0, 0.0)] # green -> yellow -> red | |
cmap = LinearSegmentedColormap.from_list('email_cmap', colors, N=100) | |
gauge_color = cmap(probability / 100) | |
# Draw the gauge | |
ax1.plot(r * np.cos(theta), r * np.sin(theta), color='gray', linewidth=3) | |
# Calculate the position for the needle | |
needle_theta = np.pi * probability / 100 | |
ax1.plot([0, r * np.cos(needle_theta)], [0, r * np.sin(needle_theta)], color='black', linewidth=4) | |
# Draw colored arc for the current probability | |
theta_prob = np.linspace(0, needle_theta, 100) | |
ax1.fill_between(r * np.cos(theta_prob), 0, r * np.sin(theta_prob), color=gauge_color, alpha=0.7) | |
# Add probability text | |
ax1.text(0, -0.2, f"{probability:.1f}% Email-able", ha='center', fontsize=24, fontweight='bold') | |
# Add labels | |
ax1.text(-1, 0.1, "Meeting", fontsize=16) | |
ax1.text(1, 0.1, "Email", fontsize=16) | |
# Decision text | |
if is_email: | |
decision_text = "VERDICT: This could have been an email!" | |
else: | |
decision_text = "VERDICT: This meeting seems necessary." | |
ax1.text(0, -0.4, decision_text, ha='center', fontsize=20, | |
fontweight='bold', color='red' if is_email else 'green') | |
# Set axis limits and remove ticks | |
ax1.set_xlim(-1.2, 1.2) | |
ax1.set_ylim(-0.5, 1.2) | |
ax1.axis('off') | |
ax1.set_title("Meeting Email-ability Meter", fontsize=18) | |
# Second chart: Wasted time visualization | |
labels = ['This Meeting', 'Annual Impact\n(if weekly)'] | |
values = [wasted_minutes, wasted_minutes * 52] # Weekly for a year | |
ax2.bar(labels, values, color=['#ff9999', '#ff5555']) | |
# Add value labels on top of bars | |
for i, v in enumerate(values): | |
if i == 0: | |
ax2.text(i, v + 5, f"{v:.0f} person-minutes", ha='center', fontsize=14) | |
else: | |
hours = v / 60 | |
days = hours / 8 | |
ax2.text(i, v + 5, f"{hours:.0f} hours\n({days:.1f} workdays)", ha='center', fontsize=14) | |
ax2.set_title("Time Impact Analysis", fontsize=18) | |
ax2.set_ylabel("Wasted Time (person-minutes)", fontsize=14) | |
plt.tight_layout() | |
return fig, probability, is_email, wasted_minutes, wasted_workdays | |
# Create a personalized report | |
def generate_report( | |
meeting_type, duration, participants, presenter_talk, questions, | |
action_items, silence, topic_changes, slides, is_email, probability, | |
wasted_minutes, wasted_workdays | |
): | |
if is_email: | |
title = "📧 THIS MEETING COULD HAVE BEEN AN EMAIL 📧" | |
color = "red" | |
else: | |
title = "✅ This meeting appears to be necessary" | |
color = "green" | |
report = f""" | |
<div style="font-family: Arial, sans-serif; padding: 20px; max-width: 800px; margin: 0 auto;"> | |
<h1 style="color: {color}; text-align: center;">{title}</h1> | |
<div style="background-color: #f5f5f5; border-radius: 10px; padding: 20px; margin-top: 20px;"> | |
<h2>Meeting Analysis</h2> | |
<p><strong>Meeting Type:</strong> {meeting_type}</p> | |
<p><strong>Duration:</strong> {duration} minutes</p> | |
<p><strong>Participants:</strong> {participants} people</p> | |
<p><strong>Email-ability Score:</strong> <span style="font-size: 1.2em; font-weight: bold;">{probability:.1f}%</span></p> | |
</div> | |
<div style="background-color: #fff3f3; border-radius: 10px; padding: 20px; margin-top: 20px;"> | |
<h2>Economic Impact</h2> | |
<p><strong>Time Wasted in This Meeting:</strong> {wasted_minutes:.0f} person-minutes</p> | |
<p><strong>Equivalent Workdays:</strong> {wasted_workdays:.2f} days</p> | |
<p><strong>Annual Impact (if held weekly):</strong> {wasted_workdays * 52:.1f} workdays</p> | |
<p><strong>Estimated Annual Cost:</strong> ${wasted_minutes * 52 * 0.5:.0f}</p> | |
</div> | |
""" | |
# Add recommendations based on the analysis | |
report += """ | |
<div style="background-color: #f0f8ff; border-radius: 10px; padding: 20px; margin-top: 20px;"> | |
<h2>Recommendations</h2> | |
""" | |
if is_email: | |
report += """ | |
<ul> | |
<li>Convert this meeting to an async email or Slack thread</li> | |
<li>If a meeting is necessary, reduce the participant count by 50%</li> | |
<li>Consider recording a 5-minute video update instead</li> | |
<li>Create a shared document for status updates</li> | |
</ul> | |
""" | |
else: | |
report += """ | |
<ul> | |
<li>This meeting seems justified, but consider reducing duration</li> | |
<li>Send an agenda in advance to increase focus</li> | |
<li>Use a timer to keep discussions on track</li> | |
<li>End with clear action items and owners</li> | |
</ul> | |
""" | |
report += """ | |
</div> | |
<div style="text-align: center; font-style: italic; margin-top: 30px; color: #666;"> | |
<p>Analysis generated by the Meeting-That-Could-Have-Been-An-Email Detector</p> | |
<p>Results are for entertainment purposes. Actual productivity may vary.</p> | |
</div> | |
</div> | |
""" | |
return report | |
# Generate dataset and train model when the app starts | |
print("Generating synthetic data and training model...") | |
df = generate_meeting_data() | |
model, features = train_model(df) | |
# Create Gradio interface | |
with gr.Blocks(title="Meeting Email Detector") as demo: | |
gr.Markdown( | |
""" | |
# 📧 The Meeting-That-Could-Have-Been-An-Email Detector | |
Have you ever sat through a meeting thinking "this could have been an email"? | |
Now you can scientifically prove it! Enter your meeting details below to analyze | |
whether your meeting is necessary or could be replaced with an email. | |
*Note: This is a humor project using synthetic data. Results are meant to be entertaining, not prescriptive.* | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
meeting_type = gr.Dropdown( | |
choices=[ | |
"Weekly Status Update", "Quarterly Planning", "Project Kickoff", | |
"Brainstorming Session", "Customer Feedback Review", "Budget Review", | |
"Team Building", "Product Demo", "Strategic Alignment", "Post-Mortem", | |
"OKR Review", "All-Hands", "Happy Hour Planning" | |
], | |
label="Meeting Type", | |
value="Weekly Status Update" | |
) | |
duration = gr.Slider( | |
minimum=15, maximum=120, value=60, step=15, | |
label="Duration (minutes)" | |
) | |
participants = gr.Slider( | |
minimum=2, maximum=20, value=6, step=1, | |
label="Number of Participants" | |
) | |
presenter_talk = gr.Slider( | |
minimum=10, maximum=100, value=70, step=5, | |
label="Presenter Talk Percentage (%)" | |
) | |
questions = gr.Slider( | |
minimum=0, maximum=15, value=4, step=1, | |
label="Expected Questions from Audience" | |
) | |
with gr.Column(): | |
action_items = gr.Slider( | |
minimum=0, maximum=10, value=3, step=1, | |
label="Actionable Items Expected" | |
) | |
silence = gr.Slider( | |
minimum=0, maximum=50, value=15, step=5, | |
label="Expected Silence/Awkward Pauses (%)" | |
) | |
topic_changes = gr.Slider( | |
minimum=1, maximum=15, value=4, step=1, | |
label="Number of Distinct Topics" | |
) | |
slides = gr.Slider( | |
minimum=0, maximum=50, value=10, step=1, | |
label="Number of Slides/Visual Aids" | |
) | |
analyze_btn = gr.Button("Analyze This Meeting", variant="primary") | |
with gr.Row(): | |
with gr.Column(): | |
result_plot = gr.Plot(label="Analysis Results") | |
with gr.Column(): | |
with gr.Row(): | |
email_score = gr.Number(label="Email-ability Score (%)") | |
is_email = gr.Checkbox(label="Could Be An Email?") | |
with gr.Row(): | |
wasted_time = gr.Number(label="Time Wasted (person-minutes)") | |
wasted_days = gr.Number(label="Equivalent Workdays") | |
report_html = gr.HTML(label="Detailed Report") | |
analyze_btn.click( | |
fn=lambda *args: predict_meeting(*args) + (args[0],), # Include meeting_type in output | |
inputs=[ | |
duration, participants, presenter_talk, questions, | |
action_items, silence, topic_changes, slides | |
], | |
outputs=[result_plot, email_score, is_email, wasted_time, wasted_days] | |
).then( | |
fn=generate_report, | |
inputs=[ | |
meeting_type, duration, participants, presenter_talk, questions, | |
action_items, silence, topic_changes, slides, is_email, email_score, | |
wasted_time, wasted_days | |
], | |
outputs=report_html | |
) | |
gr.Markdown( | |
""" | |
## How It Works | |
This tool uses a machine learning model trained on synthetic data representing thousands of meetings. | |
The model analyzes meeting characteristics to determine whether the meeting could be replaced with asynchronous communication. | |
Key factors that make a meeting "email-able": | |
- High presenter talk percentage (one-way information flow) | |
- Few questions from participants | |
- Few actionable outcomes | |
- Many participants relative to the decisions being made | |
## About This Project | |
This is a humor project that pokes fun at corporate meeting culture. While the analysis uses real data science techniques, | |
the underlying data is synthetic. The tool is meant to be entertaining while making us think about how we use our time at work. | |
Created as a data science portfolio project to demonstrate data visualization, interactive web apps, and a bit of workplace humor. | |
""" | |
) | |
# Launch the app | |
if __name__ == "__main__": | |
demo.launch() |