import pandas as pd import plotly.graph_objects as go import streamlit as st from PIL import Image from joypy import joyplot import seaborn as sns import matplotlib.pyplot as plt # Function to load dataset def load_dataset(): file_path = "Data/mvtec_meta_features_dataset.csv" try: complete_df = pd.read_csv(file_path) # Show available column names for debugging print("Available columns:", complete_df.columns) # Verify column presence required_columns = ["category", "set_type", "anomaly_status"] for col in required_columns: if col not in complete_df.columns: raise KeyError(f"Missing required column: {col}") # Define the subclasses for each category subclasses = { 'Texture-Based': ['carpet', 'wood', 'tile', 'leather', 'zipper'], 'Industrial Components': ['cable', 'transistor', 'screw', 'grid', 'metal_nut'], 'Consumer Products': ['bottle', 'capsule', 'toothbrush'], 'Edible': ['hazelnut', 'pill'] } # Add a new column to the DataFrame to store the subclass complete_df['subclass'] = complete_df['category'].apply( lambda x: next((key for key, value in subclasses.items() if x in value), 'Unknown') ) # Reorder columns to place 'subclass' after 'category' cols = list(complete_df.columns) cols.insert(cols.index('category') + 1, cols.pop(cols.index('subclass'))) complete_df = complete_df[cols] return complete_df except Exception as e: st.error(f"Error loading dataset: {e}") return None # Function to generate dataset statistics def dataset_statistics(): df = load_dataset() if df is not None: print("Loaded dataset preview:\n", df.head()) # Debugging step # Aggregate counts for each category and condition train_normal = df[(df['set_type'] == 'train') & (df['anomaly_status'] == 'normal')].groupby('category').size() test_normal = df[(df['set_type'] == 'test') & (df['anomaly_status'] == 'normal')].groupby('category').size() test_anomalous = df[(df['set_type'] == 'test') & (df['anomaly_status'] == 'anomalous')].groupby('category').size() # Combine into a single DataFrame final_summary = pd.DataFrame({ 'Train Normal Images': train_normal, 'Test Normal Images': test_normal, 'Test Anomalous Images': test_anomalous }).fillna(0).reset_index() return final_summary return None # Function to generate the bar chart def dataset_distribution_chart(df): fig = go.Figure() fig.add_trace(go.Bar( x=df['category'], y=df['Train Normal Images'], name='Train Normal Images', marker_color='blue' )) fig.add_trace(go.Bar( x=df['category'], y=df['Test Normal Images'], name='Test Normal Images', marker_color='red' )) fig.add_trace(go.Bar( x=df['category'], y=df['Test Anomalous Images'], name='Test Anomalous Images', marker_color='green' )) # Update layout fig.update_layout( title="Distribution of Normal and Anomalous Images per Category", xaxis_title="Categories", yaxis_title="Number of Images", barmode='stack', legend_title="Image Types" ) # Display chart in Streamlit st.plotly_chart(fig, use_container_width=True) # Function to display the complete dataframe with expander def display_dataframe(): df = load_dataset() if df is not None: with st.expander("Show Complete DataFrame"): st.dataframe(df) def plot_bgr_pixel_densities(df, pixel_columns=['num_pixels_b', 'num_pixels_g', 'num_pixels_r']): """ Generate JoyPy density plots for pixel counts of BGR channels for a given category. Parameters: df (pd.DataFrame): Filtered DataFrame for a single category. pixel_columns (list): List of column names for BGR pixel counts. Returns: None """ if df.empty: st.warning("⚠️ No data available for the selected category.") return # Plot JoyPy density plot fig, axes = joyplot( data=df, by="category", # Group by category column=pixel_columns, color=['blue', 'green', 'red'], # Colors for BGR channels alpha=0.5, fade=True, legend=True, linewidth=1.0, overlap=3, figsize=(8, 6) # Adjust the figure size here ) # Add title and labels plt.title(f'Density Plots for {df["category"].unique()[0]}', fontsize=14) plt.xlabel('Number of Pixels Density', fontsize=12) plt.ylabel('Categories', fontsize=12) # Show the plot in Streamlit st.pyplot(fig) def plot_pair_plots(complete_df): """ Generate and display pair plots for each category in the dataset. Parameters: complete_df (pd.DataFrame): The input DataFrame containing image features and categories. Returns: None """ # Define the features to be included in the pairplot features = ['num_pixels_b', 'num_pixels_g', 'num_pixels_r', 'perceived_brightness'] # Create a separate pairplot for each category for category in complete_df['category'].unique(): # Filter data for current category category_df = complete_df[complete_df['category'] == category] # Check if the filtered DataFrame is not empty if not category_df.empty: # Create PairGrid with hue and palette g = sns.PairGrid(category_df, vars=features, hue='anomaly_status', palette={'normal': 'blue', 'anomalous': 'red'}) # Map the plots to the grid g.map_upper(sns.scatterplot, alpha=0.6) g.map_diag(sns.histplot, kde=True) g.map_lower(sns.scatterplot, alpha=0.6) # Add legend g.add_legend() # Customize the plot g.figure.suptitle(f'Feature Relationships for {category.title()}', y=1.02, fontsize=14) # Improve label readability for i in range(len(g.axes)): for j in range(len(g.axes)): if g.axes[i][j] is not None: g.axes[i][j].set_xlabel(g.axes[i][j].get_xlabel().replace('_', ' ').title()) g.axes[i][j].set_ylabel(g.axes[i][j].get_ylabel().replace('_', ' ').title()) # Adjust legend position to the right without overlapping the plots g._legend.set_bbox_to_anchor((1.05, 0.5)) g._legend.set_loc('center left') plt.tight_layout() st.pyplot(g.figure)