Spaces:
Sleeping
Sleeping
import pandas as pd | |
import streamlit as st | |
import numpy as np | |
from pre import preprocess_uploaded_file | |
from difflib import SequenceMatcher | |
import time | |
def similar(a, b, threshold=0.9): | |
return SequenceMatcher(None, a, b).ratio() > threshold | |
def perform_multi_env_analysis(uploaded_dataframes): | |
# Concatenate all dataframes into a single dataframe | |
combined_data = pd.concat(uploaded_dataframes, ignore_index=True) | |
# Get unique environments and functional areas | |
unique_environments = combined_data['Environment'].unique() | |
unique_areas = np.append(combined_data['Functional area'].unique(), "All") | |
# Select environments to display | |
selected_environments = st.multiselect("Select environments to display", unique_environments, default=unique_environments) | |
# Initialize session state for selected functional areas if it doesn't exist | |
if 'selected_functional_areas' not in st.session_state: | |
st.session_state.selected_functional_areas = ["All"] | |
# Select functional areas to display, using session state | |
selected_functional_areas = st.multiselect( | |
"Select functional areas", | |
unique_areas, | |
default=st.session_state.selected_functional_areas, | |
key="functional_areas_multiselect" | |
) | |
# Add a button to confirm the selection | |
if st.button("Confirm Functional Area Selection"): | |
# Update session state with the new selection | |
st.session_state.selected_functional_areas = selected_functional_areas | |
st.success("Functional area selection updated!") | |
time.sleep(0.5) # Add a small delay for better user experience | |
st.rerun() # Rerun the app to reflect the changes | |
if "All" in selected_functional_areas: | |
selected_functional_areas = combined_data['Functional area'].unique() | |
# Filter data based on selected environments and functional areas | |
filtered_data = combined_data[ | |
(combined_data['Environment'].isin(selected_environments)) & | |
(combined_data['Functional area'].isin(selected_functional_areas)) | |
] | |
# Group data by Environment, Functional area, Scenario name, and Status | |
grouped_data = filtered_data.groupby(['Environment', 'Functional area', 'Scenario name', 'Status']).size().unstack(fill_value=0) | |
# Ensure 'PASSED' and 'FAILED' columns exist | |
if 'PASSED' not in grouped_data.columns: | |
grouped_data['PASSED'] = 0 | |
if 'FAILED' not in grouped_data.columns: | |
grouped_data['FAILED'] = 0 | |
# Calculate total scenarios | |
grouped_data['Total'] = grouped_data['PASSED'] + grouped_data['FAILED'] | |
# Reset index to make Environment, Functional area, and Scenario name as columns | |
grouped_data = grouped_data.reset_index() | |
# Reorder columns | |
grouped_data = grouped_data[['Environment', 'Functional area', 'Scenario name', 'Total', 'PASSED', 'FAILED']] | |
# Display the grouped data | |
st.write("### Scenario Counts by Environment and Functional Area") | |
# st.dataframe(grouped_data.style.highlight_max(axis=0, subset=['Total', 'PASSED', 'FAILED'])) | |
# Display summary statistics | |
st.write("### Summary Statistics") | |
summary = grouped_data.groupby('Environment').agg({ | |
'Total': 'sum', | |
'PASSED': 'sum', | |
'FAILED': 'sum' | |
}).reset_index() | |
# Add column names as the first row | |
summary_with_headers = pd.concat([pd.DataFrame([summary.columns], columns=summary.columns), summary], ignore_index=True) | |
# Display the DataFrame | |
st.dataframe(summary_with_headers) | |
# Define scenarios_by_env here | |
scenarios_by_env = {env: set(grouped_data[grouped_data['Environment'] == env]['Scenario name']) for env in selected_environments} | |
# Debug: Print the number of scenarios in each environment | |
for env, scenarios in scenarios_by_env.items(): | |
st.write(f"Number of scenarios in {env}: {len(scenarios)}") | |
missing_scenarios = [] | |
mismatched_scenarios = [] | |
# New section for efficient inconsistency analysis | |
st.write("### Inconsistent Scenario Count Analysis by Functional Area") | |
if len(selected_environments) > 1: | |
# Group data by Environment and Functional area, count scenarios | |
scenario_counts = filtered_data.groupby(['Environment', 'Functional area'])['Scenario name'].nunique().unstack(fill_value=0) | |
# Calculate the difference between max and min counts for each functional area | |
count_diff = scenario_counts.max() - scenario_counts.min() | |
# Sort functional areas by count difference, descending | |
inconsistent_areas = count_diff.sort_values(ascending=False) | |
st.write("Functional areas with inconsistent scenario counts across environments:") | |
for area, diff in inconsistent_areas.items(): | |
if diff > 0: | |
st.write(f"- {area}: Difference of {diff} scenarios") | |
st.write(scenario_counts[area]) | |
st.write("\n") | |
# Option to show detailed breakdown | |
if st.checkbox("Show detailed scenario count breakdown"): | |
st.write(scenario_counts) | |
else: | |
st.write("Please select at least two environments for comparison.") | |
# Debug: Print the number of missing and mismatched scenarios | |
st.write(f"Number of truly missing scenarios: {len(missing_scenarios)}") | |
st.write(f"Number of scenarios with name differences: {len(mismatched_scenarios)}") | |
if missing_scenarios: | |
st.write("### Truly Missing Scenarios") | |
missing_df = pd.DataFrame(missing_scenarios) | |
st.dataframe(missing_df) | |
else: | |
st.write("No truly missing scenarios found across environments.") | |
if mismatched_scenarios: | |
st.write("### Scenarios with Name Differences") | |
mismatched_df = pd.DataFrame(mismatched_scenarios) | |
st.dataframe(mismatched_df) | |
else: | |
st.write("No scenarios with name differences found across environments.") | |
def multi_env_compare_main(): | |
st.title("Multi-Environment Comparison") | |
# Get the number of environments from the user | |
num_environments = st.number_input("Enter the number of environments", min_value=1, value=1, step=1) | |
# Initialize list to store uploaded dataframes | |
uploaded_dataframes = [] | |
# Loop through the number of environments and create file uploaders | |
for i in range(num_environments): | |
uploaded_files = st.file_uploader(f"Upload CSV files for Environment {i + 1}", type="csv", accept_multiple_files=True) | |
for uploaded_file in uploaded_files: | |
# Preprocess the uploaded CSV file | |
data = preprocess_uploaded_file(uploaded_file) | |
# Append the dataframe to the list | |
uploaded_dataframes.append(data) | |
# Check if any files were uploaded | |
if uploaded_dataframes: | |
# Perform analysis for uploaded data | |
perform_multi_env_analysis(uploaded_dataframes) | |
else: | |
st.write("Please upload at least one CSV file.") | |
if __name__ == "__main__": | |
multi_env_compare_main() |