Spaces:
Sleeping
Sleeping
File size: 7,068 Bytes
62b007e 5f944ac 62b007e 5f944ac 62b007e 5f944ac 62b007e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
import pandas as pd
import streamlit as st
import numpy as np
from pre import preprocess_uploaded_file
from difflib import SequenceMatcher
import time
def similar(a, b, threshold=0.9):
return SequenceMatcher(None, a, b).ratio() > threshold
def perform_multi_env_analysis(uploaded_dataframes):
# Concatenate all dataframes into a single dataframe
combined_data = pd.concat(uploaded_dataframes, ignore_index=True)
# Get unique environments and functional areas
unique_environments = combined_data['Environment'].unique()
unique_areas = np.append(combined_data['Functional area'].unique(), "All")
# Select environments to display
selected_environments = st.multiselect("Select environments to display", unique_environments, default=unique_environments)
# Initialize session state for selected functional areas if it doesn't exist
if 'selected_functional_areas' not in st.session_state:
st.session_state.selected_functional_areas = ["All"]
# Select functional areas to display, using session state
selected_functional_areas = st.multiselect(
"Select functional areas",
unique_areas,
default=st.session_state.selected_functional_areas,
key="functional_areas_multiselect"
)
# Add a button to confirm the selection
if st.button("Confirm Functional Area Selection"):
# Update session state with the new selection
st.session_state.selected_functional_areas = selected_functional_areas
st.success("Functional area selection updated!")
time.sleep(0.5) # Add a small delay for better user experience
st.rerun() # Rerun the app to reflect the changes
if "All" in selected_functional_areas:
selected_functional_areas = combined_data['Functional area'].unique()
# Filter data based on selected environments and functional areas
filtered_data = combined_data[
(combined_data['Environment'].isin(selected_environments)) &
(combined_data['Functional area'].isin(selected_functional_areas))
]
# Group data by Environment, Functional area, Scenario name, and Status
grouped_data = filtered_data.groupby(['Environment', 'Functional area', 'Scenario name', 'Status']).size().unstack(fill_value=0)
# Ensure 'PASSED' and 'FAILED' columns exist
if 'PASSED' not in grouped_data.columns:
grouped_data['PASSED'] = 0
if 'FAILED' not in grouped_data.columns:
grouped_data['FAILED'] = 0
# Calculate total scenarios
grouped_data['Total'] = grouped_data['PASSED'] + grouped_data['FAILED']
# Reset index to make Environment, Functional area, and Scenario name as columns
grouped_data = grouped_data.reset_index()
# Reorder columns
grouped_data = grouped_data[['Environment', 'Functional area', 'Scenario name', 'Total', 'PASSED', 'FAILED']]
# Display the grouped data
st.write("### Scenario Counts by Environment and Functional Area")
# st.dataframe(grouped_data.style.highlight_max(axis=0, subset=['Total', 'PASSED', 'FAILED']))
# Display summary statistics
st.write("### Summary Statistics")
summary = grouped_data.groupby('Environment').agg({
'Total': 'sum',
'PASSED': 'sum',
'FAILED': 'sum'
}).reset_index()
# Add column names as the first row
summary_with_headers = pd.concat([pd.DataFrame([summary.columns], columns=summary.columns), summary], ignore_index=True)
# Display the DataFrame
st.dataframe(summary_with_headers)
# Define scenarios_by_env here
scenarios_by_env = {env: set(grouped_data[grouped_data['Environment'] == env]['Scenario name']) for env in selected_environments}
# Debug: Print the number of scenarios in each environment
for env, scenarios in scenarios_by_env.items():
st.write(f"Number of scenarios in {env}: {len(scenarios)}")
missing_scenarios = []
mismatched_scenarios = []
# New section for efficient inconsistency analysis
st.write("### Inconsistent Scenario Count Analysis by Functional Area")
if len(selected_environments) > 1:
# Group data by Environment and Functional area, count scenarios
scenario_counts = filtered_data.groupby(['Environment', 'Functional area'])['Scenario name'].nunique().unstack(fill_value=0)
# Calculate the difference between max and min counts for each functional area
count_diff = scenario_counts.max() - scenario_counts.min()
# Sort functional areas by count difference, descending
inconsistent_areas = count_diff.sort_values(ascending=False)
st.write("Functional areas with inconsistent scenario counts across environments:")
for area, diff in inconsistent_areas.items():
if diff > 0:
st.write(f"- {area}: Difference of {diff} scenarios")
st.write(scenario_counts[area])
st.write("\n")
# Option to show detailed breakdown
if st.checkbox("Show detailed scenario count breakdown"):
st.write(scenario_counts)
else:
st.write("Please select at least two environments for comparison.")
# Debug: Print the number of missing and mismatched scenarios
st.write(f"Number of truly missing scenarios: {len(missing_scenarios)}")
st.write(f"Number of scenarios with name differences: {len(mismatched_scenarios)}")
if missing_scenarios:
st.write("### Truly Missing Scenarios")
missing_df = pd.DataFrame(missing_scenarios)
st.dataframe(missing_df)
else:
st.write("No truly missing scenarios found across environments.")
if mismatched_scenarios:
st.write("### Scenarios with Name Differences")
mismatched_df = pd.DataFrame(mismatched_scenarios)
st.dataframe(mismatched_df)
else:
st.write("No scenarios with name differences found across environments.")
def multi_env_compare_main():
st.title("Multi-Environment Comparison")
# Get the number of environments from the user
num_environments = st.number_input("Enter the number of environments", min_value=1, value=1, step=1)
# Initialize list to store uploaded dataframes
uploaded_dataframes = []
# Loop through the number of environments and create file uploaders
for i in range(num_environments):
uploaded_files = st.file_uploader(f"Upload CSV files for Environment {i + 1}", type="csv", accept_multiple_files=True)
for uploaded_file in uploaded_files:
# Preprocess the uploaded CSV file
data = preprocess_uploaded_file(uploaded_file)
# Append the dataframe to the list
uploaded_dataframes.append(data)
# Check if any files were uploaded
if uploaded_dataframes:
# Perform analysis for uploaded data
perform_multi_env_analysis(uploaded_dataframes)
else:
st.write("Please upload at least one CSV file.")
if __name__ == "__main__":
multi_env_compare_main() |