File size: 7,068 Bytes
62b007e
 
 
 
 
5f944ac
62b007e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5f944ac
 
62b007e
 
5f944ac
 
 
 
 
 
 
62b007e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
import pandas as pd
import streamlit as st
import numpy as np
from pre import preprocess_uploaded_file
from difflib import SequenceMatcher
import time

def similar(a, b, threshold=0.9):
    return SequenceMatcher(None, a, b).ratio() > threshold

def perform_multi_env_analysis(uploaded_dataframes):
    # Concatenate all dataframes into a single dataframe
    combined_data = pd.concat(uploaded_dataframes, ignore_index=True)

    # Get unique environments and functional areas
    unique_environments = combined_data['Environment'].unique()
    unique_areas = np.append(combined_data['Functional area'].unique(), "All")

    # Select environments to display
    selected_environments = st.multiselect("Select environments to display", unique_environments, default=unique_environments)

    # Initialize session state for selected functional areas if it doesn't exist
    if 'selected_functional_areas' not in st.session_state:
        st.session_state.selected_functional_areas = ["All"]

    # Select functional areas to display, using session state
    selected_functional_areas = st.multiselect(
        "Select functional areas",
        unique_areas,
        default=st.session_state.selected_functional_areas,
        key="functional_areas_multiselect"
    )

    # Add a button to confirm the selection
    if st.button("Confirm Functional Area Selection"):
        # Update session state with the new selection
        st.session_state.selected_functional_areas = selected_functional_areas
        st.success("Functional area selection updated!")
        time.sleep(0.5)  # Add a small delay for better user experience
        st.rerun()  # Rerun the app to reflect the changes

    if "All" in selected_functional_areas:
        selected_functional_areas = combined_data['Functional area'].unique()

    # Filter data based on selected environments and functional areas
    filtered_data = combined_data[
        (combined_data['Environment'].isin(selected_environments)) &
        (combined_data['Functional area'].isin(selected_functional_areas))
    ]

    # Group data by Environment, Functional area, Scenario name, and Status
    grouped_data = filtered_data.groupby(['Environment', 'Functional area', 'Scenario name', 'Status']).size().unstack(fill_value=0)

    # Ensure 'PASSED' and 'FAILED' columns exist
    if 'PASSED' not in grouped_data.columns:
        grouped_data['PASSED'] = 0
    if 'FAILED' not in grouped_data.columns:
        grouped_data['FAILED'] = 0

    # Calculate total scenarios
    grouped_data['Total'] = grouped_data['PASSED'] + grouped_data['FAILED']

    # Reset index to make Environment, Functional area, and Scenario name as columns
    grouped_data = grouped_data.reset_index()

    # Reorder columns
    grouped_data = grouped_data[['Environment', 'Functional area', 'Scenario name', 'Total', 'PASSED', 'FAILED']]

    # Display the grouped data
    st.write("### Scenario Counts by Environment and Functional Area")
    # st.dataframe(grouped_data.style.highlight_max(axis=0, subset=['Total', 'PASSED', 'FAILED']))

    # Display summary statistics
    st.write("### Summary Statistics")
    summary = grouped_data.groupby('Environment').agg({
        'Total': 'sum',
        'PASSED': 'sum',
        'FAILED': 'sum'
    }).reset_index()

    # Add column names as the first row
    summary_with_headers = pd.concat([pd.DataFrame([summary.columns], columns=summary.columns), summary], ignore_index=True)

    # Display the DataFrame
    st.dataframe(summary_with_headers)
    # Define scenarios_by_env here
    scenarios_by_env = {env: set(grouped_data[grouped_data['Environment'] == env]['Scenario name']) for env in selected_environments}

    # Debug: Print the number of scenarios in each environment
    for env, scenarios in scenarios_by_env.items():
        st.write(f"Number of scenarios in {env}: {len(scenarios)}")

    missing_scenarios = []
    mismatched_scenarios = []
    
    # New section for efficient inconsistency analysis
    st.write("### Inconsistent Scenario Count Analysis by Functional Area")
    
    if len(selected_environments) > 1:
        # Group data by Environment and Functional area, count scenarios
        scenario_counts = filtered_data.groupby(['Environment', 'Functional area'])['Scenario name'].nunique().unstack(fill_value=0)
        
        # Calculate the difference between max and min counts for each functional area
        count_diff = scenario_counts.max() - scenario_counts.min()
        
        # Sort functional areas by count difference, descending
        inconsistent_areas = count_diff.sort_values(ascending=False)
        
        st.write("Functional areas with inconsistent scenario counts across environments:")
        for area, diff in inconsistent_areas.items():
            if diff > 0:
                st.write(f"- {area}: Difference of {diff} scenarios")
                st.write(scenario_counts[area])
                st.write("\n")
        
        # Option to show detailed breakdown
        if st.checkbox("Show detailed scenario count breakdown"):
            st.write(scenario_counts)
    
    else:
        st.write("Please select at least two environments for comparison.")

    # Debug: Print the number of missing and mismatched scenarios
    st.write(f"Number of truly missing scenarios: {len(missing_scenarios)}")
    st.write(f"Number of scenarios with name differences: {len(mismatched_scenarios)}")

    if missing_scenarios:
        st.write("### Truly Missing Scenarios")
        missing_df = pd.DataFrame(missing_scenarios)
        st.dataframe(missing_df)
    else:
        st.write("No truly missing scenarios found across environments.")

    if mismatched_scenarios:
        st.write("### Scenarios with Name Differences")
        mismatched_df = pd.DataFrame(mismatched_scenarios)
        st.dataframe(mismatched_df)
    else:
        st.write("No scenarios with name differences found across environments.")

def multi_env_compare_main():
    st.title("Multi-Environment Comparison")
    
    # Get the number of environments from the user
    num_environments = st.number_input("Enter the number of environments", min_value=1, value=1, step=1)

    # Initialize list to store uploaded dataframes
    uploaded_dataframes = []

    # Loop through the number of environments and create file uploaders
    for i in range(num_environments):
        uploaded_files = st.file_uploader(f"Upload CSV files for Environment {i + 1}", type="csv", accept_multiple_files=True)
        
        for uploaded_file in uploaded_files:
            # Preprocess the uploaded CSV file
            data = preprocess_uploaded_file(uploaded_file)
            
            # Append the dataframe to the list
            uploaded_dataframes.append(data)
    
    # Check if any files were uploaded
    if uploaded_dataframes:
        # Perform analysis for uploaded data
        perform_multi_env_analysis(uploaded_dataframes)
    else:
        st.write("Please upload at least one CSV file.")

if __name__ == "__main__":
    multi_env_compare_main()