Spaces:

BananaSauce
/

batch-run-csv-analyser

Sleeping

App Files Files Community

batch-run-csv-analyser / multi_env_compare.py

BananaSauce

added delay for session state

5f944ac verified 10 months ago

raw

history blame

7.07 kB

	import pandas as pd
	import streamlit as st
	import numpy as np
	from pre import preprocess_uploaded_file
	from difflib import SequenceMatcher
	import time

	def similar(a, b, threshold=0.9):
	return SequenceMatcher(None, a, b).ratio() > threshold

	def perform_multi_env_analysis(uploaded_dataframes):
	# Concatenate all dataframes into a single dataframe
	combined_data = pd.concat(uploaded_dataframes, ignore_index=True)

	# Get unique environments and functional areas
	unique_environments = combined_data['Environment'].unique()
	unique_areas = np.append(combined_data['Functional area'].unique(), "All")

	# Select environments to display
	selected_environments = st.multiselect("Select environments to display", unique_environments, default=unique_environments)

	# Initialize session state for selected functional areas if it doesn't exist
	if 'selected_functional_areas' not in st.session_state:
	st.session_state.selected_functional_areas = ["All"]

	# Select functional areas to display, using session state
	selected_functional_areas = st.multiselect(
	"Select functional areas",
	unique_areas,
	default=st.session_state.selected_functional_areas,
	key="functional_areas_multiselect"
	)

	# Add a button to confirm the selection
	if st.button("Confirm Functional Area Selection"):
	# Update session state with the new selection
	st.session_state.selected_functional_areas = selected_functional_areas
	st.success("Functional area selection updated!")
	time.sleep(0.5) # Add a small delay for better user experience
	st.rerun() # Rerun the app to reflect the changes

	if "All" in selected_functional_areas:
	selected_functional_areas = combined_data['Functional area'].unique()

	# Filter data based on selected environments and functional areas
	filtered_data = combined_data[
	(combined_data['Environment'].isin(selected_environments)) &
	(combined_data['Functional area'].isin(selected_functional_areas))
	]

	# Group data by Environment, Functional area, Scenario name, and Status
	grouped_data = filtered_data.groupby(['Environment', 'Functional area', 'Scenario name', 'Status']).size().unstack(fill_value=0)

	# Ensure 'PASSED' and 'FAILED' columns exist
	if 'PASSED' not in grouped_data.columns:
	grouped_data['PASSED'] = 0
	if 'FAILED' not in grouped_data.columns:
	grouped_data['FAILED'] = 0

	# Calculate total scenarios
	grouped_data['Total'] = grouped_data['PASSED'] + grouped_data['FAILED']

	# Reset index to make Environment, Functional area, and Scenario name as columns
	grouped_data = grouped_data.reset_index()

	# Reorder columns
	grouped_data = grouped_data[['Environment', 'Functional area', 'Scenario name', 'Total', 'PASSED', 'FAILED']]

	# Display the grouped data
	st.write("### Scenario Counts by Environment and Functional Area")
	# st.dataframe(grouped_data.style.highlight_max(axis=0, subset=['Total', 'PASSED', 'FAILED']))

	# Display summary statistics
	st.write("### Summary Statistics")
	summary = grouped_data.groupby('Environment').agg({
	'Total': 'sum',
	'PASSED': 'sum',
	'FAILED': 'sum'
	}).reset_index()

	# Add column names as the first row
	summary_with_headers = pd.concat([pd.DataFrame([summary.columns], columns=summary.columns), summary], ignore_index=True)

	# Display the DataFrame
	st.dataframe(summary_with_headers)
	# Define scenarios_by_env here
	scenarios_by_env = {env: set(grouped_data[grouped_data['Environment'] == env]['Scenario name']) for env in selected_environments}

	# Debug: Print the number of scenarios in each environment
	for env, scenarios in scenarios_by_env.items():
	st.write(f"Number of scenarios in {env}: {len(scenarios)}")

	missing_scenarios = []
	mismatched_scenarios = []

	# New section for efficient inconsistency analysis
	st.write("### Inconsistent Scenario Count Analysis by Functional Area")

	if len(selected_environments) > 1:
	# Group data by Environment and Functional area, count scenarios
	scenario_counts = filtered_data.groupby(['Environment', 'Functional area'])['Scenario name'].nunique().unstack(fill_value=0)

	# Calculate the difference between max and min counts for each functional area
	count_diff = scenario_counts.max() - scenario_counts.min()

	# Sort functional areas by count difference, descending
	inconsistent_areas = count_diff.sort_values(ascending=False)

	st.write("Functional areas with inconsistent scenario counts across environments:")
	for area, diff in inconsistent_areas.items():
	if diff > 0:
	st.write(f"- {area}: Difference of {diff} scenarios")
	st.write(scenario_counts[area])
	st.write("\n")

	# Option to show detailed breakdown
	if st.checkbox("Show detailed scenario count breakdown"):
	st.write(scenario_counts)

	else:
	st.write("Please select at least two environments for comparison.")

	# Debug: Print the number of missing and mismatched scenarios
	st.write(f"Number of truly missing scenarios: {len(missing_scenarios)}")
	st.write(f"Number of scenarios with name differences: {len(mismatched_scenarios)}")

	if missing_scenarios:
	st.write("### Truly Missing Scenarios")
	missing_df = pd.DataFrame(missing_scenarios)
	st.dataframe(missing_df)
	else:
	st.write("No truly missing scenarios found across environments.")

	if mismatched_scenarios:
	st.write("### Scenarios with Name Differences")
	mismatched_df = pd.DataFrame(mismatched_scenarios)
	st.dataframe(mismatched_df)
	else:
	st.write("No scenarios with name differences found across environments.")

	def multi_env_compare_main():
	st.title("Multi-Environment Comparison")

	# Get the number of environments from the user
	num_environments = st.number_input("Enter the number of environments", min_value=1, value=1, step=1)

	# Initialize list to store uploaded dataframes
	uploaded_dataframes = []

	# Loop through the number of environments and create file uploaders
	for i in range(num_environments):
	uploaded_files = st.file_uploader(f"Upload CSV files for Environment {i + 1}", type="csv", accept_multiple_files=True)

	for uploaded_file in uploaded_files:
	# Preprocess the uploaded CSV file
	data = preprocess_uploaded_file(uploaded_file)

	# Append the dataframe to the list
	uploaded_dataframes.append(data)

	# Check if any files were uploaded
	if uploaded_dataframes:
	# Perform analysis for uploaded data
	perform_multi_env_analysis(uploaded_dataframes)
	else:
	st.write("Please upload at least one CSV file.")

	if __name__ == "__main__":
	multi_env_compare_main()