Spaces:

BananaSauce
/

batch-run-csv-analyser

Sleeping

File size: 9,546 Bytes

import pandas as pd
import streamlit as st
import matplotlib.pyplot as plt
import numpy as np
from pre import preprocess_uploaded_file

# Define the function to perform analysis
def perform_analysis(uploaded_dataframes):
    # Concatenate all dataframes into a single dataframe
    combined_data = pd.concat(uploaded_dataframes, ignore_index=True)

    # Display debugging information
    # st.write("Combined data shape:", combined_data.shape)
    # st.write("Unique functional areas in combined data:", combined_data['Functional area'].nunique())
    # st.write("Sample of combined data:", combined_data.head())

    # Display scenarios with status "failed" grouped by functional area
    failed_scenarios = combined_data[combined_data['Status'] == 'FAILED']
    passed_scenarios = combined_data[combined_data['Status'] == 'PASSED']
    # Display total count of failures
    fail_count = len(failed_scenarios)
    st.markdown(f"Failing scenarios Count: {fail_count}")
    # Display total count of Passing
    pass_count = len(passed_scenarios)
    st.markdown(f"Passing scenarios Count: {pass_count}")
     # Use radio buttons for selecting status
    selected_status = st.radio("Select a status", ['Failed', 'Passed'])
    # Determine which scenarios to display based on selected status
    if selected_status == 'Failed':
        unique_areas = np.append(failed_scenarios['Functional area'].unique(), "All")
        selected_scenarios = failed_scenarios
    elif selected_status == 'Passed':
        unique_areas = np.append(passed_scenarios['Functional area'].unique(), "All")
        selected_scenarios = passed_scenarios
    else:  
        selected_scenarios = None
    
    if selected_scenarios is not None:
        # st.write(f"Scenarios with status '{selected_status}' grouped by functional area:")
        st.markdown(f"### Scenarios with status '{selected_status}' grouped by functional area:")
        
        # # Display count of unique functional areas
        # st.write(f"Number of unique functional areas: {len(unique_areas) - 1}")  # Subtract 1 for "All"
        
        # Select a range of functional areas to filter scenarios
        selected_functional_areas = st.multiselect("Select functional areas", unique_areas, ["All"])
       
        if "All" in selected_functional_areas:
            filtered_scenarios = selected_scenarios
        else:
            filtered_scenarios = selected_scenarios[selected_scenarios['Functional area'].isin(selected_functional_areas)]
        
        if not selected_functional_areas:  # Check if the list is empty
            st.error("Please select at least one functional area.")
        else:
            # Display count of filtered scenarios
            st.write(f"Number of filtered scenarios: {len(filtered_scenarios)}")
            
            # Calculate the average time spent for each functional area
            average_time_spent_seconds = filtered_scenarios.groupby('Functional area')['Time spent'].mean().reset_index()
            # Convert average time spent from seconds to minutes and seconds format
            average_time_spent_seconds['Time spent'] = pd.to_datetime(average_time_spent_seconds['Time spent'], unit='s').dt.strftime('%M:%S')
            
    
            # Group by functional area and get the start datetime for sorting
            start_datetime_group = filtered_scenarios.groupby('Functional area')['Start datetime'].min().reset_index()
            end_datetime_group = filtered_scenarios.groupby('Functional area')['End datetime'].max().reset_index()
           
            # Calculate the total time spent for each functional area (difference between end and start datetime)
            total_time_spent_seconds = (end_datetime_group['End datetime'] - start_datetime_group['Start datetime']).dt.total_seconds()

            # Convert total time spent from seconds to minutes and seconds format
            total_time_spent_seconds = pd.to_datetime(total_time_spent_seconds, unit='s').dt.strftime('%M:%S')

           # Merge the average_time_spent_seconds with start_datetime_group and end_datetime_group
            average_time_spent_seconds = average_time_spent_seconds.merge(start_datetime_group, on='Functional area')
            average_time_spent_seconds = average_time_spent_seconds.merge(end_datetime_group, on='Functional area')   
            average_time_spent_seconds['Total Time Spent'] = total_time_spent_seconds

            
             # Filter scenarios based on selected functional area
            if selected_status == 'Failed':
                # Check if Failed Step column exists
                if 'Failed Step' in filtered_scenarios.columns:
                    grouped_filtered_scenarios = filtered_scenarios.groupby('Environment')[['Functional area', 'Scenario Name', 'Error Message', 'Failed Step', 'Time spent(m:s)', 'Start datetime']].apply(lambda x: x.reset_index(drop=True))
                else:
                    grouped_filtered_scenarios = filtered_scenarios.groupby('Environment')[['Functional area', 'Scenario Name', 'Error Message', 'Time spent(m:s)', 'Start datetime']].apply(lambda x: x.reset_index(drop=True))
            elif selected_status == 'Passed':
                grouped_filtered_scenarios = filtered_scenarios.groupby('Functional area')[['Scenario Name', 'Time spent(m:s)']].apply(lambda x: x.reset_index(drop=True))
            else:  
                grouped_filtered_scenarios = None
            grouped_filtered_scenarios.reset_index(inplace=True)
            
            # Only drop 'level_1' if it exists in the DataFrame
            if 'level_1' in grouped_filtered_scenarios.columns:
                grouped_filtered_scenarios.drop(columns=['level_1'], inplace=True)
                
            grouped_filtered_scenarios.index = grouped_filtered_scenarios.index + 1
            st.dataframe(grouped_filtered_scenarios)  

            # Sort the average time spent table by start datetime
            average_time_spent_seconds = average_time_spent_seconds.sort_values(by='Start datetime')
           
        #    # Display average time spent on each functional area in a table
        #     st.markdown("### Total and Average Time Spent on Each Functional Area")
        #     average_time_spent_seconds.index = average_time_spent_seconds.index + 1
        #      # Rename the columns for clarity
        #     average_time_spent_seconds.rename(columns={'Start datetime': 'Start Datetime', 'End datetime': 'End Datetime', 'Time spent':'Average Time Spent'}, inplace=True)
        #     # Rearrange the columns
        #     average_time_spent_seconds = average_time_spent_seconds[['Functional area', 'Total Time Spent', 'Start Datetime', 'End Datetime', 'Average Time Spent']]
        #     st.dataframe(average_time_spent_seconds)

            # Check if selected_status is 'Failed' and grouped_filtered_scenarifos length is less than or equal to 400
            if selected_status != 'Passed':
                # Create and display bar graph of errors by functional area
                st.write(f"### Bar graph showing number of '{selected_status}' scenarios in each functional area:")
                error_counts = grouped_filtered_scenarios['Functional area'].value_counts()
                
                # Only create the graph if there are errors to display
                if not error_counts.empty:
                    plt.figure(figsize=(12, 10))
                    bars = plt.bar(error_counts.index, error_counts.values)
                    plt.xlabel('Functional Area')
                    plt.ylabel('Number of Failures')
                    plt.title(f"Number of '{selected_status}' scenarios by Functional Area")
                    plt.xticks(rotation=45, ha='right', fontsize=10)
                    # Set y-axis limits and ticks for consistent interval of 1
                    y_max = max(error_counts.values) + 1
                    plt.ylim(0, y_max)
                    plt.yticks(range(0, y_max, 1), fontsize=10)
                    
                    # Display individual numbers on y-axis
                    for bar in bars:
                        height = bar.get_height()
                        plt.text(bar.get_x() + bar.get_width() / 2, height, str(int(height)),
                                ha='center', va='bottom')  # Reduce font size of individual numbers

                    plt.tight_layout()  # Add this line to adjust layout
                    st.pyplot(plt)
                else:
                    st.info(f"No '{selected_status}' scenarios found to display in the graph.")
    pass

def multiple_main():

    # num_environments = st.number_input("Enter the number of environments", min_value=1, value=1, step=1)
    
    # Initialize list to store uploaded dataframes
    uploaded_dataframes = []
    
    # Loop through the number of environments and create file uploaders
    # for i in range(num_environments):
    uploaded_files = st.file_uploader("Upload multiple XLSX files from different environments", type=["xlsx"], accept_multiple_files=True)
    
    for uploaded_file in uploaded_files:
        # Preprocess the uploaded file
        data = preprocess_uploaded_file(uploaded_file)
        
        # Append the dataframe to the list
        uploaded_dataframes.append(data)
    
    # Check if any files were uploaded
    if uploaded_dataframes:
        # Perform analysis for uploaded data
        perform_analysis(uploaded_dataframes)
    else:
        st.write("Please upload at least one file.")

pass