Spaces:

BananaSauce
/

batch-run-csv-analyser

Sleeping

File size: 6,938 Bytes

a8d73d1
a95b240
30590ff
9ca7e46
588dd53
30590ff
 
a95b240
30590ff
 
3ff5801
30590ff
a8d73d1
3ff5801
 
 
 
 
 
a8d73d1
 
 
30590ff
a8d73d1
30590ff
a8d73d1
 
 
 
30590ff
a8d73d1
 
 
 
 
3ff5801
 
a8d73d1
3ff5801
 
a8d73d1
d2ed71e
 
 
 
 
 
 
 
 
 
 
30590ff
a8d73d1
 
30590ff
a8d73d1
30590ff
 
a8d73d1
30590ff
a8d73d1
30590ff
 
 
 
 
 
 
a8d73d1
30590ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a8d73d1
30590ff
 
 
 
 
 
 
 
d2ed71e
 
 
 
30590ff
 
 
 
 
 
d2ed71e
 
 
30590ff
 
 
 
 
 
d2ed71e
 
 
30590ff
 
 
 
 
3ff5801
30590ff
 
3ff5801
 
30590ff
 
 
 
 
3ff5801
30590ff
 
3ff5801
30590ff

import pandas as pd 
import streamlit as st
import plotly.graph_objects as go
from pre import preprocess_uploaded_file

def convert_df(df):
    return df.to_csv(index=False).encode('utf-8')

def double_main(uploaded_file1, uploaded_file2):
    if uploaded_file1 is None or uploaded_file2 is None:
        st.warning("Please upload both files for comparison.")
        return

    # Preprocess the uploaded files (CSV or XLSX)
    with st.spinner("Processing the first file..."):
        data_1 = preprocess_uploaded_file(uploaded_file1)
    
    with st.spinner("Processing the second file..."):
        data_2 = preprocess_uploaded_file(uploaded_file2)

    # Determine which file is older and newer
    if data_1['Start datetime'].min() < data_2['Start datetime'].min():
        older_df, newer_df = data_1, data_2
    else:
        older_df, newer_df = data_2, data_1

    # Convert time columns to MM:SS format  
    older_df['Time spent'] = pd.to_datetime(older_df['Time spent'], unit='s').dt.strftime('%M:%S')
    newer_df['Time spent'] = pd.to_datetime(newer_df['Time spent'], unit='s').dt.strftime('%M:%S')

    # Get start datetime of each file  
    older_datetime = older_df['Start datetime'].min()
    newer_datetime = newer_df['Start datetime'].min()
    
    # Display start datetime of each file
    st.write(f"The older file started on {older_datetime}") 
    st.write(f"The newer file started on {newer_datetime}")

    # Merge dataframes on 'Scenario Name' 
    merged_df = pd.merge(older_df, newer_df, on=['Functional area', 'Scenario Name'], suffixes=('_old', '_new'))

    # Ensure column naming consistency
    # Rename columns if needed for consistency
    column_mapping = {}
    for col in merged_df.columns:
        if col.startswith('Error message'):
            new_col = col.replace('Error message', 'Error Message')
            column_mapping[col] = new_col
    
    if column_mapping:
        merged_df.rename(columns=column_mapping, inplace=True)

    # Filter scenarios
    fail_to_fail_scenarios = merged_df[(merged_df['Status_old'] == 'FAILED') & (merged_df['Status_new'] == 'FAILED')]
    pass_to_fail_scenarios = merged_df[(merged_df['Status_old'] == 'PASSED') & (merged_df['Status_new'] == 'FAILED')]
    fail_to_pass_scenarios = merged_df[(merged_df['Status_old'] == 'FAILED') & (merged_df['Status_new'] == 'PASSED')]

    # Get counts
    fail_count = len(fail_to_fail_scenarios)
    pass_fail_count = len(pass_to_fail_scenarios)
    pass_count = len(fail_to_pass_scenarios)

    # Display summary chart
    status_counts = {
        'Consistent Failures': fail_count,
        'New Failures': pass_fail_count,
        'New Passes': pass_count
    }
    status_df = pd.DataFrame.from_dict(status_counts, orient='index', columns=['Count'])
    
    st.subheader("Summary of Scenario Status Changes")
    
    # Create a bar chart using Plotly
    fig = go.Figure(data=[
        go.Bar(
            x=status_df.index,
            y=status_df['Count'],
            text=status_df['Count'],
            textposition='outside',
            textfont=dict(size=14),
            marker_color=['#1f77b4', '#ff7f0e', '#2ca02c'],  # Custom colors for each bar
            width=0.6  # Adjust bar width
        )
    ])
    
    # Customize the layout
    fig.update_layout(
        yaxis=dict(
            title='Count',
            range=[0, max(status_df['Count']) * 1.1]  # Extend y-axis range by 10% to fit labels
        ),
        xaxis_title="Status",
        hoverlabel=dict(bgcolor="white", font_size=16),
        margin=dict(l=20, r=20, t=40, b=20),
        uniformtext_minsize=8,
        uniformtext_mode='hide'
    )
    
    # Ensure all bars are visible
    fig.update_traces(marker_line_width=1, marker_line_color="black", selector=dict(type="bar"))
    
    # Add hover text
    fig.update_traces(
        hovertemplate="<b>%{x}</b><br>Count: %{y}<extra></extra>"
    )
    
    # Display the chart
    st.plotly_chart(fig, use_container_width=True)

    # Use tabs to display data
    tab1, tab2, tab3 = st.tabs(["Consistent Failures", "New Failures", "New Passes"])

    with tab1:
        st.write(f"Failing scenarios Count: {fail_count}")
        # Check if Failed Step columns exist
        columns_to_display1 = ['Functional area', 'Scenario Name', 'Error Message_old', 'Error Message_new']
        if 'Failed Step_old' in fail_to_fail_scenarios.columns and 'Failed Step_new' in fail_to_fail_scenarios.columns:
            columns_to_display1 = ['Functional area', 'Scenario Name', 'Error Message_old', 'Failed Step_old', 'Error Message_new', 'Failed Step_new']
        st.dataframe(fail_to_fail_scenarios[columns_to_display1])
        csv = convert_df(fail_to_fail_scenarios[columns_to_display1])
        st.download_button("Download Consistent Failures as CSV", data=csv, file_name='consistent_failures.csv', mime='text/csv')

    with tab2:
        st.write(f"Failing scenarios Count: {pass_fail_count}")
        columns_to_display2 = ['Functional area', 'Scenario Name', 'Error Message_new', 'Time spent_old', 'Time spent_new']
        if 'Failed Step_new' in pass_to_fail_scenarios.columns:
            columns_to_display2 = ['Functional area', 'Scenario Name', 'Error Message_new', 'Failed Step_new', 'Time spent_old', 'Time spent_new']
        st.dataframe(pass_to_fail_scenarios[columns_to_display2])
        csv = convert_df(pass_to_fail_scenarios[columns_to_display2])
        st.download_button("Download New Failures as CSV", data=csv, file_name='new_failures.csv', mime='text/csv')

    with tab3:
        st.write(f"Passing scenarios Count: {pass_count}")
        columns_to_display3 = ['Functional area', 'Scenario Name', 'Error Message_old', 'Time spent_old', 'Time spent_new']
        if 'Failed Step_old' in fail_to_pass_scenarios.columns:
            columns_to_display3 = ['Functional area', 'Scenario Name', 'Error Message_old', 'Failed Step_old', 'Time spent_old', 'Time spent_new']
        st.dataframe(fail_to_pass_scenarios[columns_to_display3])
        csv = convert_df(fail_to_pass_scenarios[columns_to_display3])
        st.download_button("Download New Passes as CSV", data=csv, file_name='new_passes.csv', mime='text/csv')

def main():
    st.title("File Comparison Tool")

    st.markdown("""
    This tool compares two files and highlights the differences in the scenarios.
    Please upload the older and newer files below.
    """)

    col1, col2 = st.columns(2)

    with col1:
        uploaded_file1 = st.file_uploader("Upload the older file", type=['csv', 'xlsx'], key='uploader1')

    with col2:
        uploaded_file2 = st.file_uploader("Upload the newer file", type=['csv', 'xlsx'], key='uploader2')

    if uploaded_file1 is not None and uploaded_file2 is not None:
        with st.spinner('Processing...'):
            double_main(uploaded_file1, uploaded_file2)
        st.success('Comparison Complete!')

if __name__ == "__main__":
    main()