batch-run-csv-analyser / multiple.py
BananaSauce's picture
added auto loader that doesnt work
295a9df
raw
history blame
27.6 kB
import pandas as pd
import streamlit as st
import matplotlib.pyplot as plt
import numpy as np
from pre import preprocess_uploaded_file
from jira_integration import (
render_jira_login,
get_current_sprint,
get_regression_board,
get_sprint_issues,
calculate_points,
create_regression_task,
generate_task_content,
calculate_story_points,
get_project_metadata,
get_field_dependencies,
get_dependent_field_value,
get_boards,
get_functional_area_values
)
from datetime import datetime, timedelta
import plotly.express as px
import plotly.graph_objects as go
import os
from dotenv import load_dotenv
import json
import logging
# Inject CSS to shrink metric font sizes and padding to prevent ellipsis overflow
if __name__ == "__main__":
st.markdown("""
<style>
[data-testid="metric-container"] {
padding: 0.25rem 0.5rem !important;
min-width: 80px !important;
overflow: visible !important;
}
[data-testid="metric-container"] div {
white-space: nowrap !important;
text-overflow: clip !important;
}
[data-testid="metric-value"] {
font-size: 0.8rem !important;
}
[data-testid="metric-label"] {
font-size: 0.6rem !important;
}
</style>
""", unsafe_allow_html=True)
load_dotenv()
JIRA_SERVER = os.getenv("JIRA_SERVER")
# Initialize session state variables
if 'filtered_scenarios_df' not in st.session_state:
st.session_state.filtered_scenarios_df = None
if 'task_content' not in st.session_state:
st.session_state.task_content = None
if 'total_story_points' not in st.session_state:
st.session_state.total_story_points = 0
if 'completed_points' not in st.session_state:
st.session_state.completed_points = 0
if 'current_page' not in st.session_state:
st.session_state.current_page = "analysis"
if 'task_df' not in st.session_state:
st.session_state.task_df = None
if 'task_environment' not in st.session_state:
st.session_state.task_environment = None
if 'last_task_key' not in st.session_state:
st.session_state.last_task_key = None
if 'last_task_url' not in st.session_state:
st.session_state.last_task_url = None
if 'show_success' not in st.session_state:
st.session_state.show_success = False
# Get logger from jira_integration
logger = logging.getLogger("multiple")
# Function to capture button clicks with manual callback
def handle_task_button_click(summary, description, formatted_env, filtered_df):
logger.info("=== Task button clicked - Starting callback function ===")
try:
logger.info(f"Summary: {summary}")
logger.info(f"Description length: {len(description)}")
logger.info(f"Environment: {formatted_env}")
logger.info(f"DataFrame shape: {filtered_df.shape}")
# Import here to avoid circular imports
from jira_integration import create_regression_task
logger.info("Imported create_regression_task function")
# Call the actual function
with st.spinner("Creating task in Jira..."):
logger.info("About to call create_regression_task function")
task = create_regression_task(
project_key="RS",
summary=summary,
description=description,
environment=formatted_env,
filtered_scenarios_df=filtered_df
)
logger.info(f"create_regression_task returned: {task}")
if task:
logger.info(f"Task created successfully: {task.key}")
# Store task information in session state
st.session_state.last_task_key = task.key
st.session_state.last_task_url = f"{JIRA_SERVER}/browse/{task.key}"
st.session_state.show_success = True
# Display success message and task details
st.success("✅ Task created successfully!")
st.markdown(
f"""
<div style='padding: 10px; border-radius: 5px; border: 1px solid #90EE90; margin: 10px 0;'>
<h3 style='margin: 0; color: #90EE90;'>Task Details</h3>
<p style='margin: 10px 0;'>Task Key: {task.key}</p>
<a href='{JIRA_SERVER}/browse/{task.key}' target='_blank'
style='background-color: #90EE90; color: black; padding: 5px 10px;
border-radius: 3px; text-decoration: none; display: inline-block;'>
View Task in Jira
</a>
</div>
""",
unsafe_allow_html=True
)
# Clear task content
st.session_state.task_content = None
# Add button to create another task
if st.button("Create Another Task", key="create_another"):
# Clear all task-related state
st.session_state.task_content = None
st.session_state.last_task_key = None
st.session_state.last_task_url = None
st.session_state.show_success = False
st.rerun()
logger.info("Task creation process completed successfully")
return True
else:
logger.error("Task creation failed (returned None)")
st.error("❌ Task creation failed. Please check the error messages and try again.")
return False
except Exception as e:
logger.exception(f"Error in handle_task_button_click: {str(e)}")
st.error(f"❌ Error creating task: {str(e)}")
import traceback
error_trace = traceback.format_exc()
logger.error(f"Full traceback: {error_trace}")
st.error(error_trace)
return False
finally:
logger.info("=== Ending handle_task_button_click function ===")
# Define the function to perform analysis
def perform_analysis(uploaded_dataframes):
# Concatenate all dataframes into a single dataframe
combined_data = pd.concat(uploaded_dataframes, ignore_index=True)
# Display debugging information
# st.write("Combined data shape:", combined_data.shape)
# st.write("Unique functional areas in combined data:", combined_data['Functional area'].nunique())
# st.write("Sample of combined data:", combined_data.head())
# Display scenarios with status "failed" grouped by functional area
failed_scenarios = combined_data[combined_data['Status'] == 'FAILED']
passed_scenarios = combined_data[combined_data['Status'] == 'PASSED']
# Display total count of failures
fail_count = len(failed_scenarios)
st.markdown(f"Failing scenarios Count: {fail_count}")
# Display total count of Passing
pass_count = len(passed_scenarios)
st.markdown(f"Passing scenarios Count: {pass_count}")
# Use radio buttons for selecting status
selected_status = st.radio("Select a status", ['Failed', 'Passed'])
# Determine which scenarios to display based on selected status
if selected_status == 'Failed':
unique_areas = np.append(failed_scenarios['Functional area'].unique(), "All")
selected_scenarios = failed_scenarios
elif selected_status == 'Passed':
unique_areas = np.append(passed_scenarios['Functional area'].unique(), "All")
selected_scenarios = passed_scenarios
else:
selected_scenarios = None
if selected_scenarios is not None:
st.markdown(f"### Scenarios with status '{selected_status}' grouped by functional area:")
# Select a range of functional areas to filter scenarios
selected_functional_areas = st.multiselect("Select functional areas", unique_areas, ["All"])
if "All" in selected_functional_areas:
filtered_scenarios = selected_scenarios
else:
filtered_scenarios = selected_scenarios[selected_scenarios['Functional area'].isin(selected_functional_areas)]
if not selected_functional_areas: # Check if the list is empty
st.error("Please select at least one functional area.")
else:
# Display count of filtered scenarios
st.write(f"Number of filtered scenarios: {len(filtered_scenarios)}")
# Calculate the average time spent for each functional area
average_time_spent_seconds = filtered_scenarios.groupby('Functional area')['Time spent'].mean().reset_index()
# Convert average time spent from seconds to minutes and seconds format
average_time_spent_seconds['Time spent'] = pd.to_datetime(average_time_spent_seconds['Time spent'], unit='s').dt.strftime('%M:%S')
# Group by functional area and get the start datetime for sorting
start_datetime_group = filtered_scenarios.groupby('Functional area')['Start datetime'].min().reset_index()
end_datetime_group = filtered_scenarios.groupby('Functional area')['End datetime'].max().reset_index()
# Calculate the total time spent for each functional area (difference between end and start datetime)
total_time_spent_seconds = (end_datetime_group['End datetime'] - start_datetime_group['Start datetime']).dt.total_seconds()
# Convert total time spent from seconds to minutes and seconds format
total_time_spent_seconds = pd.to_datetime(total_time_spent_seconds, unit='s').dt.strftime('%M:%S')
# Merge the average_time_spent_seconds with start_datetime_group and end_datetime_group
average_time_spent_seconds = average_time_spent_seconds.merge(start_datetime_group, on='Functional area')
average_time_spent_seconds = average_time_spent_seconds.merge(end_datetime_group, on='Functional area')
average_time_spent_seconds['Total Time Spent'] = total_time_spent_seconds
# Filter scenarios based on selected functional area
if selected_status == 'Failed':
# Define columns in the exact order they appear in the table
columns_to_keep = [
'Environment',
'Functional area',
'Scenario Name',
'Error Message',
'Failed Step',
'Time spent(m:s)',
'Start datetime'
]
# Check if Failed Step column exists
if 'Failed Step' in filtered_scenarios.columns:
grouped_filtered_scenarios = filtered_scenarios[columns_to_keep].copy()
else:
columns_to_keep.remove('Failed Step')
grouped_filtered_scenarios = filtered_scenarios[columns_to_keep].copy()
elif selected_status == 'Passed':
grouped_filtered_scenarios = filtered_scenarios[[
'Environment',
'Functional area',
'Scenario Name',
'Time spent(m:s)'
]].copy()
else:
grouped_filtered_scenarios = None
# Only proceed if we have data
if grouped_filtered_scenarios is not None:
# Reset the index to start from 1
grouped_filtered_scenarios.index = range(1, len(grouped_filtered_scenarios) + 1)
st.dataframe(grouped_filtered_scenarios)
# Task creation section: always show button placeholder with tooltip, enabling only when conditions are met
can_create_task = (
'jira_client' in st.session_state and
st.session_state.jira_client and
selected_status == 'Failed' and
len(selected_functional_areas) == 1 and
"All" not in selected_functional_areas
)
col1, col2, col3 = st.columns([1, 2, 1])
with col2:
if st.session_state.show_success and st.session_state.last_task_key:
st.success("✅ Task created successfully!")
st.markdown(
f"""
<div style='padding: 10px; border-radius: 5px; border: 1px solid #90EE90; margin: 10px 0;'>
<h3 style='margin: 0; color: #90EE90;'>Task Details</h3>
<p style='margin: 10px 0;'>Task Key: {st.session_state.last_task_key}</p>
<a href='{st.session_state.last_task_url}' target='_blank'
style='background-color: #90EE90; color: black; padding: 5px 10px;
border-radius: 3px; text-decoration: none; display: inline-block;'>
View Task in Jira
</a>
</div>
""",
unsafe_allow_html=True
)
if st.button("Create Another Task", key="create_another", use_container_width=True):
st.session_state.task_content = None
st.session_state.last_task_key = None
st.session_state.last_task_url = None
st.session_state.show_success = False
st.rerun()
else:
help_text = (
"Requires: Jira login, 'Failed' status selected, "
"and exactly one functional area (not 'All')."
)
if st.button(
"📝 Log Jira Task",
disabled=not can_create_task,
use_container_width=True,
help=help_text
) and can_create_task:
environment = filtered_scenarios['Environment'].iloc[0]
task_df = grouped_filtered_scenarios.copy()
expected_columns = [
'Environment',
'Functional area',
'Scenario Name',
'Error Message',
'Failed Step',
'Time spent(m:s)',
'Start datetime'
]
missing_columns = [col for col in expected_columns if col not in task_df.columns]
if missing_columns:
st.error(f"Missing required columns: {', '.join(missing_columns)}")
st.error("Please ensure your data includes all required columns")
return
summary, description = generate_task_content(task_df)
if summary and description:
handle_task_button_click(summary, description, environment, task_df)
# Check if selected_status is 'Failed' and show bar graph
if selected_status != 'Passed':
# Create and display bar graph of errors by functional area
st.write(f"### Bar graph showing number of '{selected_status}' scenarios in each functional area:")
error_counts = grouped_filtered_scenarios['Functional area'].value_counts()
# Only create the graph if there are errors to display
if not error_counts.empty:
plt.figure(figsize=(12, 10))
bars = plt.bar(error_counts.index, error_counts.values)
plt.xlabel('Functional Area')
plt.ylabel('Number of Failures')
plt.title(f"Number of '{selected_status}' scenarios by Functional Area")
plt.xticks(rotation=45, ha='right', fontsize=10)
# Set y-axis limits and ticks for consistent interval of 1
y_max = max(error_counts.values) + 1
plt.ylim(0, y_max)
plt.yticks(range(0, y_max, 1), fontsize=10)
# Display individual numbers on y-axis
for bar in bars:
height = bar.get_height()
# Annotate bar height, defaulting to 0 if conversion fails
try:
# Ensure numeric conversion in case of string 'NaN'
h_int = int(float(height))
except Exception:
h_int = 0
plt.text(
bar.get_x() + bar.get_width() / 2,
height,
str(h_int),
ha='center',
va='bottom'
) # Reduce font size of individual numbers
plt.tight_layout() # Add this line to adjust layout
st.pyplot(plt)
else:
st.info(f"No '{selected_status}' scenarios found to display in the graph.")
pass
def display_story_points_stats(force_refresh=False):
"""Display story points statistics from current sprint with caching"""
if not st.session_state.jira_client:
return
# Initialize cache
if 'sprint_data_cache' not in st.session_state:
st.session_state.sprint_data_cache = None
if 'last_sprint_fetch' not in st.session_state:
st.session_state.last_sprint_fetch = None
now = datetime.now()
cache_expiry = 300 # 5 minutes
refresh_needed = (
force_refresh
or st.session_state.sprint_data_cache is None
or (st.session_state.last_sprint_fetch
and (now - st.session_state.last_sprint_fetch).total_seconds() > cache_expiry)
)
if refresh_needed:
if force_refresh:
with st.spinner("Fetching sprint data..."):
board = get_regression_board("RS")
if not board:
return
sprint = get_current_sprint(board['id'])
if not sprint:
return
issues = get_sprint_issues(board['id'], sprint.id, board['estimation_field'])
if not issues:
return
_, total_points, completed_points, in_progress_points = calculate_points(
issues, board['estimation_field']
)
st.session_state.sprint_data_cache = {
'sprint_name': sprint.name,
'total_points': total_points,
'completed_points': completed_points,
'in_progress_points': in_progress_points
}
st.session_state.last_sprint_fetch = now
else:
# Fetch data silently without spinner
board = get_regression_board("RS")
if not board:
return
sprint = get_current_sprint(board['id'])
if not sprint:
return
issues = get_sprint_issues(board['id'], sprint.id, board['estimation_field'])
if not issues:
return
_, total_points, completed_points, in_progress_points = calculate_points(
issues, board['estimation_field']
)
st.session_state.sprint_data_cache = {
'sprint_name': sprint.name,
'total_points': total_points,
'completed_points': completed_points,
'in_progress_points': in_progress_points
}
st.session_state.last_sprint_fetch = now
# Display cached sprint data
if st.session_state.sprint_data_cache:
sprint_data = st.session_state.sprint_data_cache
# Use markdown with custom HTML for a compact, non-truncating display
metrics_html = f"""
<div style="display: grid; grid-template-columns: repeat(4, 1fr); gap: 10px; text-align: center; font-size: 0.8rem;">
<div>
<div style="color: #888;">Total</div>
<div style="font-size: 1rem; font-weight: bold;">{sprint_data['total_points']:.1f}</div>
</div>
<div>
<div style="color: #888;">Done</div>
<div style="font-size: 1rem; font-weight: bold;">{sprint_data['completed_points']:.1f}</div>
</div>
<div>
<div style="color: #888;">In Progress</div>
<div style="font-size: 1rem; font-weight: bold;">{sprint_data['in_progress_points']:.1f}</div>
</div>
<div>
<div style="color: #888;">Complete</div>
<div style="font-size: 1rem; font-weight: bold;">{(
sprint_data['completed_points'] / sprint_data['total_points'] * 100
if sprint_data['total_points'] > 0 else 0
):.1f}%</div>
</div>
</div>
"""
st.markdown(metrics_html, unsafe_allow_html=True)
st.progress(
sprint_data['completed_points'] / sprint_data['total_points']
if sprint_data['total_points'] > 0 else 0
)
def show_task_creation_section(filtered_df, environment):
"""Display the task creation section with detailed functional area mapping information."""
if "Functional area" in filtered_df.columns and len(filtered_df) > 0:
functional_areas = filtered_df["Functional area"].unique().tolist()
functional_area = functional_areas[0] if functional_areas else None
logger.debug(f"Found functional areas: {functional_areas}")
# Get project metadata to access allowed values
metadata = get_project_metadata("RS")
if metadata:
# Create expandable section for field structure
with st.expander("Functional Area Field Structure", expanded=False):
func_field = metadata['all_fields'].get('customfield_13100', {})
if func_field and 'allowedValues' in func_field:
st.write("Available parent-child mappings:")
for parent in func_field['allowedValues']:
if isinstance(parent, dict):
parent_value = parent.get('value', 'Unknown')
st.markdown(f"**Parent: {parent_value}**")
if 'cascadingOptions' in parent:
child_values = [child.get('value') for child in parent['cascadingOptions'] if child.get('value')]
st.write("Child options:")
for child in sorted(child_values):
st.write(f" • {child}")
st.write("")
# Display current functional area and mapping attempt
st.subheader("Functional Area Mapping")
col1, col2 = st.columns(2)
with col1:
st.markdown("**Input Functional Area:**")
st.info(functional_area)
st.markdown("**Split Parts:**")
parts = functional_area.split(' - ')
for i, part in enumerate(parts, 1):
st.write(f"{i}. {part}")
with col2:
# Try to map the functional area
parent, child = map_functional_area(functional_area, metadata)
st.markdown("**Mapped Values:**")
st.success(f"Parent: {parent}")
st.success(f"Child: {child}")
# Show normalized form
st.markdown("**Normalized Form:**")
norm_area = functional_area.lower().replace(' ', '-')
st.info(norm_area)
# Add warning if using default mapping
if parent == "R&I" and child == "Data Exchange" and functional_area.lower() != "data exchange":
st.warning("""
⚠️ Using default mapping (R&I/Data Exchange). This might not be the best match.
Please check the 'Functional Area Field Structure' above for available values.
""")
else:
logger.warning("No functional area found in data")
st.warning("No functional area information found in the data")
# Create task button
if st.button("Create Task", key="create_task_button"):
handle_task_button_click(filtered_df, environment)
def multiple_main():
# Initialize session state variables
if 'current_page' not in st.session_state:
st.session_state.current_page = "upload"
if 'task_df' not in st.session_state:
st.session_state.task_df = None
if 'selected_files' not in st.session_state:
st.session_state.selected_files = []
if 'uploaded_files' not in st.session_state:
st.session_state.uploaded_files = []
if 'filtered_scenarios_df' not in st.session_state:
st.session_state.filtered_scenarios_df = None
if 'sprint_data_initialized' not in st.session_state:
st.session_state.sprint_data_initialized = False
st.title("Multiple File Analysis")
# Initialize session state for uploaded data
if 'uploaded_data' not in st.session_state:
st.session_state.uploaded_data = None
if 'last_refresh' not in st.session_state:
st.session_state.last_refresh = None
# Check if we're in task creation mode
if st.session_state.current_page == "create_task" and st.session_state.task_df is not None:
# Add a back button
if st.button("⬅️ Back to Analysis"):
st.session_state.current_page = "analysis"
st.rerun()
return
# Show task creation section
show_task_creation_section(st.session_state.task_df, st.session_state.task_environment)
return
# Main analysis page
uploaded_files = st.file_uploader("Upload CSV or Excel files",
type=['csv', 'xlsx'],
accept_multiple_files=True)
# Process uploaded files and store in session state
if uploaded_files:
all_data = []
for file in uploaded_files:
try:
df = preprocess_uploaded_file(file)
all_data.append(df)
except Exception as e:
st.error(f"Error processing {file.name}: {str(e)}")
if all_data:
# Store the processed data in session state
st.session_state.uploaded_data = all_data
# Use data from session state for analysis
if st.session_state.uploaded_data:
# Perform analysis for uploaded data
perform_analysis(st.session_state.uploaded_data)
# Get combined data for Jira integration
combined_df = pd.concat(st.session_state.uploaded_data, ignore_index=True)
else:
st.write("Please upload at least one file.")
if __name__ == "__main__":
st.set_page_config(layout="wide")
multiple_main()