batch-run-analyser

Sleeping

App Files Files Community

BananaSauce commited on Mar 19

Commit

611c02e

1 Parent(s): f556076

updated for task

Browse files

Files changed (3) hide show

jira_integration.py +364 -95
multiple.py +101 -70
pre.py +181 -153

jira_integration.py CHANGED Viewed

@@ -21,23 +21,19 @@ try:
     if not os.path.exists(log_dir):
         os.makedirs(log_dir)
     log_file = os.path.join(log_dir, f"jira_debug_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
     # Configure root logger with file handler
     logging.basicConfig(
         level=logging.DEBUG,
         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        handlers=[
-            logging.FileHandler(log_file)
-        ]
     )
 except (OSError, IOError):
     # If file logging fails (e.g., in Hugging Face Spaces), configure logging without file handler
     logging.basicConfig(
         level=logging.DEBUG,
         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-        handlers=[
-            logging.NullHandler()
-        ]
     )
 logger = logging.getLogger("jira_integration")
@@ -839,76 +835,228 @@ def map_functional_area(functional_area, metadata):
     """Map a functional area to its closest Jira allowed parent and child values using structured mapping."""
     if not metadata or not functional_area:
         logger.error("No metadata or functional area provided")
-        raise ValueError("Metadata and functional area are required")
     # Get the functional area field from metadata
     func_field = metadata['all_fields'].get('customfield_13100', {})
     if not func_field or 'allowedValues' not in func_field:
         logger.error("Could not find functional area field in metadata")
-        raise ValueError("Functional area field not found in metadata")
-    # Build a set of allowed child values for faster lookup
-    allowed_values = {}
-    for parent in func_field['allowedValues']:
-        if isinstance(parent, dict):
-            parent_value = parent.get('value')
-            if parent_value and 'children' in parent:
-                for child in parent['children']:
-                    if isinstance(child, dict) and 'value' in child:
-                        allowed_values[child['value']] = parent_value
-    logger.info(f"Input functional area: {functional_area}")
     # Split the functional area into parts
     parts = [p.strip() for p in functional_area.split(' - ')]
     logger.info(f"Split into parts: {parts}")
-    # Try different combinations of parts joined with '-'
-    for i in range(len(parts)):
-        for j in range(i + 1, len(parts) + 1):
-            # Try joining parts with '-'
-            test_value = '-'.join(parts[i:j])
-            # Also try without spaces
-            test_value_no_spaces = test_value.replace(' ', '')
-            logger.info(f"Trying combination: {test_value}")
-            # Check both versions (with and without spaces)
-            if test_value in allowed_values:
-                logger.info(f"Found exact match: {test_value}")
-                return allowed_values[test_value], test_value
-            elif test_value_no_spaces in allowed_values:
-                logger.info(f"Found match without spaces: {test_value_no_spaces}")
-                return allowed_values[test_value_no_spaces], test_value_no_spaces
-            # Try category-specific matches
-            categories = ['Services', 'FIN', 'WARPSPEED']
-            for category in categories:
-                category_value = f"{category}-{test_value}"
-                category_value_no_spaces = category_value.replace(' ', '')
-                if category_value in allowed_values:
-                    logger.info(f"Found category match: {category_value}")
-                    return allowed_values[category_value], category_value
-                elif category_value_no_spaces in allowed_values:
-                    logger.info(f"Found category match without spaces: {category_value_no_spaces}")
-                    return allowed_values[category_value_no_spaces], category_value_no_spaces
-    # If no match found, try to find a suitable default based on the first part
-    first_part = parts[0].upper()
-    if 'SERVICE' in first_part or 'SERVICES' in first_part:
-        logger.info("No exact match found, defaulting to Services-Platform")
         return "R&I", "Services-Platform"
-    elif 'FIN' in first_part:
-        logger.info("No exact match found, defaulting to FIN-Parameters")
-        return "R&I", "FIN-Parameters"
-    elif 'WARPSPEED' in first_part:
-        logger.info("No exact match found, defaulting to WARPSPEED-Parameters")
         return "R&I", "WARPSPEED-Parameters"
-    # Final fallback to Data Exchange
-    logger.warning(f"No suitable match found for '{functional_area}', defaulting to Data Exchange")
     return "R&I", "Data Exchange"
 def get_customer_field_values(metadata):
     """Extract all available customer field values and their child options from metadata"""
@@ -937,24 +1085,100 @@ def map_customer_value(environment_value, customer_values):
     # Clean up environment value
     env_value = environment_value.strip()
-    # Special case handling for specific environments
-    if any(env in env_value.lower() for env in ['legalwise', 'scorpion', 'lifewise', 'talksure']):
-        parent_value = "ILR"
-        child_value = env_value  # Use the original environment value as child
-        logger.info(f"Mapped {env_value} to ILR parent with child {child_value}")
         return parent_value, child_value
-    # Handle RI environments
-    if env_value.startswith('RI'):
         parent_value = "MIP Research and Innovation"
-        # Remove 'RI' prefix and clean up
-        child_value = env_value[2:].strip()
-        if child_value:
-            child_value = f"R&I {child_value}"
-        else:
-            child_value = "R&I General"
-        logger.info(f"Mapped RI environment {env_value} to {parent_value} parent with child {child_value}")
         return parent_value, child_value
     # Default case - try to find matching values
@@ -973,10 +1197,8 @@ def map_customer_value(environment_value, customer_values):
     return "MIP Research and Innovation", "R&I General"
 def create_regression_task(project_key, summary, description, environment, filtered_scenarios_df):
-    logger.debug(f"Entering create_regression_task with project_key={project_key}, summary={summary}, environment={environment}, DF_shape={filtered_scenarios_df.shape}")
     logger.info("=== Starting create_regression_task function ===")
     logger.info(f"Project: {project_key}, Summary: {summary}, Environment: {environment}")
-    logger.info(f"Filtered DF shape: {filtered_scenarios_df.shape if filtered_scenarios_df is not None else 'None'}")
     try:
         # Get metadata first to access field values
@@ -987,9 +1209,74 @@ def create_regression_task(project_key, summary, description, environment, filte
             st.error(error_msg)
             return None
         # Get customer field values and map environment
         customer_values = get_customer_field_values(metadata)
-        parent_value, child_value = map_customer_value(environment, customer_values)
         logger.info(f"Mapped customer values - Parent: {parent_value}, Child: {child_value}")
         # Get Jira client
@@ -1010,28 +1297,10 @@ def create_regression_task(project_key, summary, description, environment, filte
         logger.info(f"Found active sprint: {active_sprint.name} (ID: {active_sprint.id})")
-        # Extract functional area from filtered scenarios
-        functional_areas = []
-        try:
-            if "Functional area" in filtered_scenarios_df.columns:
-                functional_areas = filtered_scenarios_df["Functional area"].unique().tolist()
-                logger.info(f"Extracted functional areas: {functional_areas}")
-        except Exception as e:
-            logger.exception(f"Error extracting functional areas: {str(e)}")
-            st.error(f"Error extracting functional areas: {str(e)}")
-            return None
         # Calculate story points based on number of scenarios
         story_points = calculate_story_points(len(filtered_scenarios_df))
         logger.info(f"Calculated story points: {story_points}")
-        # Map functional area using metadata
-        functional_area_parent, functional_area_child = map_functional_area(
-            functional_areas[0] if functional_areas else "Data Exchange",
-            metadata
-        )
-        logger.info(f"Mapped functional area to parent: {functional_area_parent}, child: {functional_area_child}")
         # Prepare issue dictionary with all required fields
         issue_dict = {
             "project": {"key": project_key},

     if not os.path.exists(log_dir):
         os.makedirs(log_dir)
     log_file = os.path.join(log_dir, f"jira_debug_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log")
     # Configure root logger with file handler
     logging.basicConfig(
         level=logging.DEBUG,
         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        handlers=[logging.FileHandler(log_file)]
     )
 except (OSError, IOError):
     # If file logging fails (e.g., in Hugging Face Spaces), configure logging without file handler
     logging.basicConfig(
         level=logging.DEBUG,
         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        handlers=[logging.NullHandler()]
     )
 logger = logging.getLogger("jira_integration")
     """Map a functional area to its closest Jira allowed parent and child values using structured mapping."""
     if not metadata or not functional_area:
         logger.error("No metadata or functional area provided")
+        # Return default values instead of raising an exception
+        return "R&I", "Data Exchange"
     # Get the functional area field from metadata
     func_field = metadata['all_fields'].get('customfield_13100', {})
     if not func_field or 'allowedValues' not in func_field:
         logger.error("Could not find functional area field in metadata")
+        # Return default values instead of raising an exception
+        return "R&I", "Data Exchange"
+    # Define all allowed child values for R&I parent
+    allowed_child_values = [
+        "Data Exchange", "FIN-Cash Book", "FIN-Creditors", "FIN-Debtors", "FIN-Fixed Assets",
+        "FIN-General Ledger", "FIN-Parameters", "Services-Calendar", "Services-Call Center",
+        "Services-Communications", "Services-Decsion Services", "Services-Entity Relations",
+        "Services-Gamification", "Services-Job Manager", "Services-Measurements",
+        "Services-Multimedia", "Services-Platform", "Services-Questionnaire", "Services-Workflow",
+        "WARPSPEED-Access", "WARPSPEED-Applications", "WARPSPEED-Calendar",
+        "WARPSPEED-Entity Configuration", "WARPSPEED-Environments", "WARPSPEED-Event Types",
+        "WARPSPEED-External Systems", "WARPSPEED-Linked Programmes", "WARPSPEED-Messages",
+        "WARPSPEED-Parameters", "WARPSPEED-Preferences", "WARPSPEED-RefMaster",
+        "WARPSPEED-Relations", "WARPSPEED-Security Tokens", "WARPSPEED-Sequences",
+        "WARPSPEED-Setup - Systems", "WARPSPEED-Statuses", "WARPSPEED-System Index Search",
+        "WARPSPEED-Template Relations", "WARPSPEED-Users", "WARPSPEED-Utilites - My Profile",
+        "WARPSPEED-Utilities - Matrix Headers", "WARPSPEED-Web Objects"
+    ]
+    # Define ILR child values
+    ilr_child_values = [
+        "Products", "New Business", "Policy Maintenance", "Collections", "Payments",
+        "Claims", "Commission", "Month End", "Integration", "Campaigns",
+        "Claims (Non Legal)", "Claims (Legal) (Including WF)", "Claim Invoicing (Legal)",
+        "Bulk Processing", "Manual Transactions", "Entity Maintenance", "General Queries"
+    ]
+    # Log input functional area
+    logger.info(f"\nInput functional area: {functional_area}")
+    # Check if this is an ILR-related functional area
+    ilr_keywords = ["ilr", "legal", "policy", "claim", "invoice", "commission", "collection", "payment"]
+    is_ilr_related = any(keyword in functional_area.lower() for keyword in ilr_keywords)
+    # Also check if the environment is Legalwise, Lifewise, Scorpion, or Talksure
+    environment = metadata.get('environment', '')
+    is_ilr_environment = any(env in environment.upper() if environment else False
+                            for env in ["LEGAL_WISE_NR", "LIFE_WISE_NR", "SCORPION_NR", "TALKSURE"])
+    if is_ilr_related or is_ilr_environment or functional_area.startswith("ILR"):
+        logger.info(f"Detected ILR-related functional area or environment: {functional_area}, {environment}")
+        # Try to find the best matching ILR child value
+        best_match = None
+        best_score = 0
+        for child_value in ilr_child_values:
+            # Calculate similarity score
+            score = 0
+            child_lower = child_value.lower()
+            func_lower = functional_area.lower()
+            # Check for exact matches or contains
+            if child_lower in func_lower or func_lower in child_lower:
+                score = 100
+            else:
+                # Check for partial matches with specific keywords
+                for keyword in child_lower.split():
+                    if len(keyword) > 3 and keyword in func_lower:  # Only consider meaningful keywords
+                        score += 20
+            if score > best_score:
+                best_score = score
+                best_match = child_value
+        # If we found a good match
+        if best_match and best_score > 30:
+            logger.info(f"Mapped ILR functional area to: {best_match} with score {best_score}")
+            return "ILR", best_match
+        # If no good match found, return None as the child value
+        logger.info("No good match found for ILR functional area, using None")
+        return "ILR", None
+    # Enhanced direct mapping for common functional areas with more specific patterns
+    direct_mappings = {
+        "Financials - Creditors": "FIN-Creditors",
+        "Financials - Debtors": "FIN-Debtors",
+        "Financials - Cash Book": "FIN-Cash Book",
+        "Financials - Fixed Assets": "FIN-Fixed Assets",
+        "Financials - FA": "FIN-Fixed Assets",  # Added specific mapping for FA
+        "Financials - General Ledger": "FIN-General Ledger",
+        "Financials - Parameters": "FIN-Parameters",
+        "Services - Calendar": "Services-Calendar",
+        "Services - Call Center": "Services-Call Center",
+        "Services - Communications": "Services-Communications",
+        "Services - Entity Relations": "Services-Entity Relations",
+        "Services - Gamification": "Services-Gamification",
+        "Services - Job Manager": "Services-Job Manager",
+        "Services - Platform": "Services-Platform",
+        "Services - Workflow": "Services-Workflow",
+        "Data Exchange": "Data Exchange"
+    }
+    # Check for direct matches first
+    for pattern, value in direct_mappings.items():
+        if pattern in functional_area:
+            logger.info(f"Found direct mapping match: {pattern} -> {value}")
+            return "R&I", value
     # Split the functional area into parts
     parts = [p.strip() for p in functional_area.split(' - ')]
     logger.info(f"Split into parts: {parts}")
+    # Check first part for category matching
+    first_part = parts[0].upper() if parts else ""
+    # Map based on first part
+    if "FINANCIALS" in first_part or "FIN" in first_part:
+        # For financial areas, check if we can be more specific based on second part
+        if len(parts) > 1:
+            second_part = parts[1].upper()
+            # Enhanced FA/Fixed Assets detection
+            if "FA" in second_part or "FIXED" in second_part or "ASSETS" in second_part or "ASSET" in second_part:
+                logger.info("Mapping to FIN-Fixed Assets based on FA detection")
+                return "R&I", "FIN-Fixed Assets"
+            elif "CREDITORS" in second_part or "AP" in second_part or "ACCOUNTS PAYABLE" in second_part:
+                logger.info("Mapping to FIN-Creditors based on second part")
+                return "R&I", "FIN-Creditors"
+            elif "DEBTORS" in second_part or "AR" in second_part or "ACCOUNTS RECEIVABLE" in second_part:
+                logger.info("Mapping to FIN-Debtors based on second part")
+                return "R&I", "FIN-Debtors"
+            elif "CASH" in second_part or "CASHBOOK" in second_part:
+                logger.info("Mapping to FIN-Cash Book based on second part")
+                return "R&I", "FIN-Cash Book"
+            elif "GENERAL" in second_part or "GL" in second_part or "LEDGER" in second_part:
+                logger.info("Mapping to FIN-General Ledger based on second part")
+                return "R&I", "FIN-General Ledger"
+        # Default financial mapping
+        logger.info("Defaulting to FIN-Parameters")
+        return "R&I", "FIN-Parameters"
+    elif "SERVICE" in first_part or "SERVICES" in first_part:
+        # For services areas, check if we can be more specific based on second part
+        if len(parts) > 1:
+            second_part = parts[1].upper()
+            # Add specific check for "Work Flow" vs "Workflow"
+            if "WORK" in second_part and ("FLOW" in second_part or "FLOWS" in second_part):
+                logger.info("Mapping to Services-Workflow based on Work Flow detection")
+                return "R&I", "Services-Workflow"
+            # Continue with existing logic for other service types
+            for child_value in allowed_child_values:
+                if child_value.startswith("Services-"):
+                    # Extract the part after "Services-"
+                    service_type = child_value.split("Services-")[1].upper()
+                    # Normalize by removing spaces for comparison
+                    second_part_normalized = second_part.replace(" ", "")
+                    service_type_normalized = service_type.replace(" ", "")
+                    if service_type_normalized in second_part_normalized or second_part_normalized in service_type_normalized:
+                        logger.info(f"Mapping to {child_value} based on second part")
+                        return "R&I", child_value
+        # Default services mapping
+        logger.info("Defaulting to Services-Platform")
         return "R&I", "Services-Platform"
+    elif "WARPSPEED" in first_part:
+        # For warpspeed areas, check if we can be more specific based on second part
+        if len(parts) > 1:
+            second_part = parts[1].upper()
+            for child_value in allowed_child_values:
+                if child_value.startswith("WARPSPEED-"):
+                    # Extract the part after "WARPSPEED-"
+                    warpspeed_type = child_value.split("WARPSPEED-")[1].upper()
+                    if warpspeed_type in second_part or second_part in warpspeed_type:
+                        logger.info(f"Mapping to {child_value} based on second part")
+                        return "R&I", child_value
+        # Default warpspeed mapping
+        logger.info("Defaulting to WARPSPEED-Parameters")
         return "R&I", "WARPSPEED-Parameters"
+    elif "DATA" in first_part or "EXCHANGE" in first_part:
+        logger.info("Mapping to Data Exchange")
     return "R&I", "Data Exchange"
+    # If no category match, try fuzzy matching with all allowed values
+    best_match = None
+    best_score = 0
+    # Try to match the full functional area
+    for child_value in allowed_child_values:
+        # Calculate similarity score
+        score = 0
+        child_upper = child_value.upper()
+        func_upper = functional_area.upper()
+        # Check if child value appears in functional area
+        if child_upper.replace("-", " ") in func_upper or child_upper in func_upper:
+            score = 100  # Perfect match
+        else:
+            # Calculate partial match score
+            for part in parts:
+                part_upper = part.upper()
+                if part_upper in child_upper or child_upper in part_upper:
+                    score += 50 / len(parts)  # Partial match
+        if score > best_score:
+            best_score = score
+            best_match = child_value
+    # If we found a good match
+    if best_match and best_score > 30:  # Threshold for accepting a match
+        logger.info(f"Found fuzzy match: {best_match} with score {best_score}")
+        return "R&I", best_match
+    # If no good match found, return None as the child value
+    logger.warning(f"No good match found for '{functional_area}', using None")
+    return "R&I", None
 def get_customer_field_values(metadata):
     """Extract all available customer field values and their child options from metadata"""
     # Clean up environment value
     env_value = environment_value.strip()
+    logger.info(f"Mapping customer value for environment: {env_value}")
+    # Handle Legalwise environments
+    if 'LEGAL_WISE_NR' in env_value:
+        parent_value = "MIP ILR"
+        child_value = "LEZA - LegalWise"
+        logger.info(f"Mapped Legalwise environment {env_value} to {parent_value} parent with child {child_value}")
         return parent_value, child_value
+    # Handle Lifewise environments
+    if 'LIFE_WISE_NR' in env_value:
+        parent_value = "MIP ILR"
+        child_value = "LEZA - LifeWise"
+        logger.info(f"Mapped Lifewise environment {env_value} to {parent_value} parent with child {child_value}")
+        return parent_value, child_value
+    # Handle Scorpion environments
+    if 'SCORPION_NR' in env_value:
+        parent_value = "MIP ILR"
+        child_value = "LEZA - Scorpion"
+        logger.info(f"Mapped Scorpion environment {env_value} to {parent_value} parent with child {child_value}")
+        return parent_value, child_value
+    # Handle Talksure environments
+    if 'TALKSURE' in env_value:
+        parent_value = "MIP ILR"
+        child_value = "Talksure"
+        logger.info(f"Mapped Talksure environment {env_value} to {parent_value} parent with child {child_value}")
+        return parent_value, child_value
+    # Special case for 2001 fin
+    if '2001' in env_value and 'FIN' in env_value.lower():
         parent_value = "MIP Research and Innovation"
+        child_value = "R&I 2001 Fin"
+        logger.info(f"Mapped 2001 fin environment {env_value} to {parent_value} parent with child {child_value}")
+        return parent_value, child_value
+    # Handle R&I environments with FIN
+    if 'R&I' in env_value and 'FIN' in env_value:
+        parent_value = "MIP Research and Innovation"
+        # Extract the number between R&I and FIN
+        try:
+            number = env_value.split('R&I')[1].split('FIN')[0].strip()
+            child_value = f"R&I {number} Fin"  # Changed from FIN to Fin
+            logger.info(f"Mapped R&I FIN environment {env_value} to {parent_value} parent with child {child_value}")
+            return parent_value, child_value
+        except:
+            logger.warning(f"Could not parse R&I FIN format from {env_value}")
+    # Handle R&I environments without FIN
+    if 'R&I' in env_value and 'FIN' not in env_value:
+        parent_value = "MIP Research and Innovation"
+        # Extract the number after R&I
+        try:
+            number = env_value.split('R&I')[1].strip()
+            child_value = f"R&I {number}"
+            logger.info(f"Mapped R&I environment {env_value} to {parent_value} parent with child {child_value}")
+            return parent_value, child_value
+        except:
+            logger.warning(f"Could not parse R&I format from {env_value}")
+    # Handle Task Manager environments
+    if 'Task Manager' in env_value:
+        parent_value = "MIP Research and Innovation"
+        child_value = env_value
+        logger.info(f"Mapped Task Manager environment {env_value} to {parent_value} parent with child {child_value}")
+        return parent_value, child_value
+    # Handle App Manager environments
+    if 'App Manager' in env_value:
+        parent_value = "MIP Research and Innovation"
+        child_value = env_value
+        logger.info(f"Mapped App Manager environment {env_value} to {parent_value} parent with child {child_value}")
+        return parent_value, child_value
+    # Handle Auth Gateway environments
+    if 'Auth Gateway' in env_value:
+        parent_value = "MIP Research and Innovation"
+        child_value = env_value
+        logger.info(f"Mapped Auth Gateway environment {env_value} to {parent_value} parent with child {child_value}")
+        return parent_value, child_value
+    # Handle MIP Intranet
+    if 'MIP Intranet' in env_value:
+        parent_value = "MIP Research and Innovation"
+        child_value = "MIP Intranet"
+        logger.info(f"Mapped MIP Intranet environment {env_value} to {parent_value} parent with child {child_value}")
+        return parent_value, child_value
+    # Handle Pilotfish
+    if 'Pilotfish' in env_value:
+        parent_value = "MIP Research and Innovation"
+        child_value = "Pilotfish"
+        logger.info(f"Mapped Pilotfish environment {env_value} to {parent_value} parent with child {child_value}")
         return parent_value, child_value
     # Default case - try to find matching values
     return "MIP Research and Innovation", "R&I General"
 def create_regression_task(project_key, summary, description, environment, filtered_scenarios_df):
     logger.info("=== Starting create_regression_task function ===")
     logger.info(f"Project: {project_key}, Summary: {summary}, Environment: {environment}")
     try:
         # Get metadata first to access field values
             st.error(error_msg)
             return None
+        # Add environment to metadata for use in mapping functions
+        metadata['environment'] = environment
+        # Check if environment is ILR-related
+        is_ilr_environment = any(env in environment.upper() if environment else False
+                               for env in ["LEGAL_WISE_NR", "LIFE_WISE_NR", "SCORPION_NR", "TALKSURE"])
+        logger.info(f"Environment check - Is ILR: {is_ilr_environment}")
+        # Extract functional area from filtered scenarios
+        functional_areas = []
+        if "Functional area" in filtered_scenarios_df.columns:
+            functional_areas = filtered_scenarios_df["Functional area"].unique().tolist()
+            logger.info(f"Extracted functional areas: {functional_areas}")
+        # Handle ILR environments
+        if is_ilr_environment:
+            functional_area_parent = "ILR"
+            # Define ILR child values
+            ilr_child_values = [
+                "Products", "New Business", "Policy Maintenance", "Collections", "Payments",
+                "Claims", "Commission", "Month End", "Integration", "Campaigns",
+                "Claims (Non Legal)", "Claims (Legal) (Including WF)", "Claim Invoicing (Legal)",
+                "Bulk Processing", "Manual Transactions", "Entity Maintenance", "General Queries"
+            ]
+            # Default to General Queries
+            functional_area_child = "General Queries"
+            # Try to find a better match based on the functional area
+            if functional_areas:
+                func_area = functional_areas[0].lower()
+                for child in ilr_child_values:
+                    if child.lower() in func_area:
+                        functional_area_child = child
+                        break
+            logger.info(f"Forced functional area to ILR parent with child: {functional_area_child}")
+        else:
+            # Map functional area using metadata (now with environment info)
+            logger.info("Not an ILR environment, using standard mapping")
+            functional_area_parent, functional_area_child = map_functional_area(
+                functional_areas[0] if functional_areas else "Data Exchange",
+                metadata
+            )
+        logger.info(f"Final functional area mapping - Parent: {functional_area_parent}, Child: {functional_area_child}")
         # Get customer field values and map environment
         customer_values = get_customer_field_values(metadata)
+        # If functional area is ILR or environment is ILR-related, set customer to MIP ILR
+        if functional_area_parent == "ILR" or is_ilr_environment:
+            parent_value = "MIP ILR"
+            # Set child value based on environment
+            if "LEGAL_WISE_NR" in environment.upper():
+                child_value = "LEZA - LegalWise"
+            elif "LIFE_WISE_NR" in environment.upper():
+                child_value = "LEZA - LifeWise"
+            elif "SCORPION_NR" in environment.upper():
+                child_value = "LEZA - Scorpion"
+            elif "TALKSURE" in environment.upper():
+                child_value = "Talksure"
+            else:
+                child_value = "General Queries"
+        else:
+            # Use the normal mapping for non-ILR functional areas
+            parent_value, child_value = map_customer_value(environment, customer_values)
         logger.info(f"Mapped customer values - Parent: {parent_value}, Child: {child_value}")
         # Get Jira client
         logger.info(f"Found active sprint: {active_sprint.name} (ID: {active_sprint.id})")
         # Calculate story points based on number of scenarios
         story_points = calculate_story_points(len(filtered_scenarios_df))
         logger.info(f"Calculated story points: {story_points}")
         # Prepare issue dictionary with all required fields
         issue_dict = {
             "project": {"key": project_key},

multiple.py CHANGED Viewed

@@ -17,7 +17,9 @@ from jira_integration import (
     get_dependent_field_value,
     get_boards,
     get_functional_area_values,
-    map_functional_area
 )
 from datetime import datetime, timedelta
 import plotly.express as px
@@ -55,85 +57,113 @@ logger = logging.getLogger("multiple")
 # Function to capture button clicks with manual callback
 def handle_task_button_click(summary, description, formatted_env, filtered_df):
-    logger.info("=== Task button clicked - Starting callback function ===")
     try:
         logger.info(f"Summary: {summary}")
         logger.info(f"Description length: {len(description)}")
         logger.info(f"Environment: {formatted_env}")
         logger.info(f"DataFrame shape: {filtered_df.shape}")
-        # Format environment for customer field - simple conversion from RI to R&I
-        if isinstance(formatted_env, str):
-            if formatted_env.startswith('RI') and 'FIN' in formatted_env:
-                # Extract the number between RI and FIN
-                number = formatted_env[2:formatted_env.index('FIN')]
-                formatted_env = f"R&I {number} FIN"
-            elif formatted_env.startswith('RI'):
-                number = formatted_env[2:]
-                formatted_env = f"R&I {number}"
-        # Import here to avoid circular imports
-        from jira_integration import create_regression_task
-        logger.info("Imported create_regression_task function")
-        # Call the actual function
-        with st.spinner("Creating task in Jira..."):
-            logger.info("About to call create_regression_task function")
-            task = create_regression_task(
-                project_key="RS",
-                summary=summary,
-                description=description,
-                environment=formatted_env,
-                filtered_scenarios_df=filtered_df
             )
-            logger.info(f"create_regression_task returned: {task}")
-            if task:                # Store task information in session state
-                st.session_state.last_task_key = task.key
-                st.session_state.last_task_url = f"{JIRA_SERVER}/browse/{task.key}"
-                st.session_state.show_success = True
-                # Display success message and task details
-                st.success("✅ Task created successfully!")
-                st.markdown(
-                    f"""
-                    <div style='padding: 10px; border-radius: 5px; border: 1px solid #90EE90; margin: 10px 0;'>
-                        <h3 style='margin: 0; color: #90EE90;'>Task Details</h3>
-                        <p style='margin: 10px 0;'>Task Key: {task.key}</p>
-                        <a href='{JIRA_SERVER}/browse/{task.key}' target='_blank'
-                           style='background-color: #90EE90; color: black; padding: 5px 10px;
-                                  border-radius: 3px; text-decoration: none; display: inline-block;'>
-                            View Task in Jira
-                        </a>
-                    </div>
-                    """,
-                    unsafe_allow_html=True
-                )
-                # Clear task content
-                st.session_state.task_content = None
-                # Add button to create another task
-                if st.button("Create Another Task", key="create_another"):
-                    # Clear all task-related state
-                    st.session_state.task_content = None
-                    st.session_state.last_task_key = None
-                    st.session_state.last_task_url = None
-                    st.session_state.show_success = False
-                    st.rerun()
-                logger.info("Task creation process completed successfully")
-                return True
-            else:
-                logger.error("Task creation failed (returned None)")
-                st.error("❌ Task creation failed. Please check the error messages and try again.")
-                return False
     except Exception as e:
         logger.exception(f"Error in handle_task_button_click: {str(e)}")
-        st.error(f"❌ Error creating task: {str(e)}")
         import traceback
         error_trace = traceback.format_exc()
         logger.error(f"Full traceback: {error_trace}")
@@ -292,7 +322,6 @@ def perform_analysis(uploaded_dataframes):
                     col1, col2, col3 = st.columns([1, 2, 1])
                     with col2:
                         if st.button("📝 Log Jira Task", use_container_width=True):
-                            st.write("Debug: Button clicked")  # Debug line
                             # Use the properly structured DataFrame for task creation
                             task_df = grouped_filtered_scenarios.copy()
                             expected_columns = [
@@ -314,7 +343,9 @@ def perform_analysis(uploaded_dataframes):
                             summary, description = generate_task_content(task_df)
                             if summary and description:
                                 # Call the task creation function
-                                handle_task_button_click(summary, description, environment, task_df)
             # Check if selected_status is 'Failed' and show bar graph
             if selected_status != 'Passed':

     get_dependent_field_value,
     get_boards,
     get_functional_area_values,
+    map_functional_area,
+    get_customer_field_values,
+    map_customer_value
 )
 from datetime import datetime, timedelta
 import plotly.express as px
 # Function to capture button clicks with manual callback
 def handle_task_button_click(summary, description, formatted_env, filtered_df):
+    logger.info("=== Task button clicked - Starting debug logging ===")
     try:
         logger.info(f"Summary: {summary}")
         logger.info(f"Description length: {len(description)}")
         logger.info(f"Environment: {formatted_env}")
         logger.info(f"DataFrame shape: {filtered_df.shape}")
+        # Get metadata for field values
+        metadata = get_project_metadata("RS")
+        if not metadata:
+            logger.error("Could not get project metadata")
+            return False
+        # Check if this is an ILR environment
+        is_ilr_environment = any(env in formatted_env.upper() if formatted_env else False
+                               for env in ["LEGAL_WISE_NR", "LIFE_WISE_NR", "SCORPION_NR", "TALKSURE"])
+        # Extract functional area from filtered scenarios
+        functional_areas = []
+        if "Functional area" in filtered_df.columns:
+            functional_areas = filtered_df["Functional area"].unique().tolist()
+            logger.info(f"Extracted functional areas: {functional_areas}")
+        # Map functional area using metadata
+        functional_area_parent = "ILR" if is_ilr_environment else "R&I"
+        functional_area_child = None
+        # Set child value based on environment for ILR
+        if is_ilr_environment:
+            if "LEGAL_WISE_NR" in formatted_env.upper():
+                functional_area_child = "LEZA - LegalWise"
+            elif "LIFE_WISE_NR" in formatted_env.upper():
+                functional_area_child = "LEZA - LifeWise"
+            elif "SCORPION_NR" in formatted_env.upper():
+                functional_area_child = "LEZA - Scorpion"
+            elif "TALKSURE" in formatted_env.upper():
+                functional_area_child = "Talksure"
+        else:
+            # Use standard R&I mapping
+            _, functional_area_child = map_functional_area(
+                functional_areas[0] if functional_areas else "Data Exchange",
+                metadata
             )
+        logger.info(f"Mapped functional area to parent: {functional_area_parent}, child: {functional_area_child}")
+        # Get customer field values and map environment
+        customer_values = get_customer_field_values(metadata)
+        parent_value, child_value = map_customer_value(formatted_env, customer_values)
+        logger.info(f"Mapped customer values - Parent: {parent_value}, Child: {child_value}")
+        # Calculate story points based on number of scenarios
+        story_points = calculate_story_points(len(filtered_df))
+        logger.info(f"Calculated story points: {story_points}")
+        # Prepare issue dictionary with all required fields
+        issue_dict = {
+            "project": {"key": "RS"},
+            "summary": summary,
+            "description": description,
+            "issuetype": {"name": "Story"},
+            "components": [{"name": "Maintenance (Regression)"}],
+            "customfield_10427": {
+                "value": parent_value,
+                "child": {
+                    "value": child_value
+                }
+            },
+            "customfield_12730": {"value": "Non-Business Critical"},  # Regression Type field
+            "customfield_13430": {"value": str(len(filtered_df))},  # Number of Scenarios
+            "customfield_13100": {
+                "value": functional_area_parent,
+                "child": {
+                    "value": functional_area_child
+                }
+            },
+            "assignee": {"name": st.session_state.jira_username},
+            "customfield_10002": story_points  # Story Points field
+        }
+        # Log the complete issue dictionary
+        logger.info("=== Task Creation Values ===")
+        logger.info(f"Complete issue dictionary: {json.dumps(issue_dict, indent=2)}")
+        # Create the actual Jira task
+        task_key = create_regression_task(
+            summary=summary,
+            description=description,
+            environment=formatted_env,
+            filtered_scenarios_df=filtered_df,
+            project_key="RS"
+        )
+        if task_key:
+            # Set session state variables for success message
+            st.session_state.last_task_key = task_key
+            st.session_state.last_task_url = f"{JIRA_SERVER}/browse/{task_key}"
+            st.session_state.show_success = True
+            logger.info(f"Successfully created task: {task_key}")
+            return True
+        else:
+            st.error("❌ Failed to create Jira task. Check logs for details.")
+            return False
     except Exception as e:
         logger.exception(f"Error in handle_task_button_click: {str(e)}")
+        st.error(f"❌ Error preparing task: {str(e)}")
         import traceback
         error_trace = traceback.format_exc()
         logger.error(f"Full traceback: {error_trace}")
                     col1, col2, col3 = st.columns([1, 2, 1])
                     with col2:
                         if st.button("📝 Log Jira Task", use_container_width=True):
                             # Use the properly structured DataFrame for task creation
                             task_df = grouped_filtered_scenarios.copy()
                             expected_columns = [
                             summary, description = generate_task_content(task_df)
                             if summary and description:
                                 # Call the task creation function
+                                success = handle_task_button_click(summary, description, environment, task_df)
+                                if success:
+                                    st.rerun()  # Refresh the page to show success message
             # Check if selected_status is 'Failed' and show bar graph
             if selected_status != 'Passed':

pre.py CHANGED Viewed

@@ -43,138 +43,143 @@ def preprocess_xlsx(uploaded_file):
         'Failed Scenario': 'string'
     }
-    # Read both the first sheet for error messages and "Time Taken" sheet
-    excel_file = pd.ExcelFile(uploaded_file, engine='openpyxl')
-    # Read detailed step data from first sheet (contains error messages)
-    error_df = pd.read_excel(excel_file, sheet_name=0)
-    # Read time taken data from the "Time Taken" sheet
-    df = pd.read_excel(
-        excel_file,
-        sheet_name='Time Taken',
-        dtype=dtype_dict
-    )
-    # Print column names and sample values for debugging
-    # st.write("Excel columns:", df.columns.tolist())
-    # st.write("Sample data from Time Taken sheet:", df.head())
-    # st.write("Unique Feature Names:", df['Feature Name'].unique())
-    # st.write("Feature Name count:", df['Feature Name'].nunique())
-    # # Check for any empty or NaN values in Feature Name
-    # empty_features = df['Feature Name'].isna().sum()
-    # st.write(f"Empty Feature Names: {empty_features}")
-    # Convert Failed Scenario column to boolean after reading
-    # Handle different possible values (TRUE/FALSE, True/False, etc.)
-    df['Failed Scenario'] = df['Failed Scenario'].astype(str).str.upper()
-    df['Status'] = df['Failed Scenario'].map(
-        lambda x: 'FAILED' if x in ['TRUE', 'YES', 'Y', '1'] else 'PASSED'
-    )
-    # Count failed and passed scenarios
-    failed_count = (df['Status'] == 'FAILED').sum()
-    passed_count = (df['Status'] == 'PASSED').sum()
-    # Extract error messages from the first sheet
-    # Find rows with FAILED result and group by Scenario Name to get the error message
-    if 'Result' in error_df.columns:
-        failed_steps = error_df[error_df['Result'] == 'FAILED'].copy()
-        # If there are failed steps, get the error messages
-        if not failed_steps.empty:
-            # Group by Scenario Name and get the first error message and step for each scenario
-            error_messages = failed_steps.groupby('Scenario Name').agg({
-                'Error Message': 'first',
-                'Step': 'first'  # Capture the step where it failed
-            }).reset_index()
         else:
-            # Create empty DataFrame with required columns
             error_messages = pd.DataFrame(columns=['Scenario Name', 'Error Message', 'Step'])
-    else:
-        # If Result column doesn't exist, create empty DataFrame
-        error_messages = pd.DataFrame(columns=['Scenario Name', 'Error Message', 'Step'])
-    # Extract date from filename (e.g., RI2211_batch_20250225_27031.xlsx)
-    filename = uploaded_file.name
-    date_match = re.search(r'_(\d{8})_', filename)
-    if date_match:
-        date_str = date_match.group(1)
-        file_date = datetime.strptime(date_str, '%Y%m%d').date()
-    else:
-        st.warning(f"Could not extract date from filename: {filename}. Using current date.")
-        file_date = datetime.now().date()
-    # Extract environment from filename
-    if any(pattern in filename for pattern in ['_batch_', '_fin_', '_priority_', '_Puppeteer_']):
-        # Get everything before _batch, _fin, or _priority
-        if '_batch_' in filename:
-            environment = filename.split('_batch_')[0]
-        elif '_fin_' in filename:
-            environment = filename.split('_fin_')[0]
-        elif '_priority_' in filename:
-            environment = filename.split('_priority_')[0]
-        elif '_Puppeteer_' in filename:
-            environment = filename.split('_Puppeteer_')[0]
-    else:
-        environment = filename.split('.')[0]
-    # Create result dataframe
-    result_df = pd.DataFrame({
-        'Functional area': df['Feature Name'],
-        'Scenario Name': df['Scenario Name'],
-        'Status': df['Status'],
-        'Time spent': df['Total Time Taken (ms)'] / 1000  # Convert ms to seconds
-    })
-    # Fill any NaN values in Functional area
-    result_df['Functional area'] = result_df['Functional area'].fillna('Unknown')
-    # Merge error messages with result dataframe
-    if not error_messages.empty:
-        result_df = result_df.merge(error_messages[['Scenario Name', 'Error Message', 'Step']],
-                                   on='Scenario Name', how='left')
-    # Add environment column
-    result_df['Environment'] = environment
-    # Calculate formatted time spent
-    result_df['Time spent(m:s)'] = pd.to_datetime(result_df['Time spent'], unit='s').dt.strftime('%M:%S')
-    result_df['Start datetime'] = pd.to_datetime(file_date)
-    result_df['End datetime'] = result_df['Start datetime'] + pd.to_timedelta(result_df['Time spent'], unit='s')
-    # Add failed step information if available
-    if 'Step' in result_df.columns:
-        result_df['Failed Step'] = result_df['Step']
-        result_df.drop('Step', axis=1, inplace=True)
-    # Extract start time from the first sheet
-    before_steps = error_df[error_df['Step'].str.contains('before', case=False, na=False)]
-    if not before_steps.empty:
-        # Get the first 'before' step for each scenario
-        before_steps['Time Stamp'] = pd.to_datetime(before_steps['Time Stamp'], format='%H:%M:%S', errors='coerce')
-        start_times = before_steps.groupby('Scenario Name').agg({'Time Stamp': 'first'}).reset_index()
-        # Store the timestamps in a variable for efficient reuse
-        result_df = result_df.merge(start_times, on='Scenario Name', how='left')
-        result_df.rename(columns={'Time Stamp': 'Scenario Start Time'}, inplace=True)
-        scenario_start_times = result_df['Scenario Start Time']
-        # Combine the date from the filename with the time stamp
-        result_df['Start datetime'] = pd.to_datetime(scenario_start_times.dt.strftime('%H:%M:%S') + ' ' + file_date.strftime('%Y-%m-%d'))
-    # Print counts for debugging
-    # st.write(f"Processed data - Failed: {len(result_df[result_df['Status'] == 'FAILED'])}, Passed: {len(result_df[result_df['Status'] == 'PASSED'])}")
-    # st.write(f"Unique functional areas in processed data: {result_df['Functional area'].nunique()}")
-    # st.write(f"Unique functional areas: {result_df['Functional area'].unique()}")
-    # Debugging: Print the columns of the first sheet
-    # st.write("Columns in the first sheet:", error_df.columns.tolist())
-    # st.write("Sample data from the first sheet:", error_df.head())
-    return result_df
 def fill_missing_data(data, column_index, value):
     data.iloc[:, column_index] = data.iloc[:, column_index].fillna(value)
@@ -187,30 +192,53 @@ def to_camel_case(s):
 # Define the function to preprocess a file (CSV or XLSX)
 def preprocess_uploaded_file(uploaded_file):
-    # Commenting out the spinner to disable it
-    # with st.spinner(f'Processing {uploaded_file.name}...'):
-    # Determine file type based on extension
-    if uploaded_file.name.lower().endswith('.xlsx'):
-        data = preprocess_xlsx(uploaded_file)
-    else:
-        # Original CSV processing
-        file_content = uploaded_file.read()
-        processed_output = preprocess_csv(file_content)
-        processed_file = io.StringIO(processed_output.getvalue())
-        data = load_data(processed_file)
-        data = fill_missing_data(data, 4, 0)
-        data['Start datetime'] = pd.to_datetime(data['Start datetime'], dayfirst=True, errors='coerce')
-        data['End datetime'] = pd.to_datetime(data['End datetime'], dayfirst=True, errors='coerce')
-        data['Time spent'] = (data['End datetime'] - data['Start datetime']).dt.total_seconds()
-        data['Time spent(m:s)'] = pd.to_datetime(data['Time spent'], unit='s').dt.strftime('%M:%S')
-        # Extract environment name from filename
-        filename = uploaded_file.name
-        environment = filename.split('_Puppeteer')[0]
-        # Add environment column to the dataframe
-        data['Environment'] = environment
-    return data
 def add_app_description():
     app_title = '<p style="font-family:Roboto, sans-serif; color:#004E7C; font-size: 42px;">DataLink Compare</p>'

         'Failed Scenario': 'string'
     }
+    try:
+        # Read both the first sheet for error messages and "Time Taken" sheet
+        excel_file = pd.ExcelFile(uploaded_file, engine='openpyxl')
+        # Read detailed step data from first sheet (contains error messages)
+        error_df = pd.read_excel(excel_file, sheet_name=0)
+        # Read time taken data from the "Time Taken" sheet
+        df = pd.read_excel(
+            excel_file,
+            sheet_name='Time Taken',
+            dtype=dtype_dict
+        )
+        # Convert Failed Scenario column to boolean after reading
+        # Handle different possible values (TRUE/FALSE, True/False, etc.)
+        df['Failed Scenario'] = df['Failed Scenario'].astype(str).str.upper()
+        # Replace 'NAN' string with empty string to avoid conversion issues
+        df['Failed Scenario'] = df['Failed Scenario'].replace('NAN', '')
+        df['Status'] = df['Failed Scenario'].map(
+            lambda x: 'FAILED' if x in ['TRUE', 'YES', 'Y', '1'] else 'PASSED'
+        )
+        # Count failed and passed scenarios
+        failed_count = (df['Status'] == 'FAILED').sum()
+        passed_count = (df['Status'] == 'PASSED').sum()
+        # Extract error messages from the first sheet
+        # Find rows with FAILED result and group by Scenario Name to get the error message
+        if 'Result' in error_df.columns:
+            failed_steps = error_df[error_df['Result'] == 'FAILED'].copy()
+            # If there are failed steps, get the error messages
+            if not failed_steps.empty:
+                # Group by Scenario Name and get the first error message and step for each scenario
+                error_messages = failed_steps.groupby('Scenario Name').agg({
+                    'Error Message': 'first',
+                    'Step': 'first'  # Capture the step where it failed
+                }).reset_index()
+            else:
+                # Create empty DataFrame with required columns
+                error_messages = pd.DataFrame(columns=['Scenario Name', 'Error Message', 'Step'])
         else:
+            # If Result column doesn't exist, create empty DataFrame
             error_messages = pd.DataFrame(columns=['Scenario Name', 'Error Message', 'Step'])
+        # Extract date from filename (e.g., RI2211_batch_20250225_27031.xlsx)
+        filename = uploaded_file.name
+        date_match = re.search(r'_(\d{8})_', filename)
+        if date_match:
+            date_str = date_match.group(1)
+            file_date = datetime.strptime(date_str, '%Y%m%d').date()
+        else:
+            st.warning(f"Could not extract date from filename: {filename}. Using current date.")
+            file_date = datetime.now().date()
+        # Extract environment from filename
+        if any(pattern in filename for pattern in ['_batch_', '_fin_', '_priority_', '_Puppeteer_']):
+            # Get everything before _batch, _fin, or _priority
+            if '_batch_' in filename:
+                environment = filename.split('_batch_')[0]
+            elif '_fin_' in filename:
+                environment = filename.split('_fin_')[0]
+            elif '_priority_' in filename:
+                environment = filename.split('_priority_')[0]
+            elif '_Puppeteer_' in filename:
+                environment = filename.split('_Puppeteer_')[0]
+        else:
+            environment = filename.split('.')[0]
+        # Create result dataframe
+        result_df = pd.DataFrame({
+            'Functional area': df['Feature Name'],
+            'Scenario Name': df['Scenario Name'],
+            'Status': df['Status'],
+            'Time spent': df['Total Time Taken (ms)'] / 1000  # Convert ms to seconds
+        })
+        # Fill any NaN values in Functional area
+        result_df['Functional area'] = result_df['Functional area'].fillna('Unknown')
+        # Ensure Time spent is a numeric value and handle NaN
+        result_df['Time spent'] = pd.to_numeric(result_df['Time spent'], errors='coerce')
+        result_df['Time spent'] = result_df['Time spent'].fillna(0)
+        # Merge error messages with result dataframe
+        if not error_messages.empty:
+            result_df = result_df.merge(error_messages[['Scenario Name', 'Error Message', 'Step']],
+                                       on='Scenario Name', how='left')
+        # Add environment column
+        result_df['Environment'] = environment
+        # Calculate formatted time spent
+        result_df['Time spent(m:s)'] = pd.to_datetime(result_df['Time spent'], unit='s').dt.strftime('%M:%S')
+        result_df['Start datetime'] = pd.to_datetime(file_date)
+        result_df['End datetime'] = result_df['Start datetime'] + pd.to_timedelta(result_df['Time spent'], unit='s')
+        # Add failed step information if available
+        if 'Step' in result_df.columns:
+            result_df['Failed Step'] = result_df['Step']
+            result_df.drop('Step', axis=1, inplace=True)
+        # Extract start time from the first sheet
+        before_steps = error_df[error_df['Step'].str.contains('before', case=False, na=False)].copy()
+        if not before_steps.empty:
+            # Get the first 'before' step for each scenario
+            before_steps.loc[:, 'Time Stamp'] = pd.to_datetime(before_steps['Time Stamp'], format='%H:%M:%S', errors='coerce')
+            start_times = before_steps.groupby('Scenario Name').agg({'Time Stamp': 'first'}).reset_index()
+            # Store the timestamps in a variable for efficient reuse
+            result_df = result_df.merge(start_times, on='Scenario Name', how='left')
+            result_df.rename(columns={'Time Stamp': 'Scenario Start Time'}, inplace=True)
+            # Convert Scenario Start Time to datetime if it's not already
+            result_df['Scenario Start Time'] = pd.to_datetime(result_df['Scenario Start Time'], errors='coerce')
+            # Combine the date from the filename with the time stamp
+            result_df['Start datetime'] = pd.to_datetime(
+                result_df['Scenario Start Time'].dt.strftime('%H:%M:%S') + ' ' + file_date.strftime('%Y-%m-%d'),
+                errors='coerce'
+            )
+        return result_df
+    except Exception as e:
+        st.error(f"Error processing Excel file: {str(e)}")
+        # Log more detailed error information
+        import traceback
+        st.error(f"Detailed error: {traceback.format_exc()}")
+        # Return empty DataFrame with expected columns to avoid further errors
+        return pd.DataFrame(columns=[
+            'Functional area', 'Scenario Name', 'Status', 'Time spent',
+            'Time spent(m:s)', 'Environment', 'Start datetime', 'End datetime'
+        ])
 def fill_missing_data(data, column_index, value):
     data.iloc[:, column_index] = data.iloc[:, column_index].fillna(value)
 # Define the function to preprocess a file (CSV or XLSX)
 def preprocess_uploaded_file(uploaded_file):
+    try:
+        # Determine file type based on extension
+        if uploaded_file.name.lower().endswith('.xlsx'):
+            data = preprocess_xlsx(uploaded_file)
+        else:
+            # Original CSV processing
+            file_content = uploaded_file.read()
+            processed_output = preprocess_csv(file_content)
+            processed_file = io.StringIO(processed_output.getvalue())
+            data = load_data(processed_file)
+            data = fill_missing_data(data, 4, 0)
+            data['Start datetime'] = pd.to_datetime(data['Start datetime'], dayfirst=True, errors='coerce')
+            data['End datetime'] = pd.to_datetime(data['End datetime'], dayfirst=True, errors='coerce')
+            data['Time spent'] = (data['End datetime'] - data['Start datetime']).dt.total_seconds()
+            data['Time spent(m:s)'] = pd.to_datetime(data['Time spent'], unit='s').dt.strftime('%M:%S')
+            # Extract environment name from filename
+            filename = uploaded_file.name
+            environment = filename.split('_Puppeteer')[0]
+            # Add environment column to the dataframe
+            data['Environment'] = environment
+        # Make sure all required columns exist and have proper values
+        if data is not None and not data.empty:
+            # Ensure Time spent is numeric
+            if 'Time spent' in data.columns:
+                data['Time spent'] = pd.to_numeric(data['Time spent'], errors='coerce')
+                data['Time spent'] = data['Time spent'].fillna(0)
+            # Replace any NaN string values
+            for col in data.columns:
+                if data[col].dtype == 'object':
+                    data[col] = data[col].replace('NaN', '').replace('nan', '')
+        return data
+    except Exception as e:
+        st.error(f"Error processing {uploaded_file.name}: {str(e)}")
+        # Provide more detailed error information
+        import traceback
+        st.error(f"Detailed error: {traceback.format_exc()}")
+        # Return empty DataFrame with expected columns to avoid cascading errors
+        return pd.DataFrame(columns=[
+            'Functional area', 'Scenario Name', 'Status', 'Time spent',
+            'Time spent(m:s)', 'Environment', 'Start datetime', 'End datetime'
+        ])
 def add_app_description():
     app_title = '<p style="font-family:Roboto, sans-serif; color:#004E7C; font-size: 42px;">DataLink Compare</p>'