File size: 2,901 Bytes
193db9d
 
 
 
ea7575f
193db9d
ea7575f
 
 
 
 
 
 
 
 
 
 
3b39b49
 
193db9d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
from typing import Any, Dict, List

import pandas as pd

from shared.workflows.errors import ProviderAPIError, WorkflowExecutionError


def create_error_message(e: Exception) -> str:
    """Create an error message for a given exception."""
    if isinstance(e, ProviderAPIError):
        return f"Our {e.provider} models are currently experiencing issues. Please try again later. \n\nIf the problem persists, please contact support."
    elif isinstance(e, WorkflowExecutionError):
        return f"Workflow execution failed: {e}. Please try again later. \n\nIf the problem persists, please contact support."
    elif isinstance(e, ValueError):
        return f"Invalid input -- {e}. Please try again. \n\nIf the problem persists, please contact support."
    else:
        return "An unexpected error occurred. Please contact support."


def _create_confidence_plot_data(results: List[Dict], top_k_mode: bool = False) -> pd.DataFrame:
    """Create a DataFrame for the confidence plot."""
    if not top_k_mode:
        return pd.DataFrame(
            {
                "position": [r["position"] for r in results],
                "confidence": [r["confidence"] for r in results],
                "answer": [r["answer"] for r in results],
            }
        )

    # For top-k mode, extract and plot top answers
    return _create_top_k_plot_data(results)


def _create_top_k_plot_data(results: List[Dict]) -> pd.DataFrame:
    """Create plot data for top-k mode."""
    # Find top answers across all positions (limited to top 5)
    top_answers = set()
    for r in results:
        for g in r.get("guesses", [])[:3]:  # Get top 3 from each position
            if g.get("answer"):
                top_answers.add(g.get("answer"))

    top_answers = list(top_answers)[:5]  # Limit to 5 total answers

    # Create plot data for each answer
    all_data = []
    for position_idx, result in enumerate(results):
        position = result["position"]
        for answer in top_answers:
            confidence = 0
            for guess in result.get("guesses", []):
                if guess.get("answer") == answer:
                    confidence = guess.get("confidence", 0)
                    break
            all_data.append({"position": position, "confidence": confidence, "answer": answer})

    return pd.DataFrame(all_data)


def _create_top_k_dataframe(results: List[Dict]) -> pd.DataFrame:
    """Create a DataFrame for top-k results."""
    df_rows = []
    for result in results:
        position = result["position"]
        for i, guess in enumerate(result.get("guesses", [])):
            df_rows.append(
                {
                    "position": position,
                    "answer": guess.get("answer", ""),
                    "confidence": guess.get("confidence", 0),
                    "rank": i + 1,
                }
            )
    return pd.DataFrame(df_rows)