Spaces:

qanta-challenge
/

quizbowl-submission

Running

Maharshi Gor commited on 7 days ago

Commit

ea7575f

1 Parent(s): b5b12d3

Error handling for Anthropic models

Files changed (4) hide show

shared/workflows CHANGED Viewed

	@@ -1 +1 @@
1	- Subproject commit ~~3b211faf9bd3aa769a00f9ac40f6f377e77517c8~~


1	+ Subproject commit 873b0e6bc80052921fa2061ef9fbcd4e1c4f057e

src/components/quizbowl/bonus.py CHANGED Viewed

@@ -13,12 +13,13 @@ from components.model_pipeline.model_pipeline import PipelineInterface, Pipeline
 from components.typed_dicts import PipelineStateDict
 from display.formatting import styled_error
 from shared.workflows import factory
 from shared.workflows.qb_agents import QuizBowlBonusAgent
 from submission import submit
 from . import populate, validation
 from .plotting import create_bonus_confidence_plot, create_bonus_html
-from .utils import evaluate_prediction
 from .validation import UserInputWorkflowValidator
@@ -252,9 +253,8 @@ class BonusInterface:
                 gr.update(visible=False),
             )
         except Exception as e:
-            import traceback
-            error_msg = f"Error: {str(e)}\n{traceback.format_exc()}"
             return (
                 gr.skip(),
                 gr.skip(),
@@ -304,8 +304,8 @@ class BonusInterface:
                 gr.update(visible=False),
             )
         except Exception as e:
-            error_msg = styled_error(f"Error evaluating bonus: {e.args}")
-            logger.exception(f"Error evaluating bonus: {e.args}")
             return gr.skip(), gr.skip(), gr.update(visible=True, value=error_msg)
     def submit_model(

 from components.typed_dicts import PipelineStateDict
 from display.formatting import styled_error
 from shared.workflows import factory
+from shared.workflows.metrics import evaluate_prediction
 from shared.workflows.qb_agents import QuizBowlBonusAgent
 from submission import submit
 from . import populate, validation
 from .plotting import create_bonus_confidence_plot, create_bonus_html
+from .utils import create_error_message
 from .validation import UserInputWorkflowValidator
                 gr.update(visible=False),
             )
         except Exception as e:
+            error_msg = styled_error(create_error_message(e))
+            logger.exception(f"Error running tossup: {e}")
             return (
                 gr.skip(),
                 gr.skip(),
                 gr.update(visible=False),
             )
         except Exception as e:
+            error_msg = styled_error(create_error_message(e))
+            logger.exception(f"Error evaluating tossups: {e}")
             return gr.skip(), gr.skip(), gr.update(visible=True, value=error_msg)
     def submit_model(

src/components/quizbowl/tossup.py CHANGED Viewed

@@ -13,6 +13,7 @@ from components.model_pipeline.tossup_pipeline import TossupPipelineInterface, T
 from components.typed_dicts import TossupInterfaceDefaults, TossupPipelineStateDict
 from display.formatting import styled_error
 from shared.workflows import factory
 from shared.workflows.qb_agents import QuizBowlTossupAgent, TossupResult
 from submission import submit
@@ -24,7 +25,7 @@ from .plotting import (
     create_tossup_html,
     prepare_tossup_results_df,
 )
-from .utils import evaluate_prediction
 from .validation import UserInputWorkflowValidator
 # TODO: Error handling on run tossup and evaluate tossup and show correct messages
@@ -313,9 +314,8 @@ class TossupInterface:
                 gr.update(visible=False),
             )
         except Exception as e:
-            import traceback
-            error_msg = styled_error(f"Error: {str(e)}\n{traceback.format_exc()}")
             return (
                 gr.skip(),
                 gr.skip(),
@@ -346,14 +346,13 @@ class TossupInterface:
                 gr.update(visible=False),
             )
         except Exception as e:
-            import traceback
-            logger.exception(f"Error evaluating tossups: {e.args}")
             return (
                 gr.skip(),
                 gr.update(visible=False),
                 gr.update(visible=False),
-                gr.update(visible=True, value=styled_error(f"Error: {str(e)}")),
             )
     def submit_model(

 from components.typed_dicts import TossupInterfaceDefaults, TossupPipelineStateDict
 from display.formatting import styled_error
 from shared.workflows import factory
+from shared.workflows.metrics import evaluate_prediction
 from shared.workflows.qb_agents import QuizBowlTossupAgent, TossupResult
 from submission import submit
     create_tossup_html,
     prepare_tossup_results_df,
 )
+from .utils import create_error_message
 from .validation import UserInputWorkflowValidator
 # TODO: Error handling on run tossup and evaluate tossup and show correct messages
                 gr.update(visible=False),
             )
         except Exception as e:
+            error_msg = styled_error(create_error_message(e))
+            logger.exception(f"Error running tossup: {e}")
             return (
                 gr.skip(),
                 gr.skip(),
                 gr.update(visible=False),
             )
         except Exception as e:
+            error_msg = styled_error(create_error_message(e))
+            logger.exception(f"Error evaluating tossups: {e}")
             return (
                 gr.skip(),
                 gr.update(visible=False),
                 gr.update(visible=False),
+                gr.update(visible=True, value=error_msg),
             )
     def submit_model(

src/components/quizbowl/utils.py CHANGED Viewed

@@ -2,20 +2,19 @@ from typing import Any, Dict, List
 import pandas as pd
-def evaluate_prediction(prediction: str, clean_answers: list[str] | str) -> int:
-    """Evaluate the buzz of a prediction against the clean answers."""
-    if isinstance(clean_answers, str):
-        print("clean_answers is a string")
-        clean_answers = [clean_answers]
-    pred = prediction.lower().strip()
-    if not pred:
-        return 0
-    for answer in clean_answers:
-        answer = answer.strip().lower()
-        if answer and answer in pred:
-            return 1
-    return 0
 def _create_confidence_plot_data(results: List[Dict], top_k_mode: bool = False) -> pd.DataFrame:

 import pandas as pd
+from shared.workflows.errors import ProviderAPIError, WorkflowExecutionError
+def create_error_message(e: Exception) -> str:
+    """Create an error message for a given exception."""
+    if isinstance(e, ProviderAPIError):
+        return f"Our {e.provider} models are currently experiencing issues. Please try again later. \n\nIf the problem persists, please contact support."
+    elif isinstance(e, WorkflowExecutionError):
+        return f"Workflow execution failed: {e}. Please try again later. \n\nIf the problem persists, please contact support."
+    elif isinstance(e, ValueError):
+        return f"Invalid input -- {e}. Please try again. \n\nIf the problem persists, please contact support."
+    else:
+        return "An unexpected error occurred. Please contact support."
 def _create_confidence_plot_data(results: List[Dict], top_k_mode: bool = False) -> pd.DataFrame: