Spaces:

qanta-challenge
/

quizbowl-submission

Running

App Files Files Community

Maharshi Gor commited on 8 days ago

Commit

5f3e7d5

1 Parent(s): 5d637a7

Made workflows a submodule

Browse files

Files changed (32) hide show

.gitignore +4 -4
.gitmodules +3 -0
app.py +2 -2
check_repos.py +0 -26
shared/__init__.py +0 -0
shared/workflows +1 -0
src/components/model_pipeline/model_pipeline.py +2 -2
src/components/model_pipeline/state_manager.py +6 -6
src/components/model_pipeline/tossup_pipeline.py +3 -7
src/components/model_step/model_step.py +1 -1
src/components/model_step/state_manager.py +1 -1
src/components/quizbowl/bonus.py +2 -2
src/components/quizbowl/tossup.py +2 -2
src/components/quizbowl/validation.py +2 -2
src/components/structs.py +1 -1
src/components/typed_dicts.py +1 -1
src/populate.py +3 -5
src/submission/_submit.py +119 -0
src/submission/check_validity.py +99 -0
src/submission/structs.py +1 -1
src/submission/submit.py +2 -2
src/workflows/README.md +0 -129
src/workflows/configs.py +0 -56
src/workflows/errors.py +0 -63
src/workflows/executors.py +0 -673
src/workflows/factory.py +0 -176
src/workflows/llmcache.py +0 -488
src/workflows/llms.py +0 -285
src/workflows/qb_agents.py +0 -232
src/workflows/structs.py +0 -370
src/workflows/utils.py +0 -195
src/workflows/validators.py +0 -615

.gitignore CHANGED Viewed

@@ -16,8 +16,8 @@ __pycache__/
 *ipynb
 .vscode/
-eval-queue/
-eval-results/
-eval-queue-bk/
-eval-results-bk/
 logs/

 *ipynb
 .vscode/
+eval-*/
 logs/
+data/
+outputs/
+hf_cache/

.gitmodules ADDED Viewed

	@@ -0,0 +1,3 @@

+[submodule "shared/workflows"]
+	path = shared/workflows
+	url = https://github.com/qanta-challenge/ai_workflows

app.py CHANGED Viewed

@@ -30,8 +30,8 @@ from envs import (
     RESULTS_REPO,
     SERVER_REFRESH_INTERVAL,
 )
-from workflows import factory
-from workflows.configs import AVAILABLE_MODELS
 def restart_space():

     RESULTS_REPO,
     SERVER_REFRESH_INTERVAL,
 )
+from shared.workflows import factory
+from shared.workflows.configs import AVAILABLE_MODELS
 def restart_space():

check_repos.py DELETED Viewed

@@ -1,26 +0,0 @@
-from huggingface_hub import HfApi
-from src.envs import LLM_CACHE_REPO, QUEUE_REPO, RESULTS_REPO, TOKEN
-def check_and_create_dataset_repo(repo_id: str):
-    api = HfApi(token=TOKEN)
-    try:
-        api.repo_info(repo_id=repo_id, repo_type="dataset")
-        print(f"{repo_id} exists")
-    except Exception:
-        print(f"Creating {repo_id}")
-        api.create_repo(repo_id=repo_id, repo_type="dataset", exist_ok=True, private=True)
-def check_and_create_repos():
-    print("1. QUEUE Repository")
-    check_and_create_dataset_repo(QUEUE_REPO)
-    print("2. RESULTS Repository")
-    check_and_create_dataset_repo(RESULTS_REPO)
-    print("3. LLM Cache Repository")
-    check_and_create_dataset_repo(LLM_CACHE_REPO)
-if __name__ == "__main__":
-    check_and_create_repos()

shared/__init__.py ADDED Viewed

File without changes

shared/workflows ADDED Viewed

	@@ -0,0 +1 @@


1	+ Subproject commit 9d8bfae31f4db8b25165c950742d13e6c4e80de8

src/components/model_pipeline/model_pipeline.py CHANGED Viewed

@@ -12,8 +12,8 @@ from components.model_pipeline.state_manager import (
 from components.model_step.model_step import ModelStepComponent
 from components.structs import ModelStepUIState, PipelineState, PipelineUIState
 from components.utils import make_state
-from workflows.structs import ModelStep, Workflow
-from workflows.validators import WorkflowValidationError, WorkflowValidator
 from .state_manager import get_output_panel_state

 from components.model_step.model_step import ModelStepComponent
 from components.structs import ModelStepUIState, PipelineState, PipelineUIState
 from components.utils import make_state
+from shared.workflows.structs import ModelStep, Workflow
+from shared.workflows.validators import WorkflowValidationError, WorkflowValidator
 from .state_manager import get_output_panel_state

src/components/model_pipeline/state_manager.py CHANGED Viewed

@@ -13,8 +13,8 @@ from components import typed_dicts as td
 from components import utils
 from components.structs import ModelStepUIState, PipelineState, PipelineUIState, TossupPipelineState
 from envs import DOCS_REPO_BRANCH, DOCS_REPO_URL
-from workflows.factory import create_new_llm_step
-from workflows.structs import Buzzer, BuzzerMethod, ModelStep, TossupWorkflow, Workflow
 def get_output_panel_state(workflow: Workflow) -> dict:
@@ -113,9 +113,7 @@ class PipelineStateManager:
             pipeline_change = not pipeline_change
         return new_state_dict, pipeline_change
-    def move_down(
-        self, state_dict: td.PipelineStateDict, pipeline_change: bool, position: int
-    ) -> td.PipelineStateDict:
         """Move a step down in the pipeline."""
         new_state_dict, change = self._move_step(state_dict, position, "down")
         if change:
@@ -189,7 +187,9 @@ class PipelineStateManager:
             help_text = f"Refer to the <a href='{repo_files_url}/pipeline-schema.md' target='_blank'>documentation</a> for the correct pipeline schema."
         else:
             error_type = "Unexpected Error"
-            help_text = f"Please report this issue to us at <a href='{DOCS_REPO_URL}/issues' target='_blank'>GitHub Issues</a>."
         return error_template.format(error_type=error_type, error_message=str(e), help_text=help_text)

 from components import utils
 from components.structs import ModelStepUIState, PipelineState, PipelineUIState, TossupPipelineState
 from envs import DOCS_REPO_BRANCH, DOCS_REPO_URL
+from shared.workflows.factory import create_new_llm_step
+from shared.workflows.structs import Buzzer, BuzzerMethod, ModelStep, TossupWorkflow, Workflow
 def get_output_panel_state(workflow: Workflow) -> dict:
             pipeline_change = not pipeline_change
         return new_state_dict, pipeline_change
+    def move_down(self, state_dict: td.PipelineStateDict, pipeline_change: bool, position: int) -> td.PipelineStateDict:
         """Move a step down in the pipeline."""
         new_state_dict, change = self._move_step(state_dict, position, "down")
         if change:
             help_text = f"Refer to the <a href='{repo_files_url}/pipeline-schema.md' target='_blank'>documentation</a> for the correct pipeline schema."
         else:
             error_type = "Unexpected Error"
+            help_text = (
+                f"Please report this issue to us at <a href='{DOCS_REPO_URL}/issues' target='_blank'>GitHub Issues</a>."
+            )
         return error_template.format(error_type=error_type, error_message=str(e), help_text=help_text)

src/components/model_pipeline/tossup_pipeline.py CHANGED Viewed

@@ -6,15 +6,13 @@ from components import commons
 from components.structs import PipelineUIState, TossupPipelineState
 from components.typed_dicts import TossupPipelineStateDict
 from display.formatting import tiny_styled_warning
-from workflows.structs import Buzzer, TossupWorkflow
 from .model_pipeline import PipelineInterface
 from .state_manager import BasePipelineValidator, TossupPipelineStateManager
-def toggleable_slider(
-    value, minimum, maximum, step, toggle_value=False, label=None, info=None, min_width=200, scale=1
-):
     with gr.Column(elem_classes="toggleable", min_width=min_width, scale=scale):
         show_label = label is not None
         checkbox = gr.Checkbox(label=label, value=toggle_value, container=False, info=info, show_label=show_label)
@@ -90,9 +88,7 @@ class TossupPipelineInterface(PipelineInterface):
             ),
         )
-    def _render_buzzer_panel(
-        self, buzzer: Buzzer, prob_slider_supported: bool, selected_model_name: str | None = None
-    ):
         with gr.Row(elem_classes="control-panel"):
             self.confidence_slider = gr.Slider(
                 minimum=0.0,

 from components.structs import PipelineUIState, TossupPipelineState
 from components.typed_dicts import TossupPipelineStateDict
 from display.formatting import tiny_styled_warning
+from shared.workflows.structs import Buzzer, TossupWorkflow
 from .model_pipeline import PipelineInterface
 from .state_manager import BasePipelineValidator, TossupPipelineStateManager
+def toggleable_slider(value, minimum, maximum, step, toggle_value=False, label=None, info=None, min_width=200, scale=1):
     with gr.Column(elem_classes="toggleable", min_width=min_width, scale=scale):
         show_label = label is not None
         checkbox = gr.Checkbox(label=label, value=toggle_value, container=False, info=info, show_label=show_label)
             ),
         )
+    def _render_buzzer_panel(self, buzzer: Buzzer, prob_slider_supported: bool, selected_model_name: str | None = None):
         with gr.Row(elem_classes="control-panel"):
             self.confidence_slider = gr.Slider(
                 minimum=0.0,

src/components/model_step/model_step.py CHANGED Viewed

@@ -7,8 +7,8 @@ from gradio.components import FormComponent
 from app_configs import UNSELECTED_VAR_NAME
 from components.model_pipeline.state_manager import ModelStepUIState, PipelineStateManager
 from components.typed_dicts import PipelineStateDict
 from utils import get_full_model_name
-from workflows.structs import ModelStep
 from .state_manager import ModelStepStateManager
 from .ui_components import InputRowButtonGroup, OutputRowButtonGroup

 from app_configs import UNSELECTED_VAR_NAME
 from components.model_pipeline.state_manager import ModelStepUIState, PipelineStateManager
 from components.typed_dicts import PipelineStateDict
+from shared.workflows.structs import ModelStep
 from utils import get_full_model_name
 from .state_manager import ModelStepStateManager
 from .ui_components import InputRowButtonGroup, OutputRowButtonGroup

src/components/model_step/state_manager.py CHANGED Viewed

@@ -6,8 +6,8 @@ from loguru import logger
 from app_configs import UNSELECTED_VAR_NAME
 from components.model_pipeline.state_manager import ModelStepUIState
 from components.utils import DIRECTIONS, move_item
 from utils import get_model_and_provider
-from workflows.structs import FieldType, ModelStep
 class ModelStepStateManager:

 from app_configs import UNSELECTED_VAR_NAME
 from components.model_pipeline.state_manager import ModelStepUIState
 from components.utils import DIRECTIONS, move_item
+from shared.workflows.structs import FieldType, ModelStep
 from utils import get_model_and_provider
 class ModelStepStateManager:

src/components/quizbowl/bonus.py CHANGED Viewed

@@ -12,9 +12,9 @@ from components import commons
 from components.model_pipeline.model_pipeline import PipelineInterface, PipelineState
 from components.typed_dicts import PipelineStateDict
 from display.formatting import styled_error
 from submission import submit
-from workflows import factory
-from workflows.qb_agents import QuizBowlBonusAgent
 from . import populate, validation
 from .plotting import create_bonus_confidence_plot, create_bonus_html

 from components.model_pipeline.model_pipeline import PipelineInterface, PipelineState
 from components.typed_dicts import PipelineStateDict
 from display.formatting import styled_error
+from shared.workflows import factory
+from shared.workflows.qb_agents import QuizBowlBonusAgent
 from submission import submit
 from . import populate, validation
 from .plotting import create_bonus_confidence_plot, create_bonus_html

src/components/quizbowl/tossup.py CHANGED Viewed

@@ -12,9 +12,9 @@ from components import commons
 from components.model_pipeline.tossup_pipeline import TossupPipelineInterface, TossupPipelineState
 from components.typed_dicts import TossupInterfaceDefaults, TossupPipelineStateDict
 from display.formatting import styled_error
 from submission import submit
-from workflows import factory
-from workflows.qb_agents import QuizBowlTossupAgent, TossupResult
 from . import populate, validation
 from .plotting import (

 from components.model_pipeline.tossup_pipeline import TossupPipelineInterface, TossupPipelineState
 from components.typed_dicts import TossupInterfaceDefaults, TossupPipelineStateDict
 from display.formatting import styled_error
+from shared.workflows import factory
+from shared.workflows.qb_agents import QuizBowlTossupAgent, TossupResult
 from submission import submit
 from . import populate, validation
 from .plotting import (

src/components/quizbowl/validation.py CHANGED Viewed

@@ -3,8 +3,8 @@ from typing import Literal
 from app_configs import AVAILABLE_MODELS, CONFIGS
 from components.structs import PipelineState, TossupPipelineState
 from components.typed_dicts import PipelineStateDict, TossupPipelineStateDict
-from workflows.structs import TossupWorkflow, Workflow
-from workflows.validators import WorkflowValidationError, WorkflowValidator
 def validate_workflow(

 from app_configs import AVAILABLE_MODELS, CONFIGS
 from components.structs import PipelineState, TossupPipelineState
 from components.typed_dicts import PipelineStateDict, TossupPipelineStateDict
+from shared.workflows.structs import TossupWorkflow, Workflow
+from shared.workflows.validators import WorkflowValidationError, WorkflowValidator
 def validate_workflow(

src/components/structs.py CHANGED Viewed

@@ -2,7 +2,7 @@ from typing import Any, Literal
 from pydantic import BaseModel, Field, model_validator
-from workflows.structs import ModelStep, TossupWorkflow, Workflow
 def make_step_id(step_number: int):

 from pydantic import BaseModel, Field, model_validator
+from shared.workflows.structs import ModelStep, TossupWorkflow, Workflow
 def make_step_id(step_number: int):

src/components/typed_dicts.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
-from workflows.structs import TossupWorkflow, Workflow
 # TypedDicts for workflows/structs.py

 from typing import Any, Dict, List, Literal, Optional, TypedDict, Union
+from shared.workflows.structs import TossupWorkflow, Workflow
 # TypedDicts for workflows/structs.py

src/populate.py CHANGED Viewed

@@ -49,7 +49,7 @@ def get_tossups_leaderboard_df(repo_dir: str, eval_split: str) -> pd.DataFrame:
                 row["Win Rate w/ Human (Aggressive)"] = metrics["human_win_rate_strict"]
             eval_results.append(row)
         except Exception as e:
-            logger.error(f"Error processing model result: {e}")
             continue
     return pd.DataFrame(eval_results)
@@ -72,7 +72,7 @@ def get_bonuses_leaderboard_df(repo_dir: str, eval_split: str) -> pd.DataFrame:
             }
             eval_results.append(row)
         except Exception as e:
-            logger.error(f"Error processing model result: {e}")
             continue
     return pd.DataFrame(eval_results)
@@ -96,9 +96,7 @@ def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
             all_evals.append(data)
         elif ".md" not in entry:
             # this is a folder
-            sub_entries = [
-                e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")
-            ]
             for sub_entry in sub_entries:
                 file_path = os.path.join(save_path, entry, sub_entry)
                 with open(file_path) as fp:

                 row["Win Rate w/ Human (Aggressive)"] = metrics["human_win_rate_strict"]
             eval_results.append(row)
         except Exception as e:
+            logger.error(f"Error processing model result '{username}/{model_name}': {e}")
             continue
     return pd.DataFrame(eval_results)
             }
             eval_results.append(row)
         except Exception as e:
+            logger.error(f"Error processing model result '{username}/{model_name}': {e}")
             continue
     return pd.DataFrame(eval_results)
             all_evals.append(data)
         elif ".md" not in entry:
             # this is a folder
+            sub_entries = [e for e in os.listdir(f"{save_path}/{entry}") if os.path.isfile(e) and not e.startswith(".")]
             for sub_entry in sub_entries:
                 file_path = os.path.join(save_path, entry, sub_entry)
                 with open(file_path) as fp:

src/submission/_submit.py ADDED Viewed

	@@ -0,0 +1,119 @@

+import json
+import os
+from datetime import datetime, timezone
+from src.display.formatting import styled_error, styled_message, styled_warning
+from src.envs import API, EVAL_REQUESTS_PATH, TOKEN, QUEUE_REPO
+from src.submission.check_validity import (
+    already_submitted_models,
+    check_model_card,
+    get_model_size,
+    is_model_on_hub,
+)
+REQUESTED_MODELS = None
+USERS_TO_SUBMISSION_DATES = None
+def add_new_eval(
+    model: str,
+    base_model: str,
+    revision: str,
+    precision: str,
+    weight_type: str,
+    model_type: str,
+):
+    global REQUESTED_MODELS
+    global USERS_TO_SUBMISSION_DATES
+    if not REQUESTED_MODELS:
+        REQUESTED_MODELS, USERS_TO_SUBMISSION_DATES = already_submitted_models(EVAL_REQUESTS_PATH)
+    user_name = ""
+    model_path = model
+    if "/" in model:
+        user_name = model.split("/")[0]
+        model_path = model.split("/")[1]
+    precision = precision.split(" ")[0]
+    current_time = datetime.now(timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
+    if model_type is None or model_type == "":
+        return styled_error("Please select a model type.")
+    # Does the model actually exist?
+    if revision == "":
+        revision = "main"
+    # Is the model on the hub?
+    if weight_type in ["Delta", "Adapter"]:
+        base_model_on_hub, error, _ = is_model_on_hub(model_name=base_model, revision=revision, token=TOKEN, test_tokenizer=True)
+        if not base_model_on_hub:
+            return styled_error(f'Base model "{base_model}" {error}')
+    if not weight_type == "Adapter":
+        model_on_hub, error, _ = is_model_on_hub(model_name=model, revision=revision, token=TOKEN, test_tokenizer=True)
+        if not model_on_hub:
+            return styled_error(f'Model "{model}" {error}')
+    # Is the model info correctly filled?
+    try:
+        model_info = API.model_info(repo_id=model, revision=revision)
+    except Exception:
+        return styled_error("Could not get your model information. Please fill it up properly.")
+    model_size = get_model_size(model_info=model_info, precision=precision)
+    # Were the model card and license filled?
+    try:
+        license = model_info.cardData["license"]
+    except Exception:
+        return styled_error("Please select a license for your model")
+    modelcard_OK, error_msg = check_model_card(model)
+    if not modelcard_OK:
+        return styled_error(error_msg)
+    # Seems good, creating the eval
+    print("Adding new eval")
+    eval_entry = {
+        "model": model,
+        "base_model": base_model,
+        "revision": revision,
+        "precision": precision,
+        "weight_type": weight_type,
+        "status": "PENDING",
+        "submitted_time": current_time,
+        "model_type": model_type,
+        "likes": model_info.likes,
+        "params": model_size,
+        "license": license,
+        "private": False,
+    }
+    # Check for duplicate submission
+    if f"{model}_{revision}_{precision}" in REQUESTED_MODELS:
+        return styled_warning("This model has been already submitted.")
+    print("Creating eval file")
+    OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
+    os.makedirs(OUT_DIR, exist_ok=True)
+    out_path = f"{OUT_DIR}/{model_path}_eval_request_False_{precision}_{weight_type}.json"
+    with open(out_path, "w") as f:
+        f.write(json.dumps(eval_entry))
+    print("Uploading eval file")
+    API.upload_file(
+        path_or_fileobj=out_path,
+        path_in_repo=out_path.split("eval-queue/")[1],
+        repo_id=QUEUE_REPO,
+        repo_type="dataset",
+        commit_message=f"Add {model} to eval queue",
+    )
+    # Remove the local file
+    os.remove(out_path)
+    return styled_message(
+        "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
+    )

src/submission/check_validity.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import json
+import os
+import re
+from collections import defaultdict
+from datetime import datetime, timedelta, timezone
+import huggingface_hub
+from huggingface_hub import ModelCard
+from huggingface_hub.hf_api import ModelInfo
+from transformers import AutoConfig
+from transformers.models.auto.tokenization_auto import AutoTokenizer
+def check_model_card(repo_id: str) -> tuple[bool, str]:
+    """Checks if the model card and license exist and have been filled"""
+    try:
+        card = ModelCard.load(repo_id)
+    except huggingface_hub.utils.EntryNotFoundError:
+        return False, "Please add a model card to your model to explain how you trained/fine-tuned it."
+    # Enforce license metadata
+    if card.data.license is None:
+        if not ("license_name" in card.data and "license_link" in card.data):
+            return False, (
+                "License not found. Please add a license to your model card using the `license` metadata or a"
+                " `license_name`/`license_link` pair."
+            )
+    # Enforce card content
+    if len(card.text) < 200:
+        return False, "Please add a description to your model card, it is too short."
+    return True, ""
+def is_model_on_hub(model_name: str, revision: str, token: str = None, trust_remote_code=False, test_tokenizer=False) -> tuple[bool, str]:
+    """Checks if the model model_name is on the hub, and whether it (and its tokenizer) can be loaded with AutoClasses."""
+    try:
+        config = AutoConfig.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
+        if test_tokenizer:
+            try:
+                tk = AutoTokenizer.from_pretrained(model_name, revision=revision, trust_remote_code=trust_remote_code, token=token)
+            except ValueError as e:
+                return (
+                    False,
+                    f"uses a tokenizer which is not in a transformers release: {e}",
+                    None
+                )
+            except Exception as e:
+                return (False, "'s tokenizer cannot be loaded. Is your tokenizer class in a stable transformers release, and correctly configured?", None)
+        return True, None, config
+    except ValueError:
+        return (
+            False,
+            "needs to be launched with `trust_remote_code=True`. For safety reason, we do not allow these models to be automatically submitted to the leaderboard.",
+            None
+        )
+    except Exception as e:
+        return False, "was not found on hub!", None
+def get_model_size(model_info: ModelInfo, precision: str):
+    """Gets the model size from the configuration, or the model name if the configuration does not contain the information."""
+    try:
+        model_size = round(model_info.safetensors["total"] / 1e9, 3)
+    except (AttributeError, TypeError):
+        return 0  # Unknown model sizes are indicated as 0, see NUMERIC_INTERVALS in app.py
+    size_factor = 8 if (precision == "GPTQ" or "gptq" in model_info.modelId.lower()) else 1
+    model_size = size_factor * model_size
+    return model_size
+def get_model_arch(model_info: ModelInfo):
+    """Gets the model architecture from the configuration"""
+    return model_info.config.get("architectures", "Unknown")
+def already_submitted_models(requested_models_dir: str) -> set[str]:
+    """Gather a list of already submitted models to avoid duplicates"""
+    depth = 1
+    file_names = []
+    users_to_submission_dates = defaultdict(list)
+    for root, _, files in os.walk(requested_models_dir):
+        current_depth = root.count(os.sep) - requested_models_dir.count(os.sep)
+        if current_depth == depth:
+            for file in files:
+                if not file.endswith(".json"):
+                    continue
+                with open(os.path.join(root, file), "r") as f:
+                    info = json.load(f)
+                    file_names.append(f"{info['model']}_{info['revision']}_{info['precision']}")
+                    # Select organisation
+                    if info["model"].count("/") == 0 or "submitted_time" not in info:
+                        continue
+                    organisation, _ = info["model"].split("/")
+                    users_to_submission_dates[organisation].append(info["submitted_time"])
+    return set(file_names), users_to_submission_dates

src/submission/structs.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Dict, List, Literal, Optional
 from pydantic import BaseModel, Field
-from workflows.structs import TossupWorkflow, Workflow
 CompetitionType = Literal["tossup", "bonus"]
 SubmissionType = Literal["python_file", "simple_workflow", "complex_workflow"]

 from pydantic import BaseModel, Field
+from shared.workflows.structs import TossupWorkflow, Workflow
 CompetitionType = Literal["tossup", "bonus"]
 SubmissionType = Literal["python_file", "simple_workflow", "complex_workflow"]

src/submission/submit.py CHANGED Viewed

@@ -13,8 +13,8 @@ from loguru import logger
 from app_configs import DAILY_SUBMISSION_LIMIT_PER_USER
 from display.formatting import styled_error, styled_message
 from envs import API, EVAL_REQUESTS_PATH, EXAMPLES_PATH, OWNER, QUEUE_REPO
 from submission.structs import CompetitionType, Submission, SubmissionStatus
-from workflows.structs import TossupWorkflow, Workflow
 def get_user_submissions(username: str, competition_type: str, pattern: str = None) -> list[Submission]:
@@ -238,7 +238,7 @@ def load_submission(model_name: str, competition_type: CompetitionType, profile:
 if __name__ == "__main__":
     # Example usage
-    from workflows.factory import create_quizbowl_simple_step_initial_setup
     # Create workflow
     model_step = create_quizbowl_simple_step_initial_setup()

 from app_configs import DAILY_SUBMISSION_LIMIT_PER_USER
 from display.formatting import styled_error, styled_message
 from envs import API, EVAL_REQUESTS_PATH, EXAMPLES_PATH, OWNER, QUEUE_REPO
+from shared.workflows.structs import TossupWorkflow, Workflow
 from submission.structs import CompetitionType, Submission, SubmissionStatus
 def get_user_submissions(username: str, competition_type: str, pattern: str = None) -> list[Submission]:
 if __name__ == "__main__":
     # Example usage
+    from shared.workflows.factory import create_quizbowl_simple_step_initial_setup
     # Create workflow
     model_step = create_quizbowl_simple_step_initial_setup()

src/workflows/README.md DELETED Viewed

@@ -1,129 +0,0 @@
-# Workflows Subpackage
-This subpackage provides a framework for defining, validating, and executing workflows composed of interconnected model steps with dependency management.
-## Overview
-The workflows subpackage enables the creation and execution of workflows where multiple model steps can be combined, with outputs from earlier steps feeding into inputs of later steps. The package handles dependency resolution, execution order, and error handling.
-## Components
-### `structs.py`
-Contains the core data structures used throughout the workflow system:
-- `InputField`: Represents an input field with name, description, and variable reference
-- `OutputField`: Represents an output field with name, type, and description
-- `ModelStep`: Represents a single step in a workflow with input fields, output fields, and model details
-- `Workflow`: A collection of ModelSteps with their identifiers
-- `TossupWorkflow`: Specialized workflow for quizbowl tossup questions with buzzing capability
-### `configs.py`
-Provides configuration settings and constants:
-- `AVAILABLE_MODELS`: Supported model configurations from various providers
-- `TYPE_MAP`: Mapping of supported field types to Python types
-- `FUNCTION_MAP`: Built-in transformation functions for input/output processing
-### `utils.py`
-Provides utility functions for workflow operations:
-- `create_dependency_graph`: Builds a dependency graph representing the execution order constraints
-- `topological_sort`: Sorts steps in execution order based on their dependencies
-- `detect_cycles`: Identifies cyclic dependencies in workflow definitions
-### `executors.py`
-Handles the execution of workflows:
-- `execute_model_step`: Executes a single model step with input processing and output collection
-- `execute_simple_workflow`: Handles single-step workflows
-- `execute_multi_step_workflow`: Manages multi-step workflows with dependency resolution
-- `execute_workflow`: Main entry point that routes to appropriate executor based on workflow complexity
-### `validators.py`
-Provides workflow validation functionality:
-- `ValidationErrorType`: Enumeration of possible validation error types
-- `WorkflowValidationError`: Base class for validation errors
-- Validation functions for steps, DAGs, variables, and types
-### `errors.py`
-Defines custom exceptions for workflow-related errors:
-- `WorkflowError`: Base class for workflow errors
-- `CyclicDependencyError`: Raised when detecting cycles in the workflow graph
-- `UnknownVariableError`: Raised when a step requires a variable that's not provided or produced
-## Usage Example
-```python
-from workflows.structs import InputField, ModelStep, OutputField, Workflow
-# Define a workflow with two steps
-step1 = ModelStep(
-    id="step1",
-    model="gpt-4o-mini",
-    provider="OpenAI",
-    call_type="llm",
-    system_prompt="Step1 processing",
-    input_fields=[InputField(name="value", description="Input value", variable="input.value")],
-    output_fields=[OutputField(name="result", description="Processed result", type="str", func="upper")],
-)
-step2 = ModelStep(
-    id="step2",
-    model="gpt-4o-mini",
-    provider="OpenAI",
-    call_type="llm",
-    system_prompt="Step2 processing",
-    input_fields=[InputField(name="result", description="Result from step1", variable="step1.result")],
-    output_fields=[OutputField(name="final", description="Final output", type="str", func="lower")],
-)
-workflow = Workflow(
-    steps={"step1": step1, "step2": step2},
-    inputs=["input.value"],
-    outputs={"final": "step2.final"}
-)
-# Execute the workflow
-from workflows.executors import execute_workflow
-result = execute_workflow(
-    workflow=workflow,
-    input_values={"input.value": "Hello, World!"},
-    return_full_content=True,
-    logprob_step="step2"
-)
-# Access results
-final_output = result["final_outputs"]["final"]
-intermediate_results = result["intermediate_outputs"]
-step_contents = result["step_contents"]
-logprob = result["logprob"]
-```
-## Error Handling
-The workflows system provides robust error handling:
-- Detects cyclic dependencies in workflow definitions
-- Validates input/output variable references
-- Ensures all required inputs are provided
-- Supports custom validation rules through the validation system
-- Provides detailed error messages for debugging
-## Extending the Workflows System
-To extend the workflows system:
-1. Add new model step types by extending the `ModelStep` class
-2. Create custom field types by extending validation in the execution logic
-3. Implement additional error types in `errors.py` for specialized error handling
-4. Add new transformation functions to `FUNCTION_MAP` in `configs.py`
-5. Create specialized workflow types by extending the `Workflow` class

src/workflows/configs.py DELETED Viewed

@@ -1,56 +0,0 @@
-"""
-Configuration settings for the workflows package.
-This module contains configuration settings and constants used across the workflows package,
-including model configurations, workflow settings, and other package-wide constants.
-"""
-AVAILABLE_MODELS = {
-    "OpenAI/gpt-4o": {
-        "model": "gpt-4o-2024-11-20",
-        "logprobs": True,
-    },
-    "OpenAI/gpt-4o-mini": {
-        "model": "gpt-4o-mini-2024-07-18",
-        "logprobs": True,
-    },
-    "OpenAI/gpt-3.5-turbo": {
-        "model": "gpt-3.5-turbo-0125",
-    },
-    "Anthropic/claude-3-7-sonnet": {
-        "model": "claude-3-7-sonnet-20250219",
-    },
-    "Anthropic/claude-3-5-sonnet": {
-        "model": "claude-3-5-sonnet-20241022",
-    },
-    "Anthropic/claude-3-5-haiku": {
-        "model": "claude-3-5-haiku-20241022",
-    },
-    "Cohere/command-r": {
-        "model": "command-r-08-2024",
-        "logprobs": True,
-    },
-    "Cohere/command-r-plus": {
-        "model": "command-r-plus-08-2024",
-        "logprobs": True,
-    },
-    "Cohere/command-r7b": {
-        "model": "command-r7b-12-2024",
-        "logprobs": False,
-    },
-}
-# Function mapping for input/output transformations
-TYPE_MAP = {
-    "str": str,
-    "int": int,
-    "float": float,
-    "bool": bool,
-}
-FUNCTION_MAP = {
-    "upper": str.upper,
-    "lower": str.lower,
-    "len": len,
-    "split": str.split,
-}

src/workflows/errors.py DELETED Viewed

@@ -1,63 +0,0 @@
-"""
-Custom exceptions for workflow validation and execution errors.
-This module defines the exception hierarchy for the workflows package, enabling
-specific error types to be raised and caught during workflow validation and execution.
-Each exception provides detailed error messages to help diagnose and fix issues in
-workflow definitions or execution.
-Exception hierarchy:
-- WorkflowError (base class)
-  - UnknownVariableError (missing variable reference)
-  - CyclicDependencyError (circular dependencies)
-  - FunctionNotFoundError (missing function reference)
-"""
-# Define custom exceptions for workflow errors
-class WorkflowError(Exception):
-    """
-    Base exception class for all workflow-related errors.
-    This is the parent class for all workflow-specific exceptions and can be used
-    to catch any error from the workflows package.
-    """
-    pass
-class UnknownVariableError(WorkflowError):
-    """
-    Raised when a workflow step references a variable that doesn't exist.
-    This typically occurs when a step's input field references a variable that is neither
-    provided as an external input nor produced as an output by any previous step.
-    """
-    def __init__(self, var: str):
-        super().__init__(f"Unknown variable referenced: {var}")
-class CyclicDependencyError(WorkflowError):
-    """
-    Raised when a cyclic dependency is detected in a workflow.
-    A cyclic dependency occurs when there is a circular reference in the workflow graph,
-    such as step A depending on step B, which depends on step A. Such workflows cannot
-    be executed because there's no valid order to process the steps.
-    """
-    def __init__(self):
-        super().__init__("Cyclic dependency detected in workflow")
-class FunctionNotFoundError(WorkflowError):
-    """
-    Raised when a referenced function cannot be found during workflow execution.
-    This typically occurs when a step references a function that doesn't exist in
-    the available function registry or namespace.
-    """
-    def __init__(self, func_name: str):
-        super().__init__(f"Function not found: {func_name}")

src/workflows/executors.py DELETED Viewed

@@ -1,673 +0,0 @@
-"""
-Core workflow execution functionality.
-This module handles the execution of defined workflows, including input processing,
-dependency-based execution order, model calling, and output collection. It integrates
-with the litellm library to handle model interactions.
-Key components:
-- Utility functions for input/output transformation
-- Input processing and validation
-- Model step execution with support for log probabilities
-- Complete workflow execution with dependency resolution
-- Support for both simple (single-step) and multi-step workflows
-- Structured output collection with intermediate results
-The module orchestrates the execution of steps in the correct order based on their
-dependencies and manages the flow of data between steps. It supports:
-- Full content tracking for debugging
-- Log probability calculation for specific steps
-- Flexible input/output transformations
-- Error handling and validation
-"""
-from typing import Any, TypedDict
-import pydantic
-from .configs import FUNCTION_MAP, TYPE_MAP
-from .errors import WorkflowError
-from .llms import completion
-from .structs import InputField, ModelStep, OutputField, Workflow
-from .utils import create_dependency_graph, topological_sort
-def get_type(type_str: str) -> type:
-    """
-    Converts a type string to its corresponding Python type.
-    This function maps type strings to their actual Python type objects. It first checks
-    the TYPE_MAP dictionary for predefined mappings, and if not found, falls back to
-    evaluating the type string directly.
-    Args:
-        type_str (str): A string representation of a type (e.g., "str", "int", "list[str]")
-    Returns:
-        type: The corresponding Python type object
-    Note:
-        Uses eval() for non-predefined types, which has security implications if used
-        with untrusted input. This is intended for internal use with validated type strings.
-    """
-    return TYPE_MAP.get(type_str, eval(type_str))
-def create_processed_inputs(model_step: ModelStep, available_vars: dict[str, Any]) -> dict[str, Any]:
-    """
-    Creates processed inputs for a model step.
-    This function extracts and processes the required inputs for a model step based on
-    its input field definitions. It retrieves values from the available variables dictionary
-    and applies any specified transformations.
-    Args:
-        model_step (ModelStep): The model step for which to create processed inputs.
-        available_vars (dict[str, Any]): Dictionary of variables available for use as inputs.
-                                         Keys are variable names, values are the variable values.
-    Returns:
-        dict[str, Any]: A dictionary of processed inputs ready for use by the model step.
-                        Keys are input field names, values are the processed input values.
-    Raises:
-        WorkflowError: If a required variable is not found in available_vars,
-                       or if a specified transformation function is not available.
-    Example:
-        >>> available_vars = {"step1.output": "Hello World"}
-        >>> create_processed_inputs(model_step, available_vars)
-        {"input_field_name": "HELLO WORLD"}  # If upper transformation was specified
-    """
-    processed_inputs: dict[str, Any] = {}
-    for input_field in model_step.input_fields:
-        var = input_field.variable
-        value = available_vars[var]
-        if input_field.func is not None:
-            func = FUNCTION_MAP.get(input_field.func)
-            func = func or eval(input_field.func)
-            value = func(value)
-        processed_inputs[input_field.name] = value
-    return processed_inputs
-class ModelStepResult(TypedDict):
-    """
-    Result of executing a model step.
-    This TypedDict contains the outputs and metadata from executing a single model step,
-    including the processed output values, the full response content, and log probability
-    information when requested.
-    Attributes:
-        outputs (dict[str, Any]): A dictionary of processed outputs from the model step,
-                                with keys matching the output field names.
-        content (str | None): The full content of the model's response, only populated
-                            if return_full_content is True.
-        logprob (float | None): The log probability of the model step output, only populated
-                               if logprobs is True.
-    """
-    # A dictionary of processed outputs from the model step,
-    # with keys matching the output field names.
-    outputs: dict[str, Any]
-    # The full content of the model step.
-    content: str | None
-    # The log probability of the model step output if requested.
-    logprob: float | None
-class WorkflowOutput(TypedDict):
-    """
-    Result of executing a complete workflow.
-    This TypedDict contains the outputs and metadata from executing a workflow,
-    including final outputs, intermediate values, step contents, and log probabilities.
-    Attributes:
-        final_outputs (dict[str, Any]): The final output values produced by the workflow,
-                                      with keys matching the names defined in workflow.outputs.
-        intermediate_outputs (dict[str, Any]): All computed values during workflow execution,
-                                             including both external inputs and outputs from all steps.
-        step_contents (dict[str, Any]): Full response content for each step, keyed by step ID.
-                                      Only populated if return_full_content is True.
-        logprob (float | None): The log probability of the specified step's output.
-                               Only populated if logprob_step is specified.
-    """
-    # A dictionary of the workflow's outputs, with keys matching the variables defined in workflow.outputs.
-    final_outputs: dict[str, Any]
-    # A dictionary of all computed values during workflow execution, including intermediate results.
-    intermediate_outputs: dict[str, Any]
-    # A dictionary of step contents, only populated if return_full_content is True.
-    step_contents: dict[str, Any]
-    # The log probability of the workflow output if requested.
-    logprob: float | None
-# %%
-def execute_model_step(
-    model_step: ModelStep,
-    available_vars: dict[str, Any],
-    return_full_content: bool = False,
-    logprobs: bool = False,
-) -> ModelStepResult:
-    """
-    Executes a model step using the provided available variables.
-    This function handles the complete execution of a model step, including:
-    1. Processing inputs using variable references and transformations
-    2. Constructing the appropriate prompt for the model
-    3. Calling the model via litellm with structured output
-    4. Processing and validating the model's response
-    5. Applying any output transformations
-    The function supports different providers and model types through the litellm
-    integration, allowing for a consistent interface regardless of the underlying model.
-    Args:
-        model_step (ModelStep): The model step to execute, containing model details,
-                               input/output specifications, and system prompt.
-        available_vars (dict[str, Any]): A dictionary of all variables available to this step,
-                                        including outputs from previous steps and external inputs.
-        return_full_content (bool, optional): If True, includes the full model response content
-                                             in the result. Defaults to False.
-        logprobs (bool, optional): If True, calculates and returns log probability information
-                                  for the model response. Defaults to False.
-    Returns:
-        ModelStepResult: A TypedDict containing processed outputs, optional full content,
-                        and optional log probability information.
-    Raises:
-        WorkflowError: If there's an error in input processing, model execution,
-                      or output validation.
-    Example:
-        >>> step = ModelStep(
-        ...     id="summarize",
-        ...     model="gpt-3.5-turbo",
-        ...     provider="openai",
-        ...     call_type="llm",
-        ...     system_prompt="Summarize the text",
-        ...     input_fields=[InputField(name="text", variable="input_text", description="Text to summarize")],
-        ...     output_fields=[OutputField(name="summary", type="str", description="Summary of the text")]
-        ... )
-        >>> result = execute_model_step(step, {"input_text": "Long text to be summarized..."})
-        >>> summary = result["outputs"]["summary"]
-    """
-    # Ensure inputs are processed using the specified functions in input_fields.
-    processed_inputs = create_processed_inputs(model_step, available_vars)
-    # Construct the input prompt for the model
-    input_str = "\n".join(f"{k}: {v}" for k, v in processed_inputs.items())
-    step_result = f"Inputs: \n{input_str}"
-    # Define the expected output fields and their types
-    fields = {
-        field.name: (get_type(field.type), pydantic.Field(..., description=field.description))
-        for field in model_step.output_fields
-    }
-    ModelResponse = pydantic.create_model("ModelResponse", **fields)
-    # Execute the model step using litellm
-    api_response = completion(
-        model=f"{model_step.provider}/{model_step.model}",
-        system=model_step.system_prompt,
-        prompt=step_result,
-        response_format=ModelResponse,
-        temperature=model_step.temperature,
-        logprobs=logprobs,
-    )
-    # Map the parsed response to the output fields
-    outputs = {field.name: api_response["output"][field.name] for field in model_step.output_fields}
-    result = ModelStepResult(outputs=outputs, content=None, logprob=None)
-    if return_full_content:
-        result["content"] = api_response["content"]
-    if logprobs:
-        result["logprob"] = api_response.get("logprob")
-    return result
-def execute_multi_step_workflow(
-    workflow: Workflow,
-    input_values: dict[str, Any],
-    return_full_content: bool = False,
-    logprob_step: str | None = None,
-) -> WorkflowOutput:
-    """
-    Execute the given workflow as a computational graph.
-    This function orchestrates the complete execution of a workflow by:
-    1. Validating and populating initial values using the provided external inputs
-    2. Building a dependency graph between workflow steps
-    3. Determining a valid execution order using topological sorting
-    4. Executing each step in the correct order, with inputs from previous steps
-    5. Collecting and returning the final outputs
-    The execution process ensures that all dependencies are satisfied before a step
-    is executed, and that the data flows correctly between steps according to the
-    variable references defined in each step's input fields.
-    Args:
-        workflow (Workflow): The workflow to execute, containing steps, their
-                            dependencies, and input/output specifications.
-        input_values (dict[str, Any]): External input values to be used by the workflow.
-                                      Keys should match the required workflow.inputs.
-        return_full_content (bool, optional): If True, returns the full content of each step.
-                                             Defaults to False.
-        logprob_step (str, optional): The ID of the step to use for log probability calculation.
-                                      Defaults to None.
-    Returns:
-        WorkflowOutput: A dictionary of workflow outputs, including final outputs, intermediate outputs, and step contents.
-    Raises:
-        UnknownVariableError: If an input_field references a variable that is not
-                             provided externally nor produced by any step.
-        CyclicDependencyError: If the workflow contains a circular dependency that
-                              prevents a valid execution order.
-        FunctionNotFoundError: If a transformation function specified in input_fields.func
-                              or output_fields.func is not available.
-        WorkflowError: For any other workflow-related errors, such as missing required inputs.
-    Example:
-        >>> workflow = Workflow(
-        ...     steps={
-        ...         "extract": ModelStep(...),  # A step that extracts entities
-        ...         "analyze": ModelStep(...)   # A step that analyzes the entities
-        ...     },
-        ...     inputs=["text"],
-        ...     outputs={"sentiment": "analyze.sentiment", "entities": "extract.entities"}
-        ... )
-        >>> final_outputs, computed_values, step_contents = execute_workflow(workflow, {"text": "Apple is launching a new product tomorrow."})
-        >>> print(final_outputs["sentiment"])
-        "positive"
-        >>> print(final_outputs["entities"])
-        ["Apple", "product"]
-    """
-    # Step 1: Pre-populate computed values with external workflow inputs.
-    computed_values: dict[str, Any] = {}
-    for var in workflow.inputs:
-        if var not in input_values:
-            raise WorkflowError(f"Missing required workflow input: {var}")
-        computed_values[var] = input_values[var]
-    # Step 2: Build dependency graph among model steps.
-    # For each step, examine its input_fields. If an input is not in the pre-populated external inputs,
-    # then it is expected to be produced by some step. Otherwise, raise an error.
-    dependencies = create_dependency_graph(workflow, input_values)
-    # Step 3: Determine the execution order of the steps using topological sort.
-    # Raises an error if a cycle is detected.
-    execution_order = topological_sort(dependencies)
-    # Step 4: Execute steps in topological order.
-    step_contents: dict[str, Any] = {}
-    logprob = None
-    for step_id in execution_order:
-        step = workflow.steps[step_id]
-        return_logprobs = logprob_step == step_id
-        # Execute the step
-        result = execute_model_step(
-            step, computed_values, return_full_content=return_full_content, logprobs=return_logprobs
-        )
-        if return_logprobs:
-            logprob = result["logprob"]
-        if return_full_content:
-            step_contents[step_id] = result["content"]
-        outputs = {f"{step_id}.{k}": v for k, v in result["outputs"].items()}
-        computed_values.update(outputs)
-    # Step 5: Gather and return workflow outputs.
-    final_outputs: dict[str, Any] = {}
-    for target, var in workflow.outputs.items():
-        if var not in computed_values:
-            raise WorkflowError(
-                f"Workflow output variable {var} was not produced. Computed values: {computed_values.keys()}"
-            )
-        final_outputs[target] = computed_values[var]
-    return WorkflowOutput(
-        final_outputs=final_outputs,
-        intermediate_outputs=computed_values,
-        step_contents=step_contents,
-        logprob=logprob,
-    )
-def execute_simple_workflow(
-    workflow: Workflow,
-    input_values: dict[str, Any],
-    return_full_content: bool = False,
-    logprob_step: bool | str = False,
-) -> WorkflowOutput:
-    """
-    Execute a simple workflow with a single step.
-    This is an optimized version of workflow execution for workflows containing only one step.
-    It bypasses the dependency graph building and topological sorting steps, providing a more
-    direct execution path for simple workflows.
-    Args:
-        workflow (Workflow): The workflow to execute, which must contain exactly one step.
-        input_values (dict[str, Any]): External input values to be used by the workflow.
-                                     Keys should match the required workflow.inputs.
-        return_full_content (bool, optional): If True, includes the full model response content
-                                            in the result. Defaults to False.
-        logprobs (bool, optional): If True, calculates and returns log probability information
-                                  for the model response. Defaults to False.
-    Returns:
-        WorkflowOutput: A TypedDict containing the workflow outputs, intermediate values,
-                       optional step contents, and optional log probability information.
-    Raises:
-        WorkflowError: If the workflow has more than one step or if required inputs are missing.
-    Example:
-        >>> workflow = Workflow(
-        ...     steps={"extract": ModelStep(...)},
-        ...     inputs=["text"],
-        ...     outputs={"entities": "extract.entities"}
-        ... )
-        >>> result = execute_simple_workflow(workflow, {"text": "Apple is launching a new product."})
-        >>> entities = result["final_outputs"]["entities"]
-    """
-    if len(workflow.steps) != 1:
-        raise WorkflowError("Simple workflow must have exactly one step")
-    # Get the single step
-    step = list(workflow.steps.values())[0]
-    logprobs = logprob_step is True or logprob_step == step.id
-    # Validate inputs
-    for var in workflow.inputs:
-        if var not in input_values:
-            raise WorkflowError(f"Missing required workflow input: {var}")
-    # Execute the step
-    step_result = execute_model_step(step, input_values, return_full_content=return_full_content, logprobs=logprobs)
-    step_outputs = step_result["outputs"]
-    step_contents = {step.id: step_result["content"]} if return_full_content else {}
-    # Prepare the final outputs
-    final_outputs = {}
-    for target, var in workflow.outputs.items():
-        if var.startswith(f"{step.id}."):
-            output_key = var.split(".", 1)[1]
-            if output_key in step_outputs:
-                final_outputs[target] = step_outputs[output_key]
-            else:
-                raise WorkflowError(f"Workflow output variable {var} was not produced")
-        else:
-            raise WorkflowError(f"Invalid output mapping: {var} does not match step ID {step.id}")
-    # Prepare computed values (prefixed with step ID)
-    computed_values = input_values | {f"{step.id}.{k}": v for k, v in step_outputs.items()}
-    return WorkflowOutput(
-        final_outputs=final_outputs,
-        intermediate_outputs=computed_values,
-        step_contents=step_contents,
-        logprob=step_result.get("logprob"),
-    )
-def execute_workflow(
-    workflow: Workflow,
-    input_values: dict[str, Any],
-    return_full_content: bool = False,
-    logprob_step: str | bool = False,
-) -> WorkflowOutput:
-    """
-    Main entry point for executing workflows of any complexity.
-    This function serves as a router that delegates to the appropriate specialized
-    execution function based on the complexity of the workflow:
-    - For single-step workflows, it calls execute_simple_workflow
-    - For multi-step workflows, it calls execute_multi_step_workflow
-    This abstraction allows callers to use a consistent interface regardless of
-    the workflow's complexity.
-    Args:
-        workflow (Workflow): The workflow to execute, containing steps, their
-                           dependencies, and input/output specifications.
-        input_values (dict[str, Any]): External input values to be used by the workflow.
-                                     Keys should match the required workflow.inputs.
-        return_full_content (bool, optional): If True, includes the full model response
-                                            content in the result. Defaults to False.
-        logprob_step (str | bool, optional): Either a string with the ID of the step for which
-                                           to calculate log probability, or a boolean flag.
-                                           If False, no log probabilities are calculated.
-                                           Defaults to False.
-    Returns:
-        WorkflowOutput: A TypedDict containing the workflow outputs, intermediate values,
-                       optional step contents, and optional log probability information.
-    Raises:
-        WorkflowError: For any workflow-related errors, such as missing required inputs,
-                      circular dependencies, or invalid variable references.
-    Example:
-        >>> workflow = Workflow(
-        ...     steps={"extract": ModelStep(...), "analyze": ModelStep(...)},
-        ...     inputs=["text"],
-        ...     outputs={"sentiment": "analyze.sentiment"}
-        ... )
-        >>> result = execute_workflow(
-        ...     workflow,
-        ...     {"text": "Apple is launching a new product."},
-        ...     return_full_content=True,
-        ...     logprob_step="analyze"
-        ... )
-        >>> print(result["final_outputs"]["sentiment"])
-        "positive"
-    """
-    if len(workflow.steps) > 1:
-        return execute_multi_step_workflow(workflow, input_values, return_full_content, logprob_step)
-    else:
-        return execute_simple_workflow(workflow, input_values, return_full_content, logprob_step)
-def run_examples():
-    """
-    Runs example workflows demonstrating key functionality and error handling.
-    This function creates and executes three different example workflows to showcase:
-    1. Successful workflow execution:
-       - A linear two-step workflow with proper dependency flow
-       - Input transformation using the 'upper' function
-       - Output transformation using the 'lower' function
-       - Proper variable passing between steps
-    2. Cyclic dependency detection:
-       - A workflow with two steps that depend on each other circularly
-       - Demonstrates the error handling for cyclic dependencies
-       - Shows how the system prevents infinite execution loops
-    3. Unknown variable detection:
-       - A workflow that references a variable not provided as input or by any step
-       - Demonstrates validation of variable references
-       - Shows error handling for missing dependencies
-    Each example prints its result or the error encountered, making this function
-    useful for testing and demonstration purposes.
-    Returns:
-        None: This function prints its results and doesn't return a value.
-    """
-    print("Example 1: Successful Workflow Execution")
-    # Example 1: Simple linear workflow.
-    # External input "input.value" is provided. Two steps:
-    #  - step1 takes "input.value" and produces "step1.result".
-    #  - step2 uses "step1.result" and produces "step2.final".
-    from workflows.structs import ModelStep, Workflow
-    workflow_success = Workflow(
-        steps={
-            "step1": ModelStep(
-                id="step1",
-                model="gpt-4o-mini",
-                provider="OpenAI",
-                call_type="llm",
-                system_prompt="Step1 processing",
-                input_fields=[InputField(name="value", description="Input value", variable="input.value")],
-                output_fields=[OutputField(name="result", description="Processed result", type="str", func="upper")],
-            ),
-            "step2": ModelStep(
-                id="step2",
-                model="gpt-4o-mini",
-                provider="OpenAI",
-                call_type="llm",
-                system_prompt="Step2 processing",
-                input_fields=[InputField(name="result", description="Result from step1", variable="step1.result")],
-                output_fields=[OutputField(name="final", description="Final output", type="str", func="lower")],
-            ),
-        },
-        inputs=["input.value"],
-        outputs={"final": "step2.final"},
-    )
-    input_values_success = {"input.value": "Hello, World!"}
-    try:
-        outputs = execute_workflow(workflow_success, input_values_success)
-        print("Workflow outputs:", outputs)
-    except WorkflowError as e:
-        print("Workflow failed with error:", e)
-    print("\nExample 2: Cyclic Dependency Workflow")
-    # Example 2: Cyclic dependency.
-    # stepA depends on an output from stepB and vice versa.
-    workflow_cycle = Workflow(
-        steps={
-            "stepA": ModelStep(
-                id="stepA",
-                model="gpt-4o-mini",
-                provider="OpenAI",
-                call_type="llm",
-                system_prompt="StepA processing",
-                input_fields=[
-                    InputField(name="input", description="Input from stepB", variable="stepB.output", func="identity")
-                ],
-                output_fields=[OutputField(name="output", description="Output from A", type="str", func="upper")],
-            ),
-            "stepB": ModelStep(
-                id="stepB",
-                model="gpt-4o-mini",
-                provider="OpenAI",
-                call_type="llm",
-                system_prompt="StepB processing",
-                input_fields=[
-                    InputField(name="input", description="Input from stepA", variable="stepA.output", func="identity")
-                ],
-                output_fields=[OutputField(name="output", description="Output from B", type="str", func="upper")],
-            ),
-        },
-        inputs=[],  # no external inputs
-        outputs={"output": "stepB.output"},
-    )
-    try:
-        outputs = execute_workflow(workflow_cycle, {})
-        print("Workflow outputs:", outputs)
-    except WorkflowError as e:
-        print("Workflow failed with error:", e)
-    print("\nExample 3: Unknown Variable Dependency Workflow")
-    # Example 3: A workflow that references a variable not provided as an input or produced by any step.
-    workflow_unknown = Workflow(
-        steps={
-            "stepX": ModelStep(
-                id="stepX",
-                model="gpt-4o-mini",
-                provider="OpenAI",
-                call_type="llm",
-                system_prompt="StepX processing",
-                input_fields=[
-                    InputField(
-                        name="input", description="Non-existent input", variable="nonexistent.value", func="identity"
-                    )
-                ],
-                output_fields=[OutputField(name="output", description="Output from X", type="str", func="upper")],
-            )
-        },
-        inputs=[],  # no external inputs
-        outputs={"output": "stepX.output"},
-    )
-    try:
-        outputs = execute_workflow(workflow_unknown, {})
-        print("Workflow outputs:", outputs)
-    except WorkflowError as e:
-        print("Workflow failed with error:", e)
-if __name__ == "__main__":
-    # create example of model_step
-    model_step = ModelStep(
-        id="step1",
-        model="gpt-4o-mini",
-        provider="OpenAI",
-        call_type="llm",
-        system_prompt="You are a simple NLP tool that takes a string, and a number N, and return the first N entities in the string, and the total count of entities in the string.",
-        input_fields=[
-            InputField(name="sentence", description="The sentence to process", variable="sentence", func=None),
-            InputField(name="n", description="The number of entities to return", variable="n", func=None),
-        ],
-        output_fields=[
-            OutputField(
-                name="entities",
-                description="The first N entities in the string as a list of strings",
-                type="list[str]",
-                func=None,
-            ),
-            OutputField(name="count", description="The total count of entities in the string", type="int", func=None),
-        ],
-    )
-    processed_inputs = {"sentence": "Abdul Akbar is a good person, but Jesus is the son of God.", "n": 3}
-    processed_inputs = create_processed_inputs(model_step, processed_inputs)
-    print(processed_inputs)
-    run_examples()
-# %%
-# Example usage
-if __name__ == "__main__":
-    # Define a simple model step
-    model_step = ModelStep(
-        id="step1",
-        model="gpt-4o-mini",
-        provider="OpenAI",
-        call_type="llm",
-        system_prompt="You are a simple NLP tool that takes a string, and a number N, and return the first N entities in the string, and the total count of entities in the string.",
-        input_fields=[
-            InputField(name="sentence", description="The sentence to process", variable="sentence", func=None),
-            InputField(name="n", description="The number of entities to return", variable="n", func=None),
-        ],
-        output_fields=[
-            OutputField(
-                name="entities",
-                description="The first N entities in the string as a list of strings",
-                type="list[str]",
-                func=None,
-            ),
-            OutputField(name="count", description="The total count of entities in the string", type="int", func=None),
-        ],
-    )
-    # Define processed inputs
-    processed_inputs = {"sentence": "Abdul Akbar is a good person, but Jesus is the son of God.", "n": 3}
-    # Execute the model step
-    outputs = execute_model_step(model_step, processed_inputs)
-    print(outputs)

src/workflows/factory.py DELETED Viewed

@@ -1,176 +0,0 @@
-# %%
-from .structs import (
-    Buzzer,
-    BuzzerMethod,
-    CallType,
-    InputField,
-    ModelStep,
-    OutputField,
-    TossupWorkflow,
-    Workflow,
-)
-INITIAL_SYS_PROMPT = """You are a  helpful performant question answering bot.
-Given a question clue, output your most likely guess in a couple words with a calibrated confidence for the guess.
-"""
-def create_empty_bonus_workflow():
-    return Workflow(
-        inputs=["leadin", "part"],
-        outputs={"answer": None, "confidence": None, "explanation": None},
-        steps={},
-    )
-def create_empty_tossup_workflow():
-    return TossupWorkflow(
-        inputs=["question_text"],
-        outputs={"answer": None, "confidence": None},
-        steps={},
-    )
-def create_first_step_input_fields() -> list[InputField]:
-    return [
-        InputField(
-            name="question",
-            description="The question text progressively revealed to the agent so far.",
-            variable="question_text",
-        )
-    ]
-def create_empty_input_field() -> list[InputField]:
-    return [InputField(name="", description="", variable="question_text")]
-def create_quizbowl_simple_step_initial_setup():
-    return ModelStep(
-        id="simple_step",
-        name="Quizbowl Simple Step",
-        model="",
-        provider="",
-        temperature=0.7,
-        call_type="llm",
-        system_prompt=INITIAL_SYS_PROMPT,
-        input_fields=[
-            InputField(name="question", description="The question to answer", variable="question"),
-        ],
-        output_fields=[
-            OutputField(name="answer", description="The most likely answer", type="str"),
-            OutputField(name="confidence", description="The confidence of the answer", type="float"),
-        ],
-    )
-def create_new_llm_step(step_id: str, name: str) -> ModelStep:
-    return ModelStep(
-        id=step_id,
-        name=name,
-        model="gpt-4o",
-        provider="OpenAI",
-        call_type="llm",
-        temperature=0.7,
-        system_prompt="",
-        input_fields=create_empty_input_field(),
-        output_fields=[OutputField(name="", description="")],
-    )
-def create_first_llm_step() -> ModelStep:
-    return ModelStep(
-        id="A",
-        name="",
-        model="gpt-4o",
-        provider="OpenAI",
-        call_type="llm",
-        temperature=0.7,
-        system_prompt="",
-        input_fields=[create_first_step_input_fields()],
-        output_fields=[OutputField(name="", description="")],
-    )
-def create_simple_qb_tossup_workflow():
-    return TossupWorkflow(
-        inputs=["question_text"],
-        outputs={"answer": "A.answer", "confidence": "A.confidence"},
-        steps={
-            "A": ModelStep(
-                id="A",
-                name="Tossup Agent",
-                model="gpt-4o-mini",
-                provider="OpenAI",
-                call_type="llm",
-                temperature=0.3,
-                system_prompt="You are a helpful assistant that can answer questions.",
-                input_fields=[InputField(name="question", description="The question text", variable="question_text")],
-                output_fields=[
-                    OutputField(
-                        name="answer",
-                        description="The best guess at the answer to the question",
-                        type="str",
-                    ),
-                    OutputField(
-                        name="confidence",
-                        description="The confidence in the answer, ranging from 0 to 1 in increments of 0.05.",
-                        type="float",
-                    ),
-                ],
-            )
-        },
-        buzzer=Buzzer(
-            confidence_threshold=0.75,
-            prob_threshold=None,
-            method=BuzzerMethod.AND,
-        ),
-    )
-BONUS_SYS_PROMPT = """You are a quizbowl player answering bonus questions. For each part:
-1. Read the leadin and part carefully
-2. Provide a concise answer
-3. Rate your confidence (0-1)
-4. Explain your reasoning
-Format your response as:
-ANSWER: <your answer>
-CONFIDENCE: <0-1>
-EXPLANATION: <your reasoning>"""
-def create_simple_qb_bonus_workflow() -> Workflow:
-    """Create a simple model step for bonus questions."""
-    return Workflow(
-        inputs=["leadin", "part"],
-        outputs={"answer": "A.answer", "confidence": "A.confidence", "explanation": "A.explanation"},
-        steps={
-            "A": ModelStep(
-                id="A",
-                name="Bonus Agent",
-                model="gpt-4o-mini",
-                provider="OpenAI",
-                temperature=0.3,
-                call_type=CallType.LLM,
-                system_prompt=BONUS_SYS_PROMPT,
-                input_fields=[
-                    InputField(
-                        name="question_leadin",
-                        description="The leadin text for the bonus question",
-                        variable="leadin",
-                    ),
-                    InputField(
-                        name="question_part",
-                        description="The specific part text to answer",
-                        variable="part",
-                    ),
-                ],
-                output_fields=[
-                    OutputField(name="answer", description="The predicted answer", type="str"),
-                    OutputField(name="confidence", description="Confidence in the answer (0-1)", type="float"),
-                    OutputField(name="explanation", description="Short explanation for the answer", type="str"),
-                ],
-            )
-        },
-    )

src/workflows/llmcache.py DELETED Viewed

@@ -1,488 +0,0 @@
-import hashlib
-import json
-import os
-import sqlite3
-import threading
-import time
-from pathlib import Path
-from typing import Any, Optional
-from datasets import Dataset, load_dataset, load_from_disk
-from huggingface_hub import snapshot_download
-from loguru import logger
-def load_dataset_from_hf(repo_id, local_dir):
-    snapshot_download(
-        repo_id=repo_id,
-        local_dir=local_dir,
-        repo_type="dataset",
-        tqdm_class=None,
-        etag_timeout=30,
-        token=os.environ["HF_TOKEN"],
-    )
-    return load_dataset(repo_id)
-class CacheDB:
-    """Handles database operations for storing and retrieving cache entries."""
-    def __init__(self, db_path: Path):
-        """Initialize database connection.
-        Args:
-            db_path: Path to SQLite database file
-        """
-        self.db_path = db_path
-        self.lock = threading.Lock()
-        # Initialize the database
-        try:
-            self.initialize_db()
-        except Exception as e:
-            logger.exception(f"Failed to initialize database: {e}")
-            logger.warning(f"Please provide a different filepath or remove the file at {self.db_path}")
-            raise
-    def initialize_db(self) -> None:
-        """Initialize SQLite database with the required table."""
-        # Check if database file already exists
-        if self.db_path.exists():
-            self._verify_existing_db()
-        else:
-            self._create_new_db()
-    def _verify_existing_db(self) -> None:
-        """Verify and repair an existing database if needed."""
-        try:
-            with sqlite3.connect(self.db_path) as conn:
-                cursor = conn.cursor()
-                self._ensure_table_exists(cursor)
-                self._verify_schema(cursor)
-                self._ensure_index_exists(cursor)
-                conn.commit()
-            logger.info(f"Using existing SQLite database at {self.db_path}")
-        except Exception as e:
-            logger.exception(f"Database corruption detected: {e}")
-            raise ValueError(f"Corrupted database at {self.db_path}: {str(e)}")
-    def _create_new_db(self) -> None:
-        """Create a new database with the required schema."""
-        try:
-            with sqlite3.connect(self.db_path) as conn:
-                cursor = conn.cursor()
-                self._create_table(cursor)
-                self._ensure_index_exists(cursor)
-                conn.commit()
-                logger.info(f"Initialized new SQLite database at {self.db_path}")
-        except Exception as e:
-            logger.exception(f"Failed to initialize SQLite database: {e}")
-            raise
-    def _ensure_table_exists(self, cursor) -> None:
-        """Check if the llm_cache table exists and create it if not."""
-        cursor.execute("SELECT name FROM sqlite_master WHERE type='table' AND name='llm_cache'")
-        if not cursor.fetchone():
-            self._create_table(cursor)
-            logger.info("Created missing llm_cache table")
-    def _create_table(self, cursor) -> None:
-        """Create the llm_cache table with the required schema."""
-        cursor.execute("""
-        CREATE TABLE IF NOT EXISTS llm_cache (
-            key TEXT PRIMARY KEY,
-            request_json TEXT,
-            response_json TEXT,
-            created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
-        )
-        """)
-    def _verify_schema(self, cursor) -> None:
-        """Verify that the table schema has all required columns."""
-        cursor.execute("PRAGMA table_info(llm_cache)")
-        columns = {row[1] for row in cursor.fetchall()}
-        required_columns = {"key", "request_json", "response_json", "created_at"}
-        if not required_columns.issubset(columns):
-            missing = required_columns - columns
-            raise ValueError(f"Database schema is corrupted. Missing columns: {missing}")
-    def _ensure_index_exists(self, cursor) -> None:
-        """Create an index on the key column for faster lookups."""
-        cursor.execute("CREATE INDEX IF NOT EXISTS idx_llm_cache_key ON llm_cache (key)")
-    def get(self, key: str) -> Optional[dict[str, Any]]:
-        """Get cached entry by key.
-        Args:
-            key: Cache key to look up
-        Returns:
-            Dict containing the request and response or None if not found
-        """
-        try:
-            with sqlite3.connect(self.db_path) as conn:
-                conn.row_factory = sqlite3.Row
-                cursor = conn.cursor()
-                cursor.execute("SELECT request_json, response_json FROM llm_cache WHERE key = ?", (key,))
-                result = cursor.fetchone()
-                if result:
-                    logger.debug(f"Cache hit for key: {key}. Response: {result['response_json']}")
-                    return {
-                        "request": result["request_json"],
-                        "response": result["response_json"],
-                    }
-                logger.debug(f"Cache miss for key: {key}")
-                return None
-        except Exception as e:
-            logger.error(f"Error retrieving from cache: {e}")
-            return None
-    def set(self, key: str, request_json: str, response_json: str) -> bool:
-        """Set entry in cache.
-        Args:
-            key: Cache key
-            request_json: JSON string of request parameters
-            response_json: JSON string of response
-        Returns:
-            True if successful, False otherwise
-        """
-        with self.lock:
-            try:
-                with sqlite3.connect(self.db_path) as conn:
-                    cursor = conn.cursor()
-                    cursor.execute(
-                        "INSERT OR REPLACE INTO llm_cache (key, request_json, response_json) VALUES (?, ?, ?)",
-                        (key, request_json, response_json),
-                    )
-                    conn.commit()
-                    logger.debug(f"Saved response to cache with key: {key}, response: {response_json}")
-                    return True
-            except Exception as e:
-                logger.error(f"Failed to save to SQLite cache: {e}")
-                return False
-    def get_all_entries(self) -> dict[str, dict[str, Any]]:
-        """Get all cache entries from the database."""
-        cache = {}
-        try:
-            with sqlite3.connect(self.db_path) as conn:
-                conn.row_factory = sqlite3.Row
-                cursor = conn.cursor()
-                cursor.execute("SELECT key, request_json, response_json FROM llm_cache ORDER BY created_at")
-                for row in cursor.fetchall():
-                    cache[row["key"]] = {
-                        "request": row["request_json"],
-                        "response": row["response_json"],
-                    }
-                logger.debug(f"Retrieved {len(cache)} entries from cache database")
-                return cache
-        except Exception as e:
-            logger.error(f"Error retrieving all cache entries: {e}")
-            return {}
-    def clear(self) -> bool:
-        """Clear all cache entries.
-        Returns:
-            True if successful, False otherwise
-        """
-        with self.lock:
-            try:
-                with sqlite3.connect(self.db_path) as conn:
-                    cursor = conn.cursor()
-                    cursor.execute("DELETE FROM llm_cache")
-                    conn.commit()
-                    logger.info("Cache cleared")
-                    return True
-            except Exception as e:
-                logger.error(f"Failed to clear cache: {e}")
-                return False
-    def get_existing_keys(self) -> set:
-        """Get all existing keys in the database.
-        Returns:
-            Set of keys
-        """
-        existing_keys = set()
-        try:
-            with sqlite3.connect(self.db_path) as conn:
-                cursor = conn.cursor()
-                cursor.execute("SELECT key FROM llm_cache")
-                for row in cursor.fetchall():
-                    existing_keys.add(row[0])
-                return existing_keys
-        except Exception as e:
-            logger.error(f"Error retrieving existing keys: {e}")
-            return set()
-    def bulk_insert(self, items: list, update: bool = False) -> int:
-        """Insert multiple items into the cache.
-        Args:
-            items: List of (key, request_json, response_json) tuples
-            update: Whether to update existing entries
-        Returns:
-            Number of items inserted
-        """
-        count = 0
-        UPDATE_OR_IGNORE = "INSERT OR REPLACE" if update else "INSERT OR IGNORE"
-        with self.lock:
-            try:
-                with sqlite3.connect(self.db_path) as conn:
-                    cursor = conn.cursor()
-                    cursor.executemany(
-                        f"{UPDATE_OR_IGNORE} INTO llm_cache (key, request_json, response_json) VALUES (?, ?, ?)",
-                        items,
-                    )
-                    count = cursor.rowcount
-                    conn.commit()
-                return count
-            except Exception as e:
-                logger.error(f"Error during bulk insert: {e}")
-                return 0
-class LLMCache:
-    def __init__(
-        self, cache_dir: str = ".", hf_repo: str | None = None, cache_sync_interval: int = 3600, reset: bool = False
-    ):
-        self.cache_dir = Path(cache_dir)
-        self.db_path = self.cache_dir / "llm_cache.db"
-        self.hf_repo_id = hf_repo
-        self.cache_sync_interval = cache_sync_interval
-        self.last_sync_time = time.time()
-        # Create cache directory if it doesn't exist
-        self.cache_dir.mkdir(exist_ok=True, parents=True)
-        # Initialize CacheDB
-        self.db = CacheDB(self.db_path)
-        if reset:
-            self.db.clear()
-        # Try to load from HF dataset if available
-        try:
-            self._load_cache_from_hf()
-        except Exception as e:
-            logger.warning(f"Failed to load cache from HF dataset: {e}")
-    def response_format_to_dict(self, response_format: Any) -> dict[str, Any]:
-        """Convert a response format to a dict."""
-        # If it's a Pydantic model, use its schema
-        if hasattr(response_format, "model_json_schema"):
-            response_format = response_format.model_json_schema()
-        # If it's a Pydantic model, use its dump
-        elif hasattr(response_format, "model_dump"):
-            response_format = response_format.model_dump()
-        if not isinstance(response_format, dict):
-            response_format = {"value": str(response_format)}
-        return response_format
-    def _generate_key(
-        self, model: str, system: str, prompt: str, response_format: Any, temperature: float | None = None
-    ) -> str:
-        """Generate a unique key for caching based on inputs."""
-        response_format_dict = self.response_format_to_dict(response_format)
-        response_format_str = json.dumps(response_format_dict, sort_keys=True)
-        # Include temperature in the key
-        key_content = f"{model}:{system}:{prompt}:{response_format_str}"
-        if temperature is not None:
-            key_content += f":{temperature:.2f}"
-        return hashlib.md5(key_content.encode()).hexdigest()
-    def _create_request_json(
-        self, model: str, system: str, prompt: str, response_format: Any, temperature: float | None
-    ) -> str:
-        """Create JSON string from request parameters."""
-        logger.info(f"Creating request JSON with temperature: {temperature}")
-        request_data = {
-            "model": model,
-            "system": system,
-            "prompt": prompt,
-            "response_format": self.response_format_to_dict(response_format),
-            "temperature": temperature,
-        }
-        return json.dumps(request_data)
-    def _check_request_match(
-        self,
-        cached_request: dict[str, Any],
-        model: str,
-        system: str,
-        prompt: str,
-        response_format: Any,
-        temperature: float | None,
-    ) -> bool:
-        """Check if the cached request matches the new request."""
-        # Check each field and log any mismatches
-        if cached_request["model"] != model:
-            logger.debug(f"Cache mismatch: model - cached: {cached_request['model']}, new: {model}")
-            return False
-        if cached_request["system"] != system:
-            logger.debug(f"Cache mismatch: system - cached: {cached_request['system']}, new: {system}")
-            return False
-        if cached_request["prompt"] != prompt:
-            logger.debug(f"Cache mismatch: prompt - cached: {cached_request['prompt']}, new: {prompt}")
-            return False
-        response_format_dict = self.response_format_to_dict(response_format)
-        if cached_request["response_format"] != response_format_dict:
-            logger.debug(
-                f"Cache mismatch: response_format - cached: {cached_request['response_format']}, new: {response_format_dict}"
-            )
-            return False
-        if cached_request["temperature"] != temperature:
-            logger.debug(f"Cache mismatch: temperature - cached: {cached_request['temperature']}, new: {temperature}")
-            return False
-        return True
-    def get(
-        self, model: str, system: str, prompt: str, response_format: dict[str, Any], temperature: float | None = None
-    ) -> Optional[dict[str, Any]]:
-        """Get cached response if it exists."""
-        key = self._generate_key(model, system, prompt, response_format, temperature)
-        result = self.db.get(key)
-        if not result:
-            return None
-        request_dict = json.loads(result["request"])
-        if not self._check_request_match(request_dict, model, system, prompt, response_format, temperature):
-            logger.warning(f"Cached request does not match new request for key: {key}")
-            return None
-        return json.loads(result["response"])
-    def set(
-        self,
-        model: str,
-        system: str,
-        prompt: str,
-        response_format: dict[str, Any],
-        temperature: float | None,
-        response: dict[str, Any],
-    ) -> None:
-        """Set response in cache and sync if needed."""
-        key = self._generate_key(model, system, prompt, response_format, temperature)
-        request_json = self._create_request_json(model, system, prompt, response_format, temperature)
-        response_json = json.dumps(response)
-        success = self.db.set(key, request_json, response_json)
-        # Check if we should sync to HF
-        if success and self.hf_repo_id and (time.time() - self.last_sync_time > self.cache_sync_interval):
-            try:
-                self.sync_to_hf()
-                self.last_sync_time = time.time()
-            except Exception as e:
-                logger.error(f"Failed to sync cache to HF dataset: {e}")
-    def _load_cache_from_hf(self) -> None:
-        """Load cache from HF dataset if it exists and merge with local cache."""
-        if not self.hf_repo_id:
-            return
-        try:
-            # Check for new commits before loading the dataset
-            ds_path = (self.cache_dir / "hf_cache").as_posix()
-            dataset = load_dataset_from_hf(self.hf_repo_id, ds_path)["train"]
-            if not dataset:
-                logger.info("No new items to merge from HF dataset")
-                return
-            existing_keys = self.db.get_existing_keys()
-            logger.info(f"Found {len(dataset)} items in HF dataset. Existing keys: {len(existing_keys)}")
-            # Prepare batch items for insertion
-            items_to_insert = []
-            for item in dataset:
-                key = item["key"]
-                # Only update if not in local cache to prioritize local changes
-                if key in existing_keys:
-                    continue
-                # Create request JSON
-                request_data = {
-                    "model": item["model"],
-                    "system": item["system"],
-                    "prompt": item["prompt"],
-                    "temperature": item["temperature"],
-                    "response_format": None,  # We can't fully reconstruct this
-                }
-                items_to_insert.append(
-                    (
-                        key,
-                        json.dumps(request_data),
-                        item["response"],  # This is already a JSON string
-                    )
-                )
-                logger.info(
-                    f"Inserting item: {key} with temperature: {item['temperature']} and response: {item['response']}"
-                )
-            # Bulk insert new items
-            if items_to_insert:
-                inserted_count = self.db.bulk_insert(items_to_insert)
-                logger.info(f"Merged {inserted_count} items from HF dataset into SQLite cache")
-            else:
-                logger.info("No new items to merge from HF dataset")
-        except Exception as e:
-            logger.warning(f"Could not load cache from HF dataset: {e}")
-    def get_all_entries(self) -> dict[str, dict[str, Any]]:
-        """Get all cache entries from the database."""
-        cache = self.db.get_all_entries()
-        entries = {}
-        for key, entry in cache.items():
-            request = json.loads(entry["request"])
-            response = json.loads(entry["response"])
-            entries[key] = {"request": request, "response": response}
-        return entries
-    def sync_to_hf(self) -> None:
-        """Sync cache to HF dataset."""
-        if not self.hf_repo_id:
-            return
-        self._load_cache_from_hf()
-        # Get all entries from the database
-        cache = self.db.get_all_entries()
-        # Convert cache to dataset format
-        entries = []
-        for key, entry in cache.items():
-            request = json.loads(entry["request"])
-            response_str = entry["response"]
-            entries.append(
-                {
-                    "key": key,
-                    "model": request["model"],
-                    "system": request["system"],
-                    "prompt": request["prompt"],
-                    "response_format": request["response_format"],
-                    "temperature": request["temperature"],
-                    "response": response_str,
-                }
-            )
-        # Create and push dataset
-        dataset = Dataset.from_list(entries)
-        dataset.push_to_hub(self.hf_repo_id, private=True)
-        logger.info(f"Synced {len(cache)} cached items to HF dataset {self.hf_repo_id}")
-    def clear(self) -> None:
-        """Clear all cache entries."""
-        self.db.clear()

src/workflows/llms.py DELETED Viewed

@@ -1,285 +0,0 @@
-# %%
-import json
-import os
-from typing import Any, Optional
-import cohere
-import numpy as np
-from langchain_anthropic import ChatAnthropic
-from langchain_cohere import ChatCohere
-from langchain_core.language_models import BaseChatModel
-from langchain_openai import ChatOpenAI
-from loguru import logger
-from openai import OpenAI
-from pydantic import BaseModel, Field
-from pydantic._internal._core_utils import CoreSchemaOrField, is_core_schema
-from pydantic.json_schema import GenerateJsonSchema
-from rich import print as rprint
-# Initialize global cache
-from src.envs import CACHE_PATH, LLM_CACHE_REPO
-from .configs import AVAILABLE_MODELS
-from .llmcache import LLMCache
-llm_cache = LLMCache(cache_dir=CACHE_PATH, hf_repo=LLM_CACHE_REPO)
-class CohereSchemaGenerator(GenerateJsonSchema):
-    """Generates JSON schema for Cohere models without default titles."""
-    def field_title_should_be_set(self, schema: CoreSchemaOrField) -> bool:
-        return_value = super().field_title_should_be_set(schema)
-        if return_value and is_core_schema(schema):
-            return False
-        return return_value
-def _openai_is_json_mode_supported(model_name: str) -> bool:
-    if model_name.startswith("gpt-4"):
-        return True
-    if model_name.startswith("gpt-3.5"):
-        return False
-    logger.warning(f"OpenAI model {model_name} is not available in this app, skipping JSON mode, returning False")
-    return False
-class LLMOutput(BaseModel):
-    content: str = Field(description="The content of the response")
-    logprob: Optional[float] = Field(None, description="The log probability of the response")
-def _get_langchain_chat_output(llm: BaseChatModel, system: str, prompt: str) -> str:
-    output = llm.invoke([("system", system), ("human", prompt)])
-    ai_message = output["raw"]
-    content = {"content": ai_message.content, "tool_calls": ai_message.tool_calls}
-    content_str = json.dumps(content)
-    return {"content": content_str, "output": output["parsed"].model_dump()}
-def _cohere_completion(
-    model: str, system: str, prompt: str, response_model, temperature: float | None = None, logprobs: bool = True
-) -> str:
-    messages = [
-        {"role": "system", "content": system},
-        {"role": "user", "content": prompt},
-    ]
-    client = cohere.ClientV2(api_key=os.getenv("COHERE_API_KEY"))
-    schema = response_model.model_json_schema(schema_generator=CohereSchemaGenerator)
-    if "title" in schema:
-        del schema["title"]
-    response_format = {
-        "type": "json_object",
-        "schema": schema,
-    }
-    response = client.chat(
-        model=model,
-        messages=messages,
-        response_format=response_format,
-        logprobs=logprobs,
-        temperature=temperature,
-    )
-    output = {}
-    output["content"] = response.message.content[0].text
-    output["output"] = response_model.model_validate_json(response.message.content[0].text).model_dump()
-    if logprobs:
-        output["logprob"] = sum(lp.logprobs[0] for lp in response.logprobs)
-        output["prob"] = np.exp(output["logprob"])
-    return output
-def _openai_langchain_completion(
-    model: str, system: str, prompt: str, response_model, temperature: float | None = None
-) -> str:
-    llm = ChatOpenAI(model=model, temperature=temperature).with_structured_output(response_model, include_raw=True)
-    return _get_langchain_chat_output(llm, system, prompt)
-def _openai_completion(
-    model: str, system: str, prompt: str, response_model, temperature: float | None = None, logprobs: bool = True
-) -> str:
-    messages = [
-        {"role": "system", "content": system},
-        {"role": "user", "content": prompt},
-    ]
-    client = OpenAI(api_key=os.getenv("OPENAI_API_KEY"))
-    response = client.beta.chat.completions.parse(
-        model=model,
-        messages=messages,
-        response_format=response_model,
-        logprobs=logprobs,
-        temperature=temperature,
-    )
-    output = {}
-    output["content"] = response.choices[0].message.content
-    output["output"] = response.choices[0].message.parsed.model_dump()
-    if logprobs:
-        output["logprob"] = sum(lp.logprob for lp in response.choices[0].logprobs.content)
-        output["prob"] = np.exp(output["logprob"])
-    return output
-def _anthropic_completion(
-    model: str, system: str, prompt: str, response_model, temperature: float | None = None
-) -> str:
-    llm = ChatAnthropic(model=model, temperature=temperature).with_structured_output(response_model, include_raw=True)
-    return _get_langchain_chat_output(llm, system, prompt)
-def _llm_completion(
-    model: str, system: str, prompt: str, response_format, temperature: float | None = None, logprobs: bool = False
-) -> dict[str, Any]:
-    """
-    Generate a completion from an LLM provider with structured output without caching.
-    Args:
-        model (str): Provider and model name in format "provider/model" (e.g. "OpenAI/gpt-4")
-        system (str): System prompt/instructions for the model
-        prompt (str): User prompt/input
-        response_format: Pydantic model defining the expected response structure
-        logprobs (bool, optional): Whether to return log probabilities. Defaults to False.
-            Note: Not supported by Anthropic models.
-    Returns:
-        dict: Contains:
-            - output: The structured response matching response_format
-            - logprob: (optional) Sum of log probabilities if logprobs=True
-            - prob: (optional) Exponential of logprob if logprobs=True
-    Raises:
-        ValueError: If logprobs=True with Anthropic models
-    """
-    model_name = AVAILABLE_MODELS[model]["model"]
-    provider = model.split("/")[0]
-    if provider == "Cohere":
-        return _cohere_completion(model_name, system, prompt, response_format, temperature, logprobs)
-    elif provider == "OpenAI":
-        if _openai_is_json_mode_supported(model_name):
-            return _openai_completion(model_name, system, prompt, response_format, temperature, logprobs)
-        elif logprobs:
-            raise ValueError(f"{model} does not support logprobs feature.")
-        else:
-            return _openai_langchain_completion(model_name, system, prompt, response_format, temperature)
-    elif provider == "Anthropic":
-        if logprobs:
-            raise ValueError("Anthropic models do not support logprobs")
-        return _anthropic_completion(model_name, system, prompt, response_format, temperature)
-    else:
-        raise ValueError(f"Provider {provider} not supported")
-def completion(
-    model: str, system: str, prompt: str, response_format, temperature: float | None = None, logprobs: bool = False
-) -> dict[str, Any]:
-    """
-    Generate a completion from an LLM provider with structured output with caching.
-    Args:
-        model (str): Provider and model name in format "provider/model" (e.g. "OpenAI/gpt-4")
-        system (str): System prompt/instructions for the model
-        prompt (str): User prompt/input
-        response_format: Pydantic model defining the expected response structure
-        logprobs (bool, optional): Whether to return log probabilities. Defaults to False.
-            Note: Not supported by Anthropic models.
-    Returns:
-        dict: Contains:
-            - output: The structured response matching response_format
-            - logprob: (optional) Sum of log probabilities if logprobs=True
-            - prob: (optional) Exponential of logprob if logprobs=True
-    Raises:
-        ValueError: If logprobs=True with Anthropic models
-    """
-    if model not in AVAILABLE_MODELS:
-        raise ValueError(f"Model {model} not supported")
-    if logprobs and not AVAILABLE_MODELS[model].get("logprobs", False):
-        logger.warning(f"{model} does not support logprobs feature, setting logprobs to False")
-        logprobs = False
-    # Check cache first
-    cached_response = llm_cache.get(model, system, prompt, response_format, temperature)
-    if cached_response and (not logprobs or cached_response.get("logprob")):
-        logger.debug(f"Cache hit for model {model}")
-        return cached_response
-    logger.debug(f"Cache miss for model {model}, calling API. Logprobs: {logprobs}")
-    # Continue with the original implementation for cache miss
-    response = _llm_completion(model, system, prompt, response_format, temperature, logprobs)
-    # Update cache with the new response
-    llm_cache.set(
-        model,
-        system,
-        prompt,
-        response_format,
-        temperature,
-        response,
-    )
-    return response
-# %%
-if __name__ == "__main__":
-    from tqdm import tqdm
-    class ExplainedAnswer(BaseModel):
-        """
-        The answer to the question and a terse explanation of the answer.
-        """
-        answer: str = Field(description="The short answer to the question")
-        explanation: str = Field(description="5 words terse best explanation of the answer.")
-    models = list(AVAILABLE_MODELS.keys())[:1]  # Just use the first model for testing
-    system = "You are an accurate and concise explainer of scientific concepts."
-    prompt = "Which planet is closest to the sun in the Milky Way galaxy? Answer directly, no explanation needed."
-    llm_cache = LLMCache(cache_dir=".", hf_repo="qanta-challenge/advcal-llm-cache", reset=True)
-    # First call - should be a cache miss
-    logger.info("First call - should be a cache miss")
-    for model in tqdm(models):
-        response = completion(model, system, prompt, ExplainedAnswer, logprobs=False)
-        rprint(response)
-    # Second call - should be a cache hit
-    logger.info("Second call - should be a cache hit")
-    for model in tqdm(models):
-        response = completion(model, system, prompt, ExplainedAnswer, logprobs=False)
-        rprint(response)
-    # Slightly different prompt - should be a cache miss
-    logger.info("Different prompt - should be a cache miss")
-    prompt2 = "Which planet is closest to the sun? Answer directly."
-    for model in tqdm(models):
-        response = completion(model, system, prompt2, ExplainedAnswer, logprobs=False)
-        rprint(response)
-    # Get cache entries count from SQLite
-    try:
-        cache_entries = llm_cache.get_all_entries()
-        logger.info(f"Cache now has {len(cache_entries)} items")
-    except Exception as e:
-        logger.error(f"Failed to get cache entries: {e}")
-    # Test adding entry with temperature parameter
-    logger.info("Testing with temperature parameter")
-    response = completion(models[0], system, "What is Mars?", ExplainedAnswer, temperature=0.7, logprobs=False)
-    rprint(response)
-    # Demonstrate forced sync to HF if repo is configured
-    if llm_cache.hf_repo_id:
-        logger.info("Forcing sync to HF dataset")
-        try:
-            llm_cache.sync_to_hf()
-            logger.info("Successfully synced to HF dataset")
-        except Exception as e:
-            logger.exception(f"Failed to sync to HF: {e}")
-    else:
-        logger.info("HF repo not configured, skipping sync test")
-# %%

src/workflows/qb_agents.py DELETED Viewed

@@ -1,232 +0,0 @@
-import time
-from typing import Any, Iterable, TypedDict
-from loguru import logger
-from .executors import WorkflowOutput, execute_workflow
-from .structs import TossupWorkflow, Workflow
-def _get_workflow_response(
-    workflow: Workflow, available_vars: dict[str, Any], logprob_step: bool | str = False
-) -> tuple[WorkflowOutput, float]:
-    """Get response from executing a complete workflow."""
-    start_time = time.time()
-    workflow_output = execute_workflow(workflow, available_vars, return_full_content=True, logprob_step=logprob_step)
-    response_time = time.time() - start_time
-    return workflow_output, response_time
-class TossupResult(TypedDict):
-    answer: str  # the model's answer
-    confidence: float  # confidence score
-    logprob: float | None  # log probability of the answer
-    buzz: bool  # whether the agent buzzed
-    question_fragment: str  # prefix of the question text so far
-    position: int  # 1-indexed question run index
-    step_contents: list[str]  # string content outputs of each step
-    response_time: float
-    step_outputs: dict[str, Any]
-class BonusResult(TypedDict):
-    answer: str
-    confidence: float
-    explanation: str
-    response_time: float
-    step_contents: list[str]
-    step_outputs: dict[str, Any]
-class QuizBowlTossupAgent:
-    """Agent for handling tossup questions with multiple steps in the workflow."""
-    external_input_variable = "question_text"
-    output_variables = ["answer", "confidence"]
-    def __init__(self, workflow: TossupWorkflow):
-        """Initialize the multi-step tossup agent.
-        Args:
-            workflow: The workflow containing multiple steps
-            buzz_threshold: Confidence threshold for buzzing
-        """
-        self.workflow = workflow
-        self.output_variables = list(workflow.outputs.keys())
-        # Validate input variables
-        if self.external_input_variable not in workflow.inputs:
-            raise ValueError(f"External input variable {self.external_input_variable} not found in workflow inputs")
-        # Validate output variables
-        for out_var in self.output_variables:
-            if out_var not in workflow.outputs:
-                raise ValueError(f"Output variable {out_var} not found in workflow outputs")
-    def _single_run(self, question_run: str, position: int) -> TossupResult:
-        """Process a single question run.
-        Args:
-            question_run: The question run to process
-            position: The position of the question run
-        Returns:
-            A TossupResult containing the answer, confidence, logprob, buzz, question fragment, position, step contents, response time, and step outputs
-        """
-        answer_var_step = self.workflow.outputs["answer"].split(".")[0]
-        workflow_output, response_time = _get_workflow_response(
-            self.workflow, {self.external_input_variable: question_run}, logprob_step=answer_var_step
-        )
-        final_outputs = workflow_output["final_outputs"]
-        buzz = self.workflow.buzzer.run(final_outputs["confidence"], logprob=workflow_output["logprob"])
-        result: TossupResult = {
-            "position": position,
-            "answer": final_outputs["answer"],
-            "confidence": final_outputs["confidence"],
-            "logprob": workflow_output["logprob"],
-            "buzz": buzz,
-            "question_fragment": question_run,
-            "step_contents": workflow_output["step_contents"],
-            "step_outputs": workflow_output["intermediate_outputs"],  # Include intermediate step outputs
-            "response_time": response_time,
-        }
-        return result
-    def run(self, question_runs: list[str], early_stop: bool = True) -> Iterable[TossupResult]:
-        """Process a tossup question and decide when to buzz based on confidence.
-        Args:
-            question_runs: Progressive reveals of the question text
-            early_stop: Whether to stop after the first buzz
-        Yields:
-            Dict containing:
-                - answer: The model's answer
-                - confidence: Confidence score
-                - buzz: Whether to buzz
-                - question_fragment: Current question text
-                - position: Current position in question
-                - step_contents: String content outputs of each step
-                - response_time: Time taken for response
-                - step_outputs: Outputs from each step
-        """
-        for i, question_text in enumerate(question_runs):
-            # Execute the complete workflow
-            result = self._single_run(question_text, i + 1)
-            yield result
-            # If we've reached the confidence threshold, buzz and stop
-            if early_stop and result["buzz"]:
-                if i + 1 < len(question_runs):
-                    yield self._single_run(question_runs[-1], len(question_runs))
-                return
-class QuizBowlBonusAgent:
-    """Agent for handling bonus questions with multiple steps in the workflow."""
-    external_input_variables = ["leadin", "part"]
-    output_variables = ["answer", "confidence", "explanation"]
-    def __init__(self, workflow: Workflow):
-        """Initialize the multi-step bonus agent.
-        Args:
-            workflow: The workflow containing multiple steps
-        """
-        self.workflow = workflow
-        self.output_variables = list(workflow.outputs.keys())
-        # Validate input variables
-        for input_var in self.external_input_variables:
-            if input_var not in workflow.inputs:
-                raise ValueError(f"External input variable {input_var} not found in workflow inputs")
-        # Validate output variables
-        for out_var in self.output_variables:
-            if out_var not in workflow.outputs:
-                raise ValueError(f"Output variable {out_var} not found in workflow outputs")
-    def run(self, leadin: str, part: str) -> BonusResult:
-        """Process a bonus part with the given leadin.
-        Args:
-            leadin: The leadin text for the bonus question
-            part: The specific part text to answer
-        Returns:
-            Dict containing:
-                - answer: The model's answer
-                - confidence: Confidence score
-                - explanation: Explanation for the answer
-                - step_contents: String content outputs of each step
-                - response_time: Time taken for response
-                - step_outputs: Outputs from each step
-        """
-        workflow_output, response_time = _get_workflow_response(
-            self.workflow,
-            {
-                "leadin": leadin,
-                "part": part,
-            },
-        )
-        final_outputs = workflow_output["final_outputs"]
-        return {
-            "answer": final_outputs["answer"],
-            "confidence": final_outputs["confidence"],
-            "explanation": final_outputs["explanation"],
-            "step_contents": workflow_output["step_contents"],
-            "response_time": response_time,
-            "step_outputs": workflow_output["intermediate_outputs"],  # Include intermediate step outputs
-        }
-# Example usage
-if __name__ == "__main__":
-    # Load the Quizbowl dataset
-    from datasets import load_dataset
-    from workflows.factory import create_quizbowl_bonus_workflow, create_quizbowl_tossup_workflow
-    ds_name = "qanta-challenge/leaderboard_co_set"
-    ds = load_dataset(ds_name, split="train")
-    # Create the agents with multi-step workflows
-    tossup_workflow = create_quizbowl_tossup_workflow()
-    tossup_agent = QuizBowlTossupAgent(workflow=tossup_workflow, buzz_threshold=0.9)
-    bonus_workflow = create_quizbowl_bonus_workflow()
-    bonus_agent = QuizBowlBonusAgent(workflow=bonus_workflow)
-    # Example for tossup mode
-    print("\n=== TOSSUP MODE EXAMPLE ===")
-    sample_question = ds[30]
-    print(sample_question["question_runs"][-1])
-    print(sample_question["gold_label"])
-    print()
-    question_runs = sample_question["question_runs"]
-    results = tossup_agent.run(question_runs, early_stop=True)
-    for result in results:
-        print(result["step_contents"])
-        print(f"Guess at position {result['position']}: {result['answer']}")
-        print(f"Confidence: {result['confidence']}")
-        print("Step outputs:", result["step_outputs"])
-        if result["buzz"]:
-            print("Buzzed!\n")
-    # Example for bonus mode
-    print("\n=== BONUS MODE EXAMPLE ===")
-    sample_bonus = ds[31]  # Assuming this is a bonus question
-    leadin = sample_bonus["leadin"]
-    parts = sample_bonus["parts"]
-    print(f"Leadin: {leadin}")
-    for i, part in enumerate(parts):
-        print(f"\nPart {i + 1}: {part['part']}")
-        result = bonus_agent.run(leadin, part["part"])
-        print(f"Answer: {result['answer']}")
-        print(f"Confidence: {result['confidence']}")
-        print(f"Explanation: {result['explanation']}")
-        print(f"Response time: {result['response_time']:.2f}s")
-        print("Step outputs:", result["step_outputs"])

src/workflows/structs.py DELETED Viewed

@@ -1,370 +0,0 @@
-# %%
-from copy import deepcopy
-from enum import Enum
-from typing import Any, Literal, Optional
-import numpy as np
-from pydantic import BaseModel, Field, model_validator
-from .configs import AVAILABLE_MODELS
-"""
-Core data structures for defining workflows and their components.
-This module defines the primary classes used to model workflows, steps, and their
-input/output fields. These data structures serve as the foundation for workflow
-definition, validation, and execution throughout the workflows package.
-The primary components are:
-- InputField: Represents an input to a model step with name and source variable
-- OutputField: Represents an output from a model step with name and type
-- ModelStep: Represents a single step in a workflow with inputs and outputs
-- Workflow: A collection of interconnected steps with defined inputs and outputs
-All classes use Pydantic's BaseModel for validation and serialization support.
-"""
-FieldType = Literal["input", "output"]
-SUPPORTED_TYPES = Literal["str", "int", "float", "bool", "list[str]", "list[int]", "list[float]", "list[bool]"]
-"""Supported field types for input and output fields"""
-class InputField(BaseModel):
-    """
-    Defines an input field for a model step.
-    An input field specifies what data a step requires, where it comes from,
-    and optional pre-processing to apply before use.
-    Attributes:
-        name: The name of the input field within the step's context
-        description: Human-readable description of the input's purpose
-        variable: Reference to the source variable (format: "{step_id}.{field_name}" or external input name)
-        func: Optional function name to transform the input value before use
-    """
-    name: str
-    description: str
-    variable: str
-    # function to call on the input before passing it to the model
-    func: str | None = None
-    class Config:
-        frozen = True
-class OutputField(BaseModel):
-    """
-    Defines an output field produced by a model step.
-    An output field specifies a value that the step will produce, including
-    its data type and optional post-processing.
-    Attributes:
-        name: The name of the output field within the step's context
-        description: Human-readable description of the output's purpose
-        type: The data type of the output (one of SUPPORTED_TYPES)
-        func: Optional function name to transform the raw output value
-    """
-    name: str
-    type: SUPPORTED_TYPES = Field(default="str")
-    description: str
-    # function to call on the output string from the model
-    func: str | None = None
-    class Config:
-        frozen = True
-class CallType(str, Enum):
-    LLM = "llm"
-    SEARCH = "search"
-    PYTHON_FUNC = "python_func"
-class ModelStep(BaseModel):
-    """
-    Represents a single step in a workflow.
-    A model step encapsulates the details of a specific operation within a workflow,
-    including what model to use, what inputs it requires, and what outputs it produces.
-    Attributes:
-        id: Unique identifier for this step within a workflow
-        model: The model to use for this step (e.g., "gpt-4")
-        provider: The provider of the model (e.g., "openai")
-        call_type: The type of operation (e.g., "llm", "search")
-        system_prompt: Instructions for the model
-        input_fields: List of input fields required by this step
-        output_fields: List of output fields produced by this step
-    """
-    id: str
-    name: str
-    model: str
-    provider: str
-    call_type: CallType = CallType.LLM
-    # TODO: Validate that this is not None for call_type = llm
-    temperature: Optional[float] = None
-    system_prompt: str
-    input_fields: list[InputField]
-    output_fields: list[OutputField]
-    class Config:
-        use_enum_values = True
-    def fields(self, field_type: FieldType) -> list[InputField | OutputField]:
-        return self.input_fields if field_type == "input" else self.output_fields
-    def get_full_model_name(self) -> str:
-        return f"{self.provider}/{self.model}"
-    def get_produced_variables(self) -> list[str]:
-        return [f"{self.id}.{field.name}" for field in self.output_fields if field.name]
-    def update(self, update: dict[str, Any]) -> "ModelStep":
-        """Returns a new copy with the updated properties."""
-        return self.model_copy(update=update)
-    def update_property(self, field: str, value: Any) -> "ModelStep":
-        "Update the `field` key of the model step with `value`."
-        return self.update({field: value})
-    def update_field(self, field_type: FieldType, index: int, key: str, value: str) -> "ModelStep":
-        """Update a specific field of an input or output field at the given index."""
-        if field_type == "input":
-            fields = self.input_fields
-        elif field_type == "output":
-            fields = self.output_fields
-        else:
-            raise ValueError(f"Invalid field type: {field_type}")
-        if index < len(fields):
-            fields[index] = fields[index].model_copy(update={key: value})
-        return self.model_copy()
-    @staticmethod
-    def create_new_field(field_type: FieldType, input_var: str | None = None) -> InputField | OutputField:
-        if field_type == "input":
-            return InputField(name="", description="", variable=input_var)
-        elif field_type == "output":
-            return OutputField(name="", description="")
-        else:
-            raise ValueError(f"Invalid field type: {field_type}")
-    def add_field(self, field_type: FieldType, index: int = -1, input_var: str | None = None) -> "ModelStep":
-        """Add a new field to the state and update visibility.
-        Args:
-            field_type: Type of field to add ('input' or 'output').
-            index: Position to insert the new field (-1 to append).
-        Returns:
-            A new ModelStep with the updated fields.
-        """
-        if field_type == "input":
-            fields = deepcopy(self.input_fields)
-            new_field = ModelStep.create_new_field(field_type, input_var)
-            fields.insert(index + 1, new_field) if index != -1 else fields.append(new_field)
-            return self.model_copy(update={"input_fields": fields})
-        else:
-            fields = deepcopy(self.output_fields)
-            new_field = ModelStep.create_new_field(field_type)
-            fields.insert(index + 1, new_field) if index != -1 else fields.append(new_field)
-            return self.model_copy(update={"output_fields": fields})
-    def delete_field(self, field_type: FieldType, index: int) -> "ModelStep":
-        """
-        Delete an input or output field from the state and update visibility.
-        Args:
-            field_type: Type of field to delete ('input' or 'output').
-            index: Index of the field to delete. [-1 to delete the last field]
-        Returns:
-            A new ModelStep with the updated fields.
-        """
-        fields = self.input_fields if field_type == "input" else self.output_fields
-        fields = deepcopy(fields)
-        fields.pop(index)
-        return self.model_copy(update={"input_fields": fields} if field_type == "input" else {"output_fields": fields})
-class Workflow(BaseModel):
-    """
-    Represents a complete workflow composed of interconnected steps.
-    A workflow defines a directed acyclic graph of model steps, where outputs
-    from earlier steps can be used as inputs to later steps.
-    Attributes:
-        inputs: List of input variables required by the workflow
-        outputs: List of output variables produced by the workflow
-        steps: Dictionary mapping step IDs to ModelStep instances
-    The inputs and outputs lists use the format "{step_id}.{field_name}"
-    to uniquely identify variables within the workflow.
-    """
-    # variables of form {node}.{field}
-    inputs: list[str] = Field(default_factory=list)
-    # variables of form {node}.{field}
-    outputs: dict[str, str | None] = Field(default_factory=dict)
-    steps: dict[str, ModelStep] = Field(default_factory=dict)
-    def model_dump(self, *args, **kwargs):
-        data = super().model_dump(*args, **kwargs)
-        if "steps" in data:
-            data["steps"] = list(data["steps"].values())
-        return data
-    @model_validator(mode="before")
-    def dictify_steps(cls, data):
-        if "steps" in data and isinstance(data["steps"], list):
-            steps_dict = {}
-            for step in data["steps"]:
-                if isinstance(step, ModelStep):
-                    step_id = step.id
-                else:
-                    step_id = step["id"]
-                if step_id in steps_dict:
-                    raise ValueError(f"Duplicate step ID: {step_id}")
-                steps_dict[step_id] = step
-            data["steps"] = steps_dict
-        return data
-    def get_step_variables(self, step_id: str) -> list[str]:
-        """Get all variables from a specific step."""
-        step = self.steps[step_id]
-        variables = []
-        for output in step.output_fields:
-            if output.name == "":
-                continue
-            output_var = f"{step.id}.{output.name}"
-            variables.append(output_var)
-        return variables
-    def get_available_variables(self) -> list[str]:
-        """Get all output variables from all steps."""
-        variables = set(self.inputs)
-        for step in self.steps.values():
-            variables.update(self.get_step_variables(step.id))
-        return list(variables)
-    def get_step_model_selections(self) -> dict[str, str]:
-        """Get all model selections for all steps."""
-        return {step_id: step.get_full_model_name() for step_id, step in self.steps.items()}
-    def get_output_model_selections(self) -> dict[str, str]:
-        """Get all output model selections for all steps."""
-        return {
-            output_var: target_var.split(".")[0] if target_var else None
-            for output_var, target_var in self.outputs.items()
-        }
-    # Step update method
-    def add_step(self, step: ModelStep) -> "Workflow":
-        """Add a step to the workflow."""
-        steps = self.steps | {step.id: step}
-        return self.model_copy(update={"steps": steps})
-    def remove_step(self, step_id: str) -> "Workflow":
-        """Remove a step from the workflow."""
-        self.steps.pop(step_id)
-        workflow = self.model_copy(update={"steps": self.steps})
-        workflow.refresh_output_variables()
-        return workflow
-    def update_step(self, step: ModelStep) -> "Workflow":
-        """Update a step in the workflow."""
-        self.steps[step.id] = step
-        steps = self.steps | {step.id: step}
-        workflow = self.model_copy(update={"steps": steps})
-        workflow.refresh_output_variables()
-        return workflow
-    # Output variables
-    def refresh_output_variables(self) -> "Workflow":
-        """Refresh the output variables for the workflow."""
-        produced_variables = self.get_available_variables()
-        self.outputs = {k: (v if v in produced_variables else None) for k, v in self.outputs.items()}
-        return self
-class BuzzerMethod(str, Enum):
-    AND = "AND"
-    OR = "OR"
-class Buzzer(BaseModel):
-    """Configuration for when to buzz in a tossup question."""
-    method: BuzzerMethod = BuzzerMethod.AND  # Logic to combine thresholds
-    confidence_threshold: float = Field(default=0.5, ge=0.0, le=1.0)  # Minimum confidence to trigger a buzz
-    prob_threshold: float | None = None  # Optional log probability threshold
-    class Config:
-        use_enum_values = True
-        frozen = True
-    def update(self, **kwargs) -> "Buzzer":
-        """Update the buzzer with the given kwargs."""
-        return self.model_copy(update=kwargs)
-    def run(self, confidence: float, prob: float | None = None, logprob: float | None = None) -> bool:
-        """Run the buzzer logic."""
-        if logprob is not None and prob is not None:
-            raise ValueError("Cannot provide both logprob and prob")
-        if self.prob_threshold is None:
-            return confidence >= self.confidence_threshold
-        if logprob is None and prob is None:
-            raise ValueError("Must provide either logprob or prob if prob_threshold is not None")
-        prob = prob or float(np.exp(logprob))
-        if self.method == BuzzerMethod.AND:
-            return confidence >= self.confidence_threshold and prob >= self.prob_threshold
-        elif self.method == BuzzerMethod.OR:
-            return confidence >= self.confidence_threshold or prob >= self.prob_threshold
-        else:
-            raise ValueError(f"Invalid buzzer method: {self.method}")
-    @model_validator(mode="after")
-    def validate_method_with_log_prob(cls, data):
-        """Validate that if prob_threshold is None, method must be 'and'."""
-        if data.prob_threshold is None and data.method != BuzzerMethod.AND:
-            raise ValueError("If prob_threshold is None, method must be 'and'")
-        return data
-class TossupWorkflow(Workflow):
-    """Workflow specialized for tossup questions with buzzing capability."""
-    buzzer: Buzzer = Field(default_factory=Buzzer)
-    def get_answer_model(self, answer_var: str | None = None) -> str | None:
-        answer_var = answer_var or self.outputs["answer"]
-        if answer_var is None:
-            return None
-        step_id = answer_var.split(".")[0]
-        return self.steps[step_id].get_full_model_name()
-    def is_token_probs_supported(self, answer_var: str | None = None) -> bool:
-        model_name = self.get_answer_model(answer_var)
-        if model_name is None:
-            return True
-        return AVAILABLE_MODELS[model_name].get("logprobs", False)
-    def update_buzzer(self, buzzer: Buzzer) -> "TossupWorkflow":
-        """Update the buzzer."""
-        return self.model_copy(update={"buzzer": buzzer})
-    def refresh_buzzer(self) -> "TossupWorkflow":
-        if not self.is_token_probs_supported():
-            return self.update_buzzer(self.buzzer.update(prob_threshold=None, method="AND"))
-        return self

src/workflows/utils.py DELETED Viewed

@@ -1,195 +0,0 @@
-from collections import deque
-from typing import Any, Iterable
-from .errors import CyclicDependencyError, UnknownVariableError, WorkflowError
-from .structs import Workflow
-"""
-Utilities for workflow dependency management and execution order determination.
-This module provides functions for analyzing workflows, determining dependencies between steps,
-and calculating the correct execution order to ensure all dependencies are satisfied.
-Key functionality includes:
-- Variable to step mapping: Identifying which step produces each variable
-- Dependency graph creation: Building a graph representing dependencies between steps
-- Topological sorting: Determining a valid execution order based on dependencies
-- Cycle detection: Identifying cyclic dependencies that would prevent execution
-These utilities form the foundation for workflow validation and execution in the
-workflow_executor module.
-"""
-def _create_variable_step_mapping(workflow: Workflow) -> dict[str, str]:
-    """
-    Creates a mapping from produced variable names to the model step that produces them.
-    Args:
-        workflow (Workflow): The workflow containing steps and their input/output fields.
-    Returns:
-        dict[str, str]: A dictionary where keys are variable names (formatted as "{step_id}.{output name}")
-                        and values are the step IDs that produce them.
-    Raises:
-        WorkflowError: If there are duplicate step IDs or if a variable is produced by multiple steps.
-    Example:
-        For a workflow with steps "extract" and "summarize" each producing outputs:
-        >>> _create_variable_step_mapping(workflow)
-        {'extract.keywords': 'extract', 'summarize.summary': 'summarize'}
-    """
-    variable_step_map: dict[str, str] = {}  # variable name -> step id
-    for step_id, step in workflow.steps.items():
-        for output in step.output_fields:
-            var_name = f"{step_id}.{output.name}"
-            if var_name in variable_step_map:
-                raise WorkflowError(f"Variable '{output.name}' has duplicate entry in step {step_id}")
-            variable_step_map[var_name] = step_id
-    return variable_step_map
-def create_dependency_graph(workflow: Workflow, input_values: dict[str, Any]) -> dict[str, set[str]]:
-    """
-    Creates a dependency graph from a workflow.
-    This function analyzes the workflow and determines which steps depend on others
-    based on their input/output relationships. A step depends on another if it requires
-    a variable that is produced by the other step. External inputs provided through
-    input_values don't create dependencies.
-    Args:
-        workflow (Workflow): The workflow containing steps and their input/output fields.
-        input_values (dict[str, Any]): A dictionary of external input values provided to the workflow.
-    Returns:
-        dict[str, set[str]]: A dictionary where keys are step IDs and values are sets of step IDs
-                             that the key step depends on.
-    Raises:
-        UnknownVariableError: If an input field references a variable that is not provided
-                              externally nor produced by any step.
-    Example:
-        For a workflow where step "classify" depends on output from "extract":
-        >>> create_dependency_graph(workflow, {})
-        {'extract': set(), 'classify': {'extract'}}
-        With external input provided for "text" variable:
-        >>> create_dependency_graph(workflow, {'text': 'Sample text'})
-        {'extract': set(), 'classify': {'extract'}}
-    """
-    produced_by = _create_variable_step_mapping(workflow)
-    dependencies: dict[str, set[str]] = {step_id: set() for step_id in workflow.steps}
-    for step_id, step in workflow.steps.items():
-        for input_field in step.input_fields:
-            var = input_field.variable
-            # If the variable was provided externally, then no dependency is needed.
-            if var in input_values:
-                continue
-            # Otherwise, check if the variable is produced by a step.
-            if var in produced_by:
-                producer_step_id = produced_by[var]
-                if producer_step_id != step_id:  # Avoid self-dependency
-                    dependencies[step_id].add(producer_step_id)
-            else:
-                raise UnknownVariableError(f"Variable '{var}' is not provided externally nor produced by any step")
-    return dependencies
-def detect_cycles(dep_graph: dict[str, Iterable[str]]) -> str | None:
-    """Detects cycles in the dependency graph.
-    Args:
-        dep_graph: A dictionary where the keys are node IDs and the values are the dependent node IDs
-    Returns:
-        The first step id of a model_step that is part of a cycle, None if no cycles are found
-    """
-    # Check for cycles in step dependencies
-    visited = set()
-    path = set()
-    def has_cycle(node: str) -> bool:
-        if node in path:
-            return True
-        if node in visited:
-            return False
-        visited.add(node)
-        path.add(node)
-        for neighbor in dep_graph.get(node, set()):
-            if has_cycle(neighbor):
-                return True
-        path.remove(node)
-        return False
-    # Check each step for cycles
-    for node_id in dep_graph:
-        if has_cycle(node_id):
-            return node_id
-    return None
-def topological_sort(dependencies: dict[str, set[str]]) -> list[str]:
-    """
-    Performs a topological sort on a dependency graph and detects cycles using Kahn's algorithm.
-    A topological sort orders the steps such that for every dependency from step A to step B,
-    step A comes before step B in the ordering. This ensures that all dependencies are satisfied
-    when executing steps in the returned order.
-    Args:
-        dependencies (dict[str, set[str]]): A dictionary where each key is a node identifier and
-                                            each value is a set of nodes that the key node depends on.
-    Returns:
-        list[str]: A list representing the nodes in topological order if no cycle is detected.
-    Raises:
-        CyclicDependencyError: If a cycle is detected in the graph.
-    Example:
-        >>> topological_sort({'A': set(), 'B': {'A'}, 'C': {'B'}})
-        ['A', 'B', 'C']
-        >>> topological_sort({'A': {'B'}, 'B': {'A'}})  # Cyclic dependency
-        CyclicDependencyError
-    Algorithm:
-        This implementation uses Kahn's algorithm:
-        1. Calculate in-degree for all nodes (number of dependencies)
-        2. Start with nodes having 0 in-degree (no dependencies)
-        3. Process each node by removing its outgoing edges
-        4. Add newly dependency-free nodes to the processing queue
-        5. If not all nodes are processed, a cycle exists
-    """
-    nodes = list(dependencies.keys())
-    dependents: dict[str, list[str]] = {node: [] for node in nodes}
-    in_degree: dict[str, int] = dict.fromkeys(nodes, 0)
-    # Calculate in-degrees and build dependents list
-    for node, deps in dependencies.items():
-        in_degree[node] = len(deps)
-        for dep in deps:
-            dependents[dep].append(node)
-    # Initialize queue with nodes having zero in-degree
-    queue = deque([node for node, deg in in_degree.items() if deg == 0])
-    execution_order: list[str] = []
-    # Process nodes in topological order
-    while queue:
-        current = queue.popleft()
-        execution_order.append(current)
-        for dep in dependents[current]:
-            in_degree[dep] -= 1
-            if in_degree[dep] == 0:
-                queue.append(dep)
-    # If execution order includes all nodes, no cycle exists
-    if len(execution_order) != len(nodes):
-        raise CyclicDependencyError()
-    return execution_order

src/workflows/validators.py DELETED Viewed

@@ -1,615 +0,0 @@
-import keyword
-import re
-from dataclasses import dataclass
-from enum import Enum
-from typing import Optional
-from .structs import CallType, InputField, ModelStep, OutputField, Workflow
-from .utils import detect_cycles
-SUPPORTED_TYPES = {"str", "int", "float", "bool", "list[str]", "list[int]", "list[float]", "list[bool]"}
-# Constants for validation
-MAX_FIELD_NAME_LENGTH = 50
-MAX_DESCRIPTION_LENGTH = 200
-MAX_SYSTEM_PROMPT_LENGTH = 4000
-MAX_TEMPERATURE = 10.0
-from loguru import logger
-class ValidationErrorType(Enum):
-    """Types of validation errors that can occur"""
-    INPUTS = "inputs"
-    OUTPUTS = "outputs"
-    STEP = "step"
-    DAG = "dag"
-    VARIABLE = "variable"
-    TYPE = "type"
-    GENERAL = "general"
-    NAMING = "naming"
-    LENGTH = "length"
-    RANGE = "range"
-@dataclass
-class ValidationError:
-    """Represents a validation error with type and message"""
-    error_type: ValidationErrorType
-    message: str
-    step_id: Optional[str] = None
-    field_name: Optional[str] = None
-    def __str__(self):
-        subject = ""
-        if self.step_id:
-            subject = f"Model step '{self.step_id}'"
-        if self.field_name:
-            if self.step_id:
-                subject = f"Field '{self.step_id}.{self.field_name}'"
-            else:
-                subject = f"Field '{self.field_name}'"
-        return f"{self.error_type.value}: {subject} - {self.message}"
-class WorkflowValidationError(ValueError):
-    """Base class for workflow validation errors"""
-    def __init__(self, errors: list[ValidationError]):
-        self.errors = errors
-        super().__init__(f"Workflow validation failed with {len(errors)} errors")
-def _parse_variable_reference(var: str) -> tuple[Optional[str], str]:
-    """Extracts step_id and field_name from variable reference"""
-    parts = var.split(".")
-    if len(parts) == 1:
-        return None, parts[0]
-    return parts[0], parts[1]
-def _get_step_dependencies(step: ModelStep) -> set[str]:
-    """Gets set of step IDs that this step depends on"""
-    deps = set()
-    for field in step.input_fields:
-        step_id, _ = _parse_variable_reference(field.variable)
-        if step_id:
-            deps.add(step_id)
-    return deps
-def create_step_dep_graph(workflow: Workflow) -> dict[str, set[str]]:
-    """Creates a dependency graph of steps"""
-    dep_graph: dict[str, set[str]] = {}
-    for step_id, step in workflow.steps.items():
-        dep_graph[step_id] = _get_step_dependencies(step)
-    return dep_graph
-class WorkflowValidator:
-    """Validates workflows for correctness and consistency"""
-    def __init__(
-        self,
-        min_temperature: float = 0,
-        max_temperature: float = MAX_TEMPERATURE,
-        max_field_name_length: int = MAX_FIELD_NAME_LENGTH,
-        max_description_length: int = MAX_DESCRIPTION_LENGTH,
-        max_system_prompt_length: int = MAX_SYSTEM_PROMPT_LENGTH,
-        allowed_model_names: Optional[list[str]] = None,
-        required_input_vars: Optional[list[str]] = None,
-        required_output_vars: Optional[list[str]] = None,
-    ):
-        self.errors: list[ValidationError] = []
-        self.workflow: Optional[Workflow] = None
-        self.min_temperature = min_temperature
-        self.max_temperature = max_temperature
-        self.max_field_name_length = max_field_name_length
-        self.max_description_length = max_description_length
-        self.max_system_prompt_length = max_system_prompt_length
-        self.required_input_vars = required_input_vars
-        self.required_output_vars = required_output_vars
-        self.allowed_model_names = set(allowed_model_names) if allowed_model_names else None
-    def validate(self, workflow: Workflow, allow_empty: bool = False) -> bool:
-        validated = self._validate(workflow, allow_empty)
-        if not validated:
-            raise WorkflowValidationError(self.errors)
-        return True
-    def _validate(self, workflow: Workflow, allow_empty: bool = False) -> bool:
-        """Main validation entry point
-        Args:
-            workflow: The workflow to validate.
-            allow_empty: If True, empty workflow is allowed. This flag is used to validate the intermediate states while User edits the workflow.
-        """
-        self.errors = []
-        self.workflow = workflow
-        # Basic workflow validation
-        if not self._validate_workflow_basic(workflow, allow_empty):
-            return False
-        # If it's a single-step workflow, use simple validation
-        if len(workflow.steps) == 1:
-            return self.validate_simple_workflow(workflow, allow_empty)
-        # Otherwise use complex validation
-        return self.validate_complex_workflow(workflow, allow_empty)
-    def _validate_required_inputs(self, workflow: Workflow, allow_empty: bool = False) -> bool:
-        """Validates that the workflow has the correct inputs"""
-        required_input_vars = self.required_input_vars or []
-        input_vars = set(workflow.inputs)
-        for req_var in required_input_vars:
-            if req_var in input_vars:
-                continue
-            self.errors.append(
-                ValidationError(ValidationErrorType.INPUTS, f"Workflow must have '{req_var}' as an input")
-            )
-            return False
-        for input_var in input_vars:
-            if not self._is_valid_external_input(input_var):
-                self.errors.append(
-                    ValidationError(ValidationErrorType.VARIABLE, f"Invalid input variable format: {input_var}")
-                )
-                return False
-        return True
-    def _validate_required_outputs(self, workflow: Workflow, allow_empty: bool = False) -> bool:
-        """Validates that the workflow has the correct outputs"""
-        required_output_vars = self.required_output_vars or []
-        output_vars = set(workflow.outputs)
-        for req_var in required_output_vars:
-            if req_var in output_vars:
-                continue
-            self.errors.append(
-                ValidationError(ValidationErrorType.OUTPUTS, f"Workflow must produce '{req_var}' as an output")
-            )
-            return False
-        # Validate output variables
-        for output_name, output_var in workflow.outputs.items():
-            logger.debug(f"Output name: {output_name}, Output var: {output_var}")
-            if not output_var:
-                if allow_empty:
-                    continue
-                self.errors.append(
-                    ValidationError(ValidationErrorType.VARIABLE, f"Missing output variable for {output_name}")
-                )
-                return False
-            # Check if output variable references a valid step output
-            if not self._is_valid_variable_reference(output_var):
-                self.errors.append(
-                    ValidationError(ValidationErrorType.VARIABLE, f"Invalid output variable reference: {output_var}")
-                )
-                return False
-            # Verify the output field exists in the referenced step
-            step_id, field_name = _parse_variable_reference(output_var)
-            logger.debug(f"Step ID: {step_id}, Field name: {field_name}, Workflow steps: {workflow.steps.keys()}")
-            if step_id not in workflow.steps:
-                self.errors.append(
-                    ValidationError(ValidationErrorType.VARIABLE, f"Referenced model step '{step_id}' not found")
-                )
-                return False
-            ref_step = workflow.steps[step_id]
-            if not any(field.name == field_name for field in ref_step.output_fields):
-                self.errors.append(
-                    ValidationError(
-                        ValidationErrorType.VARIABLE,
-                        f"Output field '{field_name}' not found in model step '{step_id}'",
-                        step_id,
-                        field_name,
-                    )
-                )
-                return False
-        return True
-    def validate_input_outputs(self, workflow: Workflow, allow_empty: bool = False) -> bool:
-        """Validates the input and output variables"""
-        self._validate_required_inputs(workflow, allow_empty)
-        self._validate_required_outputs(workflow, allow_empty)
-        # Check for atleast one input
-        if not workflow.inputs:
-            self.errors.append(
-                ValidationError(ValidationErrorType.GENERAL, "Workflow must contain at least one input")
-            )
-        # Check for atleast one output
-        if not workflow.outputs:
-            self.errors.append(
-                ValidationError(ValidationErrorType.GENERAL, "Workflow must contain at least one output")
-            )
-        return len(self.errors) == 0
-    def validate_simple_workflow(self, workflow: Workflow, allow_empty: bool = False) -> bool:
-        """Validates a single-step workflow"""
-        if not self.workflow:
-            return False
-        # Get the single step
-        step = next(iter(workflow.steps.values()))
-        # Validate the step itself
-        if not self._validate_step(step, allow_empty):
-            return False
-        return True
-    def validate_complex_workflow(self, workflow: Workflow, allow_empty: bool = False) -> bool:
-        """Validates a multi-step workflow"""
-        if not self.workflow:
-            return False
-        # Validate each step
-        for step in workflow.steps.values():
-            if not self._validate_step(step, allow_empty):
-                return False
-        dep_graph = create_step_dep_graph(workflow)
-        if cycle_step_id := detect_cycles(dep_graph):
-            self.errors.append(
-                ValidationError(
-                    ValidationErrorType.DAG, f"Circular dependency detected involving step: {cycle_step_id}"
-                )
-            )
-            return False
-        # Check for orphaned steps (steps that aren't used by any other step)
-        used_steps = set()
-        for deps in dep_graph.values():
-            used_steps.update(deps)
-        for step_id in workflow.steps:
-            if step_id not in used_steps and not any(
-                output_var and _parse_variable_reference(output_var)[0] == step_id
-                for output_var in workflow.outputs.values()
-            ):
-                self.errors.append(ValidationError(ValidationErrorType.DAG, f"Orphaned step detected: {step_id}"))
-                return False
-        # Validate variable dependencies
-        if not self._validate_variable_dependencies(workflow):
-            return False
-        return True
-    def _validate_workflow_basic(self, workflow: Workflow, allow_empty: bool = False) -> bool:
-        """Validates basic workflow properties"""
-        # Check the workflow inputs and outputs
-        if not self.validate_input_outputs(workflow, allow_empty):
-            return False
-        # Check for empty workflow
-        if not workflow.steps:
-            if allow_empty:
-                return True
-            self.errors.append(ValidationError(ValidationErrorType.GENERAL, "Workflow must contain at least one step"))
-            return False
-        # Check for step ID consistency
-        for step_id, step in workflow.steps.items():
-            if step_id != step.id:
-                self.errors.append(
-                    ValidationError(ValidationErrorType.STEP, f"Step ID mismatch: {step_id} != {step.id}", step_id)
-                )
-                return False
-        return True
-    def _validate_step(self, step: ModelStep, allow_empty: bool = False) -> bool:
-        """Validates a single step"""
-        # Validate required fields
-        model_name = step.get_full_model_name()
-        if model_name == "/" and not allow_empty:
-            self.errors.append(
-                ValidationError(ValidationErrorType.STEP, "Model name and provider cannot be empty", step.id)
-            )
-            return False
-        # Check if the model names are allowed
-        if self.allowed_model_names and model_name not in self.allowed_model_names:
-            self.errors.append(
-                ValidationError(ValidationErrorType.STEP, f"Model name '{model_name}' is not allowed", step.id)
-            )
-            return False
-        if not step.id or not step.call_type:
-            self.errors.append(ValidationError(ValidationErrorType.STEP, "Step missing required fields", step.id))
-            return False
-        # Validate step ID and name
-        if not self._is_valid_identifier(step.id):
-            self.errors.append(
-                ValidationError(
-                    ValidationErrorType.NAMING,
-                    f"Invalid step ID format: {step.id}. Must be a valid identifier.",
-                    step.id,
-                )
-            )
-            return False
-        # Validate temperature for LLM call type
-        if step.call_type == CallType.LLM:
-            if step.temperature is None:
-                self.errors.append(
-                    ValidationError(ValidationErrorType.STEP, "LLM step must specify temperature", step.id)
-                )
-                return False
-            if not self.min_temperature <= step.temperature <= self.max_temperature:
-                self.errors.append(
-                    ValidationError(
-                        ValidationErrorType.RANGE,
-                        f"Temperature must be between {self.min_temperature} and {self.max_temperature}",
-                        step.id,
-                    )
-                )
-                return False
-        # Validate system prompt for LLM call type
-        if step.call_type == CallType.LLM:
-            if not step.system_prompt:
-                self.errors.append(
-                    ValidationError(ValidationErrorType.STEP, "LLM step must specify system prompt", step.id)
-                )
-                return False
-            if len(step.system_prompt) > self.max_system_prompt_length:
-                self.errors.append(
-                    ValidationError(
-                        ValidationErrorType.LENGTH,
-                        f"System prompt exceeds maximum length of {self.max_system_prompt_length} characters",
-                        step.id,
-                    )
-                )
-                return False
-        # Validate input fields
-        input_names = set()
-        for field in step.input_fields:
-            if not self._validate_input_field(field, allow_empty):
-                return False
-            if field.name in input_names:
-                self.errors.append(
-                    ValidationError(
-                        ValidationErrorType.STEP, f"Duplicate input field name: {field.name}", step.id, field.name
-                    )
-                )
-                return False
-            input_names.add(field.name)
-        # Validate output fields
-        output_names = set()
-        for field in step.output_fields:
-            if not self._validate_output_field(field, allow_empty):
-                return False
-            if field.name in output_names:
-                self.errors.append(
-                    ValidationError(
-                        ValidationErrorType.STEP, f"Duplicate output field name: {field.name}", step.id, field.name
-                    )
-                )
-                return False
-            output_names.add(field.name)
-        return True
-    def _validate_input_field(self, field: InputField, allow_empty: bool = False) -> bool:
-        """Validates an input field"""
-        # Validate required fields
-        if not field.name or not field.description or not field.variable:
-            self.errors.append(
-                ValidationError(ValidationErrorType.STEP, "Input field missing required fields", field_name=field.name)
-            )
-            return False
-        # Validate field name
-        if not self._is_valid_identifier(field.name, allow_empty):
-            self.errors.append(
-                ValidationError(
-                    ValidationErrorType.NAMING,
-                    f"Invalid field name format: {field.name}. Must be a valid Python identifier.",
-                    field_name=field.name,
-                )
-            )
-            return False
-        # Validate field name length
-        if len(field.name) > self.max_field_name_length:
-            self.errors.append(
-                ValidationError(
-                    ValidationErrorType.LENGTH,
-                    f"Field name exceeds maximum length of {self.max_field_name_length} characters",
-                    field_name=field.name,
-                )
-            )
-            return False
-        # Validate description length
-        if len(field.description) > self.max_description_length:
-            self.errors.append(
-                ValidationError(
-                    ValidationErrorType.LENGTH,
-                    f"Description exceeds maximum length of {self.max_description_length} characters",
-                    field_name=field.name,
-                )
-            )
-            return False
-        # Validate variable reference
-        if not self._is_valid_variable_reference(field.variable):
-            self.errors.append(
-                ValidationError(
-                    ValidationErrorType.VARIABLE,
-                    f"Invalid variable reference: {field.variable}",
-                    field_name=field.name,
-                )
-            )
-            return False
-        return True
-    def _validate_output_field(self, field: OutputField, allow_empty: bool = False) -> bool:
-        """Validates an output field"""
-        # Validate required fields
-        if not field.name or not field.description:
-            self.errors.append(
-                ValidationError(
-                    ValidationErrorType.STEP, "Output field missing required fields", field_name=field.name
-                )
-            )
-            return False
-        # Validate field name
-        if not self._is_valid_identifier(field.name, allow_empty):
-            self.errors.append(
-                ValidationError(
-                    ValidationErrorType.NAMING,
-                    f"Invalid field name format: {field.name}. Must be a valid Python identifier.",
-                    field_name=field.name,
-                )
-            )
-            return False
-        # Validate field name length
-        if len(field.name) > self.max_field_name_length:
-            self.errors.append(
-                ValidationError(
-                    ValidationErrorType.LENGTH,
-                    f"Field name exceeds maximum length of {self.max_field_name_length} characters",
-                    field_name=field.name,
-                )
-            )
-            return False
-        # Validate description length
-        if len(field.description) > self.max_description_length:
-            self.errors.append(
-                ValidationError(
-                    ValidationErrorType.LENGTH,
-                    f"Description exceeds maximum length of {self.max_description_length} characters",
-                    field_name=field.name,
-                )
-            )
-            return False
-        # Validate type
-        if field.type not in SUPPORTED_TYPES:
-            self.errors.append(
-                ValidationError(
-                    ValidationErrorType.TYPE, f"Unsupported output type: {field.type}", field_name=field.name
-                )
-            )
-            return False
-        return True
-    def _validate_simple_workflow_variables(self, workflow: Workflow) -> bool:
-        """Validates variables in a simple workflow"""
-        step = next(iter(workflow.steps.values()))
-        # Validate input variables
-        for input_var in workflow.inputs:
-            if not self._is_valid_external_input(input_var):
-                self.errors.append(
-                    ValidationError(ValidationErrorType.VARIABLE, f"Invalid input variable format: {input_var}")
-                )
-                return False
-        # Validate output variables
-        for output_name, output_var in workflow.outputs.items():
-            if output_var and not self._is_valid_variable_reference(output_var):
-                self.errors.append(
-                    ValidationError(ValidationErrorType.VARIABLE, f"Invalid output variable reference: {output_var}")
-                )
-                return False
-        return True
-    def _validate_variable_dependencies(self, workflow: Workflow) -> bool:
-        """Validates variable dependencies between steps"""
-        # Build variable dependency graph
-        var_graph: dict[str, set[str]] = {}
-        def create_var_dep_graph(workflow: Workflow) -> dict[str, set[str]]:
-            var_graph: dict[str, set[str]] = {}
-            for step_id, step in workflow.steps.items():
-                for field in step.input_fields:
-                    if field.variable not in var_graph:
-                        var_graph[field.variable] = set()
-                    # Add dependency from input variable to step's outputs
-                    for output in step.output_fields:
-                        var_graph[field.variable].add(f"{step_id}.{output.name}")
-            return var_graph
-        # Check for cycles in variable dependencies
-        var_graph = create_var_dep_graph(workflow)
-        if cycle_var := detect_cycles(var_graph):
-            self.errors.append(
-                ValidationError(ValidationErrorType.VARIABLE, f"Circular variable dependency detected: {cycle_var}")
-            )
-            return False
-        # Validate external input existence
-        external_inputs = set(workflow.inputs)
-        for step in workflow.steps.values():
-            for field in step.input_fields:
-                step_id, field_name = _parse_variable_reference(field.variable)
-                if not step_id and field_name not in external_inputs:
-                    self.errors.append(
-                        ValidationError(
-                            ValidationErrorType.VARIABLE,
-                            f"External input '{field_name}' not found in workflow inputs",
-                            field_name=field_name,
-                        )
-                    )
-                    return False
-        return True
-    def _is_valid_variable_reference(self, var: str | None, allow_empty: bool = True) -> bool:
-        """Validates if a variable reference is properly formatted"""
-        if not self.workflow:
-            return False
-        if var is None:
-            return allow_empty
-        parts = var.split(".")
-        if len(parts) == 1:
-            return True  # External input
-        if len(parts) != 2:
-            return False
-        step_id, field_name = parts
-        return step_id in self.workflow.steps and any(
-            field.name == field_name for field in self.workflow.steps[step_id].output_fields
-        )
-    def _is_valid_external_input(self, var: str) -> bool:
-        """Validates if a variable is a valid external input"""
-        if not var:
-            return False
-        if not self._is_valid_identifier(var):
-            return False
-        if keyword.iskeyword(var):
-            return False
-        if "." in var:  # External inputs should not contain dots
-            return False
-        return True
-    def _is_valid_identifier(self, name: str, allow_empty: bool = False) -> bool:
-        """Validates if a string is a valid Python identifier"""
-        if name and name.strip():
-            return bool(re.match(r"^[a-zA-Z_][a-zA-Z0-9_]*$", name))
-        return allow_empty