Spaces:

qanta-challenge
/

quizbowl-submission

Running

quizbowl-submission / src /components /quizbowl /utils.py

Maharshi Gor

First Working commit

193db9d about 2 months ago

3.26 kB

	from typing import Any, Dict, List

	import pandas as pd


	def _create_confidence_plot_data(results: List[Dict], top_k_mode: bool = False) -> pd.DataFrame:
	"""Create a DataFrame for the confidence plot."""
	if not top_k_mode:
	return pd.DataFrame(
	{
	"position": [r["position"] for r in results],
	"confidence": [r["confidence"] for r in results],
	"answer": [r["answer"] for r in results],
	}
	)

	# For top-k mode, extract and plot top answers
	return _create_top_k_plot_data(results)


	def _create_top_k_plot_data(results: List[Dict]) -> pd.DataFrame:
	"""Create plot data for top-k mode."""
	# Find top answers across all positions (limited to top 5)
	top_answers = set()
	for r in results:
	for g in r.get("guesses", [])[:3]: # Get top 3 from each position
	if g.get("answer"):
	top_answers.add(g.get("answer"))

	top_answers = list(top_answers)[:5] # Limit to 5 total answers

	# Create plot data for each answer
	all_data = []
	for position_idx, result in enumerate(results):
	position = result["position"]
	for answer in top_answers:
	confidence = 0
	for guess in result.get("guesses", []):
	if guess.get("answer") == answer:
	confidence = guess.get("confidence", 0)
	break
	all_data.append({"position": position, "confidence": confidence, "answer": answer})

	return pd.DataFrame(all_data)


	def _create_top_k_dataframe(results: List[Dict]) -> pd.DataFrame:
	"""Create a DataFrame for top-k results."""
	df_rows = []
	for result in results:
	position = result["position"]
	for i, guess in enumerate(result.get("guesses", [])):
	df_rows.append(
	{
	"position": position,
	"answer": guess.get("answer", ""),
	"confidence": guess.get("confidence", 0),
	"rank": i + 1,
	}
	)
	return pd.DataFrame(df_rows)


	def _format_buzz_result(buzzed: bool, results: List[Dict], gold_label: str, top_k_mode: bool) -> tuple[str, str, bool]:
	"""Format the result text based on whether the agent buzzed."""
	if not buzzed:
	return f"Did not buzz. Correct answer was: {gold_label}", "No buzz", False

	buzz_position = next(i for i, r in enumerate(results) if r.get("buzz", False))
	buzz_result = results[buzz_position]

	if top_k_mode:
	# For top-k, check if any of the top guesses match
	top_answers = [g.get("answer", "").lower() for g in buzz_result.get("guesses", [])]
	correct = gold_label.lower() in [a.lower() for a in top_answers]
	final_answer = top_answers[0] if top_answers else "No answer"
	else:
	# For regular mode
	final_answer = buzz_result["answer"]
	correct = final_answer.lower() == gold_label.lower()

	result_text = f"BUZZED at position {buzz_position + 1} with answer: {final_answer}\n"
	result_text += f"Correct answer: {gold_label}\n"
	result_text += f"Result: {'CORRECT' if correct else 'INCORRECT'}"

	return result_text, final_answer, correct