Spaces:
Sleeping
Sleeping
File size: 3,774 Bytes
e0f838b 018ff1c e0f838b 018ff1c 4c1e384 018ff1c e0f838b 7655fc3 e0f838b 4c1e384 e0f838b 0ed75dd 6484aeb e0f838b 822a378 018ff1c 7655fc3 018ff1c 7655fc3 018ff1c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 |
import os
import re
from pathlib import Path
import tempfile
import openai
import pandas as pd
from tabulate import tabulate
from smolagents.tools import Tool, PipelineTool
# Load your OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")
SYSTEM_PROMPT = """You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template:
FINAL ANSWER: [YOUR FINAL ANSWER].
If you have enough confidence (≥70%) in your internal knowledge, answer directly without calling any other tool.
Otherwise, decide which tool (wiki, web search, python, audio, excel) is best and use it.
YOUR FINAL ANSWER should be:
- a number OR as few words as possible OR a comma separated list of numbers and/or strings.
- If you are asked for a number, don't use commas, symbols or units (e.g. %, $, km) unless explicitly asked.
- If you are asked for a string, don't use articles ("a", "the"), abbreviations (e.g. "NYC"), or extra words; write digits in plain text unless specified otherwise.
- If you are asked for a comma separated list, apply the above rules to each element.
"""
class AnswerTool(Tool):
name = "answer_tool"
description = "Answer GAIA Level 1 questions in exact-match format."
inputs = {"question": {"type": "string", "description": "The GAIA question text."}}
output_type = "string"
def forward(self, question: str) -> str:
messages = [
{"role": "system", "content": SYSTEM_PROMPT},
{"role": "user", "content": question},
]
resp = openai.chat.completions.create(
model="gpt-4o",
messages=messages,
temperature=0.0,
max_tokens=64,
)
text = resp.choices[0].message.content.strip()
if "FINAL ANSWER:" in text:
return text.split("FINAL ANSWER:")[-1].strip()
return text
class SpeechToTextTool(PipelineTool):
"""Transcribes audio files using OpenAI Whisper."""
name = "transcriber"
description = "Transcribes a local audio file to text using Whisper API."
inputs = {"audio": {"type": "string", "description": "Path to audio file."}}
output_type = "string"
default_checkpoint = "openai/whisper-1"
def __call__(self, audio: str) -> str:
return self._transcribe(audio)
@staticmethod
def _transcribe(audio_path: str) -> str:
path = Path(audio_path).expanduser().resolve()
if not path.is_file():
raise FileNotFoundError(f"Audio file not found: {path}")
with path.open("rb") as fp:
response = openai.audio.transcriptions.create(
file=fp,
model="whisper-1",
response_format="text"
)
return response
class ExcelToTextTool(Tool):
"""Renders an Excel worksheet as Markdown text."""
name = "excel_to_text"
description = "Read an Excel file and return a Markdown table."
inputs = {
"excel_path": {"type": "string", "description": "Path to Excel file."},
"sheet_name": {"type": "string", "description": "Sheet name or index as string.", "nullable": True},
}
output_type = "string"
def forward(self, excel_path: str, sheet_name: str = None) -> str:
path = Path(excel_path).expanduser().resolve()
if not path.exists():
return f"Error: Excel file not found at {path}"
sheet = 0 if not sheet_name or sheet_name == "" else (
int(sheet_name) if sheet_name.isdigit() else sheet_name
)
df = pd.read_excel(path, sheet_name=sheet)
if hasattr(df, "to_markdown"):
return df.to_markdown(index=False)
return tabulate(df, headers="keys", tablefmt="github", showindex=False)
|