Spaces:
Running
Running
File size: 2,909 Bytes
ff069bf 5f26e9c a2104ab ff069bf 5f26e9c ff069bf 5f26e9c 849ff4f 7b1490a 849ff4f ff069bf 7cda32d 5f26e9c a2104ab 8d28aeb cb9e3eb a2104ab 5f26e9c 7b1490a 5f26e9c 7b1490a 5f26e9c 7b1490a 5f26e9c dba77ac 7b1490a 5f26e9c c187bdf 5f26e9c dba77ac c187bdf 7b1490a 5f26e9c 7b1490a c187bdf 7cda32d dba77ac 7cda32d 5f26e9c ff069bf 5f26e9c ff069bf 5f26e9c 7cda32d 7b1490a 7cda32d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
import os
import time
import random
import logging
from openai import OpenAI
from dotenv import load_dotenv
from utils import read_config
# --- Load environment & config ---
load_dotenv()
_config = read_config()["llm"]
# --- Logging setup ---
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
logger = logging.getLogger("polLLM")
logger.setLevel(LOG_LEVEL)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s"))
logger.addHandler(handler)
# --- LLM settings from config.yaml ---
_DEFAULT_MODEL = _config.get("model","chutesai/Llama-4-Maverick-17B-128E-Instruct-FP8") # _config.get("model", "openai-large")
_SYSTEM_TEMPLATE = _config.get("system_prompt", "")
_CHAR = _config.get("char", "Eve")
_CHUTES_API_KEY = os.getenv("CHUTES_API_KEY")
# --- Custom exception ---
class LLMBadRequestError(Exception):
"""Raised when the LLM returns HTTP 400 (Bad Request)."""
pass
# --- OpenAI client init ---
client = OpenAI(
base_url="https://llm.chutes.ai/v1/",
api_key=_CHUTES_API_KEY
)
def _build_system_prompt() -> str:
"""
Substitute {char} into the system prompt template.
"""
return _SYSTEM_TEMPLATE.replace("{char}", _CHAR)
def generate_llm(
prompt: str,
) -> str:
"""
Send a chat-completion request to the LLM, with retries and backoff.
Reads defaults from config.yaml, but can be overridden per-call.
"""
model = _DEFAULT_MODEL
system_prompt = _build_system_prompt()
messages = [
{"role": "system", "content": system_prompt},
{"role": "user", "content": prompt},
]
backoff = 1
for attempt in range(1, 6):
try:
seed = random.randint(0, 2**31 - 1)
logger.debug(f"LLM call attempt={attempt}, model={model}, seed={seed}")
resp = client.chat.completions.create(
model=model,
messages=messages,
seed=seed,
)
text = resp.choices[0].message.content.strip()
logger.debug("LLM response received")
return text
except Exception as e:
if getattr(e, "status_code", None) == 400:
logger.error("LLM error 400 (Bad Request): Not retrying.")
raise LLMBadRequestError("LLM returned HTTP 400")
logger.error(f"LLM error on attempt {attempt}: {e}")
if attempt < 5:
time.sleep(backoff)
backoff *= 2
else:
logger.critical("LLM failed after 5 attempts, raising")
raise
# Example local test
if __name__ == "__main__":
logger.info("Testing generate_llm() with a sample prompt")
try:
print(generate_llm("generate 4 images of 1:1 profile picture"))
except LLMBadRequestError as e:
logger.warning(f"Test failed with bad request: {e}")
|