File size: 2,909 Bytes
ff069bf
5f26e9c
 
 
a2104ab
ff069bf
 
 
5f26e9c
ff069bf
5f26e9c
 
 
 
 
 
 
 
 
 
 
849ff4f
7b1490a
 
849ff4f
ff069bf
7cda32d
 
 
 
 
5f26e9c
a2104ab
8d28aeb
cb9e3eb
a2104ab
 
5f26e9c
7b1490a
 
 
5f26e9c
 
 
7b1490a
 
5f26e9c
 
7b1490a
5f26e9c
dba77ac
7b1490a
 
 
 
 
 
5f26e9c
 
c187bdf
5f26e9c
 
 
dba77ac
 
 
c187bdf
7b1490a
5f26e9c
7b1490a
c187bdf
7cda32d
dba77ac
7cda32d
5f26e9c
 
 
 
 
 
 
ff069bf
5f26e9c
ff069bf
5f26e9c
7cda32d
7b1490a
7cda32d
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import os
import time
import random
import logging
from openai import OpenAI
from dotenv import load_dotenv
from utils import read_config

# --- Load environment & config ---
load_dotenv()
_config = read_config()["llm"]

# --- Logging setup ---
LOG_LEVEL = os.getenv("LOG_LEVEL", "INFO").upper()
logger = logging.getLogger("polLLM")
logger.setLevel(LOG_LEVEL)
handler = logging.StreamHandler()
handler.setFormatter(logging.Formatter("%(asctime)s [%(levelname)s] %(message)s"))
logger.addHandler(handler)

# --- LLM settings from config.yaml ---
_DEFAULT_MODEL     = _config.get("model","chutesai/Llama-4-Maverick-17B-128E-Instruct-FP8")  # _config.get("model", "openai-large")
_SYSTEM_TEMPLATE   = _config.get("system_prompt", "")
_CHAR              = _config.get("char", "Eve")
_CHUTES_API_KEY    = os.getenv("CHUTES_API_KEY")

# --- Custom exception ---
class LLMBadRequestError(Exception):
    """Raised when the LLM returns HTTP 400 (Bad Request)."""
    pass

# --- OpenAI client init ---
client = OpenAI(
    base_url="https://llm.chutes.ai/v1/",
    api_key=_CHUTES_API_KEY
)

def _build_system_prompt() -> str:
    """
    Substitute {char} into the system prompt template.
    """
    return _SYSTEM_TEMPLATE.replace("{char}", _CHAR)

def generate_llm(
    prompt: str,
) -> str:
    """
    Send a chat-completion request to the LLM, with retries and backoff.
    Reads defaults from config.yaml, but can be overridden per-call.
    """
    model = _DEFAULT_MODEL
    system_prompt = _build_system_prompt()
    messages = [
        {"role": "system", "content": system_prompt},
        {"role": "user", "content": prompt},
    ]

    backoff = 1
    for attempt in range(1, 6):
        try:
            seed = random.randint(0, 2**31 - 1)
            logger.debug(f"LLM call attempt={attempt}, model={model}, seed={seed}")
            resp = client.chat.completions.create(
                model=model,
                messages=messages,
                seed=seed,
            )
            text = resp.choices[0].message.content.strip()
            logger.debug("LLM response received")
            return text
        except Exception as e:
            if getattr(e, "status_code", None) == 400:
                logger.error("LLM error 400 (Bad Request): Not retrying.")
                raise LLMBadRequestError("LLM returned HTTP 400")
            logger.error(f"LLM error on attempt {attempt}: {e}")
            if attempt < 5:
                time.sleep(backoff)
                backoff *= 2
            else:
                logger.critical("LLM failed after 5 attempts, raising")
                raise

# Example local test
if __name__ == "__main__":
    logger.info("Testing generate_llm() with a sample prompt")
    try:
        print(generate_llm("generate 4 images of 1:1 profile picture"))
    except LLMBadRequestError as e:
        logger.warning(f"Test failed with bad request: {e}")