File size: 24,985 Bytes
ea14b15
480a4ad
ea14b15
480a4ad
 
ea14b15
480a4ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea14b15
 
480a4ad
 
 
 
 
 
 
 
 
 
ea14b15
480a4ad
 
 
 
 
 
 
 
ea14b15
 
480a4ad
 
 
 
 
 
 
 
ea14b15
 
480a4ad
 
 
ea14b15
 
480a4ad
 
 
 
ea14b15
480a4ad
 
 
 
 
 
 
 
 
 
 
 
 
ea14b15
480a4ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea14b15
480a4ad
 
 
 
 
 
 
 
 
 
 
 
ea14b15
480a4ad
 
ea14b15
480a4ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea14b15
480a4ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea14b15
480a4ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea14b15
480a4ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea14b15
480a4ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea14b15
480a4ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea14b15
480a4ad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea14b15
480a4ad
 
 
 
 
 
ea14b15
480a4ad
ea14b15
480a4ad
 
 
ea14b15
 
480a4ad
 
 
ea14b15
480a4ad
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
// app.js
const express = require('express');
const fetch = require('node-fetch');
const app = express();
const PORT = process.env.PORT || 7860;

// Julep API Base URL (fixed)
const JULEP_API_BASE = "https://api.julep.ai/api";

// Hardcoded list of models (Agent IDs in this context)
const HARDCODED_MODELS = [
    'mistral-large-2411', 'o1', 'text-embedding-3-large', 'vertex_ai/text-embedding-004',
    'claude-3.5-haiku', 'cerebras/llama-4-scout-17b-16e-instruct', 'llama-3.1-8b',
    'magnum-v4-72b', 'voyage-multilingual-2', 'claude-3-haiku', 'gpt-4o',
    'BAAI/bge-m3', 'openrouter/meta-llama/llama-4-maverick', 'openrouter/meta-llama/llama-4-scout',
    'claude-3.5-sonnet', 'hermes-3-llama-3.1-70b', 'claude-3.5-sonnet-20240620',
    'qwen-2.5-72b-instruct', 'l3.3-euryale-70b', 'gpt-4o-mini', 'cerebras/llama-3.3-70b',
    'o1-preview', 'gemini-1.5-pro-latest', 'l3.1-euryale-70b', 'claude-3-sonnet',
    'Alibaba-NLP/gte-large-en-v1.5', 'openrouter/meta-llama/llama-4-scout:free',
    'llama-3.1-70b', 'eva-qwen-2.5-72b', 'claude-3.5-sonnet-20241022', 'gemini-2.0-flash',
    'deepseek-chat', 'o1-mini', 'eva-llama-3.33-70b', 'gemini-2.5-pro-preview-03-25',
    'gemini-1.5-pro', 'gpt-4-turbo', 'openrouter/meta-llama/llama-4-maverick:free',
    'o3-mini', 'claude-3.7-sonnet', 'voyage-3', 'cerebras/llama-3.1-8b', 'claude-3-opus'
];

// --- Helper Functions ---

// Define acceptable log levels
const LogLevels = {
    DEBUG: 'debug',
    INFO: 'info',
    WARN: 'warn',
    ERROR: 'error',
    TRACE: 'trace'
};

function log(level, message, data = null) {
    // Basic check if the console object has the method
    if (typeof console[level] === 'function') {
        console[level](`[${level.toUpperCase()}] ${new Date().toISOString()} - ${message}${data ? ':' : ''}`, data !== null ? JSON.stringify(data, null, 2) : '');
    } else {
        // Fallback for potentially missing methods like 'trace' in some environments
        console.log(`[${level.toUpperCase()}] ${new Date().toISOString()} - ${message}${data ? ':' : ''}`, data !== null ? JSON.stringify(data, null, 2) : '');
    }
}

function getJulepApiKey(req) {
    const authHeader = req.headers.authorization;
    if (authHeader && authHeader.startsWith("Bearer ")) {
        log(LogLevels.DEBUG, 'Extracted Julep API Key successfully.');
        return authHeader.substring(7);
    }
    log(LogLevels.WARN, 'Could not extract Julep API Key from Authorization header.');
    return null;
}

// Helper for small delays
function sleep(ms) {
    return new Promise(resolve => setTimeout(resolve, ms));
}

// Note: Using fire-and-forget for background tasks
async function cleanupJulepResources(agentId, sessionId, headers) {
    log(LogLevels.INFO, 'Attempting Julep resource cleanup.', { agentId, sessionId });
    const cleanupPromises = [];

    // Define cleanup logic as separate async functions for clarity
    const deleteResource = async (url, type, id) => {
        try {
            log(LogLevels.DEBUG, `Sending DELETE request for ${type} ${id} to: ${url}`);
            const response = await fetch(url, { method: "DELETE", headers });
            const responseText = await response.text(); // Get text regardless of status
            if (!response.ok) {
                log(LogLevels.WARN, `Cleanup failed for ${type} ${id}: ${response.status} ${response.statusText}`, { body: responseText });
            } else {
                log(LogLevels.INFO, `Cleanup successful for ${type} ${id}.`, { status: response.status, body: responseText });
            }
        } catch (err) {
            log(LogLevels.ERROR, `Cleanup error during fetch for ${type} ${id}: ${err instanceof Error ? err.message : String(err)}`, { error: err });
        }
    };

    if (sessionId) {
        const sessionDeleteUrl = `${JULEP_API_BASE}/sessions/${sessionId}`;
        cleanupPromises.push(deleteResource(sessionDeleteUrl, 'session', sessionId));
    }
    if (agentId) {
        const agentDeleteUrl = `${JULEP_API_BASE}/agents/${agentId}`;
        // Add a small delay before deleting the agent, sometimes helps if session deletion is slow
        await sleep(100);
        cleanupPromises.push(deleteResource(agentDeleteUrl, 'agent', agentId));
    }

    if (cleanupPromises.length > 0) {
        log(LogLevels.DEBUG, `Waiting for ${cleanupPromises.length} cleanup promises.`);
        // Run cleanup in background
        Promise.allSettled(cleanupPromises)
            .then(results => {
                log(LogLevels.INFO, 'Cleanup promises settled.', { results });
            })
            .catch(error => {
                log(LogLevels.ERROR, 'Unexpected error during Promise.allSettled for cleanup.', { error });
            });
    } else {
        log(LogLevels.INFO, 'No Julep resources to clean up.');
    }
}

// Helper to format Julep ToolCall delta to OpenAI format
function toolCallDeltaToOpenAI(julepToolCalls) {
    if (!julepToolCalls) return undefined;
    return julepToolCalls.map((toolCall, index) => ({
        index: toolCall.index ?? index,
        id: toolCall.id,
        type: "function",
        function: {
            name: toolCall.function?.name,
            arguments: toolCall.function?.arguments,
        },
    }));
}

// Helper to format Julep ToolCall message to OpenAI format
function toolCallMessageToOpenAI(julepToolCalls) {
    if (!julepToolCalls) return undefined;
    return julepToolCalls.map(toolCall => ({
        id: toolCall.id,
        type: "function",
        function: {
            name: toolCall.function?.name,
            arguments: toolCall.function?.arguments,
        },
    }));
}

// Helper function to simulate streaming from a complete response
async function simulateStream(julepChatData, requestedModel, res) {
    log(LogLevels.INFO, 'Starting stream simulation.');
    try {
        const baseChunk = {
            id: julepChatData.id || `chatcmpl-sim-${Date.now()}`,
            object: "chat.completion.chunk",
            created: Math.floor(new Date(julepChatData.created_at || Date.now()).getTime() / 1000),
            model: requestedModel,
            system_fingerprint: julepChatData.system_fingerprint || null,
        };

        for (const [index, choice] of julepChatData.choices.entries()) {
            log(LogLevels.DEBUG, `Simulating stream for choice index ${index}.`);
            const role = choice.message?.role;
            const content = choice.message?.content;
            const toolCallsInput = choice.message?.tool_calls;
            const toolCallsDelta = toolCallsInput ? toolCallDeltaToOpenAI(toolCallsInput) : undefined; // Format as delta
            const finishReason = choice.finish_reason;

            // 1. Send role chunk
            if (role) {
                const roleChunk = { ...baseChunk, choices: [{ index: index, delta: { role: role }, finish_reason: null }] };
                log(LogLevels.DEBUG, 'Sending role chunk:', roleChunk);
                res.write(`data: ${JSON.stringify(roleChunk)}\n\n`);
                await sleep(5);
            }

            // 2. Send tool calls chunk(s) if they exist
            if (toolCallsDelta && toolCallsDelta.length > 0) {
                const toolCallDeltaChunk = { ...baseChunk, choices: [{ index: index, delta: { tool_calls: toolCallsDelta }, finish_reason: null }] };
                log(LogLevels.DEBUG, 'Sending tool_calls chunk:', toolCallDeltaChunk);
                res.write(`data: ${JSON.stringify(toolCallDeltaChunk)}\n\n`);
                await sleep(5);
            }

            // 3. Stream content
            if (content && typeof content === 'string') {
                log(LogLevels.DEBUG, `Streaming content for choice ${index} (length: ${content.length})`);
                for (const char of content) {
                    const contentChunk = { ...baseChunk, choices: [{ index: index, delta: { content: char }, finish_reason: null }] };
                    res.write(`data: ${JSON.stringify(contentChunk)}\n\n`);
                    await sleep(2); // Simulate typing delay
                }
                log(LogLevels.DEBUG, `Finished streaming content for choice ${index}`);
            } else if (content) {
                // Send non-string content as a single chunk (might be structured JSON etc.)
                const contentChunk = { ...baseChunk, choices: [{ index: index, delta: { content: JSON.stringify(content) }, finish_reason: null }] };
                log(LogLevels.DEBUG, 'Sending non-string content chunk:', contentChunk);
                res.write(`data: ${JSON.stringify(contentChunk)}\n\n`);
                await sleep(5);
            }

            // 4. Send finish reason chunk
            if (finishReason) {
                const finishChunk = { ...baseChunk, choices: [{ index: index, delta: {}, finish_reason: finishReason }] };
                log(LogLevels.DEBUG, 'Sending finish reason chunk:', finishChunk);
                res.write(`data: ${JSON.stringify(finishChunk)}\n\n`);
                await sleep(5);
            }
        }

        // 5. Send DONE marker
        log(LogLevels.INFO, 'Sending [DONE] marker.');
        res.write('data: [DONE]\n\n');
        res.end();
        log(LogLevels.INFO, 'Stream simulation completed successfully.');

    } catch (error) {
        log(LogLevels.ERROR, `Error during stream simulation: ${error instanceof Error ? error.message : String(error)}`, { error: error });
        // Try to end the response if possible
        try {
            if (!res.headersSent) {
                res.status(500).json({ error: 'Stream simulation error' });
            } else {
                res.end();
            }
        } catch (endError) {
            log(LogLevels.ERROR, 'Error ending response after stream error', { error: endError });
        }
    }
}

// --- Middleware ---
app.use(express.json());

// CORS middleware
app.use((req, res, next) => {
    res.header('Access-Control-Allow-Origin', '*');
    res.header('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
    res.header('Access-Control-Allow-Headers', 'Authorization, Content-Type');
    
    if (req.method === 'OPTIONS') {
        return res.status(204).end();
    }
    next();
});

// --- Routes ---
app.get('/v1/models', async (req, res) => {
    log(LogLevels.INFO, 'Handling /v1/models request.');
    const julepApiKey = getJulepApiKey(req);
    if (!julepApiKey) {
        log(LogLevels.WARN, 'Unauthorized /v1/models request (missing API key).');
        // Optionally allow models request without key, or enforce it
    }

    const now = Math.floor(Date.now() / 1000);
    const openaiModels = HARDCODED_MODELS.map((modelId) => ({
        id: modelId, object: "model", created: now, owned_by: "julep",
        permission: [{ id: `modelperm-${modelId}-${now}`, object: "model_permission", created: now, allow_create_engine: false, allow_sampling: true, allow_logprobs: true, allow_search_indices: false, allow_view: true, allow_fine_tuning: false, organization: "*", group: null, is_blocking: false, }],
        root: modelId, parent: null,
    }));
    log(LogLevels.DEBUG, 'Returning hardcoded models list.');
    res.json({ data: openaiModels, object: "list" });
});

app.post('/v1/chat/completions', async (req, res) => {
    log(LogLevels.INFO, 'Handling /v1/chat/completions request.');
    const julepApiKey = getJulepApiKey(req);
    if (!julepApiKey) {
        log(LogLevels.ERROR, 'Unauthorized chat completions request: Missing Julep API Key.');
        return res.status(401).send("Unauthorized: Missing or invalid Authorization header");
    }

    // Define headers early, use this single object throughout
    const headers = {
        "Authorization": `Bearer ${julepApiKey}`,
        "Content-Type": "application/json",
    };
    log(LogLevels.DEBUG, 'Julep API request headers prepared (key omitted).', { "Content-Type": headers["Content-Type"] });

    let agentId = null;
    let sessionId = null;
    let requestBody = req.body;

    try {
        const { model, messages, stream, ...rest } = requestBody;
        const clientRequestedStream = stream === true;
        log(LogLevels.INFO, `Request details: model=${model}, clientRequestedStream=${clientRequestedStream}`);

        // Validate essential parameters
        if (!model || !messages || !Array.isArray(messages) || messages.length === 0) {
            log(LogLevels.ERROR, 'Invalid request body: "model" and "messages" are required.', { model, messages });
            return res.status(400).send("Invalid request body. 'model' and 'messages' are required.");
        }
        if (!HARDCODED_MODELS.includes(model)) {
            log(LogLevels.ERROR, `Invalid model requested: ${model}`);
            return res.status(400).send(`Invalid model: ${model}. Please use one of the available models.`);
        }
        log(LogLevels.DEBUG, 'Request parameters validated.');

        // --- Agent and Session Creation ---
        // 2. Create Agent
        const createAgentUrl = `${JULEP_API_BASE}/agents`;
        const createAgentBody = {
            name: `temp-openai-${model}-${Date.now()}`,
            model: model,
            about: `Temporary agent for OpenAI model ${model}`,
        };
        log(LogLevels.INFO, 'Attempting to create Julep Agent.', { url: createAgentUrl, body: createAgentBody });
        const createAgentResponse = await fetch(createAgentUrl, { 
            method: "POST", 
            headers, 
            body: JSON.stringify(createAgentBody) 
        });
        log(LogLevels.DEBUG, `Create Agent response status: ${createAgentResponse.status}`);

        if (!createAgentResponse.ok) {
            const errorStatus = createAgentResponse.status;
            const errorStatusText = createAgentResponse.statusText;
            let errorText = "[Could not read error body]";
            try { 
                errorText = await createAgentResponse.text(); 
            } catch (e) { 
                log(LogLevels.WARN, `Could not read error text from createAgentResponse: ${e instanceof Error ? e.message : String(e)}`); 
            }
            log(LogLevels.ERROR, `Error creating Julep Agent: ${errorStatus} - ${errorText}`);
            return res.status(errorStatus).send(`Error creating Julep Agent: ${errorStatusText} - ${errorText}`);
        }

        let agentData;
        try {
            const agentResponseText = await createAgentResponse.text();
            log(LogLevels.DEBUG, 'Create Agent raw response text:', agentResponseText);
            agentData = JSON.parse(agentResponseText);
            log(LogLevels.INFO, 'Julep Agent created successfully.', { agentData });
            agentId = agentData.id;
        } catch (e) {
            log(LogLevels.ERROR, `Failed to parse Julep Agent creation response JSON: ${e instanceof Error ? e.message : String(e)}`, { error: e });
            // Attempt cleanup (fire-and-forget)
            cleanupJulepResources(agentId, sessionId, headers).catch(err => log(LogLevels.ERROR, 'Background cleanup failed after agent parse error', err));
            return res.status(500).send(`Internal Server Error: Failed to parse Julep Agent response. ${e instanceof Error ? e.message : String(e)}`);
        }

        // 3. Create Session
        const createSessionUrl = `${JULEP_API_BASE}/sessions`;
        const createSessionBody = { agent: agentId }; // Julep API uses agent
        log(LogLevels.INFO, 'Attempting to create Julep Session.', { url: createSessionUrl, body: createSessionBody });
        const createSessionResponse = await fetch(createSessionUrl, { 
            method: "POST", 
            headers, 
            body: JSON.stringify(createSessionBody) 
        });
        log(LogLevels.DEBUG, `Create Session response status: ${createSessionResponse.status}`);

        if (!createSessionResponse.ok) {
            const errorStatus = createSessionResponse.status;
            const errorStatusText = createSessionResponse.statusText;
            let errorText = "[Could not read error body]";
            try { 
                errorText = await createSessionResponse.text(); 
            } catch (e) { 
                log(LogLevels.WARN, `Could not read error text from createSessionResponse: ${e instanceof Error ? e.message : String(e)}`); 
            }
            log(LogLevels.ERROR, `Error creating Julep Session: ${errorStatus} - ${errorText}`);
            // Cleanup the agent we just created (fire-and-forget)
            cleanupJulepResources(agentId, null, headers).catch(err => log(LogLevels.ERROR, 'Background cleanup failed after session creation error', err));
            return res.status(errorStatus).send(`Error creating Julep Session: ${errorStatusText} - ${errorText}`);
        }

        let sessionData;
        try {
            const sessionResponseText = await createSessionResponse.text();
            log(LogLevels.DEBUG, 'Create Session raw response text:', sessionResponseText);
            sessionData = JSON.parse(sessionResponseText);
            log(LogLevels.INFO, 'Julep Session created successfully.', { sessionData });
            sessionId = sessionData.id;
        } catch (e) {
            log(LogLevels.ERROR, `Failed to parse Julep Session creation response JSON: ${e instanceof Error ? e.message : String(e)}`, { error: e });
            // Cleanup agent and session (fire-and-forget)
            cleanupJulepResources(agentId, sessionId, headers).catch(err => log(LogLevels.ERROR, 'Background cleanup failed after session parse error', err));
            return res.status(500).send(`Internal Server Error: Failed to parse Julep Session response. ${e instanceof Error ? e.message : String(e)}`);
        }

        // --- Perform Chat Completion (ALWAYS non-streaming to Julep) ---
        // 4. Send Chat Request to Julep
        const chatUrl = `${JULEP_API_BASE}/sessions/${sessionId}/chat`;
        const chatBodyToJulep = {
            messages: messages.map((msg) => ({
                role: msg.role,
                content: typeof msg.content === 'string' ? msg.content : JSON.stringify(msg.content),
                // Include tool_calls if present in the input message (OpenAI format)
                tool_calls: msg.tool_calls, // Assuming Julep accepts OpenAI tool call format here
                tool_call_id: msg.tool_call_id // If it's a tool response message
            })),
            stream: false, // Force non-streaming
            ...rest, // Pass through other OpenAI parameters like temperature, top_p, etc.
        };
        log(LogLevels.INFO, 'Sending Chat request to Julep (forced non-stream).', { url: chatUrl });
        log(LogLevels.DEBUG, 'Julep Chat Request Body:', chatBodyToJulep);
        const chatResponse = await fetch(chatUrl, { 
            method: "POST", 
            headers, 
            body: JSON.stringify(chatBodyToJulep) 
        });
        log(LogLevels.DEBUG, `Julep Chat response status: ${chatResponse.status}`);

        // --- Handle Julep Response ---
        if (!chatResponse.ok) {
            const errorStatus = chatResponse.status;
            const errorStatusText = chatResponse.statusText;
            let errorText = "[Could not read error body]";
            try { 
                errorText = await chatResponse.text(); 
            } catch (e) { 
                log(LogLevels.WARN, `Could not read error text from chatResponse: ${e instanceof Error ? e.message : String(e)}`); 
            }
            log(LogLevels.ERROR, `Error during Julep Chat Completion: ${errorStatus} - ${errorText}`);
            // Cleanup agent and session (fire-and-forget)
            cleanupJulepResources(agentId, sessionId, headers).catch(err => log(LogLevels.ERROR, 'Background cleanup failed after chat error', err));
            return res.status(errorStatus).send(`Error during Julep Chat Completion: ${errorStatusText} - ${errorText}`);
        }

        // Julep request was successful, get the full JSON body
        let julepChatData;
        try {
            const chatResponseText = await chatResponse.text();
            log(LogLevels.DEBUG, 'Julep Chat raw response text:', chatResponseText);
            julepChatData = JSON.parse(chatResponseText);
            log(LogLevels.INFO, 'Julep chat completion successful.', { responseId: julepChatData.id })
            log(LogLevels.DEBUG, 'Julep Chat response data:', julepChatData);
        } catch (e) {
            log(LogLevels.ERROR, `Failed to parse Julep Chat response JSON (status was OK): ${e instanceof Error ? e.message : String(e)}`, { error: e });
            // Cleanup agent and session (fire-and-forget)
            cleanupJulepResources(agentId, sessionId, headers).catch(err => log(LogLevels.ERROR, 'Background cleanup failed after chat parse error', err));
            return res.status(500).send(`Internal Server Error: Failed to parse Julep Chat response. ${e instanceof Error ? e.message : String(e)}`);
        }

        // *** Trigger cleanup NOW (fire-and-forget), before returning the response/stream ***
        log(LogLevels.INFO, 'Julep chat successful, queueing cleanup.');
        cleanupJulepResources(agentId, sessionId, headers).catch(err => log(LogLevels.ERROR, 'Background cleanup failed after successful chat', err));

        // --- Format and Return Response to Client ---
        // Access the actual chat response data
        const julepResponseData = julepChatData;
        if (!julepResponseData || !julepResponseData.choices) {
            log(LogLevels.ERROR, 'Julep response format unexpected. Missing "response" or "response.choices".', { julepChatData });
            return res.status(500).send('Internal Server Error: Unexpected format from Julep API.');
        }

        if (clientRequestedStream) {
            log(LogLevels.INFO, 'Client requested stream, starting simulation.');
            // Set headers for SSE
            res.setHeader('Content-Type', 'text/event-stream');
            res.setHeader('Cache-Control', 'no-cache');
            res.setHeader('Connection', 'keep-alive');
            res.flushHeaders(); // Flush the headers to establish SSE with client

            // Start simulation
            simulateStream(julepResponseData, model, res)
                .catch(streamErr => {
                    log(LogLevels.ERROR, 'Stream simulation task failed.', { error: streamErr });
                });
        } else {
            log(LogLevels.INFO, 'Client requested non-streaming response.');
            // Format julepResponseData to OpenAI format
            const openaiCompletion = {
                id: julepResponseData.id || `chatcmpl-${Date.now()}`,
                object: "chat.completion",
                created: Math.floor(new Date(julepResponseData.created_at || Date.now()).getTime() / 1000),
                model: model, // Use the originally requested model
                choices: julepResponseData.choices.map((choice) => ({
                    index: choice.index,
                    message: {
                        role: choice.message.role,
                        content: choice.message.content,
                        // Use toolCallMessageToOpenAI here for the completed message format
                        tool_calls: choice.message.tool_calls ? toolCallMessageToOpenAI(choice.message.tool_calls) : undefined
                    },
                    finish_reason: choice.finish_reason
                })),
                usage: julepResponseData.usage ? {
                    prompt_tokens: julepResponseData.usage.prompt_tokens,
                    completion_tokens: julepResponseData.usage.completion_tokens,
                    total_tokens: julepResponseData.usage.total_tokens
                } : undefined,
                system_fingerprint: julepResponseData.system_fingerprint || null,
            };
            log(LogLevels.DEBUG, 'Formatted non-streaming OpenAI response:', openaiCompletion);
            log(LogLevels.INFO, 'Returning non-streaming JSON response to client.');
            res.json(openaiCompletion);
        }

    } catch (error) {
        // Catch errors from initial parsing, validation, or unexpected issues within the try block
        log(LogLevels.ERROR, `Error in handleChatCompletions (outer catch): ${error instanceof Error ? error.message : String(error)}`, { error: error, agentId, sessionId });
        // Use the headers defined at the start if available
        // Attempt cleanup (fire-and-forget)
        cleanupJulepResources(agentId, sessionId, headers).catch(err => log(LogLevels.ERROR, 'Background cleanup failed in outer catch block', err));
        res.status(500).send(`Internal Server Error: ${error instanceof Error ? error.message : String(error)}`);
    }
});

// Health check endpoint for Hugging Face
app.get('/', (req, res) => {
    res.send('Julep API Proxy is running');
});

// Start the server
app.listen(PORT, () => {
    log(LogLevels.INFO, `Server running on port ${PORT}`);
});