Update main.py
Browse files
main.py
CHANGED
@@ -6,68 +6,23 @@ import uuid
|
|
6 |
import json
|
7 |
import logging
|
8 |
import asyncio
|
9 |
-
import
|
|
|
10 |
from typing import List, Dict, Any, Optional, AsyncGenerator, Union
|
11 |
from datetime import datetime
|
12 |
|
13 |
-
from slowapi import Limiter, _rate_limit_exceeded_handler
|
14 |
-
from slowapi.util import get_remote_address
|
15 |
-
from slowapi.errors import RateLimitExceeded
|
16 |
-
from slowapi.middleware import SlowAPIMiddleware
|
17 |
-
from fastapi import FastAPI, HTTPException, Request, Depends, Security
|
18 |
-
from fastapi.responses import StreamingResponse, JSONResponse, RedirectResponse
|
19 |
-
from fastapi.security import HTTPBearer, HTTPAuthorizationCredentials
|
20 |
-
from pydantic import BaseModel, Field
|
21 |
-
|
22 |
from aiohttp import ClientSession, ClientTimeout, ClientError
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
# Custom logging filter to inject client_ip from context variable
|
28 |
-
class ContextFilter(logging.Filter):
|
29 |
-
def filter(self, record):
|
30 |
-
record.client_ip = client_ip_var.get()
|
31 |
-
return True
|
32 |
-
|
33 |
-
# Custom logging formatter to handle missing client_ip
|
34 |
-
class SafeFormatter(logging.Formatter):
|
35 |
-
def format(self, record):
|
36 |
-
if not hasattr(record, 'client_ip'):
|
37 |
-
record.client_ip = 'N/A'
|
38 |
-
return super().format(record)
|
39 |
|
40 |
# Configure logging
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
handler = logging.StreamHandler()
|
46 |
-
handler.setLevel(logging.INFO)
|
47 |
-
|
48 |
-
# Create and set the custom formatter
|
49 |
-
formatter = SafeFormatter(
|
50 |
-
fmt="%(asctime)s [%(levelname)s] %(name)s [IP: %(client_ip)s]: %(message)s",
|
51 |
-
datefmt="%Y-%m-%d %H:%M:%S"
|
52 |
)
|
53 |
-
|
54 |
-
|
55 |
-
# Add the custom filter to the handler
|
56 |
-
handler.addFilter(ContextFilter())
|
57 |
-
|
58 |
-
# Add handlers to the logger
|
59 |
-
logger.addHandler(handler)
|
60 |
-
|
61 |
-
# Initialize the limiter with slowapi
|
62 |
-
limiter = Limiter(key_func=get_remote_address, default_limits=["60/minute"])
|
63 |
-
app = FastAPI()
|
64 |
-
|
65 |
-
# Register the rate limit exceeded handler
|
66 |
-
app.state.limiter = limiter
|
67 |
-
app.add_exception_handler(RateLimitExceeded, _rate_limit_exceeded_handler)
|
68 |
-
|
69 |
-
# Add SlowAPI middleware
|
70 |
-
app.add_middleware(SlowAPIMiddleware)
|
71 |
|
72 |
# Load environment variables
|
73 |
API_KEYS = os.getenv('API_KEYS', '').split(',') # Comma-separated API keys
|
@@ -78,51 +33,49 @@ if not API_KEYS or API_KEYS == ['']:
|
|
78 |
logger.error("No API keys found. Please set the API_KEYS environment variable.")
|
79 |
raise Exception("API_KEYS environment variable not set.")
|
80 |
|
81 |
-
# Define API key security using HTTPBearer (Bearer token)
|
82 |
-
security = HTTPBearer()
|
83 |
-
|
84 |
-
async def get_api_key(credentials: HTTPAuthorizationCredentials = Depends(security)):
|
85 |
-
"""
|
86 |
-
Dependency to extract and validate the API key from the Authorization header.
|
87 |
-
Expects the header in the format: Authorization: Bearer YOUR_API_KEY
|
88 |
-
"""
|
89 |
-
if not credentials:
|
90 |
-
logger.warning("Authorization header missing")
|
91 |
-
raise HTTPException(
|
92 |
-
status_code=401,
|
93 |
-
detail="Authorization header missing",
|
94 |
-
headers={"WWW-Authenticate": "Bearer"},
|
95 |
-
)
|
96 |
-
if credentials.scheme.lower() != "bearer":
|
97 |
-
logger.warning(f"Invalid authentication scheme: {credentials.scheme}")
|
98 |
-
raise HTTPException(
|
99 |
-
status_code=401,
|
100 |
-
detail="Invalid authentication scheme. Expected 'Bearer'.",
|
101 |
-
headers={"WWW-Authenticate": "Bearer"},
|
102 |
-
)
|
103 |
-
api_key = credentials.credentials
|
104 |
-
if not api_key:
|
105 |
-
logger.warning("API key missing in Authorization header")
|
106 |
-
raise HTTPException(
|
107 |
-
status_code=401,
|
108 |
-
detail="API key missing",
|
109 |
-
headers={"WWW-Authenticate": "Bearer"},
|
110 |
-
)
|
111 |
-
if api_key not in API_KEYS:
|
112 |
-
logger.warning(f"Invalid API key: {api_key}")
|
113 |
-
raise HTTPException(
|
114 |
-
status_code=403,
|
115 |
-
detail="Invalid API key",
|
116 |
-
headers={"WWW-Authenticate": "Bearer"},
|
117 |
-
)
|
118 |
-
return api_key
|
119 |
-
|
120 |
# Process available models
|
121 |
if AVAILABLE_MODELS:
|
122 |
AVAILABLE_MODELS = [model.strip() for model in AVAILABLE_MODELS.split(',') if model.strip()]
|
123 |
else:
|
124 |
AVAILABLE_MODELS = [] # If empty, all models are available
|
125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
126 |
# Custom exception for model not working
|
127 |
class ModelNotWorkingException(Exception):
|
128 |
def __init__(self, model: str):
|
@@ -272,13 +225,13 @@ class Blackbox:
|
|
272 |
) -> AsyncGenerator[Any, None]:
|
273 |
model = cls.get_model(model)
|
274 |
if model is None:
|
275 |
-
logger.error(f"Model {model} is not available.
|
276 |
raise ModelNotWorkingException(model)
|
277 |
|
278 |
logger.info(f"Selected model: {model}")
|
279 |
|
280 |
if not cls.working or model not in cls.models:
|
281 |
-
logger.error(f"Model {model} is not working or not supported.
|
282 |
raise ModelNotWorkingException(model)
|
283 |
|
284 |
headers = {
|
@@ -290,13 +243,13 @@ class Blackbox:
|
|
290 |
"pragma": "no-cache",
|
291 |
"priority": "u=1, i",
|
292 |
"referer": cls.model_referers.get(model, cls.url),
|
293 |
-
"sec-ch-ua": '"Chromium";v="
|
294 |
"sec-ch-ua-mobile": "?0",
|
295 |
"sec-ch-ua-platform": '"Linux"',
|
296 |
"sec-fetch-dest": "empty",
|
297 |
"sec-fetch-mode": "cors",
|
298 |
"sec-fetch-site": "same-origin",
|
299 |
-
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/
|
300 |
}
|
301 |
|
302 |
if model in cls.model_prefixes:
|
@@ -371,7 +324,7 @@ class Blackbox:
|
|
371 |
yield ImageResponse(image_url, alt=messages[-1]['content'])
|
372 |
else:
|
373 |
logger.error("Image URL not found in the response.")
|
374 |
-
raise Exception("Image URL not found in the response
|
375 |
else:
|
376 |
full_response = ""
|
377 |
search_results_json = ""
|
@@ -403,31 +356,30 @@ class Blackbox:
|
|
403 |
except json.JSONDecodeError as je:
|
404 |
logger.error("Failed to parse search results JSON.")
|
405 |
raise je
|
|
|
406 |
except ClientError as ce:
|
407 |
logger.error(f"Client error occurred: {ce}. Retrying attempt {attempt + 1}/{retry_attempts}")
|
408 |
if attempt == retry_attempts - 1:
|
409 |
-
raise HTTPException(status_code=502, detail="Error communicating with the external API.
|
410 |
except asyncio.TimeoutError:
|
411 |
logger.error(f"Request timed out. Retrying attempt {attempt + 1}/{retry_attempts}")
|
412 |
if attempt == retry_attempts - 1:
|
413 |
-
raise HTTPException(status_code=504, detail="External API request timed out.
|
414 |
except Exception as e:
|
415 |
logger.error(f"Unexpected error: {e}. Retrying attempt {attempt + 1}/{retry_attempts}")
|
416 |
if attempt == retry_attempts - 1:
|
417 |
raise HTTPException(status_code=500, detail=str(e))
|
418 |
|
419 |
-
# FastAPI app setup
|
|
|
|
|
|
|
420 |
@app.middleware("http")
|
421 |
async def security_middleware(request: Request, call_next):
|
422 |
-
client_ip = request.client.host
|
423 |
-
# Set the client_ip in the context variable
|
424 |
-
client_ip_var.set(client_ip)
|
425 |
-
|
426 |
# Enforce that POST requests to sensitive endpoints must have a valid Content-Type
|
427 |
-
if request.method == "POST" and request.url.path
|
428 |
content_type = request.headers.get("Content-Type")
|
429 |
if content_type != "application/json":
|
430 |
-
logger.warning("Invalid Content-Type for /v1/chat/completions")
|
431 |
return JSONResponse(
|
432 |
status_code=400,
|
433 |
content={
|
@@ -439,33 +391,24 @@ async def security_middleware(request: Request, call_next):
|
|
439 |
}
|
440 |
},
|
441 |
)
|
442 |
-
|
443 |
-
# Log the incoming request
|
444 |
-
logger.info(f"Incoming request: {request.method} {request.url.path}")
|
445 |
-
|
446 |
response = await call_next(request)
|
447 |
-
|
448 |
-
# Log the response status
|
449 |
-
logger.info(f"Response status: {response.status_code}")
|
450 |
-
|
451 |
return response
|
452 |
|
453 |
class Message(BaseModel):
|
454 |
role: str
|
455 |
content: str
|
456 |
-
name: Optional[str] = None # Optional field as per OpenAI's API
|
457 |
|
458 |
class ChatRequest(BaseModel):
|
459 |
model: str
|
460 |
messages: List[Message]
|
461 |
-
temperature: Optional[float] =
|
462 |
-
top_p: Optional[float] =
|
463 |
-
n: Optional[int] =
|
464 |
stream: Optional[bool] = False
|
465 |
stop: Optional[Union[str, List[str]]] = None
|
466 |
-
max_tokens: Optional[int] =
|
467 |
-
presence_penalty: Optional[float] =
|
468 |
-
frequency_penalty: Optional[float] =
|
469 |
logit_bias: Optional[Dict[str, float]] = None
|
470 |
user: Optional[str] = None
|
471 |
webSearchMode: Optional[bool] = False # Custom parameter
|
@@ -486,15 +429,8 @@ def create_response(content: str, model: str, finish_reason: Optional[str] = Non
|
|
486 |
"usage": None,
|
487 |
}
|
488 |
|
489 |
-
@app.post("/v1/chat/completions", dependencies=[Depends(
|
490 |
-
async def chat_completions(
|
491 |
-
request: ChatRequest,
|
492 |
-
req: Request,
|
493 |
-
api_key: str = Depends(get_api_key)
|
494 |
-
):
|
495 |
-
"""
|
496 |
-
Handles the /v1/chat/completions endpoint, emulating OpenAI's API.
|
497 |
-
"""
|
498 |
# Redact user messages only for logging purposes
|
499 |
redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
|
500 |
|
@@ -504,7 +440,7 @@ async def chat_completions(
|
|
504 |
# Validate that the requested model is available
|
505 |
if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
|
506 |
logger.warning(f"Attempt to use unavailable model: {request.model}")
|
507 |
-
raise HTTPException(status_code=400, detail="Requested model is not available.
|
508 |
|
509 |
# Process the request with actual message content, but don't log it
|
510 |
async_generator = Blackbox.create_async_generator(
|
@@ -545,7 +481,7 @@ async def chat_completions(
|
|
545 |
else:
|
546 |
response_content += chunk
|
547 |
|
548 |
-
logger.info(f"Completed non-streaming response generation for API key: {api_key}
|
549 |
return {
|
550 |
"id": f"chatcmpl-{uuid.uuid4()}",
|
551 |
"object": "chat.completion",
|
@@ -584,18 +520,18 @@ async def chat_completions_get():
|
|
584 |
return RedirectResponse(url='about:blank')
|
585 |
|
586 |
@app.get("/v1/models")
|
587 |
-
async def get_models(
|
588 |
-
logger.info(
|
589 |
return {"data": [{"id": model, "object": "model"} for model in Blackbox.models]}
|
590 |
|
591 |
# Additional endpoints for better functionality
|
592 |
-
@app.get("/v1/health")
|
593 |
-
async def health_check(req: Request):
|
594 |
-
logger.info(f"Health check requested")
|
595 |
return {"status": "ok"}
|
596 |
|
597 |
@app.get("/v1/models/{model}/status")
|
598 |
-
async def model_status(model: str
|
599 |
logger.info(f"Model status requested for '{model}'")
|
600 |
if model in Blackbox.models:
|
601 |
return {"model": model, "status": "available"}
|
@@ -609,7 +545,6 @@ async def model_status(model: str, req: Request):
|
|
609 |
# Custom exception handler to match OpenAI's error format
|
610 |
@app.exception_handler(HTTPException)
|
611 |
async def http_exception_handler(request: Request, exc: HTTPException):
|
612 |
-
logger.error(f"HTTPException: {exc.detail}")
|
613 |
return JSONResponse(
|
614 |
status_code=exc.status_code,
|
615 |
content={
|
@@ -627,12 +562,68 @@ class TokenizerRequest(BaseModel):
|
|
627 |
text: str
|
628 |
|
629 |
@app.post("/v1/tokenizer")
|
630 |
-
async def tokenizer(request: TokenizerRequest
|
631 |
text = request.text
|
632 |
token_count = len(text.split())
|
633 |
-
logger.info(f"Tokenizer called | Tokens: {token_count}")
|
634 |
return {"text": text, "tokens": token_count}
|
635 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
636 |
if __name__ == "__main__":
|
637 |
import uvicorn
|
638 |
-
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
|
6 |
import json
|
7 |
import logging
|
8 |
import asyncio
|
9 |
+
import time
|
10 |
+
from collections import defaultdict
|
11 |
from typing import List, Dict, Any, Optional, AsyncGenerator, Union
|
12 |
from datetime import datetime
|
13 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
14 |
from aiohttp import ClientSession, ClientTimeout, ClientError
|
15 |
+
from fastapi import FastAPI, HTTPException, Request, Depends, Header
|
16 |
+
from fastapi.responses import StreamingResponse, JSONResponse, RedirectResponse
|
17 |
+
from pydantic import BaseModel
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# Configure logging
|
20 |
+
logging.basicConfig(
|
21 |
+
level=logging.INFO,
|
22 |
+
format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
|
23 |
+
handlers=[logging.StreamHandler()]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
)
|
25 |
+
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
|
27 |
# Load environment variables
|
28 |
API_KEYS = os.getenv('API_KEYS', '').split(',') # Comma-separated API keys
|
|
|
33 |
logger.error("No API keys found. Please set the API_KEYS environment variable.")
|
34 |
raise Exception("API_KEYS environment variable not set.")
|
35 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
36 |
# Process available models
|
37 |
if AVAILABLE_MODELS:
|
38 |
AVAILABLE_MODELS = [model.strip() for model in AVAILABLE_MODELS.split(',') if model.strip()]
|
39 |
else:
|
40 |
AVAILABLE_MODELS = [] # If empty, all models are available
|
41 |
|
42 |
+
# Simple in-memory rate limiter
|
43 |
+
rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
|
44 |
+
ip_rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
|
45 |
+
|
46 |
+
async def get_api_key(authorization: str = Header(None)) -> str:
|
47 |
+
if authorization is None or not authorization.startswith('Bearer '):
|
48 |
+
logger.warning("Invalid or missing authorization header.")
|
49 |
+
raise HTTPException(status_code=401, detail='Invalid authorization header format')
|
50 |
+
api_key = authorization[7:]
|
51 |
+
if api_key not in API_KEYS:
|
52 |
+
logger.warning(f"Invalid API key attempted: {api_key}")
|
53 |
+
raise HTTPException(status_code=401, detail='Invalid API key')
|
54 |
+
return api_key
|
55 |
+
|
56 |
+
async def rate_limiter(req: Request, api_key: str = Depends(get_api_key)):
|
57 |
+
current_time = time.time()
|
58 |
+
# Rate limiting per API key
|
59 |
+
window_start = rate_limit_store[api_key]["timestamp"]
|
60 |
+
if current_time - window_start > 60:
|
61 |
+
rate_limit_store[api_key] = {"count": 1, "timestamp": current_time}
|
62 |
+
else:
|
63 |
+
if rate_limit_store[api_key]["count"] >= RATE_LIMIT:
|
64 |
+
logger.warning(f"Rate limit exceeded for API key: {api_key}")
|
65 |
+
raise HTTPException(status_code=429, detail='Rate limit exceeded for API key')
|
66 |
+
rate_limit_store[api_key]["count"] += 1
|
67 |
+
|
68 |
+
# Rate limiting per IP address
|
69 |
+
client_ip = req.client.host
|
70 |
+
window_start_ip = ip_rate_limit_store[client_ip]["timestamp"]
|
71 |
+
if current_time - window_start_ip > 60:
|
72 |
+
ip_rate_limit_store[client_ip] = {"count": 1, "timestamp": current_time}
|
73 |
+
else:
|
74 |
+
if ip_rate_limit_store[client_ip]["count"] >= RATE_LIMIT:
|
75 |
+
logger.warning(f"Rate limit exceeded for IP address: {client_ip}")
|
76 |
+
raise HTTPException(status_code=429, detail='Rate limit exceeded for IP address')
|
77 |
+
ip_rate_limit_store[client_ip]["count"] += 1
|
78 |
+
|
79 |
# Custom exception for model not working
|
80 |
class ModelNotWorkingException(Exception):
|
81 |
def __init__(self, model: str):
|
|
|
225 |
) -> AsyncGenerator[Any, None]:
|
226 |
model = cls.get_model(model)
|
227 |
if model is None:
|
228 |
+
logger.error(f"Model {model} is not available.")
|
229 |
raise ModelNotWorkingException(model)
|
230 |
|
231 |
logger.info(f"Selected model: {model}")
|
232 |
|
233 |
if not cls.working or model not in cls.models:
|
234 |
+
logger.error(f"Model {model} is not working or not supported.")
|
235 |
raise ModelNotWorkingException(model)
|
236 |
|
237 |
headers = {
|
|
|
243 |
"pragma": "no-cache",
|
244 |
"priority": "u=1, i",
|
245 |
"referer": cls.model_referers.get(model, cls.url),
|
246 |
+
"sec-ch-ua": '"Chromium";v="129", "Not=A?Brand";v="8"',
|
247 |
"sec-ch-ua-mobile": "?0",
|
248 |
"sec-ch-ua-platform": '"Linux"',
|
249 |
"sec-fetch-dest": "empty",
|
250 |
"sec-fetch-mode": "cors",
|
251 |
"sec-fetch-site": "same-origin",
|
252 |
+
"user-agent": "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/129.0.0.0 Safari/537.36",
|
253 |
}
|
254 |
|
255 |
if model in cls.model_prefixes:
|
|
|
324 |
yield ImageResponse(image_url, alt=messages[-1]['content'])
|
325 |
else:
|
326 |
logger.error("Image URL not found in the response.")
|
327 |
+
raise Exception("Image URL not found in the response")
|
328 |
else:
|
329 |
full_response = ""
|
330 |
search_results_json = ""
|
|
|
356 |
except json.JSONDecodeError as je:
|
357 |
logger.error("Failed to parse search results JSON.")
|
358 |
raise je
|
359 |
+
break # Exit the retry loop if successful
|
360 |
except ClientError as ce:
|
361 |
logger.error(f"Client error occurred: {ce}. Retrying attempt {attempt + 1}/{retry_attempts}")
|
362 |
if attempt == retry_attempts - 1:
|
363 |
+
raise HTTPException(status_code=502, detail="Error communicating with the external API.")
|
364 |
except asyncio.TimeoutError:
|
365 |
logger.error(f"Request timed out. Retrying attempt {attempt + 1}/{retry_attempts}")
|
366 |
if attempt == retry_attempts - 1:
|
367 |
+
raise HTTPException(status_code=504, detail="External API request timed out.")
|
368 |
except Exception as e:
|
369 |
logger.error(f"Unexpected error: {e}. Retrying attempt {attempt + 1}/{retry_attempts}")
|
370 |
if attempt == retry_attempts - 1:
|
371 |
raise HTTPException(status_code=500, detail=str(e))
|
372 |
|
373 |
+
# FastAPI app setup
|
374 |
+
app = FastAPI()
|
375 |
+
|
376 |
+
# Middleware to enhance security
|
377 |
@app.middleware("http")
|
378 |
async def security_middleware(request: Request, call_next):
|
|
|
|
|
|
|
|
|
379 |
# Enforce that POST requests to sensitive endpoints must have a valid Content-Type
|
380 |
+
if request.method == "POST" and request.url.path in ["/v1/chat/completions", "/v1/completions"]:
|
381 |
content_type = request.headers.get("Content-Type")
|
382 |
if content_type != "application/json":
|
|
|
383 |
return JSONResponse(
|
384 |
status_code=400,
|
385 |
content={
|
|
|
391 |
}
|
392 |
},
|
393 |
)
|
|
|
|
|
|
|
|
|
394 |
response = await call_next(request)
|
|
|
|
|
|
|
|
|
395 |
return response
|
396 |
|
397 |
class Message(BaseModel):
|
398 |
role: str
|
399 |
content: str
|
|
|
400 |
|
401 |
class ChatRequest(BaseModel):
|
402 |
model: str
|
403 |
messages: List[Message]
|
404 |
+
temperature: Optional[float] = 1.0
|
405 |
+
top_p: Optional[float] = 1.0
|
406 |
+
n: Optional[int] = 1
|
407 |
stream: Optional[bool] = False
|
408 |
stop: Optional[Union[str, List[str]]] = None
|
409 |
+
max_tokens: Optional[int] = None
|
410 |
+
presence_penalty: Optional[float] = 0.0
|
411 |
+
frequency_penalty: Optional[float] = 0.0
|
412 |
logit_bias: Optional[Dict[str, float]] = None
|
413 |
user: Optional[str] = None
|
414 |
webSearchMode: Optional[bool] = False # Custom parameter
|
|
|
429 |
"usage": None,
|
430 |
}
|
431 |
|
432 |
+
@app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter)])
|
433 |
+
async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
434 |
# Redact user messages only for logging purposes
|
435 |
redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
|
436 |
|
|
|
440 |
# Validate that the requested model is available
|
441 |
if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
|
442 |
logger.warning(f"Attempt to use unavailable model: {request.model}")
|
443 |
+
raise HTTPException(status_code=400, detail="Requested model is not available.")
|
444 |
|
445 |
# Process the request with actual message content, but don't log it
|
446 |
async_generator = Blackbox.create_async_generator(
|
|
|
481 |
else:
|
482 |
response_content += chunk
|
483 |
|
484 |
+
logger.info(f"Completed non-streaming response generation for API key: {api_key}")
|
485 |
return {
|
486 |
"id": f"chatcmpl-{uuid.uuid4()}",
|
487 |
"object": "chat.completion",
|
|
|
520 |
return RedirectResponse(url='about:blank')
|
521 |
|
522 |
@app.get("/v1/models")
|
523 |
+
async def get_models():
|
524 |
+
logger.info("Fetching available models")
|
525 |
return {"data": [{"id": model, "object": "model"} for model in Blackbox.models]}
|
526 |
|
527 |
# Additional endpoints for better functionality
|
528 |
+
@app.get("/v1/health", dependencies=[Depends(rate_limiter)])
|
529 |
+
async def health_check(req: Request, api_key: str = Depends(get_api_key)):
|
530 |
+
logger.info(f"Health check requested by API key: {api_key}")
|
531 |
return {"status": "ok"}
|
532 |
|
533 |
@app.get("/v1/models/{model}/status")
|
534 |
+
async def model_status(model: str):
|
535 |
logger.info(f"Model status requested for '{model}'")
|
536 |
if model in Blackbox.models:
|
537 |
return {"model": model, "status": "available"}
|
|
|
545 |
# Custom exception handler to match OpenAI's error format
|
546 |
@app.exception_handler(HTTPException)
|
547 |
async def http_exception_handler(request: Request, exc: HTTPException):
|
|
|
548 |
return JSONResponse(
|
549 |
status_code=exc.status_code,
|
550 |
content={
|
|
|
562 |
text: str
|
563 |
|
564 |
@app.post("/v1/tokenizer")
|
565 |
+
async def tokenizer(request: TokenizerRequest):
|
566 |
text = request.text
|
567 |
token_count = len(text.split())
|
|
|
568 |
return {"text": text, "tokens": token_count}
|
569 |
|
570 |
+
# New endpoint: /v1/completions to support text completions
|
571 |
+
class CompletionRequest(BaseModel):
|
572 |
+
model: str
|
573 |
+
prompt: str
|
574 |
+
max_tokens: Optional[int] = 16
|
575 |
+
temperature: Optional[float] = 1.0
|
576 |
+
top_p: Optional[float] = 1.0
|
577 |
+
n: Optional[int] = 1
|
578 |
+
stream: Optional[bool] = False
|
579 |
+
stop: Optional[Union[str, List[str]]] = None
|
580 |
+
logprobs: Optional[int] = None
|
581 |
+
echo: Optional[bool] = False
|
582 |
+
presence_penalty: Optional[float] = 0.0
|
583 |
+
frequency_penalty: Optional[float] = 0.0
|
584 |
+
best_of: Optional[int] = 1
|
585 |
+
logit_bias: Optional[Dict[str, float]] = None
|
586 |
+
user: Optional[str] = None
|
587 |
+
|
588 |
+
@app.post("/v1/completions", dependencies=[Depends(rate_limiter)])
|
589 |
+
async def completions(request: CompletionRequest, req: Request, api_key: str = Depends(get_api_key)):
|
590 |
+
logger.info(f"Received completion request from API key: {api_key} | Model: {request.model}")
|
591 |
+
|
592 |
+
try:
|
593 |
+
# Validate that the requested model is available
|
594 |
+
if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
|
595 |
+
logger.warning(f"Attempt to use unavailable model: {request.model}")
|
596 |
+
raise HTTPException(status_code=400, detail="Requested model is not available.")
|
597 |
+
|
598 |
+
# Simulate a simple completion by echoing the prompt
|
599 |
+
completion_text = f"{request.prompt} [Completed by {request.model}]"
|
600 |
+
|
601 |
+
return {
|
602 |
+
"id": f"cmpl-{uuid.uuid4()}",
|
603 |
+
"object": "text_completion",
|
604 |
+
"created": int(datetime.now().timestamp()),
|
605 |
+
"model": request.model,
|
606 |
+
"choices": [
|
607 |
+
{
|
608 |
+
"text": completion_text,
|
609 |
+
"index": 0,
|
610 |
+
"logprobs": None,
|
611 |
+
"finish_reason": "length"
|
612 |
+
}
|
613 |
+
],
|
614 |
+
"usage": {
|
615 |
+
"prompt_tokens": len(request.prompt.split()),
|
616 |
+
"completion_tokens": len(completion_text.split()),
|
617 |
+
"total_tokens": len(request.prompt.split()) + len(completion_text.split())
|
618 |
+
}
|
619 |
+
}
|
620 |
+
except HTTPException as he:
|
621 |
+
logger.warning(f"HTTPException: {he.detail}")
|
622 |
+
raise he
|
623 |
+
except Exception as e:
|
624 |
+
logger.exception("An unexpected error occurred while processing the completions request.")
|
625 |
+
raise HTTPException(status_code=500, detail=str(e))
|
626 |
+
|
627 |
if __name__ == "__main__":
|
628 |
import uvicorn
|
629 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|