Update main.py
Browse files
main.py
CHANGED
@@ -43,32 +43,55 @@ else:
|
|
43 |
rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
|
44 |
ip_rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
|
45 |
|
46 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
if authorization is None or not authorization.startswith('Bearer '):
|
48 |
-
logger.warning("Invalid or missing authorization header
|
49 |
raise HTTPException(status_code=401, detail='Invalid authorization header format')
|
50 |
api_key = authorization[7:]
|
51 |
if api_key not in API_KEYS:
|
52 |
-
logger.warning(f"Invalid API key attempted: {api_key}")
|
53 |
raise HTTPException(status_code=401, detail='Invalid API key')
|
54 |
return api_key
|
55 |
|
56 |
-
async def rate_limiter(
|
|
|
57 |
current_time = time.time()
|
|
|
58 |
# Rate limiting per API key
|
59 |
window_start = rate_limit_store[api_key]["timestamp"]
|
60 |
-
if current_time - window_start >
|
61 |
rate_limit_store[api_key] = {"count": 1, "timestamp": current_time}
|
62 |
else:
|
63 |
if rate_limit_store[api_key]["count"] >= RATE_LIMIT:
|
64 |
-
logger.warning(f"Rate limit exceeded for API key: {api_key}")
|
65 |
raise HTTPException(status_code=429, detail='Rate limit exceeded for API key')
|
66 |
rate_limit_store[api_key]["count"] += 1
|
67 |
|
68 |
# Rate limiting per IP address
|
69 |
-
client_ip = req.client.host
|
70 |
window_start_ip = ip_rate_limit_store[client_ip]["timestamp"]
|
71 |
-
if current_time - window_start_ip >
|
72 |
ip_rate_limit_store[client_ip] = {"count": 1, "timestamp": current_time}
|
73 |
else:
|
74 |
if ip_rate_limit_store[client_ip]["count"] >= RATE_LIMIT:
|
@@ -380,6 +403,8 @@ async def security_middleware(request: Request, call_next):
|
|
380 |
if request.method == "POST" and request.url.path in ["/v1/chat/completions", "/v1/completions"]:
|
381 |
content_type = request.headers.get("Content-Type")
|
382 |
if content_type != "application/json":
|
|
|
|
|
383 |
return JSONResponse(
|
384 |
status_code=400,
|
385 |
content={
|
@@ -431,15 +456,16 @@ def create_response(content: str, model: str, finish_reason: Optional[str] = Non
|
|
431 |
|
432 |
@app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter)])
|
433 |
async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
|
|
|
434 |
# Redact user messages only for logging purposes
|
435 |
redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
|
436 |
|
437 |
-
logger.info(f"Received chat completions request from API key: {api_key} | Model: {request.model} | Messages: {redacted_messages}")
|
438 |
|
439 |
try:
|
440 |
# Validate that the requested model is available
|
441 |
if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
|
442 |
-
logger.warning(f"Attempt to use unavailable model: {request.model}")
|
443 |
raise HTTPException(status_code=400, detail="Requested model is not available.")
|
444 |
|
445 |
# Process the request with actual message content, but don't log it
|
@@ -468,7 +494,7 @@ async def chat_completions(request: ChatRequest, req: Request, api_key: str = De
|
|
468 |
error_response = {"error": he.detail}
|
469 |
yield f"data: {json.dumps(error_response)}\n\n"
|
470 |
except Exception as e:
|
471 |
-
logger.exception("Error during streaming response generation.")
|
472 |
error_response = {"error": str(e)}
|
473 |
yield f"data: {json.dumps(error_response)}\n\n"
|
474 |
|
@@ -481,7 +507,7 @@ async def chat_completions(request: ChatRequest, req: Request, api_key: str = De
|
|
481 |
else:
|
482 |
response_content += chunk
|
483 |
|
484 |
-
logger.info(f"Completed non-streaming response generation for API key: {api_key}")
|
485 |
return {
|
486 |
"id": f"chatcmpl-{uuid.uuid4()}",
|
487 |
"object": "chat.completion",
|
@@ -504,47 +530,53 @@ async def chat_completions(request: ChatRequest, req: Request, api_key: str = De
|
|
504 |
},
|
505 |
}
|
506 |
except ModelNotWorkingException as e:
|
507 |
-
logger.warning(f"Model not working: {e}")
|
508 |
raise HTTPException(status_code=503, detail=str(e))
|
509 |
except HTTPException as he:
|
510 |
-
logger.warning(f"HTTPException: {he.detail}")
|
511 |
raise he
|
512 |
except Exception as e:
|
513 |
-
logger.exception("An unexpected error occurred while processing the chat completions request.")
|
514 |
raise HTTPException(status_code=500, detail=str(e))
|
515 |
|
516 |
# Return 'about:blank' when accessing the endpoint via GET
|
517 |
@app.get("/v1/chat/completions")
|
518 |
-
async def chat_completions_get():
|
519 |
-
|
|
|
520 |
return RedirectResponse(url='about:blank')
|
521 |
|
522 |
@app.get("/v1/models")
|
523 |
-
async def get_models():
|
524 |
-
|
|
|
525 |
return {"data": [{"id": model, "object": "model"} for model in Blackbox.models]}
|
526 |
|
527 |
# Additional endpoints for better functionality
|
528 |
@app.get("/v1/health", dependencies=[Depends(rate_limiter)])
|
529 |
async def health_check(req: Request, api_key: str = Depends(get_api_key)):
|
530 |
-
|
|
|
531 |
return {"status": "ok"}
|
532 |
|
533 |
@app.get("/v1/models/{model}/status")
|
534 |
-
async def model_status(model: str):
|
535 |
-
|
|
|
536 |
if model in Blackbox.models:
|
537 |
return {"model": model, "status": "available"}
|
538 |
elif model in Blackbox.model_aliases and Blackbox.model_aliases[model] in Blackbox.models:
|
539 |
actual_model = Blackbox.model_aliases[model]
|
540 |
return {"model": actual_model, "status": "available via alias"}
|
541 |
else:
|
542 |
-
logger.warning(f"Model not found: {model}")
|
543 |
raise HTTPException(status_code=404, detail="Model not found")
|
544 |
|
545 |
# Custom exception handler to match OpenAI's error format
|
546 |
@app.exception_handler(HTTPException)
|
547 |
async def http_exception_handler(request: Request, exc: HTTPException):
|
|
|
|
|
548 |
return JSONResponse(
|
549 |
status_code=exc.status_code,
|
550 |
content={
|
@@ -562,9 +594,11 @@ class TokenizerRequest(BaseModel):
|
|
562 |
text: str
|
563 |
|
564 |
@app.post("/v1/tokenizer")
|
565 |
-
async def tokenizer(request: TokenizerRequest):
|
|
|
566 |
text = request.text
|
567 |
token_count = len(text.split())
|
|
|
568 |
return {"text": text, "tokens": token_count}
|
569 |
|
570 |
# New endpoint: /v1/completions to support text completions
|
@@ -587,12 +621,13 @@ class CompletionRequest(BaseModel):
|
|
587 |
|
588 |
@app.post("/v1/completions", dependencies=[Depends(rate_limiter)])
|
589 |
async def completions(request: CompletionRequest, req: Request, api_key: str = Depends(get_api_key)):
|
590 |
-
|
|
|
591 |
|
592 |
try:
|
593 |
# Validate that the requested model is available
|
594 |
if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
|
595 |
-
logger.warning(f"Attempt to use unavailable model: {request.model}")
|
596 |
raise HTTPException(status_code=400, detail="Requested model is not available.")
|
597 |
|
598 |
# Simulate a simple completion by echoing the prompt
|
@@ -618,12 +653,18 @@ async def completions(request: CompletionRequest, req: Request, api_key: str = D
|
|
618 |
}
|
619 |
}
|
620 |
except HTTPException as he:
|
621 |
-
logger.warning(f"HTTPException: {he.detail}")
|
622 |
raise he
|
623 |
except Exception as e:
|
624 |
-
logger.exception("An unexpected error occurred while processing the completions request.")
|
625 |
raise HTTPException(status_code=500, detail=str(e))
|
626 |
|
|
|
|
|
|
|
|
|
|
|
|
|
627 |
if __name__ == "__main__":
|
628 |
import uvicorn
|
629 |
-
uvicorn.run(app, host="0.0.0.0", port=8000)
|
|
|
43 |
rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
|
44 |
ip_rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
|
45 |
|
46 |
+
# Define cleanup interval and window
|
47 |
+
CLEANUP_INTERVAL = 60 # seconds
|
48 |
+
RATE_LIMIT_WINDOW = 60 # seconds
|
49 |
+
|
50 |
+
async def cleanup_rate_limit_stores():
|
51 |
+
while True:
|
52 |
+
current_time = time.time()
|
53 |
+
# Clean API key rate limit store
|
54 |
+
keys_to_delete = [key for key, value in rate_limit_store.items() if current_time - value["timestamp"] > RATE_LIMIT_WINDOW * 2]
|
55 |
+
for key in keys_to_delete:
|
56 |
+
del rate_limit_store[key]
|
57 |
+
logger.debug(f"Cleaned up rate_limit_store for API key: {key}")
|
58 |
+
|
59 |
+
# Clean IP rate limit store
|
60 |
+
ips_to_delete = [ip for ip, value in ip_rate_limit_store.items() if current_time - value["timestamp"] > RATE_LIMIT_WINDOW * 2]
|
61 |
+
for ip in ips_to_delete:
|
62 |
+
del ip_rate_limit_store[ip]
|
63 |
+
logger.debug(f"Cleaned up ip_rate_limit_store for IP: {ip}")
|
64 |
+
|
65 |
+
await asyncio.sleep(CLEANUP_INTERVAL)
|
66 |
+
|
67 |
+
async def get_api_key(request: Request, authorization: str = Header(None)) -> str:
|
68 |
+
client_ip = request.client.host
|
69 |
if authorization is None or not authorization.startswith('Bearer '):
|
70 |
+
logger.warning(f"Invalid or missing authorization header from IP: {client_ip}")
|
71 |
raise HTTPException(status_code=401, detail='Invalid authorization header format')
|
72 |
api_key = authorization[7:]
|
73 |
if api_key not in API_KEYS:
|
74 |
+
logger.warning(f"Invalid API key attempted: {api_key} from IP: {client_ip}")
|
75 |
raise HTTPException(status_code=401, detail='Invalid API key')
|
76 |
return api_key
|
77 |
|
78 |
+
async def rate_limiter(request: Request, api_key: str = Depends(get_api_key)):
|
79 |
+
client_ip = request.client.host
|
80 |
current_time = time.time()
|
81 |
+
|
82 |
# Rate limiting per API key
|
83 |
window_start = rate_limit_store[api_key]["timestamp"]
|
84 |
+
if current_time - window_start > RATE_LIMIT_WINDOW:
|
85 |
rate_limit_store[api_key] = {"count": 1, "timestamp": current_time}
|
86 |
else:
|
87 |
if rate_limit_store[api_key]["count"] >= RATE_LIMIT:
|
88 |
+
logger.warning(f"Rate limit exceeded for API key: {api_key} from IP: {client_ip}")
|
89 |
raise HTTPException(status_code=429, detail='Rate limit exceeded for API key')
|
90 |
rate_limit_store[api_key]["count"] += 1
|
91 |
|
92 |
# Rate limiting per IP address
|
|
|
93 |
window_start_ip = ip_rate_limit_store[client_ip]["timestamp"]
|
94 |
+
if current_time - window_start_ip > RATE_LIMIT_WINDOW:
|
95 |
ip_rate_limit_store[client_ip] = {"count": 1, "timestamp": current_time}
|
96 |
else:
|
97 |
if ip_rate_limit_store[client_ip]["count"] >= RATE_LIMIT:
|
|
|
403 |
if request.method == "POST" and request.url.path in ["/v1/chat/completions", "/v1/completions"]:
|
404 |
content_type = request.headers.get("Content-Type")
|
405 |
if content_type != "application/json":
|
406 |
+
client_ip = request.client.host
|
407 |
+
logger.warning(f"Invalid Content-Type from IP: {client_ip} for path: {request.url.path}")
|
408 |
return JSONResponse(
|
409 |
status_code=400,
|
410 |
content={
|
|
|
456 |
|
457 |
@app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter)])
|
458 |
async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
|
459 |
+
client_ip = req.client.host
|
460 |
# Redact user messages only for logging purposes
|
461 |
redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
|
462 |
|
463 |
+
logger.info(f"Received chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages}")
|
464 |
|
465 |
try:
|
466 |
# Validate that the requested model is available
|
467 |
if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
|
468 |
+
logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
|
469 |
raise HTTPException(status_code=400, detail="Requested model is not available.")
|
470 |
|
471 |
# Process the request with actual message content, but don't log it
|
|
|
494 |
error_response = {"error": he.detail}
|
495 |
yield f"data: {json.dumps(error_response)}\n\n"
|
496 |
except Exception as e:
|
497 |
+
logger.exception(f"Error during streaming response generation from IP: {client_ip}.")
|
498 |
error_response = {"error": str(e)}
|
499 |
yield f"data: {json.dumps(error_response)}\n\n"
|
500 |
|
|
|
507 |
else:
|
508 |
response_content += chunk
|
509 |
|
510 |
+
logger.info(f"Completed non-streaming response generation for API key: {api_key} | IP: {client_ip}")
|
511 |
return {
|
512 |
"id": f"chatcmpl-{uuid.uuid4()}",
|
513 |
"object": "chat.completion",
|
|
|
530 |
},
|
531 |
}
|
532 |
except ModelNotWorkingException as e:
|
533 |
+
logger.warning(f"Model not working: {e} | IP: {client_ip}")
|
534 |
raise HTTPException(status_code=503, detail=str(e))
|
535 |
except HTTPException as he:
|
536 |
+
logger.warning(f"HTTPException: {he.detail} | IP: {client_ip}")
|
537 |
raise he
|
538 |
except Exception as e:
|
539 |
+
logger.exception(f"An unexpected error occurred while processing the chat completions request from IP: {client_ip}.")
|
540 |
raise HTTPException(status_code=500, detail=str(e))
|
541 |
|
542 |
# Return 'about:blank' when accessing the endpoint via GET
|
543 |
@app.get("/v1/chat/completions")
|
544 |
+
async def chat_completions_get(req: Request):
|
545 |
+
client_ip = req.client.host
|
546 |
+
logger.info(f"GET request made to /v1/chat/completions from IP: {client_ip}, redirecting to 'about:blank'")
|
547 |
return RedirectResponse(url='about:blank')
|
548 |
|
549 |
@app.get("/v1/models")
|
550 |
+
async def get_models(req: Request):
|
551 |
+
client_ip = req.client.host
|
552 |
+
logger.info(f"Fetching available models from IP: {client_ip}")
|
553 |
return {"data": [{"id": model, "object": "model"} for model in Blackbox.models]}
|
554 |
|
555 |
# Additional endpoints for better functionality
|
556 |
@app.get("/v1/health", dependencies=[Depends(rate_limiter)])
|
557 |
async def health_check(req: Request, api_key: str = Depends(get_api_key)):
|
558 |
+
client_ip = req.client.host
|
559 |
+
logger.info(f"Health check requested by API key: {api_key} | IP: {client_ip}")
|
560 |
return {"status": "ok"}
|
561 |
|
562 |
@app.get("/v1/models/{model}/status")
|
563 |
+
async def model_status(model: str, req: Request):
|
564 |
+
client_ip = req.client.host
|
565 |
+
logger.info(f"Model status requested for '{model}' from IP: {client_ip}")
|
566 |
if model in Blackbox.models:
|
567 |
return {"model": model, "status": "available"}
|
568 |
elif model in Blackbox.model_aliases and Blackbox.model_aliases[model] in Blackbox.models:
|
569 |
actual_model = Blackbox.model_aliases[model]
|
570 |
return {"model": actual_model, "status": "available via alias"}
|
571 |
else:
|
572 |
+
logger.warning(f"Model not found: {model} from IP: {client_ip}")
|
573 |
raise HTTPException(status_code=404, detail="Model not found")
|
574 |
|
575 |
# Custom exception handler to match OpenAI's error format
|
576 |
@app.exception_handler(HTTPException)
|
577 |
async def http_exception_handler(request: Request, exc: HTTPException):
|
578 |
+
client_ip = request.client.host
|
579 |
+
logger.error(f"HTTPException: {exc.detail} | Path: {request.url.path} | IP: {client_ip}")
|
580 |
return JSONResponse(
|
581 |
status_code=exc.status_code,
|
582 |
content={
|
|
|
594 |
text: str
|
595 |
|
596 |
@app.post("/v1/tokenizer")
|
597 |
+
async def tokenizer(request: TokenizerRequest, req: Request, api_key: str = Depends(get_api_key)):
|
598 |
+
client_ip = req.client.host
|
599 |
text = request.text
|
600 |
token_count = len(text.split())
|
601 |
+
logger.info(f"Tokenizer requested by API key: {api_key} | IP: {client_ip} | Text length: {len(text)}")
|
602 |
return {"text": text, "tokens": token_count}
|
603 |
|
604 |
# New endpoint: /v1/completions to support text completions
|
|
|
621 |
|
622 |
@app.post("/v1/completions", dependencies=[Depends(rate_limiter)])
|
623 |
async def completions(request: CompletionRequest, req: Request, api_key: str = Depends(get_api_key)):
|
624 |
+
client_ip = req.client.host
|
625 |
+
logger.info(f"Received completion request from API key: {api_key} | IP: {client_ip} | Model: {request.model}")
|
626 |
|
627 |
try:
|
628 |
# Validate that the requested model is available
|
629 |
if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
|
630 |
+
logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
|
631 |
raise HTTPException(status_code=400, detail="Requested model is not available.")
|
632 |
|
633 |
# Simulate a simple completion by echoing the prompt
|
|
|
653 |
}
|
654 |
}
|
655 |
except HTTPException as he:
|
656 |
+
logger.warning(f"HTTPException: {he.detail} | IP: {client_ip}")
|
657 |
raise he
|
658 |
except Exception as e:
|
659 |
+
logger.exception(f"An unexpected error occurred while processing the completions request from IP: {client_ip}.")
|
660 |
raise HTTPException(status_code=500, detail=str(e))
|
661 |
|
662 |
+
# Add the cleanup task when the app starts
|
663 |
+
@app.on_event("startup")
|
664 |
+
async def startup_event():
|
665 |
+
asyncio.create_task(cleanup_rate_limit_stores())
|
666 |
+
logger.info("Started rate limit store cleanup task.")
|
667 |
+
|
668 |
if __name__ == "__main__":
|
669 |
import uvicorn
|
670 |
+
uvicorn.run(app, host="0.0.0.0", port=8000)
|