Niansuh commited on
Commit
ca2768f
·
verified ·
1 Parent(s): dad805d

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +229 -205
main.py CHANGED
@@ -16,15 +16,19 @@ from fastapi.responses import JSONResponse, StreamingResponse
16
  from pydantic import BaseModel
17
  from datetime import datetime
18
 
19
- # Configure logging
 
 
20
  logging.basicConfig(
21
- level=logging.DEBUG, # Set to DEBUG to capture detailed logs
22
  format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
23
  handlers=[logging.StreamHandler()]
24
  )
25
  logger = logging.getLogger(__name__)
26
 
27
- # Load environment variables
 
 
28
  API_KEYS = os.getenv('API_KEYS', '').split(',') # Comma-separated API keys
29
  RATE_LIMIT = int(os.getenv('RATE_LIMIT', '60')) # Requests per minute
30
 
@@ -32,19 +36,22 @@ if not API_KEYS or API_KEYS == ['']:
32
  logger.error("No API keys found. Please set the API_KEYS environment variable.")
33
  raise Exception("API_KEYS environment variable not set.")
34
 
35
- # Simple in-memory rate limiter based solely on IP addresses
 
 
36
  rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
37
 
38
  # Define cleanup interval and window
39
  CLEANUP_INTERVAL = 60 # seconds
40
  RATE_LIMIT_WINDOW = 60 # seconds
41
 
42
- # Define ImageResponse using Pydantic
43
- class ImageResponse(BaseModel):
 
 
44
  images: str
45
  alt: str
46
 
47
- # Request Models
48
  class Message(BaseModel):
49
  role: str
50
  content: str
@@ -61,14 +68,18 @@ class ChatRequest(BaseModel):
61
  logit_bias: Optional[Dict[str, float]] = None
62
  user: Optional[str] = None
63
 
64
- # Custom exception for model not working
 
 
65
  class ModelNotWorkingException(Exception):
66
  def __init__(self, model: str):
67
  self.model = model
68
  self.message = f"The model '{model}' is currently not working. Please try another model or wait for it to be fixed."
69
  super().__init__(self.message)
70
 
71
- # Updated Blackbox class with new models and functionalities
 
 
72
  class Blackbox:
73
  label = "Blackbox AI"
74
  url = "https://www.blackbox.ai"
@@ -344,7 +355,7 @@ class Blackbox:
344
  proxy: Optional[str] = None,
345
  websearch: bool = False,
346
  **kwargs
347
- ) -> AsyncGenerator[Union[str, ImageResponse], None]:
348
  """
349
  Creates an asynchronous generator for streaming responses from Blackbox AI.
350
 
@@ -356,7 +367,7 @@ class Blackbox:
356
  **kwargs: Additional keyword arguments.
357
 
358
  Yields:
359
- Union[str, ImageResponse]: Segments of the generated response or ImageResponse objects.
360
  """
361
  model = cls.get_model(model)
362
 
@@ -432,7 +443,7 @@ class Blackbox:
432
  "clickedForceWebSearch": False,
433
  "visitFromDelta": False,
434
  "mobileClient": False,
435
- "webSearchMode": False,
436
  "userSelectedModel": cls.userSelectedModel.get(model, model)
437
  }
438
 
@@ -466,7 +477,7 @@ class Blackbox:
466
  match = re.search(r'!\[.*?\]\((https?://[^\)]+)\)', cleaned_response)
467
  if match:
468
  image_url = match.group(1)
469
- image_response = ImageResponse(images=image_url, alt="Generated Image")
470
  yield image_response
471
  else:
472
  yield cleaned_response
@@ -533,201 +544,214 @@ class Blackbox:
533
  except Exception as e:
534
  yield f"Unexpected error during /chat/{chat_id} request: {str(e)}"
535
 
536
- # FastAPI app setup
537
- app = FastAPI()
538
-
539
- # Add the cleanup task when the app starts
540
- @app.on_event("startup")
541
- async def startup_event():
542
- asyncio.create_task(cleanup_rate_limit_stores())
543
- logger.info("Started rate limit store cleanup task.")
544
-
545
- # Middleware to enhance security and enforce Content-Type for specific endpoints
546
- @app.middleware("http")
547
- async def security_middleware(request: Request, call_next):
548
- client_ip = request.client.host
549
- # Enforce that POST requests to /v1/chat/completions must have Content-Type: application/json
550
- if request.method == "POST" and request.url.path == "/v1/chat/completions":
551
- content_type = request.headers.get("Content-Type")
552
- if content_type != "application/json":
553
- logger.warning(f"Invalid Content-Type from IP: {client_ip} for path: {request.url.path}")
554
- return JSONResponse(
555
- status_code=400,
556
- content={
557
- "error": {
558
- "message": "Content-Type must be application/json",
559
- "type": "invalid_request_error",
560
- "param": None,
561
- "code": None
562
- }
563
- },
564
- )
565
- response = await call_next(request)
566
- return response
567
-
568
- async def cleanup_rate_limit_stores():
569
- """
570
- Periodically cleans up stale entries in the rate_limit_store to prevent memory bloat.
571
- """
572
- while True:
573
- current_time = time.time()
574
- ips_to_delete = [ip for ip, value in rate_limit_store.items() if current_time - value["timestamp"] > RATE_LIMIT_WINDOW * 2]
575
- for ip in ips_to_delete:
576
- del rate_limit_store[ip]
577
- logger.debug(f"Cleaned up rate_limit_store for IP: {ip}")
578
- await asyncio.sleep(CLEANUP_INTERVAL)
579
-
580
- async def rate_limiter_per_ip(request: Request):
581
- """
582
- Rate limiter that enforces a limit based on the client's IP address.
583
- """
584
- client_ip = request.client.host
585
- current_time = time.time()
586
-
587
- # Initialize or update the count and timestamp
588
- if current_time - rate_limit_store[client_ip]["timestamp"] > RATE_LIMIT_WINDOW:
589
- rate_limit_store[client_ip] = {"count": 1, "timestamp": current_time}
590
- else:
591
- if rate_limit_store[client_ip]["count"] >= RATE_LIMIT:
592
- logger.warning(f"Rate limit exceeded for IP address: {client_ip}")
593
- raise HTTPException(status_code=429, detail='Rate limit exceeded for IP address | NiansuhAI')
594
- rate_limit_store[client_ip]["count"] += 1
595
-
596
- async def get_api_key(request: Request, authorization: str = Header(None)) -> str:
597
- """
598
- Dependency to extract and validate the API key from the Authorization header.
599
- """
600
- client_ip = request.client.host
601
- if authorization is None or not authorization.startswith('Bearer '):
602
- logger.warning(f"Invalid or missing authorization header from IP: {client_ip}")
603
- raise HTTPException(status_code=401, detail='Invalid authorization header format')
604
- api_key = authorization[7:]
605
- if api_key not in API_KEYS:
606
- logger.warning(f"Invalid API key attempted: {api_key} from IP: {client_ip}")
607
- raise HTTPException(status_code=401, detail='Invalid API key')
608
- return api_key
609
-
610
- @app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter_per_ip)])
611
- async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
612
- client_ip = req.client.host
613
- # Redact user messages only for logging purposes
614
- redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
615
-
616
- logger.info(f"Received chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages}")
617
-
618
- try:
619
- # Validate that the requested model is available
620
- if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
621
- logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
622
- raise HTTPException(status_code=400, detail="Requested model is not available.")
623
-
624
- # Process the request with actual message content, but don't log it
625
- response_content = await Blackbox.generate_response(
626
- model=request.model,
627
- messages=[{"role": msg.role, "content": msg.content} for msg in request.messages],
628
- temperature=request.temperature,
629
- max_tokens=request.max_tokens
630
- )
631
-
632
- logger.info(f"Completed response generation for API key: {api_key} | IP: {client_ip}")
633
- return {
634
- "id": f"chatcmpl-{uuid.uuid4()}",
635
- "object": "chat.completion",
636
- "created": int(datetime.now().timestamp()),
637
- "model": request.model,
638
- "choices": [
639
- {
640
- "index": 0,
641
- "message": {
642
- "role": "assistant",
643
- "content": response_content
644
- },
645
- "finish_reason": "stop"
646
  }
647
- ],
648
- "usage": {
649
- "prompt_tokens": sum(len(msg.content.split()) for msg in request.messages),
650
- "completion_tokens": len(response_content.split()),
651
- "total_tokens": sum(len(msg.content.split()) for msg in request.messages) + len(response_content.split())
652
  },
653
- }
654
- except ModelNotWorkingException as e:
655
- logger.warning(f"Model not working: {e} | IP: {client_ip}")
656
- raise HTTPException(status_code=503, detail=str(e))
657
- except HTTPException as he:
658
- logger.warning(f"HTTPException: {he.detail} | IP: {client_ip}")
659
- raise he
660
- except Exception as e:
661
- logger.exception(f"An unexpected error occurred while processing the chat completions request from IP: {client_ip}.")
662
- raise HTTPException(status_code=500, detail=str(e))
663
-
664
- @app.get("/v1/models", dependencies=[Depends(rate_limiter_per_ip)])
665
- async def get_models(req: Request):
666
- client_ip = req.client.host
667
- logger.info(f"Fetching available models from IP: {client_ip}")
668
- return {"data": [{"id": model, "object": "model"} for model in Blackbox.models]}
669
-
670
- @app.get("/v1/health", dependencies=[Depends(rate_limiter_per_ip)])
671
- async def health_check(req: Request):
672
- client_ip = req.client.host
673
- logger.info(f"Health check requested from IP: {client_ip}")
674
- return {"status": "ok"}
675
-
676
- # Custom exception handler to match OpenAI's error format
677
- @app.exception_handler(HTTPException)
678
- async def http_exception_handler(request: Request, exc: HTTPException):
679
- client_ip = request.client.host
680
- logger.error(f"HTTPException: {exc.detail} | Path: {request.url.path} | IP: {client_ip}")
681
- return JSONResponse(
682
- status_code=exc.status_code,
683
- content={
684
- "error": {
685
- "message": exc.detail,
686
- "type": "invalid_request_error",
687
- "param": None,
688
- "code": None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
689
  }
 
 
 
 
 
690
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
691
  )
692
 
693
- # Optional: Additional Endpoint for Streaming Responses (Using create_async_generator)
694
- # This endpoint leverages the new create_async_generator method for streaming responses.
695
- # Note: Streaming responses may require clients that support Server-Sent Events (SSE) or WebSockets.
696
-
697
- @app.post("/v1/chat/completions/stream", dependencies=[Depends(rate_limiter_per_ip)])
698
- async def chat_completions_stream(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
699
- client_ip = req.client.host
700
- # Redact user messages only for logging purposes
701
- redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
702
-
703
- logger.info(f"Received streaming chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages}")
704
-
705
- try:
706
- # Validate that the requested model is available
707
- if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
708
- logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
709
- raise HTTPException(status_code=400, detail="Requested model is not available.")
710
-
711
- # Create an asynchronous generator for the response
712
- async_generator = Blackbox.create_async_generator(
713
- model=request.model,
714
- messages=[{"role": msg.role, "content": msg.content} for msg in request.messages],
715
- temperature=request.temperature,
716
- max_tokens=request.max_tokens
717
- )
718
-
719
- logger.info(f"Started streaming response for API key: {api_key} | IP: {client_ip}")
720
- return StreamingResponse(async_generator, media_type="text/event-stream")
721
- except ModelNotWorkingException as e:
722
- logger.warning(f"Model not working: {e} | IP: {client_ip}")
723
- raise HTTPException(status_code=503, detail=str(e))
724
- except HTTPException as he:
725
- logger.warning(f"HTTPException: {he.detail} | IP: {client_ip}")
726
- raise he
727
- except Exception as e:
728
- logger.exception(f"An unexpected error occurred while processing the streaming chat completions request from IP: {client_ip}.")
729
- raise HTTPException(status_code=500, detail=str(e))
730
-
731
- if __name__ == "__main__":
732
- import uvicorn
733
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
16
  from pydantic import BaseModel
17
  from datetime import datetime
18
 
19
+ # =====================
20
+ # 1. Configure Logging
21
+ # =====================
22
  logging.basicConfig(
23
+ level=logging.DEBUG, # Set to DEBUG for detailed logs during development
24
  format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
25
  handlers=[logging.StreamHandler()]
26
  )
27
  logger = logging.getLogger(__name__)
28
 
29
+ # ============================
30
+ # 2. Load Environment Variables
31
+ # ============================
32
  API_KEYS = os.getenv('API_KEYS', '').split(',') # Comma-separated API keys
33
  RATE_LIMIT = int(os.getenv('RATE_LIMIT', '60')) # Requests per minute
34
 
 
36
  logger.error("No API keys found. Please set the API_KEYS environment variable.")
37
  raise Exception("API_KEYS environment variable not set.")
38
 
39
+ # ====================================
40
+ # 3. Define Rate Limiting Structures
41
+ # ====================================
42
  rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
43
 
44
  # Define cleanup interval and window
45
  CLEANUP_INTERVAL = 60 # seconds
46
  RATE_LIMIT_WINDOW = 60 # seconds
47
 
48
+ # ========================
49
+ # 4. Define Pydantic Models
50
+ # ========================
51
+ class ImageResponseModel(BaseModel):
52
  images: str
53
  alt: str
54
 
 
55
  class Message(BaseModel):
56
  role: str
57
  content: str
 
68
  logit_bias: Optional[Dict[str, float]] = None
69
  user: Optional[str] = None
70
 
71
+ # ===============================
72
+ # 5. Define Custom Exceptions
73
+ # ===============================
74
  class ModelNotWorkingException(Exception):
75
  def __init__(self, model: str):
76
  self.model = model
77
  self.message = f"The model '{model}' is currently not working. Please try another model or wait for it to be fixed."
78
  super().__init__(self.message)
79
 
80
+ # =======================
81
+ # 6. Define the Blackbox
82
+ # =======================
83
  class Blackbox:
84
  label = "Blackbox AI"
85
  url = "https://www.blackbox.ai"
 
355
  proxy: Optional[str] = None,
356
  websearch: bool = False,
357
  **kwargs
358
+ ) -> AsyncGenerator[Union[str, ImageResponseModel], None]:
359
  """
360
  Creates an asynchronous generator for streaming responses from Blackbox AI.
361
 
 
367
  **kwargs: Additional keyword arguments.
368
 
369
  Yields:
370
+ Union[str, ImageResponseModel]: Segments of the generated response or ImageResponseModel objects.
371
  """
372
  model = cls.get_model(model)
373
 
 
443
  "clickedForceWebSearch": False,
444
  "visitFromDelta": False,
445
  "mobileClient": False,
446
+ "webSearchMode": websearch,
447
  "userSelectedModel": cls.userSelectedModel.get(model, model)
448
  }
449
 
 
477
  match = re.search(r'!\[.*?\]\((https?://[^\)]+)\)', cleaned_response)
478
  if match:
479
  image_url = match.group(1)
480
+ image_response = ImageResponseModel(images=image_url, alt="Generated Image")
481
  yield image_response
482
  else:
483
  yield cleaned_response
 
544
  except Exception as e:
545
  yield f"Unexpected error during /chat/{chat_id} request: {str(e)}"
546
 
547
+ # ============================
548
+ # 7. Initialize FastAPI App
549
+ # ============================
550
+ app = FastAPI()
551
+
552
+ # ====================================
553
+ # 8. Define Middleware and Dependencies
554
+ # ====================================
555
+ @app.middleware("http")
556
+ async def security_middleware(request: Request, call_next):
557
+ client_ip = request.client.host
558
+ # Enforce that POST requests to /v1/chat/completions must have Content-Type: application/json
559
+ if request.method == "POST" and request.url.path == "/v1/chat/completions":
560
+ content_type = request.headers.get("Content-Type")
561
+ if content_type != "application/json":
562
+ logger.warning(f"Invalid Content-Type from IP: {client_ip} for path: {request.url.path}")
563
+ return JSONResponse(
564
+ status_code=400,
565
+ content={
566
+ "error": {
567
+ "message": "Content-Type must be application/json",
568
+ "type": "invalid_request_error",
569
+ "param": None,
570
+ "code": None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
571
  }
 
 
 
 
 
572
  },
573
+ )
574
+ response = await call_next(request)
575
+ return response
576
+
577
+ async def cleanup_rate_limit_stores():
578
+ """
579
+ Periodically cleans up stale entries in the rate_limit_store to prevent memory bloat.
580
+ """
581
+ while True:
582
+ current_time = time.time()
583
+ ips_to_delete = [ip for ip, value in rate_limit_store.items() if current_time - value["timestamp"] > RATE_LIMIT_WINDOW * 2]
584
+ for ip in ips_to_delete:
585
+ del rate_limit_store[ip]
586
+ logger.debug(f"Cleaned up rate_limit_store for IP: {ip}")
587
+ await asyncio.sleep(CLEANUP_INTERVAL)
588
+
589
+ async def rate_limiter_per_ip(request: Request):
590
+ """
591
+ Rate limiter that enforces a limit based on the client's IP address.
592
+ """
593
+ client_ip = request.client.host
594
+ current_time = time.time()
595
+
596
+ # Initialize or update the count and timestamp
597
+ if current_time - rate_limit_store[client_ip]["timestamp"] > RATE_LIMIT_WINDOW:
598
+ rate_limit_store[client_ip] = {"count": 1, "timestamp": current_time}
599
+ else:
600
+ if rate_limit_store[client_ip]["count"] >= RATE_LIMIT:
601
+ logger.warning(f"Rate limit exceeded for IP address: {client_ip}")
602
+ raise HTTPException(status_code=429, detail='Rate limit exceeded for IP address | NiansuhAI')
603
+ rate_limit_store[client_ip]["count"] += 1
604
+
605
+ async def get_api_key(request: Request, authorization: str = Header(None)) -> str:
606
+ """
607
+ Dependency to extract and validate the API key from the Authorization header.
608
+ """
609
+ client_ip = request.client.host
610
+ if authorization is None or not authorization.startswith('Bearer '):
611
+ logger.warning(f"Invalid or missing authorization header from IP: {client_ip}")
612
+ raise HTTPException(status_code=401, detail='Invalid authorization header format')
613
+ api_key = authorization[7:]
614
+ if api_key not in API_KEYS:
615
+ logger.warning(f"Invalid API key attempted: {api_key} from IP: {client_ip}")
616
+ raise HTTPException(status_code=401, detail='Invalid API key')
617
+ return api_key
618
+
619
+ # =====================================
620
+ # 9. Define FastAPI Event Handlers
621
+ # =====================================
622
+ @app.on_event("startup")
623
+ async def startup_event():
624
+ asyncio.create_task(cleanup_rate_limit_stores())
625
+ logger.info("Started rate limit store cleanup task.")
626
+
627
+ # ==========================================
628
+ # 10. Define FastAPI Endpoints
629
+ # ==========================================
630
+ @app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter_per_ip)])
631
+ async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
632
+ client_ip = req.client.host
633
+ # Redact user messages only for logging purposes
634
+ redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
635
+
636
+ logger.info(f"Received chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages}")
637
+
638
+ try:
639
+ # Validate that the requested model is available
640
+ if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
641
+ logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
642
+ raise HTTPException(status_code=400, detail="Requested model is not available.")
643
+
644
+ # Process the request with actual message content, but don't log it
645
+ response_content = await Blackbox.generate_response(
646
+ model=request.model,
647
+ messages=[{"role": msg.role, "content": msg.content} for msg in request.messages],
648
+ temperature=request.temperature,
649
+ max_tokens=request.max_tokens
650
+ )
651
+
652
+ logger.info(f"Completed response generation for API key: {api_key} | IP: {client_ip}")
653
+ return {
654
+ "id": f"chatcmpl-{uuid.uuid4()}",
655
+ "object": "chat.completion",
656
+ "created": int(datetime.now().timestamp()),
657
+ "model": request.model,
658
+ "choices": [
659
+ {
660
+ "index": 0,
661
+ "message": {
662
+ "role": "assistant",
663
+ "content": response_content
664
+ },
665
+ "finish_reason": "stop"
666
  }
667
+ ],
668
+ "usage": {
669
+ "prompt_tokens": sum(len(msg.content.split()) for msg in request.messages),
670
+ "completion_tokens": len(response_content.split()),
671
+ "total_tokens": sum(len(msg.content.split()) for msg in request.messages) + len(response_content.split())
672
  },
673
+ }
674
+ except ModelNotWorkingException as e:
675
+ logger.warning(f"Model not working: {e} | IP: {client_ip}")
676
+ raise HTTPException(status_code=503, detail=str(e))
677
+ except HTTPException as he:
678
+ logger.warning(f"HTTPException: {he.detail} | IP: {client_ip}")
679
+ raise he
680
+ except Exception as e:
681
+ logger.exception(f"An unexpected error occurred while processing the chat completions request from IP: {client_ip}.")
682
+ raise HTTPException(status_code=500, detail=str(e))
683
+
684
+ @app.get("/v1/models", dependencies=[Depends(rate_limiter_per_ip)])
685
+ async def get_models(req: Request):
686
+ client_ip = req.client.host
687
+ logger.info(f"Fetching available models from IP: {client_ip}")
688
+ return {"data": [{"id": model, "object": "model"} for model in Blackbox.models]}
689
+
690
+ @app.get("/v1/health", dependencies=[Depends(rate_limiter_per_ip)])
691
+ async def health_check(req: Request):
692
+ client_ip = req.client.host
693
+ logger.info(f"Health check requested from IP: {client_ip}")
694
+ return {"status": "ok"}
695
+
696
+ # ===============================
697
+ # 11. Define Custom Exception Handler
698
+ # ===============================
699
+ @app.exception_handler(HTTPException)
700
+ async def http_exception_handler(request: Request, exc: HTTPException):
701
+ client_ip = request.client.host
702
+ logger.error(f"HTTPException: {exc.detail} | Path: {request.url.path} | IP: {client_ip}")
703
+ return JSONResponse(
704
+ status_code=exc.status_code,
705
+ content={
706
+ "error": {
707
+ "message": exc.detail,
708
+ "type": "invalid_request_error",
709
+ "param": None,
710
+ "code": None
711
+ }
712
+ },
713
+ )
714
+
715
+ # ============================
716
+ # 12. Optional: Streaming Endpoint
717
+ # ============================
718
+ @app.post("/v1/chat/completions/stream", dependencies=[Depends(rate_limiter_per_ip)])
719
+ async def chat_completions_stream(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
720
+ client_ip = req.client.host
721
+ # Redact user messages only for logging purposes
722
+ redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
723
+
724
+ logger.info(f"Received streaming chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages}")
725
+
726
+ try:
727
+ # Validate that the requested model is available
728
+ if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
729
+ logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
730
+ raise HTTPException(status_code=400, detail="Requested model is not available.")
731
+
732
+ # Create an asynchronous generator for the response
733
+ async_generator = Blackbox.create_async_generator(
734
+ model=request.model,
735
+ messages=[{"role": msg.role, "content": msg.content} for msg in request.messages],
736
+ temperature=request.temperature,
737
+ max_tokens=request.max_tokens
738
  )
739
 
740
+ logger.info(f"Started streaming response for API key: {api_key} | IP: {client_ip}")
741
+ return StreamingResponse(async_generator, media_type="text/event-stream")
742
+ except ModelNotWorkingException as e:
743
+ logger.warning(f"Model not working: {e} | IP: {client_ip}")
744
+ raise HTTPException(status_code=503, detail=str(e))
745
+ except HTTPException as he:
746
+ logger.warning(f"HTTPException: {he.detail} | IP: {client_ip}")
747
+ raise he
748
+ except Exception as e:
749
+ logger.exception(f"An unexpected error occurred while processing the streaming chat completions request from IP: {client_ip}.")
750
+ raise HTTPException(status_code=500, detail=str(e))
751
+
752
+ # ========================================
753
+ # 13. Run the Application with Uvicorn
754
+ # ========================================
755
+ if __name__ == "__main__":
756
+ import uvicorn
757
+ uvicorn.run(app, host="0.0.0.0", port=8000)