Niansuh commited on
Commit
6929f59
·
verified ·
1 Parent(s): 370b45c

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +204 -204
main.py CHANGED
@@ -330,214 +330,214 @@ class Blackbox:
330
  logger.exception(f"Unexpected error during /api/chat request: {str(e)}")
331
  return f"Unexpected error during /api/chat request: {str(e)}"
332
 
333
- # ===============================
334
- # 7. Initialize FastAPI App
335
- # ===============================
336
- app = FastAPI()
337
-
338
- # ====================================
339
- # 8. Define Middleware and Dependencies
340
- # ====================================
341
- @app.middleware("http")
342
- async def security_middleware(request: Request, call_next):
343
- client_ip = request.client.host
344
- # Enforce that POST requests to /v1/chat/completions must have Content-Type: application/json
345
- if request.method == "POST" and request.url.path == "/v1/chat/completions":
346
- content_type = request.headers.get("Content-Type")
347
- if content_type != "application/json":
348
- logger.warning(f"Invalid Content-Type from IP: {client_ip} for path: {request.url.path}")
349
- return JSONResponse(
350
- status_code=400,
351
- content={
352
- "error": {
353
- "message": "Content-Type must be application/json",
354
- "type": "invalid_request_error",
355
- "param": None,
356
- "code": None
357
- }
358
- },
359
- )
360
- response = await call_next(request)
361
- return response
362
-
363
- async def cleanup_rate_limit_stores():
364
- """
365
- Periodically cleans up stale entries in the rate_limit_store to prevent memory bloat.
366
- """
367
- while True:
368
- current_time = time.time()
369
- ips_to_delete = [ip for ip, value in rate_limit_store.items() if current_time - value["timestamp"] > RATE_LIMIT_WINDOW * 2]
370
- for ip in ips_to_delete:
371
- del rate_limit_store[ip]
372
- logger.debug(f"Cleaned up rate_limit_store for IP: {ip}")
373
- await asyncio.sleep(CLEANUP_INTERVAL)
374
-
375
- async def rate_limiter_per_ip(request: Request):
376
- """
377
- Rate limiter that enforces a limit based on the client's IP address.
378
- """
379
- client_ip = request.client.host
380
- current_time = time.time()
381
-
382
- # Initialize or update the count and timestamp
383
- if current_time - rate_limit_store[client_ip]["timestamp"] > RATE_LIMIT_WINDOW:
384
- rate_limit_store[client_ip] = {"count": 1, "timestamp": current_time}
385
- else:
386
- if rate_limit_store[client_ip]["count"] >= RATE_LIMIT:
387
- logger.warning(f"Rate limit exceeded for IP address: {client_ip}")
388
- raise HTTPException(status_code=429, detail='Rate limit exceeded for IP address | NiansuhAI')
389
- rate_limit_store[client_ip]["count"] += 1
390
-
391
- async def get_api_key(request: Request, authorization: str = Header(None)) -> str:
392
- """
393
- Dependency to extract and validate the API key from the Authorization header.
394
- """
395
- client_ip = request.client.host
396
- if authorization is None or not authorization.startswith('Bearer '):
397
- logger.warning(f"Invalid or missing authorization header from IP: {client_ip}")
398
- raise HTTPException(status_code=401, detail='Invalid authorization header format')
399
- api_key = authorization[7:]
400
- if api_key not in API_KEYS:
401
- logger.warning(f"Invalid API key attempted: {api_key} from IP: {client_ip}")
402
- raise HTTPException(status_code=401, detail='Invalid API key')
403
- return api_key
404
-
405
- # =====================================
406
- # 9. Define FastAPI Event Handlers
407
- # =====================================
408
- @app.on_event("startup")
409
- async def startup_event():
410
- asyncio.create_task(cleanup_rate_limit_stores())
411
- logger.info("Started rate limit store cleanup task.")
412
-
413
- # ==========================================
414
- # 10. Define FastAPI Endpoints
415
- # ==========================================
416
- @app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter_per_ip)])
417
- async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
418
- client_ip = req.client.host
419
- # Redact user messages only for logging purposes
420
- redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
421
-
422
- logger.info(f"Received chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages}")
423
-
424
- try:
425
- # Validate that the requested model is available
426
- if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
427
- logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
428
- raise HTTPException(status_code=400, detail="Requested model is not available.")
429
-
430
- # Process the request with actual message content, but don't log it
431
- response_content = await Blackbox.generate_response(
432
- model=request.model,
433
- messages=[{"role": msg.role, "content": msg.content} for msg in request.messages],
434
- temperature=request.temperature,
435
- max_tokens=request.max_tokens
436
- )
437
 
438
- logger.info(f"Completed response generation for API key: {api_key} | IP: {client_ip}")
439
- return {
440
- "id": f"chatcmpl-{uuid.uuid4()}",
441
- "object": "chat.completion",
442
- "created": int(datetime.now().timestamp()),
443
- "model": request.model,
444
- "choices": [
445
- {
446
- "index": 0,
447
- "message": {
448
- "role": "assistant",
449
- "content": response_content
450
- },
451
- "finish_reason": "stop"
 
 
 
 
 
452
  }
453
- ],
454
- "usage": {
455
- "prompt_tokens": sum(len(msg.content.split()) for msg in request.messages),
456
- "completion_tokens": len(response_content.split()),
457
- "total_tokens": sum(len(msg.content.split()) for msg in request.messages) + len(response_content.split())
458
  },
459
- }
460
- except ModelNotWorkingException as e:
461
- logger.warning(f"Model not working: {e} | IP: {client_ip}")
462
- raise HTTPException(status_code=503, detail=str(e))
463
- except HTTPException as he:
464
- logger.warning(f"HTTPException: {he.detail} | IP: {client_ip}")
465
- raise he
466
- except Exception as e:
467
- logger.exception(f"An unexpected error occurred while processing the chat completions request from IP: {client_ip}.")
468
- raise HTTPException(status_code=500, detail=str(e))
469
-
470
- @app.get("/v1/models", dependencies=[Depends(rate_limiter_per_ip)])
471
- async def get_models(req: Request):
472
- client_ip = req.client.host
473
- logger.info(f"Fetching available models from IP: {client_ip}")
474
- return {"data": [{"id": model, "object": "model"} for model in Blackbox.models]}
475
-
476
- @app.get("/v1/health", dependencies=[Depends(rate_limiter_per_ip)])
477
- async def health_check(req: Request):
478
- client_ip = req.client.host
479
- logger.info(f"Health check requested from IP: {client_ip}")
480
- return {"status": "ok"}
481
-
482
- # ========================================
483
- # 11. Define Custom Exception Handler
484
- # ========================================
485
- @app.exception_handler(HTTPException)
486
- async def http_exception_handler(request: Request, exc: HTTPException):
487
- client_ip = request.client.host
488
- logger.error(f"HTTPException: {exc.detail} | Path: {request.url.path} | IP: {client_ip}")
489
- return JSONResponse(
490
- status_code=exc.status_code,
491
- content={
492
- "error": {
493
- "message": exc.detail,
494
- "type": "invalid_request_error",
495
- "param": None,
496
- "code": None
497
- }
498
- },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
499
  )
500
 
501
- # ============================
502
- # 12. Optional: Streaming Endpoint
503
- # ============================
504
- @app.post("/v1/chat/completions/stream", dependencies=[Depends(rate_limiter_per_ip)])
505
- async def chat_completions_stream(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
506
- client_ip = req.client.host
507
- # Redact user messages only for logging purposes
508
- redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
509
-
510
- logger.info(f"Received streaming chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
511
 
512
- try:
513
- # Validate that the requested model is available
514
- if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
515
- logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
516
- raise HTTPException(status_code=400, detail="Requested model is not available.")
517
-
518
- # Create an asynchronous generator for the response
519
- async_generator = Blackbox.create_async_generator(
520
- model=request.model,
521
- messages=[{"role": msg.role, "content": msg.content} for msg in request.messages],
522
- temperature=request.temperature,
523
- max_tokens=request.max_tokens
524
- )
 
 
 
 
 
 
 
 
 
 
 
525
 
526
- logger.info(f"Started streaming response for API key: {api_key} | IP: {client_ip}")
527
- return StreamingResponse(async_generator, media_type="text/event-stream")
528
- except ModelNotWorkingException as e:
529
- logger.warning(f"Model not working: {e} | IP: {client_ip}")
530
- raise HTTPException(status_code=503, detail=str(e))
531
- except HTTPException as he:
532
- logger.warning(f"HTTPException: {he.detail} | IP: {client_ip}")
533
- raise he
534
- except Exception as e:
535
- logger.exception(f"An unexpected error occurred while processing the streaming chat completions request from IP: {client_ip}.")
536
- raise HTTPException(status_code=500, detail=str(e))
537
-
538
- # ========================================
539
- # 13. Run the Application with Uvicorn
540
- # ========================================
541
- if __name__ == "__main__":
542
- import uvicorn
543
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
330
  logger.exception(f"Unexpected error during /api/chat request: {str(e)}")
331
  return f"Unexpected error during /api/chat request: {str(e)}"
332
 
333
+ # ============================
334
+ # 7. Initialize FastAPI App
335
+ # ============================
336
+ app = FastAPI()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
337
 
338
+ # ====================================
339
+ # 8. Define Middleware and Dependencies
340
+ # ====================================
341
+ @app.middleware("http")
342
+ async def security_middleware(request: Request, call_next):
343
+ client_ip = request.client.host
344
+ # Enforce that POST requests to /v1/chat/completions must have Content-Type: application/json
345
+ if request.method == "POST" and request.url.path == "/v1/chat/completions":
346
+ content_type = request.headers.get("Content-Type")
347
+ if content_type != "application/json":
348
+ logger.warning(f"Invalid Content-Type from IP: {client_ip} for path: {request.url.path}")
349
+ return JSONResponse(
350
+ status_code=400,
351
+ content={
352
+ "error": {
353
+ "message": "Content-Type must be application/json",
354
+ "type": "invalid_request_error",
355
+ "param": None,
356
+ "code": None
357
  }
 
 
 
 
 
358
  },
359
+ )
360
+ response = await call_next(request)
361
+ return response
362
+
363
+ async def cleanup_rate_limit_stores():
364
+ """
365
+ Periodically cleans up stale entries in the rate_limit_store to prevent memory bloat.
366
+ """
367
+ while True:
368
+ current_time = time.time()
369
+ ips_to_delete = [ip for ip, value in rate_limit_store.items() if current_time - value["timestamp"] > RATE_LIMIT_WINDOW * 2]
370
+ for ip in ips_to_delete:
371
+ del rate_limit_store[ip]
372
+ logger.debug(f"Cleaned up rate_limit_store for IP: {ip}")
373
+ await asyncio.sleep(CLEANUP_INTERVAL)
374
+
375
+ async def rate_limiter_per_ip(request: Request):
376
+ """
377
+ Rate limiter that enforces a limit based on the client's IP address.
378
+ """
379
+ client_ip = request.client.host
380
+ current_time = time.time()
381
+
382
+ # Initialize or update the count and timestamp
383
+ if current_time - rate_limit_store[client_ip]["timestamp"] > RATE_LIMIT_WINDOW:
384
+ rate_limit_store[client_ip] = {"count": 1, "timestamp": current_time}
385
+ else:
386
+ if rate_limit_store[client_ip]["count"] >= RATE_LIMIT:
387
+ logger.warning(f"Rate limit exceeded for IP address: {client_ip}")
388
+ raise HTTPException(status_code=429, detail='Rate limit exceeded for IP address | NiansuhAI')
389
+ rate_limit_store[client_ip]["count"] += 1
390
+
391
+ async def get_api_key(request: Request, authorization: str = Header(None)) -> str:
392
+ """
393
+ Dependency to extract and validate the API key from the Authorization header.
394
+ """
395
+ client_ip = request.client.host
396
+ if authorization is None or not authorization.startswith('Bearer '):
397
+ logger.warning(f"Invalid or missing authorization header from IP: {client_ip}")
398
+ raise HTTPException(status_code=401, detail='Invalid authorization header format')
399
+ api_key = authorization[7:]
400
+ if api_key not in API_KEYS:
401
+ logger.warning(f"Invalid API key attempted: {api_key} from IP: {client_ip}")
402
+ raise HTTPException(status_code=401, detail='Invalid API key')
403
+ return api_key
404
+
405
+ # =====================================
406
+ # 9. Define FastAPI Event Handlers
407
+ # =====================================
408
+ @app.on_event("startup")
409
+ async def startup_event():
410
+ asyncio.create_task(cleanup_rate_limit_stores())
411
+ logger.info("Started rate limit store cleanup task.")
412
+
413
+ # ==========================================
414
+ # 10. Define FastAPI Endpoints
415
+ # ==========================================
416
+ @app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter_per_ip)])
417
+ async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
418
+ client_ip = req.client.host
419
+ # Redact user messages only for logging purposes
420
+ redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
421
+
422
+ logger.info(f"Received chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages}")
423
+
424
+ try:
425
+ # Validate that the requested model is available
426
+ if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
427
+ logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
428
+ raise HTTPException(status_code=400, detail="Requested model is not available.")
429
+
430
+ # Process the request with actual message content, but don't log it
431
+ response_content = await Blackbox.generate_response(
432
+ model=request.model,
433
+ messages=[{"role": msg.role, "content": msg.content} for msg in request.messages],
434
+ temperature=request.temperature,
435
+ max_tokens=request.max_tokens
436
  )
437
 
438
+ logger.info(f"Completed response generation for API key: {api_key} | IP: {client_ip}")
439
+ return {
440
+ "id": f"chatcmpl-{uuid.uuid4()}",
441
+ "object": "chat.completion",
442
+ "created": int(datetime.now().timestamp()),
443
+ "model": request.model,
444
+ "choices": [
445
+ {
446
+ "index": 0,
447
+ "message": {
448
+ "role": "assistant",
449
+ "content": response_content
450
+ },
451
+ "finish_reason": "stop"
452
+ }
453
+ ],
454
+ "usage": {
455
+ "prompt_tokens": sum(len(msg.content.split()) for msg in request.messages),
456
+ "completion_tokens": len(response_content.split()),
457
+ "total_tokens": sum(len(msg.content.split()) for msg in request.messages) + len(response_content.split())
458
+ },
459
+ }
460
+ except ModelNotWorkingException as e:
461
+ logger.warning(f"Model not working: {e} | IP: {client_ip}")
462
+ raise HTTPException(status_code=503, detail=str(e))
463
+ except HTTPException as he:
464
+ logger.warning(f"HTTPException: {he.detail} | IP: {client_ip}")
465
+ raise he
466
+ except Exception as e:
467
+ logger.exception(f"An unexpected error occurred while processing the chat completions request from IP: {client_ip}.")
468
+ raise HTTPException(status_code=500, detail=str(e))
469
+
470
+ @app.get("/v1/models", dependencies=[Depends(rate_limiter_per_ip)])
471
+ async def get_models(req: Request):
472
+ client_ip = req.client.host
473
+ logger.info(f"Fetching available models from IP: {client_ip}")
474
+ return {"data": [{"id": model, "object": "model"} for model in Blackbox.models]}
475
+
476
+ @app.get("/v1/health", dependencies=[Depends(rate_limiter_per_ip)])
477
+ async def health_check(req: Request):
478
+ client_ip = req.client.host
479
+ logger.info(f"Health check requested from IP: {client_ip}")
480
+ return {"status": "ok"}
481
+
482
+ # ========================================
483
+ # 11. Define Custom Exception Handler
484
+ # ========================================
485
+ @app.exception_handler(HTTPException)
486
+ async def http_exception_handler(request: Request, exc: HTTPException):
487
+ client_ip = request.client.host
488
+ logger.error(f"HTTPException: {exc.detail} | Path: {request.url.path} | IP: {client_ip}")
489
+ return JSONResponse(
490
+ status_code=exc.status_code,
491
+ content={
492
+ "error": {
493
+ "message": exc.detail,
494
+ "type": "invalid_request_error",
495
+ "param": None,
496
+ "code": None
497
+ }
498
+ },
499
+ )
500
 
501
+ # ============================
502
+ # 12. Optional: Streaming Endpoint
503
+ # ============================
504
+ @app.post("/v1/chat/completions/stream", dependencies=[Depends(rate_limiter_per_ip)])
505
+ async def chat_completions_stream(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
506
+ client_ip = req.client.host
507
+ # Redact user messages only for logging purposes
508
+ redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
509
+
510
+ logger.info(f"Received streaming chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages}")
511
+
512
+ try:
513
+ # Validate that the requested model is available
514
+ if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
515
+ logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
516
+ raise HTTPException(status_code=400, detail="Requested model is not available.")
517
+
518
+ # Create an asynchronous generator for the response
519
+ async_generator = Blackbox.create_async_generator(
520
+ model=request.model,
521
+ messages=[{"role": msg.role, "content": msg.content} for msg in request.messages],
522
+ temperature=request.temperature,
523
+ max_tokens=request.max_tokens
524
+ )
525
 
526
+ logger.info(f"Started streaming response for API key: {api_key} | IP: {client_ip}")
527
+ return StreamingResponse(async_generator, media_type="text/event-stream")
528
+ except ModelNotWorkingException as e:
529
+ logger.warning(f"Model not working: {e} | IP: {client_ip}")
530
+ raise HTTPException(status_code=503, detail=str(e))
531
+ except HTTPException as he:
532
+ logger.warning(f"HTTPException: {he.detail} | IP: {client_ip}")
533
+ raise he
534
+ except Exception as e:
535
+ logger.exception(f"An unexpected error occurred while processing the streaming chat completions request from IP: {client_ip}.")
536
+ raise HTTPException(status_code=500, detail=str(e))
537
+
538
+ # ========================================
539
+ # 13. Run the Application with Uvicorn
540
+ # ========================================
541
+ if __name__ == "__main__":
542
+ import uvicorn
543
+ uvicorn.run(app, host="0.0.0.0", port=8000)