Niansuh commited on
Commit
c9b656e
·
verified ·
1 Parent(s): 9b9354c

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +70 -29
main.py CHANGED
@@ -43,32 +43,55 @@ else:
43
  rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
44
  ip_rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
45
 
46
- async def get_api_key(authorization: str = Header(None)) -> str:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  if authorization is None or not authorization.startswith('Bearer '):
48
- logger.warning("Invalid or missing authorization header.")
49
  raise HTTPException(status_code=401, detail='Invalid authorization header format')
50
  api_key = authorization[7:]
51
  if api_key not in API_KEYS:
52
- logger.warning(f"Invalid API key attempted: {api_key}")
53
  raise HTTPException(status_code=401, detail='Invalid API key')
54
  return api_key
55
 
56
- async def rate_limiter(req: Request, api_key: str = Depends(get_api_key)):
 
57
  current_time = time.time()
 
58
  # Rate limiting per API key
59
  window_start = rate_limit_store[api_key]["timestamp"]
60
- if current_time - window_start > 60:
61
  rate_limit_store[api_key] = {"count": 1, "timestamp": current_time}
62
  else:
63
  if rate_limit_store[api_key]["count"] >= RATE_LIMIT:
64
- logger.warning(f"Rate limit exceeded for API key: {api_key}")
65
  raise HTTPException(status_code=429, detail='Rate limit exceeded for API key')
66
  rate_limit_store[api_key]["count"] += 1
67
 
68
  # Rate limiting per IP address
69
- client_ip = req.client.host
70
  window_start_ip = ip_rate_limit_store[client_ip]["timestamp"]
71
- if current_time - window_start_ip > 60:
72
  ip_rate_limit_store[client_ip] = {"count": 1, "timestamp": current_time}
73
  else:
74
  if ip_rate_limit_store[client_ip]["count"] >= RATE_LIMIT:
@@ -380,6 +403,8 @@ async def security_middleware(request: Request, call_next):
380
  if request.method == "POST" and request.url.path in ["/v1/chat/completions", "/v1/completions"]:
381
  content_type = request.headers.get("Content-Type")
382
  if content_type != "application/json":
 
 
383
  return JSONResponse(
384
  status_code=400,
385
  content={
@@ -431,15 +456,16 @@ def create_response(content: str, model: str, finish_reason: Optional[str] = Non
431
 
432
  @app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter)])
433
  async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
 
434
  # Redact user messages only for logging purposes
435
  redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
436
 
437
- logger.info(f"Received chat completions request from API key: {api_key} | Model: {request.model} | Messages: {redacted_messages}")
438
 
439
  try:
440
  # Validate that the requested model is available
441
  if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
442
- logger.warning(f"Attempt to use unavailable model: {request.model}")
443
  raise HTTPException(status_code=400, detail="Requested model is not available.")
444
 
445
  # Process the request with actual message content, but don't log it
@@ -468,7 +494,7 @@ async def chat_completions(request: ChatRequest, req: Request, api_key: str = De
468
  error_response = {"error": he.detail}
469
  yield f"data: {json.dumps(error_response)}\n\n"
470
  except Exception as e:
471
- logger.exception("Error during streaming response generation.")
472
  error_response = {"error": str(e)}
473
  yield f"data: {json.dumps(error_response)}\n\n"
474
 
@@ -481,7 +507,7 @@ async def chat_completions(request: ChatRequest, req: Request, api_key: str = De
481
  else:
482
  response_content += chunk
483
 
484
- logger.info(f"Completed non-streaming response generation for API key: {api_key}")
485
  return {
486
  "id": f"chatcmpl-{uuid.uuid4()}",
487
  "object": "chat.completion",
@@ -504,47 +530,53 @@ async def chat_completions(request: ChatRequest, req: Request, api_key: str = De
504
  },
505
  }
506
  except ModelNotWorkingException as e:
507
- logger.warning(f"Model not working: {e}")
508
  raise HTTPException(status_code=503, detail=str(e))
509
  except HTTPException as he:
510
- logger.warning(f"HTTPException: {he.detail}")
511
  raise he
512
  except Exception as e:
513
- logger.exception("An unexpected error occurred while processing the chat completions request.")
514
  raise HTTPException(status_code=500, detail=str(e))
515
 
516
  # Return 'about:blank' when accessing the endpoint via GET
517
  @app.get("/v1/chat/completions")
518
- async def chat_completions_get():
519
- logger.info("GET request made to /v1/chat/completions, redirecting to 'about:blank'")
 
520
  return RedirectResponse(url='about:blank')
521
 
522
  @app.get("/v1/models")
523
- async def get_models():
524
- logger.info("Fetching available models")
 
525
  return {"data": [{"id": model, "object": "model"} for model in Blackbox.models]}
526
 
527
  # Additional endpoints for better functionality
528
  @app.get("/v1/health", dependencies=[Depends(rate_limiter)])
529
  async def health_check(req: Request, api_key: str = Depends(get_api_key)):
530
- logger.info(f"Health check requested by API key: {api_key}")
 
531
  return {"status": "ok"}
532
 
533
  @app.get("/v1/models/{model}/status")
534
- async def model_status(model: str):
535
- logger.info(f"Model status requested for '{model}'")
 
536
  if model in Blackbox.models:
537
  return {"model": model, "status": "available"}
538
  elif model in Blackbox.model_aliases and Blackbox.model_aliases[model] in Blackbox.models:
539
  actual_model = Blackbox.model_aliases[model]
540
  return {"model": actual_model, "status": "available via alias"}
541
  else:
542
- logger.warning(f"Model not found: {model}")
543
  raise HTTPException(status_code=404, detail="Model not found")
544
 
545
  # Custom exception handler to match OpenAI's error format
546
  @app.exception_handler(HTTPException)
547
  async def http_exception_handler(request: Request, exc: HTTPException):
 
 
548
  return JSONResponse(
549
  status_code=exc.status_code,
550
  content={
@@ -562,9 +594,11 @@ class TokenizerRequest(BaseModel):
562
  text: str
563
 
564
  @app.post("/v1/tokenizer")
565
- async def tokenizer(request: TokenizerRequest):
 
566
  text = request.text
567
  token_count = len(text.split())
 
568
  return {"text": text, "tokens": token_count}
569
 
570
  # New endpoint: /v1/completions to support text completions
@@ -587,12 +621,13 @@ class CompletionRequest(BaseModel):
587
 
588
  @app.post("/v1/completions", dependencies=[Depends(rate_limiter)])
589
  async def completions(request: CompletionRequest, req: Request, api_key: str = Depends(get_api_key)):
590
- logger.info(f"Received completion request from API key: {api_key} | Model: {request.model}")
 
591
 
592
  try:
593
  # Validate that the requested model is available
594
  if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
595
- logger.warning(f"Attempt to use unavailable model: {request.model}")
596
  raise HTTPException(status_code=400, detail="Requested model is not available.")
597
 
598
  # Simulate a simple completion by echoing the prompt
@@ -618,12 +653,18 @@ async def completions(request: CompletionRequest, req: Request, api_key: str = D
618
  }
619
  }
620
  except HTTPException as he:
621
- logger.warning(f"HTTPException: {he.detail}")
622
  raise he
623
  except Exception as e:
624
- logger.exception("An unexpected error occurred while processing the completions request.")
625
  raise HTTPException(status_code=500, detail=str(e))
626
 
 
 
 
 
 
 
627
  if __name__ == "__main__":
628
  import uvicorn
629
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
43
  rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
44
  ip_rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
45
 
46
+ # Define cleanup interval and window
47
+ CLEANUP_INTERVAL = 60 # seconds
48
+ RATE_LIMIT_WINDOW = 60 # seconds
49
+
50
+ async def cleanup_rate_limit_stores():
51
+ while True:
52
+ current_time = time.time()
53
+ # Clean API key rate limit store
54
+ keys_to_delete = [key for key, value in rate_limit_store.items() if current_time - value["timestamp"] > RATE_LIMIT_WINDOW * 2]
55
+ for key in keys_to_delete:
56
+ del rate_limit_store[key]
57
+ logger.debug(f"Cleaned up rate_limit_store for API key: {key}")
58
+
59
+ # Clean IP rate limit store
60
+ ips_to_delete = [ip for ip, value in ip_rate_limit_store.items() if current_time - value["timestamp"] > RATE_LIMIT_WINDOW * 2]
61
+ for ip in ips_to_delete:
62
+ del ip_rate_limit_store[ip]
63
+ logger.debug(f"Cleaned up ip_rate_limit_store for IP: {ip}")
64
+
65
+ await asyncio.sleep(CLEANUP_INTERVAL)
66
+
67
+ async def get_api_key(request: Request, authorization: str = Header(None)) -> str:
68
+ client_ip = request.client.host
69
  if authorization is None or not authorization.startswith('Bearer '):
70
+ logger.warning(f"Invalid or missing authorization header from IP: {client_ip}")
71
  raise HTTPException(status_code=401, detail='Invalid authorization header format')
72
  api_key = authorization[7:]
73
  if api_key not in API_KEYS:
74
+ logger.warning(f"Invalid API key attempted: {api_key} from IP: {client_ip}")
75
  raise HTTPException(status_code=401, detail='Invalid API key')
76
  return api_key
77
 
78
+ async def rate_limiter(request: Request, api_key: str = Depends(get_api_key)):
79
+ client_ip = request.client.host
80
  current_time = time.time()
81
+
82
  # Rate limiting per API key
83
  window_start = rate_limit_store[api_key]["timestamp"]
84
+ if current_time - window_start > RATE_LIMIT_WINDOW:
85
  rate_limit_store[api_key] = {"count": 1, "timestamp": current_time}
86
  else:
87
  if rate_limit_store[api_key]["count"] >= RATE_LIMIT:
88
+ logger.warning(f"Rate limit exceeded for API key: {api_key} from IP: {client_ip}")
89
  raise HTTPException(status_code=429, detail='Rate limit exceeded for API key')
90
  rate_limit_store[api_key]["count"] += 1
91
 
92
  # Rate limiting per IP address
 
93
  window_start_ip = ip_rate_limit_store[client_ip]["timestamp"]
94
+ if current_time - window_start_ip > RATE_LIMIT_WINDOW:
95
  ip_rate_limit_store[client_ip] = {"count": 1, "timestamp": current_time}
96
  else:
97
  if ip_rate_limit_store[client_ip]["count"] >= RATE_LIMIT:
 
403
  if request.method == "POST" and request.url.path in ["/v1/chat/completions", "/v1/completions"]:
404
  content_type = request.headers.get("Content-Type")
405
  if content_type != "application/json":
406
+ client_ip = request.client.host
407
+ logger.warning(f"Invalid Content-Type from IP: {client_ip} for path: {request.url.path}")
408
  return JSONResponse(
409
  status_code=400,
410
  content={
 
456
 
457
  @app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter)])
458
  async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
459
+ client_ip = req.client.host
460
  # Redact user messages only for logging purposes
461
  redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
462
 
463
+ logger.info(f"Received chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages}")
464
 
465
  try:
466
  # Validate that the requested model is available
467
  if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
468
+ logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
469
  raise HTTPException(status_code=400, detail="Requested model is not available.")
470
 
471
  # Process the request with actual message content, but don't log it
 
494
  error_response = {"error": he.detail}
495
  yield f"data: {json.dumps(error_response)}\n\n"
496
  except Exception as e:
497
+ logger.exception(f"Error during streaming response generation from IP: {client_ip}.")
498
  error_response = {"error": str(e)}
499
  yield f"data: {json.dumps(error_response)}\n\n"
500
 
 
507
  else:
508
  response_content += chunk
509
 
510
+ logger.info(f"Completed non-streaming response generation for API key: {api_key} | IP: {client_ip}")
511
  return {
512
  "id": f"chatcmpl-{uuid.uuid4()}",
513
  "object": "chat.completion",
 
530
  },
531
  }
532
  except ModelNotWorkingException as e:
533
+ logger.warning(f"Model not working: {e} | IP: {client_ip}")
534
  raise HTTPException(status_code=503, detail=str(e))
535
  except HTTPException as he:
536
+ logger.warning(f"HTTPException: {he.detail} | IP: {client_ip}")
537
  raise he
538
  except Exception as e:
539
+ logger.exception(f"An unexpected error occurred while processing the chat completions request from IP: {client_ip}.")
540
  raise HTTPException(status_code=500, detail=str(e))
541
 
542
  # Return 'about:blank' when accessing the endpoint via GET
543
  @app.get("/v1/chat/completions")
544
+ async def chat_completions_get(req: Request):
545
+ client_ip = req.client.host
546
+ logger.info(f"GET request made to /v1/chat/completions from IP: {client_ip}, redirecting to 'about:blank'")
547
  return RedirectResponse(url='about:blank')
548
 
549
  @app.get("/v1/models")
550
+ async def get_models(req: Request):
551
+ client_ip = req.client.host
552
+ logger.info(f"Fetching available models from IP: {client_ip}")
553
  return {"data": [{"id": model, "object": "model"} for model in Blackbox.models]}
554
 
555
  # Additional endpoints for better functionality
556
  @app.get("/v1/health", dependencies=[Depends(rate_limiter)])
557
  async def health_check(req: Request, api_key: str = Depends(get_api_key)):
558
+ client_ip = req.client.host
559
+ logger.info(f"Health check requested by API key: {api_key} | IP: {client_ip}")
560
  return {"status": "ok"}
561
 
562
  @app.get("/v1/models/{model}/status")
563
+ async def model_status(model: str, req: Request):
564
+ client_ip = req.client.host
565
+ logger.info(f"Model status requested for '{model}' from IP: {client_ip}")
566
  if model in Blackbox.models:
567
  return {"model": model, "status": "available"}
568
  elif model in Blackbox.model_aliases and Blackbox.model_aliases[model] in Blackbox.models:
569
  actual_model = Blackbox.model_aliases[model]
570
  return {"model": actual_model, "status": "available via alias"}
571
  else:
572
+ logger.warning(f"Model not found: {model} from IP: {client_ip}")
573
  raise HTTPException(status_code=404, detail="Model not found")
574
 
575
  # Custom exception handler to match OpenAI's error format
576
  @app.exception_handler(HTTPException)
577
  async def http_exception_handler(request: Request, exc: HTTPException):
578
+ client_ip = request.client.host
579
+ logger.error(f"HTTPException: {exc.detail} | Path: {request.url.path} | IP: {client_ip}")
580
  return JSONResponse(
581
  status_code=exc.status_code,
582
  content={
 
594
  text: str
595
 
596
  @app.post("/v1/tokenizer")
597
+ async def tokenizer(request: TokenizerRequest, req: Request, api_key: str = Depends(get_api_key)):
598
+ client_ip = req.client.host
599
  text = request.text
600
  token_count = len(text.split())
601
+ logger.info(f"Tokenizer requested by API key: {api_key} | IP: {client_ip} | Text length: {len(text)}")
602
  return {"text": text, "tokens": token_count}
603
 
604
  # New endpoint: /v1/completions to support text completions
 
621
 
622
  @app.post("/v1/completions", dependencies=[Depends(rate_limiter)])
623
  async def completions(request: CompletionRequest, req: Request, api_key: str = Depends(get_api_key)):
624
+ client_ip = req.client.host
625
+ logger.info(f"Received completion request from API key: {api_key} | IP: {client_ip} | Model: {request.model}")
626
 
627
  try:
628
  # Validate that the requested model is available
629
  if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
630
+ logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
631
  raise HTTPException(status_code=400, detail="Requested model is not available.")
632
 
633
  # Simulate a simple completion by echoing the prompt
 
653
  }
654
  }
655
  except HTTPException as he:
656
+ logger.warning(f"HTTPException: {he.detail} | IP: {client_ip}")
657
  raise he
658
  except Exception as e:
659
+ logger.exception(f"An unexpected error occurred while processing the completions request from IP: {client_ip}.")
660
  raise HTTPException(status_code=500, detail=str(e))
661
 
662
+ # Add the cleanup task when the app starts
663
+ @app.on_event("startup")
664
+ async def startup_event():
665
+ asyncio.create_task(cleanup_rate_limit_stores())
666
+ logger.info("Started rate limit store cleanup task.")
667
+
668
  if __name__ == "__main__":
669
  import uvicorn
670
+ uvicorn.run(app, host="0.0.0.0", port=8000)