test24

Sleeping

App Files Files Community

Niansuh commited on Oct 16, 2024

Commit

c9b656e

verified ·

1 Parent(s): 9b9354c

Update main.py

Browse files

Files changed (1) hide show

main.py +70 -29

main.py CHANGED Viewed

@@ -43,32 +43,55 @@ else:
 rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
 ip_rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
-async def get_api_key(authorization: str = Header(None)) -> str:
     if authorization is None or not authorization.startswith('Bearer '):
-        logger.warning("Invalid or missing authorization header.")
         raise HTTPException(status_code=401, detail='Invalid authorization header format')
     api_key = authorization[7:]
     if api_key not in API_KEYS:
-        logger.warning(f"Invalid API key attempted: {api_key}")
         raise HTTPException(status_code=401, detail='Invalid API key')
     return api_key
-async def rate_limiter(req: Request, api_key: str = Depends(get_api_key)):
     current_time = time.time()
     # Rate limiting per API key
     window_start = rate_limit_store[api_key]["timestamp"]
-    if current_time - window_start > 60:
         rate_limit_store[api_key] = {"count": 1, "timestamp": current_time}
     else:
         if rate_limit_store[api_key]["count"] >= RATE_LIMIT:
-            logger.warning(f"Rate limit exceeded for API key: {api_key}")
             raise HTTPException(status_code=429, detail='Rate limit exceeded for API key')
         rate_limit_store[api_key]["count"] += 1
     # Rate limiting per IP address
-    client_ip = req.client.host
     window_start_ip = ip_rate_limit_store[client_ip]["timestamp"]
-    if current_time - window_start_ip > 60:
         ip_rate_limit_store[client_ip] = {"count": 1, "timestamp": current_time}
     else:
         if ip_rate_limit_store[client_ip]["count"] >= RATE_LIMIT:
@@ -380,6 +403,8 @@ async def security_middleware(request: Request, call_next):
     if request.method == "POST" and request.url.path in ["/v1/chat/completions", "/v1/completions"]:
         content_type = request.headers.get("Content-Type")
         if content_type != "application/json":
             return JSONResponse(
                 status_code=400,
                 content={
@@ -431,15 +456,16 @@ def create_response(content: str, model: str, finish_reason: Optional[str] = Non
 @app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter)])
 async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
     # Redact user messages only for logging purposes
     redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
-    logger.info(f"Received chat completions request from API key: {api_key} | Model: {request.model} | Messages: {redacted_messages}")
     try:
         # Validate that the requested model is available
         if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
-            logger.warning(f"Attempt to use unavailable model: {request.model}")
             raise HTTPException(status_code=400, detail="Requested model is not available.")
         # Process the request with actual message content, but don't log it
@@ -468,7 +494,7 @@ async def chat_completions(request: ChatRequest, req: Request, api_key: str = De
                     error_response = {"error": he.detail}
                     yield f"data: {json.dumps(error_response)}\n\n"
                 except Exception as e:
-                    logger.exception("Error during streaming response generation.")
                     error_response = {"error": str(e)}
                     yield f"data: {json.dumps(error_response)}\n\n"
@@ -481,7 +507,7 @@ async def chat_completions(request: ChatRequest, req: Request, api_key: str = De
                 else:
                     response_content += chunk
-            logger.info(f"Completed non-streaming response generation for API key: {api_key}")
             return {
                 "id": f"chatcmpl-{uuid.uuid4()}",
                 "object": "chat.completion",
@@ -504,47 +530,53 @@ async def chat_completions(request: ChatRequest, req: Request, api_key: str = De
                 },
             }
     except ModelNotWorkingException as e:
-        logger.warning(f"Model not working: {e}")
         raise HTTPException(status_code=503, detail=str(e))
     except HTTPException as he:
-        logger.warning(f"HTTPException: {he.detail}")
         raise he
     except Exception as e:
-        logger.exception("An unexpected error occurred while processing the chat completions request.")
         raise HTTPException(status_code=500, detail=str(e))
 # Return 'about:blank' when accessing the endpoint via GET
 @app.get("/v1/chat/completions")
-async def chat_completions_get():
-    logger.info("GET request made to /v1/chat/completions, redirecting to 'about:blank'")
     return RedirectResponse(url='about:blank')
 @app.get("/v1/models")
-async def get_models():
-    logger.info("Fetching available models")
     return {"data": [{"id": model, "object": "model"} for model in Blackbox.models]}
 # Additional endpoints for better functionality
 @app.get("/v1/health", dependencies=[Depends(rate_limiter)])
 async def health_check(req: Request, api_key: str = Depends(get_api_key)):
-    logger.info(f"Health check requested by API key: {api_key}")
     return {"status": "ok"}
 @app.get("/v1/models/{model}/status")
-async def model_status(model: str):
-    logger.info(f"Model status requested for '{model}'")
     if model in Blackbox.models:
         return {"model": model, "status": "available"}
     elif model in Blackbox.model_aliases and Blackbox.model_aliases[model] in Blackbox.models:
         actual_model = Blackbox.model_aliases[model]
         return {"model": actual_model, "status": "available via alias"}
     else:
-        logger.warning(f"Model not found: {model}")
         raise HTTPException(status_code=404, detail="Model not found")
 # Custom exception handler to match OpenAI's error format
 @app.exception_handler(HTTPException)
 async def http_exception_handler(request: Request, exc: HTTPException):
     return JSONResponse(
         status_code=exc.status_code,
         content={
@@ -562,9 +594,11 @@ class TokenizerRequest(BaseModel):
     text: str
 @app.post("/v1/tokenizer")
-async def tokenizer(request: TokenizerRequest):
     text = request.text
     token_count = len(text.split())
     return {"text": text, "tokens": token_count}
 # New endpoint: /v1/completions to support text completions
@@ -587,12 +621,13 @@ class CompletionRequest(BaseModel):
 @app.post("/v1/completions", dependencies=[Depends(rate_limiter)])
 async def completions(request: CompletionRequest, req: Request, api_key: str = Depends(get_api_key)):
-    logger.info(f"Received completion request from API key: {api_key} | Model: {request.model}")
     try:
         # Validate that the requested model is available
         if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
-            logger.warning(f"Attempt to use unavailable model: {request.model}")
             raise HTTPException(status_code=400, detail="Requested model is not available.")
         # Simulate a simple completion by echoing the prompt
@@ -618,12 +653,18 @@ async def completions(request: CompletionRequest, req: Request, api_key: str = D
             }
         }
     except HTTPException as he:
-        logger.warning(f"HTTPException: {he.detail}")
         raise he
     except Exception as e:
-        logger.exception("An unexpected error occurred while processing the completions request.")
         raise HTTPException(status_code=500, detail=str(e))
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
 ip_rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
+# Define cleanup interval and window
+CLEANUP_INTERVAL = 60  # seconds
+RATE_LIMIT_WINDOW = 60  # seconds
+async def cleanup_rate_limit_stores():
+    while True:
+        current_time = time.time()
+        # Clean API key rate limit store
+        keys_to_delete = [key for key, value in rate_limit_store.items() if current_time - value["timestamp"] > RATE_LIMIT_WINDOW * 2]
+        for key in keys_to_delete:
+            del rate_limit_store[key]
+            logger.debug(f"Cleaned up rate_limit_store for API key: {key}")
+        # Clean IP rate limit store
+        ips_to_delete = [ip for ip, value in ip_rate_limit_store.items() if current_time - value["timestamp"] > RATE_LIMIT_WINDOW * 2]
+        for ip in ips_to_delete:
+            del ip_rate_limit_store[ip]
+            logger.debug(f"Cleaned up ip_rate_limit_store for IP: {ip}")
+        await asyncio.sleep(CLEANUP_INTERVAL)
+async def get_api_key(request: Request, authorization: str = Header(None)) -> str:
+    client_ip = request.client.host
     if authorization is None or not authorization.startswith('Bearer '):
+        logger.warning(f"Invalid or missing authorization header from IP: {client_ip}")
         raise HTTPException(status_code=401, detail='Invalid authorization header format')
     api_key = authorization[7:]
     if api_key not in API_KEYS:
+        logger.warning(f"Invalid API key attempted: {api_key} from IP: {client_ip}")
         raise HTTPException(status_code=401, detail='Invalid API key')
     return api_key
+async def rate_limiter(request: Request, api_key: str = Depends(get_api_key)):
+    client_ip = request.client.host
     current_time = time.time()
     # Rate limiting per API key
     window_start = rate_limit_store[api_key]["timestamp"]
+    if current_time - window_start > RATE_LIMIT_WINDOW:
         rate_limit_store[api_key] = {"count": 1, "timestamp": current_time}
     else:
         if rate_limit_store[api_key]["count"] >= RATE_LIMIT:
+            logger.warning(f"Rate limit exceeded for API key: {api_key} from IP: {client_ip}")
             raise HTTPException(status_code=429, detail='Rate limit exceeded for API key')
         rate_limit_store[api_key]["count"] += 1
     # Rate limiting per IP address
     window_start_ip = ip_rate_limit_store[client_ip]["timestamp"]
+    if current_time - window_start_ip > RATE_LIMIT_WINDOW:
         ip_rate_limit_store[client_ip] = {"count": 1, "timestamp": current_time}
     else:
         if ip_rate_limit_store[client_ip]["count"] >= RATE_LIMIT:
     if request.method == "POST" and request.url.path in ["/v1/chat/completions", "/v1/completions"]:
         content_type = request.headers.get("Content-Type")
         if content_type != "application/json":
+            client_ip = request.client.host
+            logger.warning(f"Invalid Content-Type from IP: {client_ip} for path: {request.url.path}")
             return JSONResponse(
                 status_code=400,
                 content={
 @app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter)])
 async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
+    client_ip = req.client.host
     # Redact user messages only for logging purposes
     redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
+    logger.info(f"Received chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages}")
     try:
         # Validate that the requested model is available
         if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
+            logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
             raise HTTPException(status_code=400, detail="Requested model is not available.")
         # Process the request with actual message content, but don't log it
                     error_response = {"error": he.detail}
                     yield f"data: {json.dumps(error_response)}\n\n"
                 except Exception as e:
+                    logger.exception(f"Error during streaming response generation from IP: {client_ip}.")
                     error_response = {"error": str(e)}
                     yield f"data: {json.dumps(error_response)}\n\n"
                 else:
                     response_content += chunk
+            logger.info(f"Completed non-streaming response generation for API key: {api_key} | IP: {client_ip}")
             return {
                 "id": f"chatcmpl-{uuid.uuid4()}",
                 "object": "chat.completion",
                 },
             }
     except ModelNotWorkingException as e:
+        logger.warning(f"Model not working: {e} | IP: {client_ip}")
         raise HTTPException(status_code=503, detail=str(e))
     except HTTPException as he:
+        logger.warning(f"HTTPException: {he.detail} | IP: {client_ip}")
         raise he
     except Exception as e:
+        logger.exception(f"An unexpected error occurred while processing the chat completions request from IP: {client_ip}.")
         raise HTTPException(status_code=500, detail=str(e))
 # Return 'about:blank' when accessing the endpoint via GET
 @app.get("/v1/chat/completions")
+async def chat_completions_get(req: Request):
+    client_ip = req.client.host
+    logger.info(f"GET request made to /v1/chat/completions from IP: {client_ip}, redirecting to 'about:blank'")
     return RedirectResponse(url='about:blank')
 @app.get("/v1/models")
+async def get_models(req: Request):
+    client_ip = req.client.host
+    logger.info(f"Fetching available models from IP: {client_ip}")
     return {"data": [{"id": model, "object": "model"} for model in Blackbox.models]}
 # Additional endpoints for better functionality
 @app.get("/v1/health", dependencies=[Depends(rate_limiter)])
 async def health_check(req: Request, api_key: str = Depends(get_api_key)):
+    client_ip = req.client.host
+    logger.info(f"Health check requested by API key: {api_key} | IP: {client_ip}")
     return {"status": "ok"}
 @app.get("/v1/models/{model}/status")
+async def model_status(model: str, req: Request):
+    client_ip = req.client.host
+    logger.info(f"Model status requested for '{model}' from IP: {client_ip}")
     if model in Blackbox.models:
         return {"model": model, "status": "available"}
     elif model in Blackbox.model_aliases and Blackbox.model_aliases[model] in Blackbox.models:
         actual_model = Blackbox.model_aliases[model]
         return {"model": actual_model, "status": "available via alias"}
     else:
+        logger.warning(f"Model not found: {model} from IP: {client_ip}")
         raise HTTPException(status_code=404, detail="Model not found")
 # Custom exception handler to match OpenAI's error format
 @app.exception_handler(HTTPException)
 async def http_exception_handler(request: Request, exc: HTTPException):
+    client_ip = request.client.host
+    logger.error(f"HTTPException: {exc.detail} | Path: {request.url.path} | IP: {client_ip}")
     return JSONResponse(
         status_code=exc.status_code,
         content={
     text: str
 @app.post("/v1/tokenizer")
+async def tokenizer(request: TokenizerRequest, req: Request, api_key: str = Depends(get_api_key)):
+    client_ip = req.client.host
     text = request.text
     token_count = len(text.split())
+    logger.info(f"Tokenizer requested by API key: {api_key} | IP: {client_ip} | Text length: {len(text)}")
     return {"text": text, "tokens": token_count}
 # New endpoint: /v1/completions to support text completions
 @app.post("/v1/completions", dependencies=[Depends(rate_limiter)])
 async def completions(request: CompletionRequest, req: Request, api_key: str = Depends(get_api_key)):
+    client_ip = req.client.host
+    logger.info(f"Received completion request from API key: {api_key} | IP: {client_ip} | Model: {request.model}")
     try:
         # Validate that the requested model is available
         if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
+            logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
             raise HTTPException(status_code=400, detail="Requested model is not available.")
         # Simulate a simple completion by echoing the prompt
             }
         }
     except HTTPException as he:
+        logger.warning(f"HTTPException: {he.detail} | IP: {client_ip}")
         raise he
     except Exception as e:
+        logger.exception(f"An unexpected error occurred while processing the completions request from IP: {client_ip}.")
         raise HTTPException(status_code=500, detail=str(e))
+# Add the cleanup task when the app starts
+@app.on_event("startup")
+async def startup_event():
+    asyncio.create_task(cleanup_rate_limit_stores())
+    logger.info("Started rate limit store cleanup task.")
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)