test24

Sleeping

App Files Files Community

Niansuh commited on Oct 16, 2024

Commit

e813605

verified ·

1 Parent(s): 25032a9

Update main.py

Browse files

Files changed (1) hide show

main.py +28 -54

main.py CHANGED Viewed

@@ -39,72 +39,45 @@ if AVAILABLE_MODELS:
 else:
     AVAILABLE_MODELS = []  # If empty, all models are available
-# Simple in-memory rate limiter
 rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
-ip_rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
 # Define cleanup interval and window
 CLEANUP_INTERVAL = 60  # seconds
 RATE_LIMIT_WINDOW = 60  # seconds
 async def cleanup_rate_limit_stores():
     while True:
         current_time = time.time()
-        # Clean API key rate limit store
-        keys_to_delete = [key for key, value in rate_limit_store.items() if current_time - value["timestamp"] > RATE_LIMIT_WINDOW * 2]
-        for key in keys_to_delete:
-            del rate_limit_store[key]
-            logger.debug(f"Cleaned up rate_limit_store for API key: {key}")
-        # Clean IP rate limit store
-        ips_to_delete = [ip for ip, value in ip_rate_limit_store.items() if current_time - value["timestamp"] > RATE_LIMIT_WINDOW * 2]
         for ip in ips_to_delete:
-            del ip_rate_limit_store[ip]
-            logger.debug(f"Cleaned up ip_rate_limit_store for IP: {ip}")
         await asyncio.sleep(CLEANUP_INTERVAL)
-# Define rate limiter for endpoints requiring API key
-async def rate_limiter_with_api_key(request: Request, api_key: str = Depends(lambda request: get_api_key(request, authorization=request.headers.get('Authorization')))):
-    client_ip = request.client.host
-    current_time = time.time()
-    # Rate limiting per API key
-    window_start = rate_limit_store[api_key]["timestamp"]
-    if current_time - window_start > RATE_LIMIT_WINDOW:
-        rate_limit_store[api_key] = {"count": 1, "timestamp": current_time}
-    else:
-        if rate_limit_store[api_key]["count"] >= RATE_LIMIT:
-            logger.warning(f"Rate limit exceeded for API key: {api_key} from IP: {client_ip}")
-            raise HTTPException(status_code=429, detail='Rate limit exceeded for API key')
-        rate_limit_store[api_key]["count"] += 1
-    # Rate limiting per IP address
-    window_start_ip = ip_rate_limit_store[client_ip]["timestamp"]
-    if current_time - window_start_ip > RATE_LIMIT_WINDOW:
-        ip_rate_limit_store[client_ip] = {"count": 1, "timestamp": current_time}
-    else:
-        if ip_rate_limit_store[client_ip]["count"] >= RATE_LIMIT:
-            logger.warning(f"Rate limit exceeded for IP address: {client_ip}")
-            raise HTTPException(status_code=429, detail='Rate limit exceeded for IP address')
-        ip_rate_limit_store[client_ip]["count"] += 1
-# Define rate limiter for endpoints NOT requiring API key
 async def rate_limiter_per_ip(request: Request):
     client_ip = request.client.host
     current_time = time.time()
-    # Rate limiting per IP address
-    window_start_ip = ip_rate_limit_store[client_ip]["timestamp"]
-    if current_time - window_start_ip > RATE_LIMIT_WINDOW:
-        ip_rate_limit_store[client_ip] = {"count": 1, "timestamp": current_time}
     else:
-        if ip_rate_limit_store[client_ip]["count"] >= RATE_LIMIT:
             logger.warning(f"Rate limit exceeded for IP address: {client_ip}")
             raise HTTPException(status_code=429, detail='Rate limit exceeded for IP address')
-        ip_rate_limit_store[client_ip]["count"] += 1
 async def get_api_key(request: Request, authorization: str = Header(None)) -> str:
     client_ip = request.client.host
     if authorization is None or not authorization.startswith('Bearer '):
         logger.warning(f"Invalid or missing authorization header from IP: {client_ip}")
@@ -412,11 +385,17 @@ class Blackbox:
 # FastAPI app setup
 app = FastAPI()
-# Middleware to enhance security
 @app.middleware("http")
 async def security_middleware(request: Request, call_next):
     client_ip = request.client.host
-    # Enforce that POST requests to sensitive endpoints must have a valid Content-Type
     if request.method == "POST" and request.url.path == "/v1/chat/completions":
         content_type = request.headers.get("Content-Type")
         if content_type != "application/json":
@@ -435,6 +414,7 @@ async def security_middleware(request: Request, call_next):
     response = await call_next(request)
     return response
 class Message(BaseModel):
     role: str
     content: str
@@ -470,7 +450,7 @@ def create_response(content: str, model: str, finish_reason: Optional[str] = Non
         "usage": None,
     }
-@app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter_with_api_key)])
 async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
     client_ip = req.client.host
     # Redact user messages only for logging purposes
@@ -621,12 +601,6 @@ async def http_exception_handler(request: Request, exc: HTTPException):
         },
     )
-# Add the cleanup task when the app starts
-@app.on_event("startup")
-async def startup_event():
-    asyncio.create_task(cleanup_rate_limit_stores())
-    logger.info("Started rate limit store cleanup task.")
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)

 else:
     AVAILABLE_MODELS = []  # If empty, all models are available
+# Simple in-memory rate limiter based solely on IP addresses
 rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
 # Define cleanup interval and window
 CLEANUP_INTERVAL = 60  # seconds
 RATE_LIMIT_WINDOW = 60  # seconds
 async def cleanup_rate_limit_stores():
+    """
+    Periodically cleans up stale entries in the rate_limit_store to prevent memory bloat.
+    """
     while True:
         current_time = time.time()
+        ips_to_delete = [ip for ip, value in rate_limit_store.items() if current_time - value["timestamp"] > RATE_LIMIT_WINDOW * 2]
         for ip in ips_to_delete:
+            del rate_limit_store[ip]
+            logger.debug(f"Cleaned up rate_limit_store for IP: {ip}")
         await asyncio.sleep(CLEANUP_INTERVAL)
 async def rate_limiter_per_ip(request: Request):
+    """
+    Rate limiter that enforces a limit based on the client's IP address.
+    """
     client_ip = request.client.host
     current_time = time.time()
+    # Initialize or update the count and timestamp
+    if current_time - rate_limit_store[client_ip]["timestamp"] > RATE_LIMIT_WINDOW:
+        rate_limit_store[client_ip] = {"count": 1, "timestamp": current_time}
     else:
+        if rate_limit_store[client_ip]["count"] >= RATE_LIMIT:
             logger.warning(f"Rate limit exceeded for IP address: {client_ip}")
             raise HTTPException(status_code=429, detail='Rate limit exceeded for IP address')
+        rate_limit_store[client_ip]["count"] += 1
 async def get_api_key(request: Request, authorization: str = Header(None)) -> str:
+    """
+    Dependency to extract and validate the API key from the Authorization header.
+    """
     client_ip = request.client.host
     if authorization is None or not authorization.startswith('Bearer '):
         logger.warning(f"Invalid or missing authorization header from IP: {client_ip}")
 # FastAPI app setup
 app = FastAPI()
+# Add the cleanup task when the app starts
+@app.on_event("startup")
+async def startup_event():
+    asyncio.create_task(cleanup_rate_limit_stores())
+    logger.info("Started rate limit store cleanup task.")
+# Middleware to enhance security and enforce Content-Type for specific endpoints
 @app.middleware("http")
 async def security_middleware(request: Request, call_next):
     client_ip = request.client.host
+    # Enforce that POST requests to /v1/chat/completions must have Content-Type: application/json
     if request.method == "POST" and request.url.path == "/v1/chat/completions":
         content_type = request.headers.get("Content-Type")
         if content_type != "application/json":
     response = await call_next(request)
     return response
+# Request Models
 class Message(BaseModel):
     role: str
     content: str
         "usage": None,
     }
+@app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter_per_ip)])
 async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
     client_ip = req.client.host
     # Redact user messages only for logging purposes
         },
     )
 if __name__ == "__main__":
     import uvicorn
     uvicorn.run(app, host="0.0.0.0", port=8000)