Niansuh commited on
Commit
e6f4968
·
verified ·
1 Parent(s): 9ef03f2

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +13 -36
main.py CHANGED
@@ -1,40 +1,17 @@
1
- import os
2
- import re
3
- import random
4
- import string
5
- import uuid
6
- import json
7
- import logging
8
- import asyncio
9
- import time
10
- from collections import defaultdict
11
- from typing import List, Dict, Any, Optional, Union, AsyncGenerator
12
-
13
- from aiohttp import ClientSession, ClientResponseError
14
- from fastapi import FastAPI, HTTPException, Request, Depends, Header
15
- from fastapi.responses import JSONResponse
16
- from pydantic import BaseModel
17
-
18
- # Configure logging
19
- logging.basicConfig(
20
- level=logging.INFO,
21
- format="%(asctime)s [%(levelname)s] %(name)s: %(message)s",
22
- handlers=[logging.StreamHandler()]
23
- )
24
- logger = logging.getLogger(__name__)
25
-
26
- # Load environment variables
27
- API_KEYS = os.getenv('API_KEYS', '').split(',') # Comma-separated API keys
28
- RATE_LIMIT = int(os.getenv('RATE_LIMIT', '60')) # Requests per minute
29
-
30
- if not API_KEYS or API_KEYS == ['']:
31
- logger.error("No API keys found. Please set the API_KEYS environment variable.")
32
- raise Exception("API_KEYS environment variable not set.")
33
-
34
  # Simple in-memory rate limiter based solely on IP addresses
35
- rate_limit_store = defaultdict(lambda: {"count": 0, "timestamp": time.time()})
 
 
 
 
 
 
 
 
 
 
 
36
 
37
- # Define cleanup interval and window
38
  CLEANUP_INTERVAL = 60 # seconds
39
  RATE_LIMIT_WINDOW = 60 # seconds
40
 
@@ -383,7 +360,7 @@ class ChatRequest(BaseModel):
383
  logit_bias: Optional[Dict[str, float]] = None
384
  user: Optional[str] = None
385
 
386
- @app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter_per_ip)])
387
  async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
388
  client_ip = req.client.host
389
  # Redact user messages only for logging purposes
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  # Simple in-memory rate limiter based solely on IP addresses
2
+ async def rate_limiter_per_ip(request: Request):
3
+ client_ip = request.client.host
4
+ current_time = time.time()
5
+
6
+ # Initialize or update the count and timestamp
7
+ if current_time - rate_limit_store[client_ip]["timestamp"] > RATE_LIMIT_WINDOW:
8
+ rate_limit_store[client_ip] = {"count": 1, "timestamp": current_time}
9
+ else:
10
+ if rate_limit_store[client_ip]["count"] >= RATE_LIMIT:
11
+ logger.warning(f"Rate limit exceeded for IP address: {client_ip}")
12
+ raise HTTPException(status_code=429, detail='Rate limit exceeded for IP address')
13
+ rate_limit_store[client_ip]["count"] += 1
14
 
 
15
  CLEANUP_INTERVAL = 60 # seconds
16
  RATE_LIMIT_WINDOW = 60 # seconds
17
 
 
360
  logit_bias: Optional[Dict[str, float]] = None
361
  user: Optional[str] = None
362
 
363
+ @app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter_per_ip: Request)])
364
  async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
365
  client_ip = req.client.host
366
  # Redact user messages only for logging purposes