Spaces:

ParthSadaria
/

lokiai

Running

App Files Files Community

ParthSadaria commited on Feb 28

Commit

396b35b

verified ·

1 Parent(s): c3d5a54

Update main.py

Browse files

Files changed (1) hide show

main.py +307 -168

main.py CHANGED Viewed

@@ -14,6 +14,16 @@ from typing import Optional, Dict, List, Any, Generator
 import asyncio
 from starlette.status import HTTP_403_FORBIDDEN
 import cloudscraper
 # Load environment variables once at startup
 load_dotenv()
@@ -27,6 +37,16 @@ usage_tracker = UsageTracker()
 app = FastAPI()
 # Environment variables (cached)
 @lru_cache(maxsize=1)
 def get_env_vars():
@@ -41,8 +61,8 @@ def get_env_vars():
         'endpoint_origin': os.getenv('ENDPOINT_ORIGIN')
     }
-# Configuration for models
-mistral_models = [
     "mistral-large-latest",
     "pixtral-large-latest",
     "mistral-moderation-latest",
@@ -52,7 +72,7 @@ mistral_models = [
     "mistral-small-latest",
     "mistral-saba-latest",
     "codestral-latest"
-]
 alternate_models = {
     "gpt-4o-mini",
@@ -78,23 +98,37 @@ class Payload(BaseModel):
 server_status = True
 available_model_ids: List[str] = []
-# Create a reusable httpx client
 @lru_cache(maxsize=1)
 def get_async_client():
-    return httpx.AsyncClient(timeout=60.0)
-# API key validation
 async def verify_api_key(api_key: str = Security(api_key_header)) -> bool:
     if not api_key:
         raise HTTPException(
             status_code=HTTP_403_FORBIDDEN,
             detail="No API key provided"
         )
-    # Clean the API key by removing 'Bearer ' if present
     if api_key.startswith('Bearer '):
         api_key = api_key[7:]  # Remove 'Bearer ' prefix
     # Get API keys from environment
     valid_api_keys = get_env_vars()['api_keys']
     if not valid_api_keys or valid_api_keys == ['']:
@@ -102,96 +136,120 @@ async def verify_api_key(api_key: str = Security(api_key_header)) -> bool:
             status_code=HTTP_403_FORBIDDEN,
             detail="API keys not configured on server"
         )
-    # Check if the provided key is valid
-    if api_key not in valid_api_keys:
         raise HTTPException(
             status_code=HTTP_403_FORBIDDEN,
             detail="Invalid API key"
         )
     return True
-# Cache for models.json
 @lru_cache(maxsize=1)
-async def get_models():
     try:
         file_path = Path(__file__).parent / 'models.json'
         with open(file_path, 'r') as f:
             return json.load(f)
     except (FileNotFoundError, json.JSONDecodeError) as e:
-        # Log the error but don't expose the exact error to users
         print(f"Error loading models.json: {str(e)}")
         raise HTTPException(status_code=500, detail="Error loading available models")
-# Searcher function with optimized streaming
-def generate_search(query: str, systemprompt: Optional[str] = None, stream: bool = True) -> Generator[str, None, None]:
-    headers = {"User-Agent": ""}
-    # Use the provided system prompt, or default to "Be Helpful and Friendly"
-    system_message = systemprompt or "Be Helpful and Friendly"
-    # Create the prompt history with the user query and system message
-    prompt = [
-        {"role": "user", "content": query},
-    ]
-    prompt.insert(0, {"content": system_message, "role": "system"})
-    # Prepare the payload for the API request
-    payload = {
-        "is_vscode_extension": True,
-        "message_history": prompt,
-        "requested_model": "searchgpt",
-        "user_input": prompt[-1]["content"],
-    }
-    # Get endpoint from environment
-    secret_api_endpoint_3 = get_env_vars()['secret_api_endpoint_3']
-    if not secret_api_endpoint_3:
-        raise ValueError("Search API endpoint not configured")
-    # Send the request to the chat endpoint
-    response = cloudscraper.create_scraper().post(
-        secret_api_endpoint_3,
-        headers=headers,
-        json=payload,
-        stream=True
-    )
-    streaming_text = ""
-    # Process the streaming response
-    for value in response.iter_lines(decode_unicode=True):
-        if value.startswith("data: "):
-            try:
-                json_modified_value = json.loads(value[6:])
-                content = json_modified_value.get("choices", [{}])[0].get("delta", {}).get("content", "")
-                if content.strip():  # Only process non-empty content
-                    cleaned_response = {
-                        "created": json_modified_value.get("created"),
-                        "id": json_modified_value.get("id"),
-                        "model": "searchgpt",
-                        "object": "chat.completion",
-                        "choices": [
-                            {
-                                "message": {
-                                    "content": content
                                 }
-                            }
-                        ]
-                    }
-                    if stream:
-                        yield f"data: {json.dumps(cleaned_response)}\n\n"
-                    streaming_text += content
-            except json.JSONDecodeError:
-                continue
-    if not stream:
-        yield streaming_text
 # Basic routes
 @app.get("/favicon.ico")
@@ -201,29 +259,21 @@ async def favicon():
 @app.get("/ping")
 async def ping():
-    start_time = datetime.datetime.now()
-    response_time = (datetime.datetime.now() - start_time).total_seconds()
-    return {"message": "pong", "response_time": f"{response_time:.6f} seconds"}
 @app.get("/", response_class=HTMLResponse)
 async def root():
-    file_path = "index.html"
-    try:
-        with open(file_path, "r") as file:
-            html_content = file.read()
-        return HTMLResponse(content=html_content)
-    except FileNotFoundError:
         return HTMLResponse(content="<h1>File not found</h1>", status_code=404)
 @app.get("/playground", response_class=HTMLResponse)
 async def playground():
-    file_path = "playground.html"
-    try:
-        with open(file_path, "r") as file:
-            html_content = file.read()
-        return HTMLResponse(content=html_content)
-    except FileNotFoundError:
         return HTMLResponse(content="<h1>playground.html not found</h1>", status_code=404)
 # Model routes
 @app.get("/api/v1/models")
@@ -239,15 +289,20 @@ async def search_gpt(q: str, stream: Optional[bool] = False, systemprompt: Optio
     usage_tracker.record_request(endpoint="/searchgpt")
     if stream:
         return StreamingResponse(
-            generate_search(q, systemprompt=systemprompt, stream=True),
             media_type="text/event-stream"
         )
     else:
-        # For non-streaming, collect the text and return as JSON response
-        response_text = "".join([chunk for chunk in generate_search(q, systemprompt=systemprompt, stream=False)])
-        return JSONResponse(content={"response": response_text})
 # Chat completion endpoint
 @app.post("/chat/completions")
@@ -260,15 +315,17 @@ async def get_completion(payload: Payload, request: Request, authenticated: bool
             content={"message": "Server is under maintenance. Please try again later."}
         )
-    model_to_use = payload.model if payload.model else "gpt-4o-mini"
-    # Validate model availability
-    if available_model_ids and model_to_use not in available_model_ids:
         raise HTTPException(
             status_code=400,
             detail=f"Model '{model_to_use}' is not available. Check /models for the available model list."
         )
     usage_tracker.record_request(model=model_to_use, endpoint="/chat/completions")
     # Prepare payload
@@ -278,7 +335,7 @@ async def get_completion(payload: Payload, request: Request, authenticated: bool
     # Get environment variables
     env_vars = get_env_vars()
-    # Select the appropriate endpoint
     if model_to_use in mistral_models:
         endpoint = env_vars['mistral_api']
         custom_headers = {
@@ -291,13 +348,8 @@ async def get_completion(payload: Payload, request: Request, authenticated: bool
         endpoint = env_vars['secret_api_endpoint']
         custom_headers = {}
-    # Current time and IP logging (with minimal data)
-    current_time = (datetime.datetime.utcnow() + datetime.timedelta(hours=5, minutes=30)).strftime("%Y-%m-%d %I:%M:%S %p")
-    ip_hash = hash(request.client.host) % 10000  # Hash the IP for privacy
-    print(f"Time: {current_time}, IP Hash: {ip_hash}, Model: {model_to_use}")
-    # Create scraper for each connection to avoid concurrency issues
-    scraper = cloudscraper.create_scraper()
     async def stream_generator(payload_dict):
         try:
@@ -320,10 +372,25 @@ async def get_completion(payload: Payload, request: Request, authenticated: bool
                 detail = error_messages.get(response.status_code, f"Error code: {response.status_code}")
                 raise HTTPException(status_code=response.status_code, detail=detail)
-            # Stream response lines to the client
             for line in response.iter_lines():
                 if line:
-                    yield line.decode('utf-8') + "\n"
         except Exception as e:
             # Use a generic error message that doesn't expose internal details
@@ -331,7 +398,14 @@ async def get_completion(payload: Payload, request: Request, authenticated: bool
     return StreamingResponse(stream_generator(payload_dict), media_type="application/json")
-# Image generation endpoint
 @app.api_route("/images/generations", methods=["GET", "POST"])
 async def generate_image(
     prompt: Optional[str] = None,
@@ -391,35 +465,53 @@ async def generate_image(
         params['enhance'] = str(enhance).lower()
     try:
-        async with httpx.AsyncClient(timeout=60.0) as client:
-            response = await client.get(url, params=params, follow_redirects=True)
-            # Check for various error conditions
-            if response.status_code != 200:
-                error_messages = {
-                    404: "Image generation service not found",
-                    400: "Invalid parameters provided to image service",
-                    429: "Too many requests to image service",
-                }
-                detail = error_messages.get(response.status_code, f"Image generation failed with status code {response.status_code}")
-                raise HTTPException(status_code=response.status_code, detail=detail)
-            # Verify content type
-            content_type = response.headers.get('content-type', '')
-            if not content_type.startswith('image/'):
-                raise HTTPException(
-                    status_code=500,
-                    detail="Unexpected content type received from image service"
-                )
-            return StreamingResponse(
-                response.iter_bytes(),
-                media_type=content_type,
-                headers={
-                    'Cache-Control': 'no-cache',
-                    'Pragma': 'no-cache'
-                }
-            )
     except httpx.TimeoutException:
         raise HTTPException(status_code=504, detail="Image generation request timed out")
@@ -428,37 +520,57 @@ async def generate_image(
     except Exception:
         raise HTTPException(status_code=500, detail="Unexpected error during image generation")
-# Usage statistics
 @app.get("/meme")
 async def get_meme():
     try:
-        async with httpx.AsyncClient() as client:
-            response = await client.get("https://meme-api.com/gimme")
-            response_data = response.json()
-            meme_url = response_data.get("url")
-            if not meme_url:
-                raise HTTPException(status_code=404, detail="No meme found")
-            image_response = await client.get(meme_url, follow_redirects=True)
-            return StreamingResponse(
-                image_response.iter_bytes(),
-                media_type=image_response.headers.get("content-type", "image/png")
-            )
     except Exception:
         raise HTTPException(status_code=500, detail="Failed to retrieve meme")
 @app.get("/usage")
 async def get_usage(days: int = 7):
     """Retrieve usage statistics"""
-    return usage_tracker.get_usage_summary(days)
-@app.get("/usage/page", response_class=HTMLResponse)
-async def usage_page():
-    """Serve an HTML page showing usage statistics"""
-    # Retrieve usage data
-    usage_data = usage_tracker.get_usage_summary()
     # Model Usage Table Rows
     model_usage_rows = "\n".join([
         f"""
@@ -646,16 +758,28 @@ async def usage_page():
     </body>
     </html>
     """
     return HTMLResponse(content=html_content)
-# Utility function for loading model IDs
 def load_model_ids(json_file_path):
     try:
         with open(json_file_path, 'r') as f:
             models_data = json.load(f)
-            # Extract 'id' from each model object
-            model_ids = [model['id'] for model in models_data if 'id' in model]
-            return model_ids
     except Exception as e:
         print(f"Error loading model IDs: {str(e)}")
         return []
@@ -665,7 +789,10 @@ async def startup_event():
     global available_model_ids
     available_model_ids = load_model_ids("models.json")
     print(f"Loaded {len(available_model_ids)} model IDs")
-    print("API started successfully")
     # Validate critical environment variables
     env_vars = get_env_vars()
@@ -680,7 +807,19 @@ async def startup_event():
     if missing_vars:
         print(f"WARNING: The following required environment variables are missing: {', '.join(missing_vars)}")
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=8000)

 import asyncio
 from starlette.status import HTTP_403_FORBIDDEN
 import cloudscraper
+from concurrent.futures import ThreadPoolExecutor
+import uvloop
+from fastapi.middleware.gzip import GZipMiddleware
+from starlette.middleware.cors import CORSMiddleware
+# Enable uvloop for faster event loop
+asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
+# Thread pool for CPU-bound operations
+executor = ThreadPoolExecutor(max_workers=8)
 # Load environment variables once at startup
 load_dotenv()
 app = FastAPI()
+# Add middleware for compression and CORS
+app.add_middleware(GZipMiddleware, minimum_size=1000)
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
 # Environment variables (cached)
 @lru_cache(maxsize=1)
 def get_env_vars():
         'endpoint_origin': os.getenv('ENDPOINT_ORIGIN')
     }
+# Configuration for models - use sets for faster lookups
+mistral_models = {
     "mistral-large-latest",
     "pixtral-large-latest",
     "mistral-moderation-latest",
     "mistral-small-latest",
     "mistral-saba-latest",
     "codestral-latest"
+}
 alternate_models = {
     "gpt-4o-mini",
 server_status = True
 available_model_ids: List[str] = []
+# Create a reusable httpx client pool with connection pooling
 @lru_cache(maxsize=1)
 def get_async_client():
+    return httpx.AsyncClient(
+        timeout=60.0,
+        limits=httpx.Limits(max_keepalive_connections=20, max_connections=100)
+    )
+# Create a cloudscraper pool
+scraper_pool = []
+MAX_SCRAPERS = 10
+def get_scraper():
+    if not scraper_pool:
+        for _ in range(MAX_SCRAPERS):
+            scraper_pool.append(cloudscraper.create_scraper())
+    return scraper_pool[int(time.time() * 1000) % MAX_SCRAPERS]  # Simple round-robin
+# API key validation - optimized to avoid string operations when possible
 async def verify_api_key(api_key: str = Security(api_key_header)) -> bool:
     if not api_key:
         raise HTTPException(
             status_code=HTTP_403_FORBIDDEN,
             detail="No API key provided"
         )
+    # Only clean if needed
     if api_key.startswith('Bearer '):
         api_key = api_key[7:]  # Remove 'Bearer ' prefix
     # Get API keys from environment
     valid_api_keys = get_env_vars()['api_keys']
     if not valid_api_keys or valid_api_keys == ['']:
             status_code=HTTP_403_FORBIDDEN,
             detail="API keys not configured on server"
         )
+    # Fast check with set operation
+    if api_key not in set(valid_api_keys):
         raise HTTPException(
             status_code=HTTP_403_FORBIDDEN,
             detail="Invalid API key"
         )
     return True
+# Pre-load and cache models.json
 @lru_cache(maxsize=1)
+def load_models_data():
     try:
         file_path = Path(__file__).parent / 'models.json'
         with open(file_path, 'r') as f:
             return json.load(f)
     except (FileNotFoundError, json.JSONDecodeError) as e:
         print(f"Error loading models.json: {str(e)}")
+        return []
+# Async wrapper for models data
+async def get_models():
+    models_data = load_models_data()
+    if not models_data:
         raise HTTPException(status_code=500, detail="Error loading available models")
+    return models_data
+# Searcher function with optimized streaming - moved to a separate thread
+async def generate_search_async(query: str, systemprompt: Optional[str] = None, stream: bool = True):
+    loop = asyncio.get_running_loop()
+    def _generate_search():
+        headers = {"User-Agent": ""}
+        # Use the provided system prompt, or default to "Be Helpful and Friendly"
+        system_message = systemprompt or "Be Helpful and Friendly"
+        # Create the prompt history with the user query and system message
+        prompt = [
+            {"role": "user", "content": query},
+        ]
+        prompt.insert(0, {"content": system_message, "role": "system"})
+        # Prepare the payload for the API request
+        payload = {
+            "is_vscode_extension": True,
+            "message_history": prompt,
+            "requested_model": "searchgpt",
+            "user_input": prompt[-1]["content"],
+        }
+        # Get endpoint from environment
+        secret_api_endpoint_3 = get_env_vars()['secret_api_endpoint_3']
+        if not secret_api_endpoint_3:
+            raise ValueError("Search API endpoint not configured")
+        # Send the request to the chat endpoint using a scraper from the pool
+        response = get_scraper().post(
+            secret_api_endpoint_3,
+            headers=headers,
+            json=payload,
+            stream=True
+        )
+        result = []
+        streaming_text = ""
+        # Process the streaming response
+        for value in response.iter_lines(decode_unicode=True):
+            if value.startswith("data: "):
+                try:
+                    json_modified_value = json.loads(value[6:])
+                    content = json_modified_value.get("choices", [{}])[0].get("delta", {}).get("content", "")
+                    if content.strip():  # Only process non-empty content
+                        cleaned_response = {
+                            "created": json_modified_value.get("created"),
+                            "id": json_modified_value.get("id"),
+                            "model": "searchgpt",
+                            "object": "chat.completion",
+                            "choices": [
+                                {
+                                    "message": {
+                                        "content": content
+                                    }
                                 }
+                            ]
+                        }
+                        if stream:
+                            result.append(f"data: {json.dumps(cleaned_response)}\n\n")
+                        streaming_text += content
+                except json.JSONDecodeError:
+                    continue
+        if not stream:
+            result.append(streaming_text)
+        return result
+    # Run in thread pool to avoid blocking the event loop
+    return await loop.run_in_executor(executor, _generate_search)
+# Cache for frequently accessed static files
+@lru_cache(maxsize=10)
+def read_html_file(file_path):
+    try:
+        with open(file_path, "r") as file:
+            return file.read()
+    except FileNotFoundError:
+        return None
 # Basic routes
 @app.get("/favicon.ico")
 @app.get("/ping")
 async def ping():
+    return {"message": "pong", "response_time": "0.000000 seconds"}
 @app.get("/", response_class=HTMLResponse)
 async def root():
+    html_content = read_html_file("index.html")
+    if html_content is None:
         return HTMLResponse(content="<h1>File not found</h1>", status_code=404)
+    return HTMLResponse(content=html_content)
 @app.get("/playground", response_class=HTMLResponse)
 async def playground():
+    html_content = read_html_file("playground.html")
+    if html_content is None:
         return HTMLResponse(content="<h1>playground.html not found</h1>", status_code=404)
+    return HTMLResponse(content=html_content)
 # Model routes
 @app.get("/api/v1/models")
     usage_tracker.record_request(endpoint="/searchgpt")
+    result = await generate_search_async(q, systemprompt=systemprompt, stream=stream)
     if stream:
+        async def stream_generator():
+            for chunk in result:
+                yield chunk
         return StreamingResponse(
+            stream_generator(),
             media_type="text/event-stream"
         )
     else:
+        # For non-streaming, return the collected text
+        return JSONResponse(content={"response": result[0] if result else ""})
 # Chat completion endpoint
 @app.post("/chat/completions")
             content={"message": "Server is under maintenance. Please try again later."}
         )
+    model_to_use = payload.model or "gpt-4o-mini"
+    # Validate model availability - fast lookup with set
+    if available_model_ids and model_to_use not in set(available_model_ids):
         raise HTTPException(
             status_code=400,
             detail=f"Model '{model_to_use}' is not available. Check /models for the available model list."
         )
+    # Log request without blocking
+    asyncio.create_task(log_request(request, model_to_use))
     usage_tracker.record_request(model=model_to_use, endpoint="/chat/completions")
     # Prepare payload
     # Get environment variables
     env_vars = get_env_vars()
+    # Select the appropriate endpoint (fast lookup with sets)
     if model_to_use in mistral_models:
         endpoint = env_vars['mistral_api']
         custom_headers = {
         endpoint = env_vars['secret_api_endpoint']
         custom_headers = {}
+    # Get a scraper from the pool
+    scraper = get_scraper()
     async def stream_generator(payload_dict):
         try:
                 detail = error_messages.get(response.status_code, f"Error code: {response.status_code}")
                 raise HTTPException(status_code=response.status_code, detail=detail)
+            # Stream response lines to the client - use buffer for efficiency
+            buffer = []
+            buffer_size = 0
+            max_buffer = 8192  # 8KB buffer
             for line in response.iter_lines():
                 if line:
+                    decoded = line.decode('utf-8') + "\n"
+                    buffer.append(decoded)
+                    buffer_size += len(decoded)
+                    if buffer_size >= max_buffer:
+                        yield ''.join(buffer)
+                        buffer = []
+                        buffer_size = 0
+            # Flush remaining buffer
+            if buffer:
+                yield ''.join(buffer)
         except Exception as e:
             # Use a generic error message that doesn't expose internal details
     return StreamingResponse(stream_generator(payload_dict), media_type="application/json")
+# Asynchronous logging function
+async def log_request(request, model):
+    # Get minimal data for logging
+    current_time = (datetime.datetime.utcnow() + datetime.timedelta(hours=5, minutes=30)).strftime("%Y-%m-%d %I:%M:%S %p")
+    ip_hash = hash(request.client.host) % 10000  # Hash the IP for privacy
+    print(f"Time: {current_time}, IP Hash: {ip_hash}, Model: {model}")
+# Image generation endpoint - optimized to use connection pool
 @app.api_route("/images/generations", methods=["GET", "POST"])
 async def generate_image(
     prompt: Optional[str] = None,
         params['enhance'] = str(enhance).lower()
     try:
+        # Use the shared httpx client for connection pooling
+        client = get_async_client()
+        response = await client.get(url, params=params, follow_redirects=True)
+        # Check for various error conditions
+        if response.status_code != 200:
+            error_messages = {
+                404: "Image generation service not found",
+                400: "Invalid parameters provided to image service",
+                429: "Too many requests to image service",
+            }
+            detail = error_messages.get(response.status_code, f"Image generation failed with status code {response.status_code}")
+            raise HTTPException(status_code=response.status_code, detail=detail)
+        # Verify content type
+        content_type = response.headers.get('content-type', '')
+        if not content_type.startswith('image/'):
+            raise HTTPException(
+                status_code=500,
+                detail="Unexpected content type received from image service"
+            )
+        # Use larger chunks for streaming for better performance
+        async def stream_with_larger_chunks():
+            chunks = []
+            size = 0
+            async for chunk in response.aiter_bytes(chunk_size=16384):  # Use 16KB chunks
+                chunks.append(chunk)
+                size += len(chunk)
+                if size >= 65536:  # Yield every 64KB
+                    yield b''.join(chunks)
+                    chunks = []
+                    size = 0
+            if chunks:
+                yield b''.join(chunks)
+        return StreamingResponse(
+            stream_with_larger_chunks(),
+            media_type=content_type,
+            headers={
+                'Cache-Control': 'no-cache, no-store, must-revalidate',
+                'Pragma': 'no-cache',
+                'Expires': '0'
+            }
+        )
     except httpx.TimeoutException:
         raise HTTPException(status_code=504, detail="Image generation request timed out")
     except Exception:
         raise HTTPException(status_code=500, detail="Unexpected error during image generation")
+# Meme endpoint with optimized networking
 @app.get("/meme")
 async def get_meme():
     try:
+        # Use the shared client for connection pooling
+        client = get_async_client()
+        response = await client.get("https://meme-api.com/gimme")
+        response_data = response.json()
+        meme_url = response_data.get("url")
+        if not meme_url:
+            raise HTTPException(status_code=404, detail="No meme found")
+        image_response = await client.get(meme_url, follow_redirects=True)
+        # Use larger chunks for streaming
+        async def stream_with_larger_chunks():
+            chunks = []
+            size = 0
+            async for chunk in image_response.aiter_bytes(chunk_size=16384):
+                chunks.append(chunk)
+                size += len(chunk)
+                if size >= 65536:
+                    yield b''.join(chunks)
+                    chunks = []
+                    size = 0
+            if chunks:
+                yield b''.join(chunks)
+        return StreamingResponse(
+            stream_with_larger_chunks(),
+            media_type=image_response.headers.get("content-type", "image/png"),
+            headers={'Cache-Control': 'max-age=3600'}  # Add caching
+        )
     except Exception:
         raise HTTPException(status_code=500, detail="Failed to retrieve meme")
+# Cache usage statistics
+@lru_cache(maxsize=10)
+def get_usage_summary(days=7):
+    return usage_tracker.get_usage_summary(days)
 @app.get("/usage")
 async def get_usage(days: int = 7):
     """Retrieve usage statistics"""
+    return get_usage_summary(days)
+# Generate HTML for usage page
+def generate_usage_html(usage_data):
     # Model Usage Table Rows
     model_usage_rows = "\n".join([
         f"""
     </body>
     </html>
     """
+    return html_content
+# Cache the usage page HTML
+@lru_cache(maxsize=1)
+def get_usage_page_html():
+    usage_data = get_usage_summary()
+    return generate_usage_html(usage_data)
+@app.get("/usage/page", response_class=HTMLResponse)
+async def usage_page():
+    """Serve an HTML page showing usage statistics"""
+    # Use cached HTML if available, regenerate if not
+    html_content = get_usage_page_html()
     return HTMLResponse(content=html_content)
+# Utility function for loading model IDs - optimized to run once at startup
 def load_model_ids(json_file_path):
     try:
         with open(json_file_path, 'r') as f:
             models_data = json.load(f)
+            # Extract 'id' from each model object and use a set for fast lookups
+            return [model['id'] for model in models_data if 'id' in model]
     except Exception as e:
         print(f"Error loading model IDs: {str(e)}")
         return []
     global available_model_ids
     available_model_ids = load_model_ids("models.json")
     print(f"Loaded {len(available_model_ids)} model IDs")
+    # Preload scrapers
+    for _ in range(MAX_SCRAPERS):
+        scraper_pool.append(cloudscraper.create_scraper())
     # Validate critical environment variables
     env_vars = get_env_vars()
     if missing_vars:
         print(f"WARNING: The following required environment variables are missing: {', '.join(missing_vars)}")
+    print("API started successfully with high-performance optimizations")
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(
+        app,
+        host="0.0.0.0",
+        port=8000,
+        workers=4,  # Multiple workers for better CPU utilization
+        loop="uvloop",  # Use uvloop for faster async operations
+        http="httptools",  # Faster HTTP parsing
+        log_level="warning",  # Reduce logging overhead
+        limit_concurrency=100,  # Limit concurrent connections
+        timeout_keep_alive=5  # Reduce idle connection time
+    )