Spaces:

ParthSadaria
/

lokiai

Running

App Files Files Community

ParthSadaria commited on Apr 21

Commit

74e8abb

verified ·

1 Parent(s): 342885c

Update main.py

Browse files

Files changed (1) hide show

main.py +960 -757

main.py CHANGED Viewed

@@ -1,886 +1,1056 @@
 import os
 import re
-from dotenv import load_dotenv
-from fastapi import FastAPI, HTTPException, Request, Depends, Security
-from fastapi.responses import StreamingResponse, HTMLResponse, JSONResponse, FileResponse
-from fastapi.security import APIKeyHeader
-from pydantic import BaseModel
-import httpx
-from functools import lru_cache
-from pathlib import Path
 import json
 import datetime
 import time
-import threading
-from typing import Optional, Dict, List, Any, Generator
 import asyncio
-from starlette.status import HTTP_403_FORBIDDEN
-import cloudscraper
 from concurrent.futures import ThreadPoolExecutor
-import uvloop
 from fastapi.middleware.gzip import GZipMiddleware
 from starlette.middleware.cors import CORSMiddleware
-import contextlib
-import requests
-# Enable uvloop for faster event loop
 asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
-# Thread pool for CPU-bound operations
-executor = ThreadPoolExecutor(max_workers=16)  # Increased thread count for better parallelism
-# Load environment variables once at startup
 load_dotenv()
-# API key security scheme
 api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
-# Initialize usage tracker
-from usage_tracker import UsageTracker
-usage_tracker = UsageTracker()
-app = FastAPI()
-# Add middleware for compression and CORS
-app.add_middleware(GZipMiddleware, minimum_size=1000)
 app.add_middleware(
-    CORSMiddleware,
-    allow_origins=["*"],
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
-# Environment variables (cached)
-@lru_cache(maxsize=1)
-def get_env_vars():
     return {
-        'api_keys': os.getenv('API_KEYS', '').split(','),
         'secret_api_endpoint': os.getenv('SECRET_API_ENDPOINT'),
         'secret_api_endpoint_2': os.getenv('SECRET_API_ENDPOINT_2'),
-        'secret_api_endpoint_3': os.getenv('SECRET_API_ENDPOINT_3'),
-        'secret_api_endpoint_4': "https://text.pollinations.ai/openai",
-        'secret_api_endpoint_5': os.getenv('SECRET_API_ENDPOINT_5'),  # Added new endpoint
-        'mistral_api': "https://api.mistral.ai",
         'mistral_key': os.getenv('MISTRAL_KEY'),
-        'endpoint_origin': os.getenv('ENDPOINT_ORIGIN')
     }
-# Configuration for models - use sets for faster lookups
-mistral_models = {
-    "mistral-large-latest",
-    "pixtral-large-latest",
-    "mistral-moderation-latest",
-    "ministral-3b-latest",
-    "ministral-8b-latest",
-    "open-mistral-nemo",
-    "mistral-small-latest",
-    "mistral-saba-latest",
-    "codestral-latest"
 }
-pollinations_models = {
-    "openai",
-    "openai-large",
-    "openai-xlarge",
-    "openai-reasoning",
-    "qwen-coder",
-    "llama",
-    "mistral",
-    "searchgpt",
-    "deepseek",
-    "claude-hybridspace",
-    "deepseek-r1",
-    "deepseek-reasoner",
-    "llamalight",
-    "gemini",
-    "gemini-thinking",
-    "hormoz",
-    "phi",
-    "phi-mini",
-    "openai-audio",
-    "llama-scaleway"
 }
-alternate_models = {  # heh, should work now
-    "gpt-4o",
-    "deepseek-v3",
-    "llama-3.1-8b-instruct",
-    "llama-3.1-sonar-small-128k-online",
-    "deepseek-r1-uncensored",
-    "tinyswallow1.5b",
-    "andy-3.5",
-    "o3-mini-low",
-    "hermes-3-llama-3.2-3b",
-    "creitin-r1",
-    "fluffy.1-chat",
-    "plutotext-1-text",
-    "command-a",
-    "claude-3-7-sonnet-20250219",
-    "plutogpt-3.5-turbo"
 }
-claude_3_models = {  # Models for the new endpoint
-    "claude-3-7-sonnet",
-    "claude-3-7-sonnet-thinking",
-    "claude 3.5 haiku",
-    "claude 3.5 sonnet",
-    "claude 3.5 haiku",
-    "o3-mini-medium",
-    "o3-mini-high",
-    "grok-3",
-    "grok-3-thinking",
-    "grok 2"
 }
-# Supported image generation models
-supported_image_models = {
-    "Flux Pro Ultra",
-    "grok-2-aurora",
-    "Flux Pro",
-    "Flux Pro Ultra Raw",
-    "Flux Dev",
-    "Flux Schnell",
-    "stable-diffusion-3-large-turbo",
-    "Flux Realism",
-    "stable-diffusion-ultra",
-    "dall-e-3",
-    "sdxl-lightning-4step"
 }
-# Request payload model
 class Payload(BaseModel):
     model: str
-    messages: list
     stream: bool = False
-# Image generation payload model
 class ImageGenerationPayload(BaseModel):
     model: str
     prompt: str
-    size: int
-    number: int
-# Server status global variable
-server_status = True
-available_model_ids: List[str] = []
-# Create a reusable httpx client pool with connection pooling
 @lru_cache(maxsize=1)
-def get_async_client():
-    return httpx.AsyncClient(
-        timeout=60.0,
-        limits=httpx.Limits(max_keepalive_connections=50, max_connections=200)  # Increased limits
-    )
-# Create a cloudscraper pool
-scraper_pool = []
-MAX_SCRAPERS = 20  # Increased pool size
-def get_scraper():
     if not scraper_pool:
         for _ in range(MAX_SCRAPERS):
             scraper_pool.append(cloudscraper.create_scraper())
-    return scraper_pool[int(time.time() * 1000) % MAX_SCRAPERS]  # Simple round-robin
-# API key validation - optimized to avoid string operations when possible
 async def verify_api_key(
     request: Request,
-    api_key: str = Security(api_key_header)
 ) -> bool:
-    # Allow bypass if the referer is from /playground or /image-playground
     referer = request.headers.get("referer", "")
-    if referer.startswith(("https://parthsadaria-lokiai.hf.space/playground",
-                           "https://parthsadaria-lokiai.hf.space/image-playground")):
         return True
     if not api_key:
-        raise HTTPException(
-            status_code=HTTP_403_FORBIDDEN,
-            detail="No API key provided"
-        )
-    # Only clean if needed
     if api_key.startswith('Bearer '):
-        api_key = api_key[7:]  # Remove 'Bearer ' prefix
-    # Get API keys from environment
-    valid_api_keys = get_env_vars().get('api_keys', [])
-    if not valid_api_keys or valid_api_keys == ['']:
-        raise HTTPException(
-            status_code=HTTP_403_FORBIDDEN,
-            detail="API keys not configured on server"
-        )
-    # Fast check with set operation
-    if api_key not in set(valid_api_keys):
-        raise HTTPException(
-            status_code=HTTP_403_FORBIDDEN,
-            detail="Invalid API key"
-        )
     return True
-# Pre-load and cache models.json
 @lru_cache(maxsize=1)
-def load_models_data():
     try:
-        file_path = Path(__file__).parent / 'models.json'
-        with open(file_path, 'r') as f:
             return json.load(f)
     except (FileNotFoundError, json.JSONDecodeError) as e:
-        print(f"Error loading models.json: {str(e)}")
         return []
-# Async wrapper for models data
-async def get_models():
     models_data = load_models_data()
     if not models_data:
         raise HTTPException(status_code=500, detail="Error loading available models")
     return models_data
-# Enhanced async streaming - now with real-time SSE support
-async def generate_search_async(query: str, systemprompt: Optional[str] = None, stream: bool = True):
-    # Create a streaming response channel using asyncio.Queue
-    queue = asyncio.Queue()
-    async def _fetch_search_data():
-        try:
-            headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
-            # Use the provided system prompt, or default to "Be Helpful and Friendly"
-            system_message = systemprompt or "Be Helpful and Friendly"
-            # Create the prompt history
-            prompt = [
-                {"role": "user", "content": query},
-            ]
-            prompt.insert(0, {"content": system_message, "role": "system"})
-            # Prepare the payload for the API request
-            payload = {
-                "is_vscode_extension": True,
-                "message_history": prompt,
-                "requested_model": "searchgpt",
-                "user_input": prompt[-1]["content"],
-            }
-            # Get endpoint from environment
-            secret_api_endpoint_3 = get_env_vars()['secret_api_endpoint_3']
-            if not secret_api_endpoint_3:
-                await queue.put({"error": "Search API endpoint not configured"})
-                return
-            # Use AsyncClient for better performance
-            async with httpx.AsyncClient(timeout=30.0) as client:
-                async with client.stream("POST", secret_api_endpoint_3, json=payload, headers=headers) as response:
-                    if response.status_code != 200:
-                        await queue.put({"error": f"Search API returned status code {response.status_code}"})
-                        return
-                    # Process the streaming response in real-time
-                    buffer = ""
-                    async for line in response.aiter_lines():
-                        if line.startswith("data: "):
-                            try:
-                                json_data = json.loads(line[6:])
-                                content = json_data.get("choices", [{}])[0].get("delta", {}).get("content", "")
-                                if content.strip():
-                                    cleaned_response = {
-                                        "created": json_data.get("created"),
-                                        "id": json_data.get("id"),
-                                        "model": "searchgpt",
-                                        "object": "chat.completion",
-                                        "choices": [
-                                            {
-                                                "message": {
-                                                    "content": content
-                                                }
-                                            }
-                                        ]
-                                    }
-                                    # Send to queue immediately for streaming
-                                    await queue.put({"data": f"data: {json.dumps(cleaned_response)}\n\n", "text": content})
-                            except json.JSONDecodeError:
-                                continue
-                    # Signal completion
-                    await queue.put(None)
-        except Exception as e:
-            await queue.put({"error": str(e)})
-            await queue.put(None)
-    # Start the fetch process
-    asyncio.create_task(_fetch_search_data())
-    # Return the queue for consumption
-    return queue
-# Cache for frequently accessed static files
 @lru_cache(maxsize=10)
-def read_html_file(file_path):
     try:
-        with open(file_path, "r") as file:
             return file.read()
-    except FileNotFoundError:
         return None
-# Basic routes
-@app.get("/favicon.ico")
-async def favicon():
-    favicon_path = Path(__file__).parent / "favicon.ico"
-    return FileResponse(favicon_path, media_type="image/x-icon")
-@app.get("/banner.jpg")
-async def favicon():
-    favicon_path = Path(__file__).parent / "banner.jpg"
-    return FileResponse(favicon_path, media_type="image/x-icon")
-@app.get("/ping")
 async def ping():
-    return {"message": "pong", "response_time": "0.000000 seconds"}
-@app.get("/", response_class=HTMLResponse)
 async def root():
-    html_content = read_html_file("index.html")
-    if html_content is None:
-        return HTMLResponse(content="<h1>File not found</h1>", status_code=404)
-    return HTMLResponse(content=html_content)
-@app.get("/script.js", response_class=HTMLResponse)
-async def root():
-    html_content = read_html_file("script.js")
-    if html_content is None:
-        return HTMLResponse(content="<h1>File not found</h1>", status_code=404)
-    return HTMLResponse(content=html_content)
-@app.get("/style.css", response_class=HTMLResponse)
-async def root():
-    html_content = read_html_file("style.css")
-    if html_content is None:
-        return HTMLResponse(content="<h1>File not found</h1>", status_code=404)
-    return HTMLResponse(content=html_content)
-@app.get("/dynamo", response_class=HTMLResponse)
 async def dynamic_ai_page(request: Request):
-    user_agent = request.headers.get('user-agent', 'Unknown User')
-    client_ip = request.client.host
-    location = f"IP: {client_ip}"
     prompt = f"""
-    Generate a dynamic HTML page for a user with the following details: with name "LOKI.AI"
     - User-Agent: {user_agent}
-    - Location: {location}
-    - Style: Cyberpunk, minimalist, or retro
-    Make sure the HTML is clean and includes a heading, also have cool animations a motivational message, and a cool background.
-    Wrap the generated HTML in triple backticks (```).
     """
     payload = {
-        "model": "mistral-small-latest",
-        "messages": [{"role": "user", "content": prompt}]
     }
     headers = {
-        "Authorization": "Bearer playground"
     }
-    response = requests.post("https://parthsadaria-lokiai.hf.space/chat/completions", json=payload, headers=headers)
-    data = response.json()
-    # Extract HTML from ``` blocks
-    html_content = re.search(r"```(.*?)```", data['choices'][0]['message']['content'], re.DOTALL)
-    if html_content:
-        html_content = html_content.group(1).strip()
-    # Remove the first word
-    if html_content:
-        html_content = ' '.join(html_content.split(' ')[1:])
-    return HTMLResponse(content=html_content)
-@app.get("/playground", response_class=HTMLResponse)
-async def playground():
-    html_content = read_html_file("playground.html")
-    if html_content is None:
-        return HTMLResponse(content="<h1>playground.html not found</h1>", status_code=404)
-    return HTMLResponse(content=html_content)
-@app.get("/image-playground", response_class=HTMLResponse)
-async def playground():
-    html_content = read_html_file("image-playground.html")
-    if html_content is None:
-        return HTMLResponse(content="<h1>image-playground.html not found</h1>", status_code=404)
-    return HTMLResponse(content=html_content)
-# VETRA
-GITHUB_BASE = "https://raw.githubusercontent.com/Parthsadaria/Vetra/main"
-FILES = {
-    "html": "index.html",
-    "css": "style.css",
-    "js": "script.js"
-}
-async def get_github_file(filename: str) -> str:
     url = f"{GITHUB_BASE}/{filename}"
-    async with httpx.AsyncClient() as client:
         res = await client.get(url)
-        return res.text if res.status_code == 200 else None
-@app.get("/vetra", response_class=HTMLResponse)
 async def serve_vetra():
-    html = await get_github_file(FILES["html"])
-    css = await get_github_file(FILES["css"])
-    js = await get_github_file(FILES["js"])
     if not html:
-        return HTMLResponse(content="<h1>index.html not found on GitHub</h1>", status_code=404)
-    final_html = html.replace(
-        "</head>",
-        f"<style>{css or '/* CSS not found */'}</style></head>"
-    ).replace(
-        "</body>",
-        f"<script>{js or '// JS not found'}</script></body>"
-    )
     return HTMLResponse(content=final_html)
-# Model routes
-@app.get("/api/v1/models")
-@app.get("/models")
-async def return_models():
-    return await get_models()
-# Search routes with enhanced real-time streaming
-@app.get("/searchgpt")
-async def search_gpt(q: str, stream: Optional[bool] = False, systemprompt: Optional[str] = None):
     if not q:
         raise HTTPException(status_code=400, detail="Query parameter 'q' is required")
     usage_tracker.record_request(endpoint="/searchgpt")
-    queue = await generate_search_async(q, systemprompt=systemprompt, stream=True)
     if stream:
         async def stream_generator():
-            collected_text = ""
             while True:
                 item = await queue.get()
-                if item is None:
                     break
                 if "error" in item:
-                    yield f"data: {json.dumps({'error': item['error']})}\n\n"
                     break
                 if "data" in item:
                     yield item["data"]
-                    collected_text += item.get("text", "")
         return StreamingResponse(
             stream_generator(),
-            media_type="text/event-stream"
         )
     else:
-        # For non-streaming, collect all text and return at once
-        collected_text = ""
         while True:
             item = await queue.get()
             if item is None:
                 break
             if "error" in item:
-                raise HTTPException(status_code=500, detail=item["error"])
-            collected_text += item.get("text", "")
-        return JSONResponse(content={"response": collected_text})
-# Enhanced streaming with direct SSE pass-through for real-time responses
-header_url = os.getenv('HEADER_URL')
-@app.post("/chat/completions")
-@app.post("/api/v1/chat/completions")
-async def get_completion(payload: Payload, request: Request, authenticated: bool = Depends(verify_api_key)):
-    # Check server status
     if not server_status:
-        return JSONResponse(
-            status_code=503,
-            content={"message": "Server is under maintenance. Please try again later."}
-        )
-    model_to_use = payload.model or "gpt-4o-mini"
-    # Validate model availability - fast lookup with set
-    if available_model_ids and model_to_use not in set(available_model_ids):
-        raise HTTPException(
-            status_code=400,
-            detail=f"Model '{model_to_use}' is not available. Check /models for the available model list."
-        )
-    # Log request without blocking
     asyncio.create_task(log_request(request, model_to_use))
     usage_tracker.record_request(model=model_to_use, endpoint="/chat/completions")
-    # Prepare payload
-    payload_dict = payload.dict()
-    payload_dict["model"] = model_to_use
-    # Ensure stream is True for real-time streaming (can be overridden by client)
-    stream_enabled = payload_dict.get("stream", True)
-    # Get environment variables
     env_vars = get_env_vars()
-    # Select the appropriate endpoint (fast lookup with sets)
     if model_to_use in mistral_models:
-        endpoint = env_vars['mistral_api']
-        custom_headers = {
-            "Authorization": f"Bearer {env_vars['mistral_key']}"
-        }
     elif model_to_use in pollinations_models:
-        endpoint = env_vars['secret_api_endpoint_4']
-        custom_headers = {}
     elif model_to_use in alternate_models:
-        endpoint = env_vars['secret_api_endpoint_2']
-        custom_headers = {}
-    elif model_to_use in claude_3_models:  # Use the new endpoint
-        endpoint = env_vars['secret_api_endpoint_5']
-        custom_headers = {}
-    else:
-        endpoint = env_vars['secret_api_endpoint']
-        custom_headers = {
-            "Origin": header_url,
-            "Priority": "u=1, i",
-            "Referer": header_url
-        }
-    print(f"Using endpoint: {endpoint} for model: {model_to_use}")
-    # Improved real-time streaming handler
-    async def real_time_stream_generator():
         try:
-            async with httpx.AsyncClient(timeout=60.0) as client:
-                async with client.stream("POST", f"{endpoint}/v1/chat/completions", json=payload_dict, headers=custom_headers) as response:
-                    if response.status_code >= 400:
-                        error_messages = {
-                            422: "Unprocessable entity. Check your payload.",
-                            400: "Bad request. Verify input data.",
-                            403: "Forbidden. You do not have access to this resource.",
-                            404: "The requested resource was not found.",
-                        }
-                        detail = error_messages.get(response.status_code, f"Error code: {response.status_code}")
-                        raise HTTPException(status_code=response.status_code, detail=detail)
-                    # Stream the response in real-time with minimal buffering
-                    async for line in response.aiter_lines():
-                        if line:
-                            # Yield immediately for faster streaming
-                            yield line + "\n"
         except httpx.TimeoutException:
-            raise HTTPException(status_code=504, detail="Request timed out")
         except httpx.RequestError as e:
-            raise HTTPException(status_code=502, detail=f"Failed to connect to upstream API: {str(e)}")
         except Exception as e:
-            if isinstance(e, HTTPException):
-                raise e
-            raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
-    # Return streaming response with proper headers
-    if stream_enabled:
         return StreamingResponse(
-            real_time_stream_generator(),
             media_type="text/event-stream",
             headers={
                 "Content-Type": "text/event-stream",
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
-                "X-Accel-Buffering": "no"  # Disable proxy buffering for Nginx
             }
         )
     else:
-        # For non-streaming requests, collect the entire response
-        response_content = []
-        async for chunk in real_time_stream_generator():
-            response_content.append(chunk)
-        return JSONResponse(content=json.loads(''.join(response_content)))
-# New image generation endpoint
-@app.post("/images/generations")
-async def create_image(payload: ImageGenerationPayload, authenticated: bool = Depends(verify_api_key)):
     """
-    Endpoint for generating images based on a text prompt.
     """
-    # Check server status
     if not server_status:
-        return JSONResponse(
-            status_code=503,
-            content={"message": "Server is under maintenance. Please try again later."}
-        )
-    # Validate model
     if payload.model not in supported_image_models:
         raise HTTPException(
             status_code=400,
-            detail=f"Model '{payload.model}' is not supported for image generation.  Supported models are: {supported_image_models}"
         )
-    # Log the request
     usage_tracker.record_request(model=payload.model, endpoint="/images/generations")
-    # Prepare the payload for the external API
     api_payload = {
         "model": payload.model,
         "prompt": payload.prompt,
-        "size": payload.size,
-        "number": payload.number
     }
-    # Target API endpoint
-    target_api_url = os.getenv('NEW_IMG')
-    try:
-        # Use a timeout for the image generation request
-        async with httpx.AsyncClient(timeout=60.0) as client:
-            response = await client.post(target_api_url, json=api_payload)
-        if response.status_code != 200:
-            error_detail = response.json().get("detail", f"Image generation failed with status code: {response.status_code}")
-            raise HTTPException(status_code=response.status_code, detail=error_detail)
-        # Return the response from the external API
         return JSONResponse(content=response.json())
     except httpx.TimeoutException:
         raise HTTPException(status_code=504, detail="Image generation request timed out.")
     except httpx.RequestError as e:
         raise HTTPException(status_code=502, detail=f"Error connecting to image generation service: {e}")
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"An unexpected error occurred during image generation: {e}")
-# Asynchronous logging function
-async def log_request(request, model):
-    # Get minimal data for logging
-    current_time = (datetime.datetime.utcnow() + datetime.timedelta(hours=5, minutes=30)).strftime("%Y-%m-%d %I:%M:%S %p")
-    ip_hash = hash(request.client.host) % 10000  # Hash the IP for privacy
-    print(f"Time: {current_time}, IP Hash: {ip_hash}, Model: {model}")
-# Cache usage statistics
-@lru_cache(maxsize=10)
-def get_usage_summary(days=7):
-    return usage_tracker.get_usage_summary(days)
-@app.get("/usage")
 async def get_usage(days: int = 7):
-    """Retrieve usage statistics"""
-    return get_usage_summary(days)
-# Generate HTML for usage page
-def generate_usage_html(usage_data):
-    # Model Usage Table Rows
     model_usage_rows = "\n".join([
         f"""
         <tr>
             <td>{model}</td>
-            <td>{model_data['total_requests']}</td>
-            <td>{model_data['first_used']}</td>
-            <td>{model_data['last_used']}</td>
         </tr>
-        """ for model, model_data in usage_data['models'].items()
-    ])
-    # API Endpoint Usage Table Rows
     api_usage_rows = "\n".join([
         f"""
         <tr>
             <td>{endpoint}</td>
-            <td>{endpoint_data['total_requests']}</td>
-            <td>{endpoint_data['first_used']}</td>
-            <td>{endpoint_data['last_used']}</td>
         </tr>
-        """ for endpoint, endpoint_data in usage_data['api_endpoints'].items()
-    ])
-    # Daily Usage Table Rows
     daily_usage_rows = "\n".join([
-        "\n".join([
-            f"""
-            <tr>
-                <td>{date}</td>
-                <td>{entity}</td>
-                <td>{requests}</td>
-            </tr>
-            """ for entity, requests in date_data.items()
-        ]) for date, date_data in usage_data['recent_daily_usage'].items()
-    ])
     html_content = f"""
     <!DOCTYPE html>
     <html lang="en">
     <head>
         <meta charset="UTF-8">
         <title>Lokiai AI - Usage Statistics</title>
         <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600&display=swap" rel="stylesheet">
         <style>
             :root {{
-                --bg-dark: #0f1011;
-                --bg-darker: #070708;
-                --text-primary: #e6e6e6;
-                --text-secondary: #8c8c8c;
-                --border-color: #2c2c2c;
-                --accent-color: #3a6ee0;
                 --accent-hover: #4a7ef0;
             }}
-            body {{
-                font-family: 'Inter', sans-serif;
-                background-color: var(--bg-dark);
-                color: var(--text-primary);
-                max-width: 1200px;
-                margin: 0 auto;
-                padding: 40px 20px;
-                line-height: 1.6;
-            }}
-            .logo {{
-                display: flex;
-                align-items: center;
-                justify-content: center;
-                margin-bottom: 30px;
-            }}
-            .logo h1 {{
-                font-weight: 600;
-                font-size: 2.5em;
-                color: var(--text-primary);
-                margin-left: 15px;
-            }}
-            .logo img {{
-                width: 60px;
-                height: 60px;
-                border-radius: 10px;
-            }}
-            .container {{
-                background-color: var(--bg-darker);
-                border-radius: 12px;
-                padding: 30px;
-                box-shadow: 0 15px 40px rgba(0,0,0,0.3);
-                border: 1px solid var(--border-color);
-            }}
-            h2, h3 {{
-                color: var(--text-primary);
-                border-bottom: 2px solid var(--border-color);
-                padding-bottom: 10px;
-                font-weight: 500;
-            }}
-            .total-requests {{
-                background-color: var(--accent-color);
-                color: white;
-                text-align: center;
-                padding: 15px;
-                border-radius: 8px;
-                margin-bottom: 30px;
-                font-weight: 600;
-                letter-spacing: -0.5px;
-            }}
-            table {{
-                width: 100%;
-                border-collapse: separate;
-                border-spacing: 0;
-                margin-bottom: 30px;
-                background-color: var(--bg-dark);
-                border-radius: 8px;
-                overflow: hidden;
-            }}
-            th, td {{
-                border: 1px solid var(--border-color);
-                padding: 12px;
-                text-align: left;
-                transition: background-color 0.3s ease;
-            }}
-            th {{
-                background-color: #1e1e1e;
-                color: var(--text-primary);
-                font-weight: 600;
-                text-transform: uppercase;
-                font-size: 0.9em;
-            }}
-            tr:nth-child(even) {{
-                background-color: rgba(255,255,255,0.05);
-            }}
-            tr:hover {{
-                background-color: rgba(62,100,255,0.1);
-            }}
-            @media (max-width: 768px) {{
-                .container {{
-                    padding: 15px;
-                }}
-                table {{
-                    font-size: 0.9em;
-                }}
-            }}
         </style>
     </head>
     <body>
         <div class="container">
             <div class="logo">
                 <img src="data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cGF0aCBkPSJNMTAwIDM1TDUwIDkwaDEwMHoiIGZpbGw9IiMzYTZlZTAiLz48Y2lyY2xlIGN4PSIxMDAiIGN5PSIxNDAiIHI9IjMwIiBmaWxsPSIjM2E2ZWUwIi8+PC9zdmc+" alt="Lokai AI Logo">
-                <h1>Lokiai AI</h1>
             </div>
             <div class="total-requests">
-                Total API Requests: {usage_data['total_requests']}
             </div>
             <h2>Model Usage</h2>
             <table>
-                <tr>
-                    <th>Model</th>
-                    <th>Total Requests</th>
-                    <th>First Used</th>
-                    <th>Last Used</th>
-                </tr>
-                {model_usage_rows}
             </table>
             <h2>API Endpoint Usage</h2>
             <table>
-                <tr>
-                    <th>Endpoint</th>
-                    <th>Total Requests</th>
-                    <th>First Used</th>
-                    <th>Last Used</th>
-                </tr>
-                {api_usage_rows}
             </table>
-            <h2>Daily Usage (Last 7 Days)</h2>
             <table>
-                <tr>
-                    <th>Date</th>
-                    <th>Entity</th>
-                    <th>Requests</th>
-                </tr>
-                {daily_usage_rows}
             </table>
         </div>
     </body>
@@ -888,166 +1058,199 @@ def generate_usage_html(usage_data):
     """
     return html_content
-# Cache the usage page HTML
-@lru_cache(maxsize=1)
-def get_usage_page_html():
-    usage_data = get_usage_summary()
-    return generate_usage_html(usage_data)
-@app.get("/usage/page", response_class=HTMLResponse)
 async def usage_page():
-    """Serve an HTML page showing usage statistics"""
-    # Use cached HTML if available, regenerate if not
-    html_content = get_usage_page_html()
-    return HTMLResponse(content=html_content)
-# Meme endpoint with optimized networking
-@app.get("/meme")
 async def get_meme():
     try:
-        # Use the shared client for connection pooling
-        client = get_async_client()
-        response = await client.get("https://meme-api.com/gimme")
         response_data = response.json()
         meme_url = response_data.get("url")
-        if not meme_url:
-            raise HTTPException(status_code=404, detail="No meme found")
-        image_response = await client.get(meme_url, follow_redirects=True)
-        # Use larger chunks for streaming
-        async def stream_with_larger_chunks():
-            chunks = []
-            size = 0
-            async for chunk in image_response.aiter_bytes(chunk_size=16384):
-                chunks.append(chunk)
-                size += len(chunk)
-                if size >= 65536:
-                    yield b''.join(chunks)
-                    chunks = []
-                    size = 0
-            if chunks:
-                yield b''.join(chunks)
-        return StreamingResponse(
-            stream_with_larger_chunks(),
-            media_type=image_response.headers.get("content-type", "image/png"),
-            headers={'Cache-Control': 'max-age=3600'}  # Add caching
-        )
-    except Exception:
-        raise HTTPException(status_code=500, detail="Failed to retrieve meme")
-# Utility function for loading model IDs - optimized to run once at startup
-def load_model_ids(json_file_path):
-    try:
-        with open(json_file_path, 'r') as f:
-            models_data = json.load(f)
-            # Extract 'id' from each model object and use a set for fast lookups
-            return [model['id'] for model in models_data if 'id' in model]
-    except Exception as e:
-        print(f"Error loading model IDs: {str(e)}")
-        return []
 @app.on_event("startup")
 async def startup_event():
     global available_model_ids
-    available_model_ids = load_model_ids("models.json")
-    print(f"Loaded {len(available_model_ids)} model IDs")
-    # Add all pollinations models to available_model_ids
-    available_model_ids.extend(list(pollinations_models))
-    # Add alternate models to available_model_ids
-    available_model_ids.extend(list(alternate_models))
-    # Add mistral models to available_model_ids
-    available_model_ids.extend(list(mistral_models))
-     # Add claude models
-    available_model_ids.extend(list(claude_3_models))
-    available_model_ids = list(set(available_model_ids))  # Remove duplicates
-    print(f"Total available models: {len(available_model_ids)}")
-    # Preload scrapers
-    for _ in range(MAX_SCRAPERS):
-        scraper_pool.append(cloudscraper.create_scraper())
-    # Validate critical environment variables
     env_vars = get_env_vars()
-    missing_vars = []
-    if not env_vars['api_keys'] or env_vars['api_keys'] == ['']:
-        missing_vars.append('API_KEYS')
-    if not env_vars['secret_api_endpoint']:
-        missing_vars.append('SECRET_API_ENDPOINT')
-    if not env_vars['secret_api_endpoint_2']:
-        missing_vars.append('SECRET_API_ENDPOINT_2')
-    if not env_vars['secret_api_endpoint_3']:
-        missing_vars.append('SECRET_API_ENDPOINT_3')
-    if not env_vars['secret_api_endpoint_4']:
-        missing_vars.append('SECRET_API_ENDPOINT_4')
-    if not env_vars['secret_api_endpoint_5']: # Check the new endpoint
-        missing_vars.append('SECRET_API_ENDPOINT_5')
-    if not env_vars['mistral_api'] and any(model in mistral_models for model in available_model_ids):
-        missing_vars.append('MISTRAL_API')
-    if not env_vars['mistral_key'] and any(model in mistral_models for model in available_model_ids):
-        missing_vars.append('MISTRAL_KEY')
-    if missing_vars:
-        print(f"WARNING: The following environment variables are missing: {', '.join(missing_vars)}")
-        print("Some functionality may be limited.")
-    print("Server started successfully!")
 @app.on_event("shutdown")
 async def shutdown_event():
-    # Close the httpx client
     client = get_async_client()
     await client.aclose()
-    # Clear scraper pool
     scraper_pool.clear()
     # Persist usage data
-    usage_tracker.save_data()
-    print("Server shutdown complete!")
-# Health check endpoint
-# Health check endpoint
-@app.get("/health")
-async def health_check():
-    """Health check endpoint for monitoring"""
-    env_vars = get_env_vars()
-    missing_critical_vars = []
-    # Check critical environment variables
-    if not env_vars['api_keys'] or env_vars['api_keys'] == ['']:
-        missing_critical_vars.append('API_KEYS')
-    if not env_vars['secret_api_endpoint']:
-        missing_critical_vars.append('SECRET_API_ENDPOINT')
-    if not env_vars['secret_api_endpoint_2']:
-        missing_critical_vars.append('SECRET_API_ENDPOINT_2')
-    if not env_vars['secret_api_endpoint_3']:
-        missing_critical_vars.append('SECRET_API_ENDPOINT_3')
-    if not env_vars['secret_api_endpoint_4']:
-        missing_critical_vars.append('SECRET_API_ENDPOINT_4')
-    if not env_vars['secret_api_endpoint_5']: # Check the new endpoint
-        missing_critical_vars.append('SECRET_API_ENDPOINT_5')
-    if not env_vars['mistral_api']:
-        missing_critical_vars.append('MISTRAL_API')
-    if not env_vars['mistral_key']:
-        missing_critical_vars.append('MISTRAL_KEY')
-    health_status = {
-        "status": "healthy" if not missing_critical_vars else "unhealthy",
-        "missing_env_vars": missing_critical_vars,
-        "server_status": server_status,
-        "message": "Everything's lit! 🚀" if not missing_critical_vars else "Uh oh, some env vars are missing. 😬"
-    }
-    return JSONResponse(content=health_status)
 if __name__ == "__main__":
     import uvicorn
-    uvicorn.run(app, host="0.0.0.0", port=7860)

 import os
 import re
 import json
 import datetime
 import time
 import asyncio
+import logging
+from pathlib import Path
+from functools import lru_cache
+from typing import Optional, Dict, List, Any, Generator, Set
 from concurrent.futures import ThreadPoolExecutor
+# Third-party libraries (ensure these are in requirements.txt)
+from dotenv import load_dotenv
+from fastapi import FastAPI, HTTPException, Request, Depends, Security, Response
+from fastapi.responses import StreamingResponse, HTMLResponse, JSONResponse, FileResponse
+from fastapi.security import APIKeyHeader
+from pydantic import BaseModel
+import httpx
+import uvloop # Use uvloop for performance
 from fastapi.middleware.gzip import GZipMiddleware
 from starlette.middleware.cors import CORSMiddleware
+import cloudscraper # For bypassing Cloudflare, potentially unreliable
+import requests # For synchronous requests like in /dynamo
+# HF Space Note: Ensure usage_tracker.py is in your repository
+try:
+    from usage_tracker import UsageTracker
+    usage_tracker = UsageTracker()
+except ImportError:
+    print("Warning: usage_tracker.py not found. Usage tracking will be disabled.")
+    # Create a dummy tracker if the file is missing
+    class DummyUsageTracker:
+        def record_request(self, *args, **kwargs): pass
+        def get_usage_summary(self, *args, **kwargs): return {}
+        def save_data(self, *args, **kwargs): pass
+    usage_tracker = DummyUsageTracker()
+# --- Configuration & Setup ---
+# HF Space Note: uvloop can improve performance in I/O bound tasks common in web apps.
 asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
+# HF Space Note: Adjust max_workers based on your HF Space resources (CPU).
+# Higher tiers allow more workers. Start lower (e.g., 4) for free tier.
+executor = ThreadPoolExecutor(max_workers=8)
+# HF Space Note: load_dotenv() is useful for local dev but HF Spaces use Secrets.
+# os.getenv will automatically pick up secrets set in the HF Space settings.
 load_dotenv()
+# Logging setup
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
+logger = logging.getLogger(__name__)
+# API key security
 api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
+# --- FastAPI App Initialization ---
+app = FastAPI(
+    title="LokiAI API",
+    description="API Proxy for various AI models with usage tracking and streaming.",
+    version="1.0.0"
+)
+# Middleware
+app.add_middleware(GZipMiddleware, minimum_size=1000) # Compress large responses
 app.add_middleware(
+    CORSMiddleware, # Allow cross-origin requests (useful for web playgrounds)
+    allow_origins=["*"], # Or restrict to specific origins
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
+# --- Environment Variables & Model Config ---
+@lru_cache(maxsize=1) # Cache environment variables
+def get_env_vars() -> Dict[str, Any]:
+    """Loads and returns essential environment variables."""
+    # HF Space Note: Set these as Secrets in your Hugging Face Space settings.
     return {
+        'api_keys': set(filter(None, os.getenv('API_KEYS', '').split(','))), # Use set for faster lookup
         'secret_api_endpoint': os.getenv('SECRET_API_ENDPOINT'),
         'secret_api_endpoint_2': os.getenv('SECRET_API_ENDPOINT_2'),
+        'secret_api_endpoint_3': os.getenv('SECRET_API_ENDPOINT_3'), # Search endpoint
+        'secret_api_endpoint_4': os.getenv('SECRET_API_ENDPOINT_4', "https://text.pollinations.ai/openai"), # Pollinations
+        'secret_api_endpoint_5': os.getenv('SECRET_API_ENDPOINT_5'), # Claude 3 endpoint
+        'mistral_api': os.getenv('MISTRAL_API', "https://api.mistral.ai"),
         'mistral_key': os.getenv('MISTRAL_KEY'),
+        'new_img_endpoint': os.getenv('NEW_IMG'), # Image generation endpoint
+        'hf_space_url': os.getenv('HF_SPACE_URL', 'https://your-space-name.hf.space') # HF Space Note: Set this! Used for Referer/Origin checks.
     }
+# Model sets for fast lookups
+# HF Space Note: Consider moving these large sets to a separate config file (e.g., config.py or models_config.json)
+# for better organization if they grow larger.
+mistral_models: Set[str] = {
+    "mistral-large-latest", "pixtral-large-latest", "mistral-moderation-latest",
+    "ministral-3b-latest", "ministral-8b-latest", "open-mistral-nemo",
+    "mistral-small-latest", "mistral-saba-latest", "codestral-latest"
 }
+pollinations_models: Set[str] = {
+    "openai", "openai-large", "openai-xlarge", "openai-reasoning", "qwen-coder",
+    "llama", "mistral", "searchgpt", "deepseek", "claude-hybridspace",
+    "deepseek-r1", "deepseek-reasoner", "llamalight", "gemini", "gemini-thinking",
+    "hormoz", "phi", "phi-mini", "openai-audio", "llama-scaleway"
 }
+alternate_models: Set[str] = {
+    "gpt-4o", "deepseek-v3", "llama-3.1-8b-instruct", "llama-3.1-sonar-small-128k-online",
+    "deepseek-r1-uncensored", "tinyswallow1.5b", "andy-3.5", "o3-mini-low",
+    "hermes-3-llama-3.2-3b", "creitin-r1", "fluffy.1-chat", "plutotext-1-text",
+    "command-a", "claude-3-7-sonnet-20250219", "plutogpt-3.5-turbo"
 }
+claude_3_models: Set[str] = {
+    "claude-3-7-sonnet", "claude-3-7-sonnet-thinking", "claude 3.5 haiku",
+    "claude 3.5 sonnet", "claude 3.5 haiku", "o3-mini-medium", "o3-mini-high",
+    "grok-3", "grok-3-thinking", "grok 2"
 }
+supported_image_models: Set[str] = {
+    "Flux Pro Ultra", "grok-2-aurora", "Flux Pro", "Flux Pro Ultra Raw", "Flux Dev",
+    "Flux Schnell", "stable-diffusion-3-large-turbo", "Flux Realism",
+    "stable-diffusion-ultra", "dall-e-3", "sdxl-lightning-4step"
 }
+# --- Pydantic Models ---
+class Message(BaseModel):
+    role: str
+    content: Any # Allow content to be string or potentially list for multimodal models
 class Payload(BaseModel):
     model: str
+    messages: List[Message]
     stream: bool = False
+    # Add other potential OpenAI compatible parameters with defaults
+    max_tokens: Optional[int] = None
+    temperature: Optional[float] = None
+    top_p: Optional[float] = None
+    # ... add others as needed
 class ImageGenerationPayload(BaseModel):
     model: str
     prompt: str
+    size: Optional[str] = "1024x1024" # Default size, make optional if API allows
+    n: Optional[int] = 1 # Number of images, OpenAI uses 'n'
+    # HF Space Note: Ensure these parameter names match the target NEW_IMG endpoint API
+    # Renaming from 'number' to 'n' and 'size' type hint correction.
+# --- Global State & Clients ---
+server_status: bool = True # For maintenance mode
+available_model_ids: List[str] = [] # Loaded at startup
+# HF Space Note: Reusable HTTP client with connection pooling is crucial for performance.
+# Adjust limits based on expected load and HF Space resources.
 @lru_cache(maxsize=1)
+def get_async_client() -> httpx.AsyncClient:
+    """Returns a cached instance of httpx.AsyncClient."""
+    # HF Space Note: Timeouts are important to prevent hanging requests.
+    # Keepalive connections reduce handshake overhead.
+    timeout = httpx.Timeout(30.0, connect=10.0) # 30s total, 10s connect
+    limits = httpx.Limits(max_keepalive_connections=20, max_connections=100)
+    return httpx.AsyncClient(timeout=timeout, limits=limits, follow_redirects=True)
+# HF Space Note: cloudscraper pool. Be mindful of potential rate limits or blocks.
+# Consider alternatives if this becomes unreliable.
+scraper_pool: List[cloudscraper.CloudScraper] = []
+MAX_SCRAPERS = 10 # Reduced pool size for potentially lower resource usage
+def get_scraper() -> cloudscraper.CloudScraper:
+    """Gets a cloudscraper instance from the pool."""
     if not scraper_pool:
+        logger.info(f"Initializing {MAX_SCRAPERS} cloudscraper instances...")
         for _ in range(MAX_SCRAPERS):
+            # HF Space Note: Scraper creation can be slow, doing it upfront is good.
             scraper_pool.append(cloudscraper.create_scraper())
+        logger.info("Cloudscraper pool initialized.")
+    # Simple round-robin selection
+    return scraper_pool[int(time.monotonic() * 1000) % MAX_SCRAPERS]
+# --- Security & Authentication ---
 async def verify_api_key(
     request: Request,
+    api_key: Optional[str] = Security(api_key_header)
 ) -> bool:
+    """Verifies the provided API key against environment variables."""
+    env_vars = get_env_vars()
+    valid_api_keys = env_vars.get('api_keys', set())
+    hf_space_url = env_vars.get('hf_space_url', '')
+    # Allow bypass if the referer is from the known HF Space playground URLs
+    # HF Space Note: Make HF_SPACE_URL a secret for flexibility.
     referer = request.headers.get("referer", "")
+    if hf_space_url and referer.startswith((f"{hf_space_url}/playground", f"{hf_space_url}/image-playground")):
+        logger.debug(f"API Key check bypassed for referer: {referer}")
         return True
     if not api_key:
+        logger.warning("API Key missing.")
+        raise HTTPException(status_code=403, detail="Not authenticated: No API key provided")
+    # Clean 'Bearer ' prefix if present
     if api_key.startswith('Bearer '):
+        api_key = api_key[7:]
+    if not valid_api_keys:
+        logger.error("API keys are not configured on the server (API_KEYS secret missing?).")
+        raise HTTPException(status_code=500, detail="Server configuration error: API keys not set")
+    if api_key not in valid_api_keys:
+        logger.warning(f"Invalid API key received: {api_key[:4]}...") # Log prefix only
+        raise HTTPException(status_code=403, detail="Not authenticated: Invalid API key")
+    logger.debug("API Key verified successfully.")
     return True
+# --- Model & File Loading ---
 @lru_cache(maxsize=1)
+def load_models_data() -> List[Dict]:
+    """Loads model data from models.json."""
+    # HF Space Note: Ensure models.json is in the root of your HF Space repo.
+    models_file = Path(__file__).parent / 'models.json'
+    if not models_file.is_file():
+        logger.error("models.json not found!")
+        return []
     try:
+        with open(models_file, 'r') as f:
             return json.load(f)
     except (FileNotFoundError, json.JSONDecodeError) as e:
+        logger.error(f"Error loading models.json: {e}")
         return []
+async def get_models() -> List[Dict]:
+    """Async wrapper to get models data."""
     models_data = load_models_data()
     if not models_data:
         raise HTTPException(status_code=500, detail="Error loading available models")
     return models_data
+# --- Static File Serving ---
+# HF Space Note: Cache frequently accessed static files in memory.
 @lru_cache(maxsize=10)
+def read_static_file(file_path: str) -> Optional[str]:
+    """Reads a static file, caching the result."""
+    full_path = Path(__file__).parent / file_path
+    if not full_path.is_file():
+        logger.warning(f"Static file not found: {file_path}")
+        return None
     try:
+        with open(full_path, "r", encoding="utf-8") as file:
             return file.read()
+    except Exception as e:
+        logger.error(f"Error reading static file {file_path}: {e}")
         return None
+async def serve_static_html(file_path: str) -> HTMLResponse:
+    """Serves a static HTML file."""
+    content = read_static_file(file_path)
+    if content is None:
+        return HTMLResponse(content=f"<h1>Error: {file_path} not found</h1>", status_code=404)
+    return HTMLResponse(content=content)
+# --- API Endpoints ---
+# Basic Routes & Static Files
+@app.get("/favicon.ico", include_in_schema=False)
+async def favicon():
+    favicon_path = Path(__file__).parent / "favicon.ico"
+    if favicon_path.is_file():
+        return FileResponse(favicon_path, media_type="image/vnd.microsoft.icon")
+    raise HTTPException(status_code=404, detail="favicon.ico not found")
+@app.get("/banner.jpg", include_in_schema=False)
+async def banner():
+    banner_path = Path(__file__).parent / "banner.jpg"
+    if banner_path.is_file():
+        return FileResponse(banner_path, media_type="image/jpeg") # Assuming JPEG
+    raise HTTPException(status_code=404, detail="banner.jpg not found")
+@app.get("/ping", tags=["Utility"])
 async def ping():
+    """Simple health check endpoint."""
+    return {"message": "pong"}
+@app.get("/", response_class=HTMLResponse, tags=["Frontend"])
 async def root():
+    """Serves the main index HTML page."""
+    return await serve_static_html("index.html")
+@app.get("/script.js", response_class=Response, tags=["Frontend"], include_in_schema=False)
+async def script_js():
+    content = read_static_file("script.js")
+    if content is None:
+        return Response(content="/* script.js not found */", status_code=404, media_type="application/javascript")
+    return Response(content=content, media_type="application/javascript")
+@app.get("/style.css", response_class=Response, tags=["Frontend"], include_in_schema=False)
+async def style_css():
+    content = read_static_file("style.css")
+    if content is None:
+        return Response(content="/* style.css not found */", status_code=404, media_type="text/css")
+    return Response(content=content, media_type="text/css")
+@app.get("/playground", response_class=HTMLResponse, tags=["Frontend"])
+async def playground():
+    """Serves the chat playground HTML page."""
+    return await serve_static_html("playground.html")
+@app.get("/image-playground", response_class=HTMLResponse, tags=["Frontend"])
+async def image_playground():
+    """Serves the image playground HTML page."""
+    return await serve_static_html("image-playground.html")
+# Dynamic Page Example
+@app.get("/dynamo", response_class=HTMLResponse, tags=["Examples"])
 async def dynamic_ai_page(request: Request):
+    """Generates a dynamic HTML page using an AI model (example)."""
+    # HF Space Note: This uses a hardcoded URL to *itself* if running in the space.
+    # Ensure the HF_SPACE_URL secret is set correctly.
+    env_vars = get_env_vars()
+    hf_space_url = env_vars.get('hf_space_url', '')
+    if not hf_space_url:
+        raise HTTPException(status_code=500, detail="HF_SPACE_URL environment variable not set.")
+    user_agent = request.headers.get('user-agent', 'Unknown')
+    client_ip = request.client.host if request.client else "Unknown"
+    location = f"IP: {client_ip}" # Basic IP, location requires GeoIP lookup (extra dependency)
     prompt = f"""
+    Generate a cool, dynamic HTML page for a user with the following details:
+    - App Name: "LokiAI"
     - User-Agent: {user_agent}
+    - Location Info: {location}
+    - Style: Cyberpunk aesthetic, minimalist layout, maybe some retro touches.
+    - Content: Include a heading, a short motivational or witty message, and perhaps a subtle animation. Use inline CSS for styling within a <style> tag.
+    - Output: Provide ONLY the raw HTML code, starting with <!DOCTYPE html>. Do not wrap it in backticks or add explanations.
     """
     payload = {
+        "model": "mistral-small-latest", # Or another capable model
+        "messages": [{"role": "user", "content": prompt}],
+        "max_tokens": 1000,
+        "temperature": 0.7
     }
     headers = {
+        # HF Space Note: Use the space's own URL and a valid API key if required by your setup.
+        # Here, we assume the playground key bypass works or use a dedicated internal key.
+        "Authorization": f"Bearer {list(env_vars['api_keys'])[0] if env_vars['api_keys'] else 'dummy-key'}" # Use first key or dummy
     }
+    try:
+        # HF Space Note: Use the async client for internal requests too.
+        client = get_async_client()
+        api_url = f"{hf_space_url}/chat/completions" # Call own endpoint
+        response = await client.post(api_url, json=payload, headers=headers)
+        response.raise_for_status() # Raise exception for bad status codes
+        data = response.json()
+        html_content = data.get('choices', [{}])[0].get('message', {}).get('content', '')
+        # Basic cleanup (remove potential markdown backticks if model adds them)
+        html_content = re.sub(r"^```html\s*", "", html_content, flags=re.IGNORECASE)
+        html_content = re.sub(r"\s*```$", "", html_content)
+        if not html_content.strip().lower().startswith("<!doctype html"):
+             logger.warning("Dynamo page generation might be incomplete or malformed.")
+             # Optionally return a fallback static page here
+        return HTMLResponse(content=html_content)
+    except httpx.HTTPStatusError as e:
+        logger.error(f"Error calling self API for /dynamo: {e.response.status_code} - {e.response.text}")
+        raise HTTPException(status_code=502, detail=f"Failed to generate dynamic content: Upstream API error {e.response.status_code}")
+    except Exception as e:
+        logger.error(f"Unexpected error in /dynamo: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail="Failed to generate dynamic content due to an internal error.")
+# Vetra Example (Fetching from GitHub)
+# HF Space Note: Ensure outbound requests to raw.githubusercontent.com are allowed.
+GITHUB_BASE = "https://raw.githubusercontent.com/Parthsadaria/Vetra/main"
+VETRA_FILES = {"html": "index.html", "css": "style.css", "js": "script.js"}
+async def get_github_file(filename: str) -> Optional[str]:
+    """Fetches a file from the Vetra GitHub repo."""
     url = f"{GITHUB_BASE}/{filename}"
+    try:
+        client = get_async_client()
         res = await client.get(url)
+        res.raise_for_status()
+        return res.text
+    except httpx.RequestError as e:
+        logger.error(f"Error fetching GitHub file {url}: {e}")
+        return None
+    except httpx.HTTPStatusError as e:
+        logger.error(f"GitHub file {url} returned status {e.response.status_code}")
+        return None
+@app.get("/vetra", response_class=HTMLResponse, tags=["Examples"])
 async def serve_vetra():
+    """Serves the Vetra application by fetching components from GitHub."""
+    logger.info("Fetching Vetra files from GitHub...")
+    # Fetch files concurrently
+    html_task = asyncio.create_task(get_github_file(VETRA_FILES["html"]))
+    css_task = asyncio.create_task(get_github_file(VETRA_FILES["css"]))
+    js_task = asyncio.create_task(get_github_file(VETRA_FILES["js"]))
+    html, css, js = await asyncio.gather(html_task, css_task, js_task)
     if not html:
+        logger.error("Failed to fetch Vetra index.html")
+        return HTMLResponse(content="<h1>Error: Could not load Vetra application (HTML missing)</h1>", status_code=502)
+    # Inject CSS and JS into HTML
+    css_content = f"<style>{css or '/* CSS failed to load */'}</style>"
+    js_content = f"<script>{js or '// JS failed to load'}</script>"
+    # Inject carefully before closing tags
+    final_html = html.replace("</head>", f"{css_content}\n</head>", 1)
+    final_html = final_html.replace("</body>", f"{js_content}\n</body>", 1)
+    logger.info("Successfully served Vetra application.")
     return HTMLResponse(content=final_html)
+# Model Info Endpoint
+@app.get("/api/v1/models", tags=["Models"])
+@app.get("/models", tags=["Models"])
+async def return_models():
+    """Returns the list of available models loaded from models.json."""
+    # HF Space Note: This endpoint now relies on models.json being present.
+    # It no longer dynamically adds models defined only in the script's sets.
+    # Ensure models.json is comprehensive or adjust startup logic if needed.
+    return await get_models()
+# Search Endpoint (using cloudscraper)
+# HF Space Note: This uses cloudscraper which might be blocked or require updates.
+# Consider replacing with a more stable search API if possible.
+async def generate_search_async(query: str, systemprompt: Optional[str] = None) -> asyncio.Queue:
+    """Performs search using the configured backend and streams results."""
+    queue = asyncio.Queue()
+    env_vars = get_env_vars()
+    search_endpoint = env_vars.get('secret_api_endpoint_3')
+    async def _fetch_search_data():
+        if not search_endpoint:
+            await queue.put({"error": "Search API endpoint (SECRET_API_ENDPOINT_3) not configured"})
+            await queue.put(None) # Signal end
+            return
+        try:
+            scraper = get_scraper() # Get a scraper instance from the pool
+            loop = asyncio.get_running_loop()
+            system_message = systemprompt or "You are a helpful search assistant."
+            messages = [
+                {"role": "system", "content": system_message},
+                {"role": "user", "content": query},
+            ]
+            payload = {
+                "model": "searchgpt", # Assuming the endpoint expects this model name
+                "messages": messages,
+                "stream": True # Explicitly request streaming from backend
+            }
+            headers = {"User-Agent": "Mozilla/5.0"} # Standard user agent
+            # HF Space Note: Run synchronous scraper call in executor thread
+            response = await loop.run_in_executor(
+                executor,
+                scraper.post,
+                search_endpoint,
+                json=payload,
+                headers=headers,
+                stream=True # Request streaming from requests library perspective
+            )
+            response.raise_for_status()
+            # Process SSE stream
+            # HF Space Note: Iterating lines on the response directly can be blocking if not handled carefully.
+            # Using iter_lines with decode_unicode=True is generally safe.
+            for line in response.iter_lines(decode_unicode=True):
+                if line.startswith("data: "):
+                    try:
+                        data_str = line[6:]
+                        if data_str.strip() == "[DONE]": # Check for OpenAI style completion
+                            break
+                        json_data = json.loads(data_str)
+                        # Assuming OpenAI compatible streaming format
+                        delta = json_data.get("choices", [{}])[0].get("delta", {})
+                        content = delta.get("content")
+                        if content:
+                             # Reconstruct OpenAI-like SSE chunk
+                            chunk = {
+                                "id": json_data.get("id"),
+                                "object": "chat.completion.chunk",
+                                "created": json_data.get("created", int(time.time())),
+                                "model": "searchgpt",
+                                "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}]
+                            }
+                            await queue.put({"data": f"data: {json.dumps(chunk)}\n\n", "text": content})
+                        # Check for finish reason
+                        finish_reason = json_data.get("choices", [{}])[0].get("finish_reason")
+                        if finish_reason:
+                             chunk = {
+                                "id": json_data.get("id"),
+                                "object": "chat.completion.chunk",
+                                "created": json_data.get("created", int(time.time())),
+                                "model": "searchgpt",
+                                "choices": [{"index": 0, "delta": {}, "finish_reason": finish_reason}]
+                            }
+                             await queue.put({"data": f"data: {json.dumps(chunk)}\n\n", "text": ""})
+                             break # Stop processing after finish reason
+                    except json.JSONDecodeError:
+                        logger.warning(f"Failed to decode JSON from search stream: {line}")
+                        continue
+                    except Exception as e:
+                        logger.error(f"Error processing search stream chunk: {e}", exc_info=True)
+                        await queue.put({"error": f"Error processing stream: {e}"})
+                        break # Stop on processing error
+        except requests.exceptions.RequestException as e:
+            logger.error(f"Search request failed: {e}")
+            await queue.put({"error": f"Search request failed: {e}"})
+        except Exception as e:
+            logger.error(f"Unexpected error during search: {e}", exc_info=True)
+            await queue.put({"error": f"An unexpected error occurred during search: {e}"})
+        finally:
+            await queue.put(None) # Signal completion
+    asyncio.create_task(_fetch_search_data())
+    return queue
+@app.get("/searchgpt", tags=["Search"])
+async def search_gpt(q: str, stream: bool = True, systemprompt: Optional[str] = None):
+    """
+    Performs a search using the backend search model and streams results.
+    Pass `stream=false` to get the full response at once.
+    """
     if not q:
         raise HTTPException(status_code=400, detail="Query parameter 'q' is required")
+    # HF Space Note: Ensure usage_tracker is thread-safe if used across async/sync boundaries.
+    # The dummy tracker used when the module isn't found is safe.
     usage_tracker.record_request(endpoint="/searchgpt")
+    queue = await generate_search_async(q, systemprompt=systemprompt)
     if stream:
         async def stream_generator():
+            full_response_text = "" # Keep track for non-streaming case if needed
             while True:
                 item = await queue.get()
+                if item is None: # End of stream signal
                     break
                 if "error" in item:
+                    # HF Space Note: Log errors server-side, return generic error to client for security.
+                    logger.error(f"Search stream error: {item['error']}")
+                    # Send an error event in the stream
+                    error_event = {"error": {"message": "Search failed.", "code": 500}}
+                    yield f"data: {json.dumps(error_event)}\n\n"
                     break
                 if "data" in item:
                     yield item["data"]
+                    full_response_text += item.get("text", "")
+            # Optionally yield a [DONE] message if backend doesn't guarantee it
+            # yield "data: [DONE]\n\n"
         return StreamingResponse(
             stream_generator(),
+            media_type="text/event-stream",
+            headers={
+                "Content-Type": "text/event-stream",
+                "Cache-Control": "no-cache",
+                "Connection": "keep-alive",
+                "X-Accel-Buffering": "no" # Crucial for Nginx/proxies in HF Spaces
+            }
         )
     else:
+        # Collect full response for non-streaming request
+        full_response_text = ""
         while True:
             item = await queue.get()
             if item is None:
                 break
             if "error" in item:
+                logger.error(f"Search non-stream error: {item['error']}")
+                raise HTTPException(status_code=502, detail=f"Search failed: {item['error']}")
+            full_response_text += item.get("text", "")
+        # Mimic OpenAI non-streaming response structure
+        return JSONResponse(content={
+            "id": f"search-{int(time.time())}",
+            "object": "chat.completion",
+            "created": int(time.time()),
+            "model": "searchgpt",
+            "choices": [{
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": full_response_text,
+                },
+                "finish_reason": "stop",
+            }],
+            "usage": { # Note: Token usage is unknown here
+                "prompt_tokens": None,
+                "completion_tokens": None,
+                "total_tokens": None,
+            }
+        })
+# Main Chat Completions Proxy
+@app.post("/api/v1/chat/completions", tags=["Chat Completions"])
+@app.post("/chat/completions", tags=["Chat Completions"])
+async def get_completion(
+    payload: Payload,
+    request: Request,
+    authenticated: bool = Depends(verify_api_key) # Apply authentication
+):
+    """
+    Proxies chat completion requests to the appropriate backend API based on the model.
+    Supports streaming (SSE).
+    """
     if not server_status:
+        raise HTTPException(status_code=503, detail="Server is under maintenance.")
+    model_to_use = payload.model or "gpt-4o-mini" # Default model
+    # HF Space Note: Check against models loaded at startup.
+    if available_model_ids and model_to_use not in available_model_ids:
+        logger.warning(f"Requested model '{model_to_use}' not in available list.")
+        # Check if it's a known category even if not explicitly in models.json
+        known_categories = mistral_models | pollinations_models | alternate_models | claude_3_models
+        if model_to_use not in known_categories:
+             raise HTTPException(
+                status_code=400,
+                detail=f"Model '{model_to_use}' is not available or recognized. Check /models."
+            )
+        else:
+            logger.info(f"Allowing known category model '{model_to_use}' despite not being in models.json.")
+    # Log request asynchronously
     asyncio.create_task(log_request(request, model_to_use))
     usage_tracker.record_request(model=model_to_use, endpoint="/chat/completions")
+    # Prepare payload for the target API
+    payload_dict = payload.dict(exclude_none=True) # Exclude None values
+    payload_dict["model"] = model_to_use # Ensure model is set
     env_vars = get_env_vars()
+    hf_space_url = env_vars.get('hf_space_url', '') # Needed for Referer/Origin
+    # Determine target endpoint and headers
+    endpoint = None
+    custom_headers = {}
     if model_to_use in mistral_models:
+        endpoint = env_vars.get('mistral_api')
+        api_key = env_vars.get('mistral_key')
+        if not endpoint or not api_key:
+            raise HTTPException(status_code=500, detail="Mistral API endpoint or key not configured.")
+        custom_headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "application/json"}
+        # Mistral specific adjustments if needed
+        # payload_dict.pop('system', None) # Example: if Mistral doesn't use 'system' role
     elif model_to_use in pollinations_models:
+        endpoint = env_vars.get('secret_api_endpoint_4')
+        if not endpoint:
+             raise HTTPException(status_code=500, detail="Pollinations API endpoint (SECRET_API_ENDPOINT_4) not configured.")
+        # Pollinations might need specific headers? Add them here.
+        custom_headers = {"Content-Type": "application/json"}
     elif model_to_use in alternate_models:
+        endpoint = env_vars.get('secret_api_endpoint_2')
+        if not endpoint:
+             raise HTTPException(status_code=500, detail="Alternate API endpoint (SECRET_API_ENDPOINT_2) not configured.")
+        custom_headers = {"Content-Type": "application/json"}
+    elif model_to_use in claude_3_models:
+        endpoint = env_vars.get('secret_api_endpoint_5')
+        if not endpoint:
+             raise HTTPException(status_code=500, detail="Claude 3 API endpoint (SECRET_API_ENDPOINT_5) not configured.")
+        custom_headers = {"Content-Type": "application/json"}
+        # Claude specific headers (like anthropic-version) might be needed
+        # custom_headers["anthropic-version"] = "2023-06-01"
+    else: # Default endpoint
+        endpoint = env_vars.get('secret_api_endpoint')
+        if not endpoint:
+             raise HTTPException(status_code=500, detail="Default API endpoint (SECRET_API_ENDPOINT) not configured.")
+        # Default endpoint might need Origin/Referer
+        if hf_space_url:
+            custom_headers = {
+                "Origin": hf_space_url,
+                "Referer": hf_space_url,
+                "Content-Type": "application/json"
+            }
+        else:
+             custom_headers = {"Content-Type": "application/json"}
+    target_url = f"{endpoint.rstrip('/')}/v1/chat/completions" # Assume OpenAI compatible path
+    logger.info(f"Proxying request for model '{model_to_use}' to endpoint: {endpoint}")
+    client = get_async_client()
+    async def stream_generator():
+        """Generator for streaming the response."""
+        nonlocal target_url # Allow modification if needed
         try:
+            async with client.stream("POST", target_url, json=payload_dict, headers=custom_headers) as response:
+                # Check for initial errors before streaming
+                if response.status_code >= 400:
+                     error_body = await response.aread()
+                     logger.error(f"Upstream API error: {response.status_code} - {error_body.decode()}")
+                     # Try to parse error detail from upstream
+                     detail = f"Upstream API error: {response.status_code}"
+                     try:
+                         error_json = json.loads(error_body)
+                         detail = error_json.get('error', {}).get('message', detail)
+                     except json.JSONDecodeError:
+                         pass
+                     # Send error as SSE event
+                     error_event = {"error": {"message": detail, "code": response.status_code}}
+                     yield f"data: {json.dumps(error_event)}\n\n"
+                     return # Stop generation
+                # Stream the response line by line
+                async for line in response.aiter_lines():
+                    if line:
+                        # Pass through the data directly
+                        yield line + "\n"
+                # Ensure stream is properly closed, yield [DONE] if backend doesn't
+                # Some backends might not send [DONE], uncomment if needed
+                # yield "data: [DONE]\n\n"
         except httpx.TimeoutException:
+            logger.error(f"Request to {target_url} timed out.")
+            error_event = {"error": {"message": "Request timed out", "code": 504}}
+            yield f"data: {json.dumps(error_event)}\n\n"
         except httpx.RequestError as e:
+            logger.error(f"Failed to connect to upstream API {target_url}: {e}")
+            error_event = {"error": {"message": f"Upstream connection error: {e}", "code": 502}}
+            yield f"data: {json.dumps(error_event)}\n\n"
         except Exception as e:
+            logger.error(f"An unexpected error occurred during streaming proxy: {e}", exc_info=True)
+            error_event = {"error": {"message": f"Internal server error: {e}", "code": 500}}
+            yield f"data: {json.dumps(error_event)}\n\n"
+    if payload.stream:
         return StreamingResponse(
+            stream_generator(),
             media_type="text/event-stream",
             headers={
                 "Content-Type": "text/event-stream",
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
+                "X-Accel-Buffering": "no" # Essential for HF Spaces proxying SSE
             }
         )
     else:
+        # Handle non-streaming request by collecting the streamed chunks
+        full_response_content = ""
+        final_json_response = None
+        async for line in stream_generator():
+            if line.startswith("data: "):
+                data_str = line[6:].strip()
+                if data_str == "[DONE]":
+                    break
+                try:
+                    chunk = json.loads(data_str)
+                    # Check for error chunk
+                    if "error" in chunk:
+                        logger.error(f"Received error during non-stream collection: {chunk['error']}")
+                        raise HTTPException(status_code=chunk['error'].get('code', 502), detail=chunk['error'].get('message', 'Upstream API error'))
+                    # Accumulate content from delta
+                    delta = chunk.get("choices", [{}])[0].get("delta", {})
+                    content = delta.get("content")
+                    if content:
+                        full_response_content += content
+                    # Store the last chunk structure to reconstruct the final response
+                    # We assume the last chunk contains necessary info like id, model, etc.
+                    # but we overwrite the choices/message part.
+                    final_json_response = chunk # Keep the structure
+                    # Check for finish reason
+                    finish_reason = chunk.get("choices", [{}])[0].get("finish_reason")
+                    if finish_reason:
+                        break # Stop collecting
+                except json.JSONDecodeError:
+                    logger.warning(f"Could not decode JSON chunk in non-stream mode: {data_str}")
+                except Exception as e:
+                     logger.error(f"Error processing chunk in non-stream mode: {e}")
+                     raise HTTPException(status_code=500, detail="Error processing response stream.")
+        if final_json_response is None:
+             # Handle cases where no valid data chunks were received
+             logger.error("No valid response chunks received for non-streaming request.")
+             raise HTTPException(status_code=502, detail="Failed to get valid response from upstream API.")
+        # Reconstruct OpenAI-like non-streaming response
+        final_response_obj = {
+            "id": final_json_response.get("id", f"chatcmpl-{int(time.time())}"),
+            "object": "chat.completion",
+            "created": final_json_response.get("created", int(time.time())),
+            "model": model_to_use, # Use the requested model
+            "choices": [{
+                "index": 0,
+                "message": {
+                    "role": "assistant",
+                    "content": full_response_content,
+                },
+                "finish_reason": final_json_response.get("choices", [{}])[0].get("finish_reason", "stop"), # Get finish reason from last chunk
+            }],
+            "usage": { # Token usage might be in the last chunk for some APIs, otherwise unknown
+                "prompt_tokens": None,
+                "completion_tokens": None,
+                "total_tokens": None,
+            }
+        }
+        # Attempt to extract usage if present in the (potentially non-standard) final chunk
+        usage_data = final_json_response.get("usage")
+        if isinstance(usage_data, dict):
+            final_response_obj["usage"].update(usage_data)
+        return JSONResponse(content=final_response_obj)
+# Image Generation Endpoint
+@app.post("/images/generations", tags=["Image Generation"])
+async def create_image(
+    payload: ImageGenerationPayload,
+    authenticated: bool = Depends(verify_api_key)
+):
     """
+    Generates images based on a text prompt using the configured backend.
     """
     if not server_status:
+        raise HTTPException(status_code=503, detail="Server is under maintenance.")
     if payload.model not in supported_image_models:
         raise HTTPException(
             status_code=400,
+            detail=f"Model '{payload.model}' is not supported for image generation. Supported: {', '.join(supported_image_models)}"
         )
     usage_tracker.record_request(model=payload.model, endpoint="/images/generations")
+    env_vars = get_env_vars()
+    target_api_url = env_vars.get('new_img_endpoint')
+    if not target_api_url:
+        raise HTTPException(status_code=500, detail="Image generation endpoint (NEW_IMG) not configured.")
+    # Prepare payload for the target API (adjust keys if needed)
+    # HF Space Note: Ensure the keys match the actual API expected by NEW_IMG endpoint.
+    # Assuming it's OpenAI compatible here.
     api_payload = {
         "model": payload.model,
         "prompt": payload.prompt,
+        "n": payload.n,
+        "size": payload.size
     }
+    # Remove None values the target API might not like
+    api_payload = {k: v for k, v in api_payload.items() if v is not None}
+    logger.info(f"Requesting image generation for model '{payload.model}' from {target_api_url}")
+    client = get_async_client()
+    try:
+        # HF Space Note: Image generation can take time, use a longer timeout if needed.
+        # Consider making this truly async if the backend supports webhooks or polling.
+        response = await client.post(target_api_url, json=api_payload, timeout=120.0) # 2 min timeout
+        response.raise_for_status() # Raise HTTP errors
+        # Return the exact response from the backend
         return JSONResponse(content=response.json())
     except httpx.TimeoutException:
+        logger.error(f"Image generation request to {target_api_url} timed out.")
         raise HTTPException(status_code=504, detail="Image generation request timed out.")
+    except httpx.HTTPStatusError as e:
+        logger.error(f"Image generation API error: {e.response.status_code} - {e.response.text}")
+        detail = f"Image generation failed: Upstream API error {e.response.status_code}"
+        try:
+             err_json = e.response.json()
+             detail = err_json.get('error', {}).get('message', detail)
+        except json.JSONDecodeError:
+             pass
+        raise HTTPException(status_code=e.response.status_code, detail=detail)
     except httpx.RequestError as e:
+        logger.error(f"Error connecting to image generation service {target_api_url}: {e}")
         raise HTTPException(status_code=502, detail=f"Error connecting to image generation service: {e}")
     except Exception as e:
+        logger.error(f"Unexpected error during image generation: {e}", exc_info=True)
         raise HTTPException(status_code=500, detail=f"An unexpected error occurred during image generation: {e}")
+# --- Utility & Admin Endpoints ---
+async def log_request(request: Request, model: Optional[str] = None):
+    """Logs basic request information asynchronously."""
+    # HF Space Note: Avoid logging sensitive info like full IP or headers unless necessary.
+    # Hashing IP provides some privacy.
+    client_host = request.client.host if request.client else "unknown"
+    ip_hash = hash(client_host) % 10000
+    timestamp = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z")
+    log_message = f"Timestamp: {timestamp}, IP Hash: {ip_hash}, Method: {request.method}, Path: {request.url.path}"
+    if model:
+        log_message += f", Model: {model}"
+    logger.info(log_message)
+@app.get("/usage", tags=["Admin"])
 async def get_usage(days: int = 7):
+    """Retrieves aggregated usage statistics."""
+    # HF Space Note: Ensure usage_tracker methods are efficient, especially get_usage_summary.
+    # Caching might be needed if it becomes slow.
+    if days <= 0:
+        raise HTTPException(status_code=400, detail="Number of days must be positive.")
+    try:
+        # Run potentially CPU-bound summary generation in executor
+        loop = asyncio.get_running_loop()
+        summary = await loop.run_in_executor(executor, usage_tracker.get_usage_summary, days)
+        return summary
+    except Exception as e:
+        logger.error(f"Error retrieving usage statistics: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail="Failed to retrieve usage statistics.")
+# HF Space Note: Generating HTML dynamically can be resource-intensive.
+# Consider caching the generated HTML or serving a static page updated periodically.
+def generate_usage_html(usage_data: Dict) -> str:
+    """Generates an HTML report from usage data."""
+    # (Keep the HTML generation logic as provided in the original file)
+    # ... (rest of the HTML generation code from the original file) ...
+    # Ensure this function handles potentially missing keys gracefully
+    models_usage = usage_data.get('models', {})
+    endpoints_usage = usage_data.get('api_endpoints', {})
+    daily_usage = usage_data.get('recent_daily_usage', {})
+    total_requests = usage_data.get('total_requests', 0)
     model_usage_rows = "\n".join([
         f"""
         <tr>
             <td>{model}</td>
+            <td>{model_data.get('total_requests', 'N/A')}</td>
+            <td>{model_data.get('first_used', 'N/A')}</td>
+            <td>{model_data.get('last_used', 'N/A')}</td>
         </tr>
+        """ for model, model_data in models_usage.items()
+    ]) if models_usage else "<tr><td colspan='4'>No model usage data</td></tr>"
     api_usage_rows = "\n".join([
         f"""
         <tr>
             <td>{endpoint}</td>
+            <td>{endpoint_data.get('total_requests', 'N/A')}</td>
+            <td>{endpoint_data.get('first_used', 'N/A')}</td>
+            <td>{endpoint_data.get('last_used', 'N/A')}</td>
         </tr>
+        """ for endpoint, endpoint_data in endpoints_usage.items()
+    ]) if endpoints_usage else "<tr><td colspan='4'>No API endpoint usage data</td></tr>"
     daily_usage_rows = "\n".join([
+        f"""
+        <tr>
+            <td>{date}</td>
+            <td>{entity}</td>
+            <td>{requests}</td>
+        </tr>
+        """
+        for date, date_data in daily_usage.items()
+        for entity, requests in date_data.items()
+    ]) if daily_usage else "<tr><td colspan='3'>No daily usage data</td></tr>"
+    # HF Space Note: Using f-string for large HTML is okay, but consider template engines (Jinja2)
+    # for more complex pages. Ensure CSS/JS are either inline or served via separate endpoints.
     html_content = f"""
     <!DOCTYPE html>
     <html lang="en">
     <head>
         <meta charset="UTF-8">
+        <meta name="viewport" content="width=device-width, initial-scale=1.0">
         <title>Lokiai AI - Usage Statistics</title>
         <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600&display=swap" rel="stylesheet">
         <style>
+            /* (Keep the CSS styles as provided in the original file) */
             :root {{
+                --bg-dark: #0f1011; --bg-darker: #070708; --text-primary: #e6e6e6;
+                --text-secondary: #8c8c8c; --border-color: #2c2c2c; --accent-color: #3a6ee0;
                 --accent-hover: #4a7ef0;
             }}
+            body {{ font-family: 'Inter', sans-serif; background-color: var(--bg-dark); color: var(--text-primary); max-width: 1200px; margin: 0 auto; padding: 40px 20px; line-height: 1.6; }}
+            .logo {{ display: flex; align-items: center; justify-content: center; margin-bottom: 30px; }}
+            .logo h1 {{ font-weight: 600; font-size: 2.5em; color: var(--text-primary); margin-left: 15px; }}
+            .logo img {{ width: 60px; height: 60px; border-radius: 10px; }}
+            .container {{ background-color: var(--bg-darker); border-radius: 12px; padding: 30px; box-shadow: 0 15px 40px rgba(0,0,0,0.3); border: 1px solid var(--border-color); }}
+            h2, h3 {{ color: var(--text-primary); border-bottom: 2px solid var(--border-color); padding-bottom: 10px; font-weight: 500; }}
+            .total-requests {{ background-color: var(--accent-color); color: white; text-align: center; padding: 15px; border-radius: 8px; margin-bottom: 30px; font-weight: 600; letter-spacing: -0.5px; }}
+            table {{ width: 100%; border-collapse: separate; border-spacing: 0; margin-bottom: 30px; background-color: var(--bg-dark); border-radius: 8px; overflow: hidden; }}
+            th, td {{ border: 1px solid var(--border-color); padding: 12px; text-align: left; transition: background-color 0.3s ease; }}
+            th {{ background-color: #1e1e1e; color: var(--text-primary); font-weight: 600; text-transform: uppercase; font-size: 0.9em; }}
+            tr:nth-child(even) {{ background-color: rgba(255,255,255,0.05); }}
+            tr:hover {{ background-color: rgba(62,100,255,0.1); }}
+            @media (max-width: 768px) {{ .container {{ padding: 15px; }} table {{ font-size: 0.9em; }} }}
         </style>
     </head>
     <body>
         <div class="container">
             <div class="logo">
                 <img src="data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cGF0aCBkPSJNMTAwIDM1TDUwIDkwaDEwMHoiIGZpbGw9IiMzYTZlZTAiLz48Y2lyY2xlIGN4PSIxMDAiIGN5PSIxNDAiIHI9IjMwIiBmaWxsPSIjM2E2ZWUwIi8+PC9zdmc+" alt="Lokai AI Logo">
+                <h1>Lokiai AI Usage</h1>
             </div>
             <div class="total-requests">
+                Total API Requests Recorded: {total_requests}
             </div>
             <h2>Model Usage</h2>
             <table>
+                <thead><tr><th>Model</th><th>Total Requests</th><th>First Used</th><th>Last Used</th></tr></thead>
+                <tbody>{model_usage_rows}</tbody>
             </table>
             <h2>API Endpoint Usage</h2>
             <table>
+                <thead><tr><th>Endpoint</th><th>Total Requests</th><th>First Used</th><th>Last Used</th></tr></thead>
+                <tbody>{api_usage_rows}</tbody>
             </table>
+            <h2>Daily Usage (Last {usage_data.get('days_analyzed', 7)} Days)</h2>
             <table>
+                 <thead><tr><th>Date</th><th>Entity (Model/Endpoint)</th><th>Requests</th></tr></thead>
+                <tbody>{daily_usage_rows}</tbody>
             </table>
         </div>
     </body>
     """
     return html_content
+# HF Space Note: Caching the generated HTML page can save resources.
+# Invalidate cache periodically or when usage data changes significantly.
+usage_html_cache = {"content": None, "timestamp": 0}
+CACHE_DURATION = 300 # Cache usage page for 5 minutes
+@app.get("/usage/page", response_class=HTMLResponse, tags=["Admin"])
 async def usage_page():
+    """Serves an HTML page showing usage statistics."""
+    now = time.monotonic()
+    if usage_html_cache["content"] and (now - usage_html_cache["timestamp"] < CACHE_DURATION):
+        logger.info("Serving cached usage page.")
+        return HTMLResponse(content=usage_html_cache["content"])
+    logger.info("Generating fresh usage page.")
+    try:
+        # Run potentially slow parts in executor
+        loop = asyncio.get_running_loop()
+        usage_data = await loop.run_in_executor(executor, usage_tracker.get_usage_summary, 7) # Get data for 7 days
+        html_content = await loop.run_in_executor(executor, generate_usage_html, usage_data)
+        # Update cache
+        usage_html_cache["content"] = html_content
+        usage_html_cache["timestamp"] = now
+        return HTMLResponse(content=html_content)
+    except Exception as e:
+        logger.error(f"Failed to generate usage page: {e}", exc_info=True)
+        # Serve stale cache if available, otherwise error
+        if usage_html_cache["content"]:
+            logger.warning("Serving stale usage page due to generation error.")
+            return HTMLResponse(content=usage_html_cache["content"])
+        else:
+            raise HTTPException(status_code=500, detail="Failed to generate usage statistics page.")
+# Meme Endpoint
+@app.get("/meme", tags=["Fun"])
 async def get_meme():
+    """Fetches a random meme and streams the image."""
+    # HF Space Note: Ensure meme-api.com is accessible from the HF Space network.
+    client = get_async_client()
+    meme_api_url = "https://meme-api.com/gimme"
     try:
+        logger.info("Fetching meme info...")
+        response = await client.get(meme_api_url)
+        response.raise_for_status()
         response_data = response.json()
         meme_url = response_data.get("url")
+        if not meme_url or not isinstance(meme_url, str):
+            logger.error(f"Invalid meme URL received from API: {meme_url}")
+            raise HTTPException(status_code=502, detail="Failed to get valid meme URL from API.")
+        logger.info(f"Fetching meme image: {meme_url}")
+        # Use streaming request for the image itself
+        async with client.stream("GET", meme_url) as image_response:
+            image_response.raise_for_status() # Check if image URL is valid
+            # Get content type, default to image/png
+            media_type = image_response.headers.get("content-type", "image/png")
+            if not media_type.startswith("image/"):
+                 logger.warning(f"Unexpected content type '{media_type}' for meme URL: {meme_url}")
+                 # You might want to reject non-image types
+                 # raise HTTPException(status_code=502, detail="Meme URL did not return an image.")
+            # Stream the image content directly
+            return StreamingResponse(
+                image_response.aiter_bytes(),
+                media_type=media_type,
+                headers={'Cache-Control': 'no-cache'} # Don't cache the meme itself heavily
+            )
+    except httpx.HTTPStatusError as e:
+         logger.error(f"HTTP error fetching meme ({e.request.url}): {e.response.status_code}")
+         raise HTTPException(status_code=502, detail=f"Failed to fetch meme (HTTP {e.response.status_code})")
+    except httpx.RequestError as e:
+        logger.error(f"Network error fetching meme ({e.request.url}): {e}")
+        raise HTTPException(status_code=502, detail="Failed to fetch meme (Network Error)")
+    except Exception as e:
+        logger.error(f"Unexpected error fetching meme: {e}", exc_info=True)
+        raise HTTPException(status_code=500, detail="Failed to retrieve meme due to an internal error.")
+# Health Check Endpoint
+@app.get("/health", tags=["Utility"])
+async def health_check():
+    """Provides a health check status, including missing critical configurations."""
+    env_vars = get_env_vars()
+    missing_critical_vars = []
+    # Define critical vars needed for core functionality
+    critical_vars = [
+        'api_keys', 'secret_api_endpoint', 'secret_api_endpoint_2',
+        'secret_api_endpoint_3', 'secret_api_endpoint_4', 'secret_api_endpoint_5',
+        'new_img_endpoint', 'hf_space_url'
+    ]
+    # Conditionally critical vars
+    if any(model in mistral_models for model in available_model_ids):
+        critical_vars.extend(['mistral_api', 'mistral_key'])
+    for var_name in critical_vars:
+        value = env_vars.get(var_name)
+        # Check for None or empty strings/lists/sets
+        if value is None or (isinstance(value, (str, list, set)) and not value):
+            missing_critical_vars.append(var_name)
+    is_healthy = not missing_critical_vars and server_status
+    status_code = 200 if is_healthy else 503 # Service Unavailable if unhealthy
+    health_status = {
+        "status": "healthy" if is_healthy else "unhealthy",
+        "server_mode": "online" if server_status else "maintenance",
+        "missing_critical_env_vars": missing_critical_vars,
+        "details": "All critical configurations seem okay. Ready to roll! 🚀" if is_healthy else "Service issues detected. Check missing env vars or server status. 🛠️"
+    }
+    return JSONResponse(content=health_status, status_code=status_code)
+# --- Startup and Shutdown Events ---
 @app.on_event("startup")
 async def startup_event():
+    """Tasks to run when the application starts."""
     global available_model_ids
+    logger.info("Application startup sequence initiated...")
+    # Load models from JSON
+    models_from_file = load_models_data()
+    model_ids_from_file = {model['id'] for model in models_from_file if 'id' in model}
+    # Combine models from file and predefined sets
+    predefined_model_sets = mistral_models | pollinations_models | alternate_models | claude_3_models
+    all_model_ids = model_ids_from_file.union(predefined_model_sets)
+    available_model_ids = sorted(list(all_model_ids)) # Keep as sorted list
+    logger.info(f"Loaded {len(model_ids_from_file)} models from models.json.")
+    logger.info(f"Total {len(available_model_ids)} unique models available.")
+    # Initialize scraper pool (can take time)
+    # HF Space Note: Run potentially blocking I/O in executor during startup
+    loop = asyncio.get_running_loop()
+    await loop.run_in_executor(executor, get_scraper) # This initializes the pool
+    # Validate critical environment variables and log warnings
     env_vars = get_env_vars()
+    logger.info("Checking critical environment variables (Secrets)...")
+    await health_check() # Run health check logic to log warnings
+    # Pre-connect async client? Optional, httpx handles connections on demand.
+    # client = get_async_client()
+    # await client.get("https://www.google.com") # Example warm-up call
+    logger.info("Startup complete. Server is ready to accept requests.")
 @app.on_event("shutdown")
 async def shutdown_event():
+    """Tasks to run when the application shuts down."""
+    logger.info("Application shutdown sequence initiated...")
+    # Close the httpx client gracefully
     client = get_async_client()
     await client.aclose()
+    logger.info("HTTP client closed.")
+    # Shutdown the thread pool executor
+    executor.shutdown(wait=True)
+    logger.info("Thread pool executor shut down.")
+    # Clear scraper pool (optional, resources will be reclaimed anyway)
     scraper_pool.clear()
+    logger.info("Scraper pool cleared.")
     # Persist usage data
+    # HF Space Note: Ensure file system is writable if saving locally.
+    # Consider using HF Datasets or external DB for persistent storage.
+    try:
+        logger.info("Saving usage data...")
+        usage_tracker.save_data()
+        logger.info("Usage data saved.")
+    except Exception as e:
+        logger.error(f"Failed to save usage data during shutdown: {e}")
+    logger.info("Shutdown complete.")
+# --- Main Execution Block ---
+# HF Space Note: This block is mainly for local testing.
+# HF Spaces usually run the app using `uvicorn main:app --host 0.0.0.0 --port 7860` (or similar)
+# defined in the README metadata or a Procfile.
 if __name__ == "__main__":
     import uvicorn
+    logger.info("Starting server locally with uvicorn...")
+    # HF Space Note: Port 7860 is the default for HF Spaces. Host 0.0.0.0 is required.
+    uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")