Spaces:

ParthSadaria
/

lokiai

Running

App Files Files Community

ParthSadaria commited on Apr 21

Commit

1f0a039

verified ·

1 Parent(s): 20b82c3

Update main.py

Browse files

Files changed (1) hide show

main.py +757 -960

main.py CHANGED Viewed

@@ -1,1056 +1,886 @@
 import os
 import re
-import json
-import datetime
-import time
-import asyncio
-import logging
-from pathlib import Path
-from functools import lru_cache
-from typing import Optional, Dict, List, Any, Generator, Set
-from concurrent.futures import ThreadPoolExecutor
-# Third-party libraries (ensure these are in requirements.txt)
 from dotenv import load_dotenv
-from fastapi import FastAPI, HTTPException, Request, Depends, Security, Response
 from fastapi.responses import StreamingResponse, HTMLResponse, JSONResponse, FileResponse
 from fastapi.security import APIKeyHeader
 from pydantic import BaseModel
 import httpx
-import uvloop # Use uvloop for performance
 from fastapi.middleware.gzip import GZipMiddleware
 from starlette.middleware.cors import CORSMiddleware
-import cloudscraper # For bypassing Cloudflare, potentially unreliable
-import requests # For synchronous requests like in /dynamo
-# HF Space Note: Ensure usage_tracker.py is in your repository
-try:
-    from usage_tracker import UsageTracker
-    usage_tracker = UsageTracker()
-except ImportError:
-    print("Warning: usage_tracker.py not found. Usage tracking will be disabled.")
-    # Create a dummy tracker if the file is missing
-    class DummyUsageTracker:
-        def record_request(self, *args, **kwargs): pass
-        def get_usage_summary(self, *args, **kwargs): return {}
-        def save_data(self, *args, **kwargs): pass
-    usage_tracker = DummyUsageTracker()
-# --- Configuration & Setup ---
-# HF Space Note: uvloop can improve performance in I/O bound tasks common in web apps.
 asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
-# HF Space Note: Adjust max_workers based on your HF Space resources (CPU).
-# Higher tiers allow more workers. Start lower (e.g., 4) for free tier.
-executor = ThreadPoolExecutor(max_workers=8)
-# HF Space Note: load_dotenv() is useful for local dev but HF Spaces use Secrets.
-# os.getenv will automatically pick up secrets set in the HF Space settings.
 load_dotenv()
-# Logging setup
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-logger = logging.getLogger(__name__)
-# API key security
 api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
-# --- FastAPI App Initialization ---
-app = FastAPI(
-    title="LokiAI API",
-    description="API Proxy for various AI models with usage tracking and streaming.",
-    version="1.0.0"
-)
-# Middleware
-app.add_middleware(GZipMiddleware, minimum_size=1000) # Compress large responses
 app.add_middleware(
-    CORSMiddleware, # Allow cross-origin requests (useful for web playgrounds)
-    allow_origins=["*"], # Or restrict to specific origins
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
-# --- Environment Variables & Model Config ---
-@lru_cache(maxsize=1) # Cache environment variables
-def get_env_vars() -> Dict[str, Any]:
-    """Loads and returns essential environment variables."""
-    # HF Space Note: Set these as Secrets in your Hugging Face Space settings.
     return {
-        'api_keys': set(filter(None, os.getenv('API_KEYS', '').split(','))), # Use set for faster lookup
         'secret_api_endpoint': os.getenv('SECRET_API_ENDPOINT'),
         'secret_api_endpoint_2': os.getenv('SECRET_API_ENDPOINT_2'),
-        'secret_api_endpoint_3': os.getenv('SECRET_API_ENDPOINT_3'), # Search endpoint
-        'secret_api_endpoint_4': os.getenv('SECRET_API_ENDPOINT_4', "https://text.pollinations.ai/openai"), # Pollinations
-        'secret_api_endpoint_5': os.getenv('SECRET_API_ENDPOINT_5'), # Claude 3 endpoint
-        'mistral_api': os.getenv('MISTRAL_API', "https://api.mistral.ai"),
         'mistral_key': os.getenv('MISTRAL_KEY'),
-        'new_img_endpoint': os.getenv('NEW_IMG'), # Image generation endpoint
-        'hf_space_url': os.getenv('HF_SPACE_URL', 'https://your-space-name.hf.space') # HF Space Note: Set this! Used for Referer/Origin checks.
     }
-# Model sets for fast lookups
-# HF Space Note: Consider moving these large sets to a separate config file (e.g., config.py or models_config.json)
-# for better organization if they grow larger.
-mistral_models: Set[str] = {
-    "mistral-large-latest", "pixtral-large-latest", "mistral-moderation-latest",
-    "ministral-3b-latest", "ministral-8b-latest", "open-mistral-nemo",
-    "mistral-small-latest", "mistral-saba-latest", "codestral-latest"
 }
-pollinations_models: Set[str] = {
-    "openai", "openai-large", "openai-xlarge", "openai-reasoning", "qwen-coder",
-    "llama", "mistral", "searchgpt", "deepseek", "claude-hybridspace",
-    "deepseek-r1", "deepseek-reasoner", "llamalight", "gemini", "gemini-thinking",
-    "hormoz", "phi", "phi-mini", "openai-audio", "llama-scaleway"
 }
-alternate_models: Set[str] = {
-    "gpt-4o", "deepseek-v3", "llama-3.1-8b-instruct", "llama-3.1-sonar-small-128k-online",
-    "deepseek-r1-uncensored", "tinyswallow1.5b", "andy-3.5", "o3-mini-low",
-    "hermes-3-llama-3.2-3b", "creitin-r1", "fluffy.1-chat", "plutotext-1-text",
-    "command-a", "claude-3-7-sonnet-20250219", "plutogpt-3.5-turbo"
 }
-claude_3_models: Set[str] = {
-    "claude-3-7-sonnet", "claude-3-7-sonnet-thinking", "claude 3.5 haiku",
-    "claude 3.5 sonnet", "claude 3.5 haiku", "o3-mini-medium", "o3-mini-high",
-    "grok-3", "grok-3-thinking", "grok 2"
 }
-supported_image_models: Set[str] = {
-    "Flux Pro Ultra", "grok-2-aurora", "Flux Pro", "Flux Pro Ultra Raw", "Flux Dev",
-    "Flux Schnell", "stable-diffusion-3-large-turbo", "Flux Realism",
-    "stable-diffusion-ultra", "dall-e-3", "sdxl-lightning-4step"
 }
-# --- Pydantic Models ---
-class Message(BaseModel):
-    role: str
-    content: Any # Allow content to be string or potentially list for multimodal models
 class Payload(BaseModel):
     model: str
-    messages: List[Message]
     stream: bool = False
-    # Add other potential OpenAI compatible parameters with defaults
-    max_tokens: Optional[int] = None
-    temperature: Optional[float] = None
-    top_p: Optional[float] = None
-    # ... add others as needed
 class ImageGenerationPayload(BaseModel):
     model: str
     prompt: str
-    size: Optional[str] = "1024x1024" # Default size, make optional if API allows
-    n: Optional[int] = 1 # Number of images, OpenAI uses 'n'
-    # HF Space Note: Ensure these parameter names match the target NEW_IMG endpoint API
-    # Renaming from 'number' to 'n' and 'size' type hint correction.
-# --- Global State & Clients ---
-server_status: bool = True # For maintenance mode
-available_model_ids: List[str] = [] # Loaded at startup
-# HF Space Note: Reusable HTTP client with connection pooling is crucial for performance.
-# Adjust limits based on expected load and HF Space resources.
 @lru_cache(maxsize=1)
-def get_async_client() -> httpx.AsyncClient:
-    """Returns a cached instance of httpx.AsyncClient."""
-    # HF Space Note: Timeouts are important to prevent hanging requests.
-    # Keepalive connections reduce handshake overhead.
-    timeout = httpx.Timeout(30.0, connect=10.0) # 30s total, 10s connect
-    limits = httpx.Limits(max_keepalive_connections=20, max_connections=100)
-    return httpx.AsyncClient(timeout=timeout, limits=limits, follow_redirects=True)
-# HF Space Note: cloudscraper pool. Be mindful of potential rate limits or blocks.
-# Consider alternatives if this becomes unreliable.
-scraper_pool: List[cloudscraper.CloudScraper] = []
-MAX_SCRAPERS = 10 # Reduced pool size for potentially lower resource usage
-def get_scraper() -> cloudscraper.CloudScraper:
-    """Gets a cloudscraper instance from the pool."""
     if not scraper_pool:
-        logger.info(f"Initializing {MAX_SCRAPERS} cloudscraper instances...")
         for _ in range(MAX_SCRAPERS):
-            # HF Space Note: Scraper creation can be slow, doing it upfront is good.
             scraper_pool.append(cloudscraper.create_scraper())
-        logger.info("Cloudscraper pool initialized.")
-    # Simple round-robin selection
-    return scraper_pool[int(time.monotonic() * 1000) % MAX_SCRAPERS]
-# --- Security & Authentication ---
 async def verify_api_key(
     request: Request,
-    api_key: Optional[str] = Security(api_key_header)
 ) -> bool:
-    """Verifies the provided API key against environment variables."""
-    env_vars = get_env_vars()
-    valid_api_keys = env_vars.get('api_keys', set())
-    hf_space_url = env_vars.get('hf_space_url', '')
-    # Allow bypass if the referer is from the known HF Space playground URLs
-    # HF Space Note: Make HF_SPACE_URL a secret for flexibility.
     referer = request.headers.get("referer", "")
-    if hf_space_url and referer.startswith((f"{hf_space_url}/playground", f"{hf_space_url}/image-playground")):
-        logger.debug(f"API Key check bypassed for referer: {referer}")
         return True
     if not api_key:
-        logger.warning("API Key missing.")
-        raise HTTPException(status_code=403, detail="Not authenticated: No API key provided")
-    # Clean 'Bearer ' prefix if present
     if api_key.startswith('Bearer '):
-        api_key = api_key[7:]
-    if not valid_api_keys:
-        logger.error("API keys are not configured on the server (API_KEYS secret missing?).")
-        raise HTTPException(status_code=500, detail="Server configuration error: API keys not set")
-    if api_key not in valid_api_keys:
-        logger.warning(f"Invalid API key received: {api_key[:4]}...") # Log prefix only
-        raise HTTPException(status_code=403, detail="Not authenticated: Invalid API key")
-    logger.debug("API Key verified successfully.")
     return True
-# --- Model & File Loading ---
 @lru_cache(maxsize=1)
-def load_models_data() -> List[Dict]:
-    """Loads model data from models.json."""
-    # HF Space Note: Ensure models.json is in the root of your HF Space repo.
-    models_file = Path(__file__).parent / 'models.json'
-    if not models_file.is_file():
-        logger.error("models.json not found!")
-        return []
     try:
-        with open(models_file, 'r') as f:
             return json.load(f)
     except (FileNotFoundError, json.JSONDecodeError) as e:
-        logger.error(f"Error loading models.json: {e}")
         return []
-async def get_models() -> List[Dict]:
-    """Async wrapper to get models data."""
     models_data = load_models_data()
     if not models_data:
         raise HTTPException(status_code=500, detail="Error loading available models")
     return models_data
-# --- Static File Serving ---
-# HF Space Note: Cache frequently accessed static files in memory.
 @lru_cache(maxsize=10)
-def read_static_file(file_path: str) -> Optional[str]:
-    """Reads a static file, caching the result."""
-    full_path = Path(__file__).parent / file_path
-    if not full_path.is_file():
-        logger.warning(f"Static file not found: {file_path}")
-        return None
     try:
-        with open(full_path, "r", encoding="utf-8") as file:
             return file.read()
-    except Exception as e:
-        logger.error(f"Error reading static file {file_path}: {e}")
         return None
-async def serve_static_html(file_path: str) -> HTMLResponse:
-    """Serves a static HTML file."""
-    content = read_static_file(file_path)
-    if content is None:
-        return HTMLResponse(content=f"<h1>Error: {file_path} not found</h1>", status_code=404)
-    return HTMLResponse(content=content)
-# --- API Endpoints ---
-# Basic Routes & Static Files
-@app.get("/favicon.ico", include_in_schema=False)
 async def favicon():
     favicon_path = Path(__file__).parent / "favicon.ico"
-    if favicon_path.is_file():
-        return FileResponse(favicon_path, media_type="image/vnd.microsoft.icon")
-    raise HTTPException(status_code=404, detail="favicon.ico not found")
-@app.get("/banner.jpg", include_in_schema=False)
-async def banner():
-    banner_path = Path(__file__).parent / "banner.jpg"
-    if banner_path.is_file():
-        return FileResponse(banner_path, media_type="image/jpeg") # Assuming JPEG
-    raise HTTPException(status_code=404, detail="banner.jpg not found")
-@app.get("/ping", tags=["Utility"])
-async def ping():
-    """Simple health check endpoint."""
-    return {"message": "pong"}
-@app.get("/", response_class=HTMLResponse, tags=["Frontend"])
-async def root():
-    """Serves the main index HTML page."""
-    return await serve_static_html("index.html")
-@app.get("/script.js", response_class=Response, tags=["Frontend"], include_in_schema=False)
-async def script_js():
-    content = read_static_file("script.js")
-    if content is None:
-        return Response(content="/* script.js not found */", status_code=404, media_type="application/javascript")
-    return Response(content=content, media_type="application/javascript")
-@app.get("/style.css", response_class=Response, tags=["Frontend"], include_in_schema=False)
-async def style_css():
-    content = read_static_file("style.css")
-    if content is None:
-        return Response(content="/* style.css not found */", status_code=404, media_type="text/css")
-    return Response(content=content, media_type="text/css")
-@app.get("/playground", response_class=HTMLResponse, tags=["Frontend"])
-async def playground():
-    """Serves the chat playground HTML page."""
-    return await serve_static_html("playground.html")
-@app.get("/image-playground", response_class=HTMLResponse, tags=["Frontend"])
-async def image_playground():
-    """Serves the image playground HTML page."""
-    return await serve_static_html("image-playground.html")
-# Dynamic Page Example
-@app.get("/dynamo", response_class=HTMLResponse, tags=["Examples"])
 async def dynamic_ai_page(request: Request):
-    """Generates a dynamic HTML page using an AI model (example)."""
-    # HF Space Note: This uses a hardcoded URL to *itself* if running in the space.
-    # Ensure the HF_SPACE_URL secret is set correctly.
-    env_vars = get_env_vars()
-    hf_space_url = env_vars.get('hf_space_url', '')
-    if not hf_space_url:
-        raise HTTPException(status_code=500, detail="HF_SPACE_URL environment variable not set.")
-    user_agent = request.headers.get('user-agent', 'Unknown')
-    client_ip = request.client.host if request.client else "Unknown"
-    location = f"IP: {client_ip}" # Basic IP, location requires GeoIP lookup (extra dependency)
     prompt = f"""
-    Generate a cool, dynamic HTML page for a user with the following details:
-    - App Name: "LokiAI"
     - User-Agent: {user_agent}
-    - Location Info: {location}
-    - Style: Cyberpunk aesthetic, minimalist layout, maybe some retro touches.
-    - Content: Include a heading, a short motivational or witty message, and perhaps a subtle animation. Use inline CSS for styling within a <style> tag.
-    - Output: Provide ONLY the raw HTML code, starting with <!DOCTYPE html>. Do not wrap it in backticks or add explanations.
     """
     payload = {
-        "model": "mistral-small-latest", # Or another capable model
-        "messages": [{"role": "user", "content": prompt}],
-        "max_tokens": 1000,
-        "temperature": 0.7
     }
     headers = {
-        # HF Space Note: Use the space's own URL and a valid API key if required by your setup.
-        # Here, we assume the playground key bypass works or use a dedicated internal key.
-        "Authorization": f"Bearer {list(env_vars['api_keys'])[0] if env_vars['api_keys'] else 'dummy-key'}" # Use first key or dummy
     }
-    try:
-        # HF Space Note: Use the async client for internal requests too.
-        client = get_async_client()
-        api_url = f"{hf_space_url}/chat/completions" # Call own endpoint
-        response = await client.post(api_url, json=payload, headers=headers)
-        response.raise_for_status() # Raise exception for bad status codes
-        data = response.json()
-        html_content = data.get('choices', [{}])[0].get('message', {}).get('content', '')
-        # Basic cleanup (remove potential markdown backticks if model adds them)
-        html_content = re.sub(r"^```html\s*", "", html_content, flags=re.IGNORECASE)
-        html_content = re.sub(r"\s*```$", "", html_content)
-        if not html_content.strip().lower().startswith("<!doctype html"):
-             logger.warning("Dynamo page generation might be incomplete or malformed.")
-             # Optionally return a fallback static page here
-        return HTMLResponse(content=html_content)
-    except httpx.HTTPStatusError as e:
-        logger.error(f"Error calling self API for /dynamo: {e.response.status_code} - {e.response.text}")
-        raise HTTPException(status_code=502, detail=f"Failed to generate dynamic content: Upstream API error {e.response.status_code}")
-    except Exception as e:
-        logger.error(f"Unexpected error in /dynamo: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail="Failed to generate dynamic content due to an internal error.")
-# Vetra Example (Fetching from GitHub)
-# HF Space Note: Ensure outbound requests to raw.githubusercontent.com are allowed.
 GITHUB_BASE = "https://raw.githubusercontent.com/Parthsadaria/Vetra/main"
-VETRA_FILES = {"html": "index.html", "css": "style.css", "js": "script.js"}
-async def get_github_file(filename: str) -> Optional[str]:
-    """Fetches a file from the Vetra GitHub repo."""
     url = f"{GITHUB_BASE}/{filename}"
-    try:
-        client = get_async_client()
         res = await client.get(url)
-        res.raise_for_status()
-        return res.text
-    except httpx.RequestError as e:
-        logger.error(f"Error fetching GitHub file {url}: {e}")
-        return None
-    except httpx.HTTPStatusError as e:
-        logger.error(f"GitHub file {url} returned status {e.response.status_code}")
-        return None
-@app.get("/vetra", response_class=HTMLResponse, tags=["Examples"])
 async def serve_vetra():
-    """Serves the Vetra application by fetching components from GitHub."""
-    logger.info("Fetching Vetra files from GitHub...")
-    # Fetch files concurrently
-    html_task = asyncio.create_task(get_github_file(VETRA_FILES["html"]))
-    css_task = asyncio.create_task(get_github_file(VETRA_FILES["css"]))
-    js_task = asyncio.create_task(get_github_file(VETRA_FILES["js"]))
-    html, css, js = await asyncio.gather(html_task, css_task, js_task)
     if not html:
-        logger.error("Failed to fetch Vetra index.html")
-        return HTMLResponse(content="<h1>Error: Could not load Vetra application (HTML missing)</h1>", status_code=502)
-    # Inject CSS and JS into HTML
-    css_content = f"<style>{css or '/* CSS failed to load */'}</style>"
-    js_content = f"<script>{js or '// JS failed to load'}</script>"
-    # Inject carefully before closing tags
-    final_html = html.replace("</head>", f"{css_content}\n</head>", 1)
-    final_html = final_html.replace("</body>", f"{js_content}\n</body>", 1)
-    logger.info("Successfully served Vetra application.")
     return HTMLResponse(content=final_html)
-# Model Info Endpoint
-@app.get("/api/v1/models", tags=["Models"])
-@app.get("/models", tags=["Models"])
-async def return_models():
-    """Returns the list of available models loaded from models.json."""
-    # HF Space Note: This endpoint now relies on models.json being present.
-    # It no longer dynamically adds models defined only in the script's sets.
-    # Ensure models.json is comprehensive or adjust startup logic if needed.
-    return await get_models()
-# Search Endpoint (using cloudscraper)
-# HF Space Note: This uses cloudscraper which might be blocked or require updates.
-# Consider replacing with a more stable search API if possible.
-async def generate_search_async(query: str, systemprompt: Optional[str] = None) -> asyncio.Queue:
-    """Performs search using the configured backend and streams results."""
-    queue = asyncio.Queue()
-    env_vars = get_env_vars()
-    search_endpoint = env_vars.get('secret_api_endpoint_3')
-    async def _fetch_search_data():
-        if not search_endpoint:
-            await queue.put({"error": "Search API endpoint (SECRET_API_ENDPOINT_3) not configured"})
-            await queue.put(None) # Signal end
-            return
-        try:
-            scraper = get_scraper() # Get a scraper instance from the pool
-            loop = asyncio.get_running_loop()
-            system_message = systemprompt or "You are a helpful search assistant."
-            messages = [
-                {"role": "system", "content": system_message},
-                {"role": "user", "content": query},
-            ]
-            payload = {
-                "model": "searchgpt", # Assuming the endpoint expects this model name
-                "messages": messages,
-                "stream": True # Explicitly request streaming from backend
-            }
-            headers = {"User-Agent": "Mozilla/5.0"} # Standard user agent
-            # HF Space Note: Run synchronous scraper call in executor thread
-            response = await loop.run_in_executor(
-                executor,
-                scraper.post,
-                search_endpoint,
-                json=payload,
-                headers=headers,
-                stream=True # Request streaming from requests library perspective
-            )
-            response.raise_for_status()
-            # Process SSE stream
-            # HF Space Note: Iterating lines on the response directly can be blocking if not handled carefully.
-            # Using iter_lines with decode_unicode=True is generally safe.
-            for line in response.iter_lines(decode_unicode=True):
-                if line.startswith("data: "):
-                    try:
-                        data_str = line[6:]
-                        if data_str.strip() == "[DONE]": # Check for OpenAI style completion
-                            break
-                        json_data = json.loads(data_str)
-                        # Assuming OpenAI compatible streaming format
-                        delta = json_data.get("choices", [{}])[0].get("delta", {})
-                        content = delta.get("content")
-                        if content:
-                             # Reconstruct OpenAI-like SSE chunk
-                            chunk = {
-                                "id": json_data.get("id"),
-                                "object": "chat.completion.chunk",
-                                "created": json_data.get("created", int(time.time())),
-                                "model": "searchgpt",
-                                "choices": [{"index": 0, "delta": {"content": content}, "finish_reason": None}]
-                            }
-                            await queue.put({"data": f"data: {json.dumps(chunk)}\n\n", "text": content})
-                        # Check for finish reason
-                        finish_reason = json_data.get("choices", [{}])[0].get("finish_reason")
-                        if finish_reason:
-                             chunk = {
-                                "id": json_data.get("id"),
-                                "object": "chat.completion.chunk",
-                                "created": json_data.get("created", int(time.time())),
-                                "model": "searchgpt",
-                                "choices": [{"index": 0, "delta": {}, "finish_reason": finish_reason}]
-                            }
-                             await queue.put({"data": f"data: {json.dumps(chunk)}\n\n", "text": ""})
-                             break # Stop processing after finish reason
-                    except json.JSONDecodeError:
-                        logger.warning(f"Failed to decode JSON from search stream: {line}")
-                        continue
-                    except Exception as e:
-                        logger.error(f"Error processing search stream chunk: {e}", exc_info=True)
-                        await queue.put({"error": f"Error processing stream: {e}"})
-                        break # Stop on processing error
-        except requests.exceptions.RequestException as e:
-            logger.error(f"Search request failed: {e}")
-            await queue.put({"error": f"Search request failed: {e}"})
-        except Exception as e:
-            logger.error(f"Unexpected error during search: {e}", exc_info=True)
-            await queue.put({"error": f"An unexpected error occurred during search: {e}"})
-        finally:
-            await queue.put(None) # Signal completion
-    asyncio.create_task(_fetch_search_data())
-    return queue
-@app.get("/searchgpt", tags=["Search"])
-async def search_gpt(q: str, stream: bool = True, systemprompt: Optional[str] = None):
-    """
-    Performs a search using the backend search model and streams results.
-    Pass `stream=false` to get the full response at once.
-    """
     if not q:
         raise HTTPException(status_code=400, detail="Query parameter 'q' is required")
-    # HF Space Note: Ensure usage_tracker is thread-safe if used across async/sync boundaries.
-    # The dummy tracker used when the module isn't found is safe.
     usage_tracker.record_request(endpoint="/searchgpt")
-    queue = await generate_search_async(q, systemprompt=systemprompt)
     if stream:
         async def stream_generator():
-            full_response_text = "" # Keep track for non-streaming case if needed
             while True:
                 item = await queue.get()
-                if item is None: # End of stream signal
                     break
                 if "error" in item:
-                    # HF Space Note: Log errors server-side, return generic error to client for security.
-                    logger.error(f"Search stream error: {item['error']}")
-                    # Send an error event in the stream
-                    error_event = {"error": {"message": "Search failed.", "code": 500}}
-                    yield f"data: {json.dumps(error_event)}\n\n"
                     break
                 if "data" in item:
                     yield item["data"]
-                    full_response_text += item.get("text", "")
-            # Optionally yield a [DONE] message if backend doesn't guarantee it
-            # yield "data: [DONE]\n\n"
         return StreamingResponse(
             stream_generator(),
-            media_type="text/event-stream",
-            headers={
-                "Content-Type": "text/event-stream",
-                "Cache-Control": "no-cache",
-                "Connection": "keep-alive",
-                "X-Accel-Buffering": "no" # Crucial for Nginx/proxies in HF Spaces
-            }
         )
     else:
-        # Collect full response for non-streaming request
-        full_response_text = ""
         while True:
             item = await queue.get()
             if item is None:
                 break
             if "error" in item:
-                logger.error(f"Search non-stream error: {item['error']}")
-                raise HTTPException(status_code=502, detail=f"Search failed: {item['error']}")
-            full_response_text += item.get("text", "")
-        # Mimic OpenAI non-streaming response structure
-        return JSONResponse(content={
-            "id": f"search-{int(time.time())}",
-            "object": "chat.completion",
-            "created": int(time.time()),
-            "model": "searchgpt",
-            "choices": [{
-                "index": 0,
-                "message": {
-                    "role": "assistant",
-                    "content": full_response_text,
-                },
-                "finish_reason": "stop",
-            }],
-            "usage": { # Note: Token usage is unknown here
-                "prompt_tokens": None,
-                "completion_tokens": None,
-                "total_tokens": None,
-            }
-        })
-# Main Chat Completions Proxy
-@app.post("/api/v1/chat/completions", tags=["Chat Completions"])
-@app.post("/chat/completions", tags=["Chat Completions"])
-async def get_completion(
-    payload: Payload,
-    request: Request,
-    authenticated: bool = Depends(verify_api_key) # Apply authentication
-):
-    """
-    Proxies chat completion requests to the appropriate backend API based on the model.
-    Supports streaming (SSE).
-    """
-    if not server_status:
-        raise HTTPException(status_code=503, detail="Server is under maintenance.")
-    model_to_use = payload.model or "gpt-4o-mini" # Default model
-    # HF Space Note: Check against models loaded at startup.
-    if available_model_ids and model_to_use not in available_model_ids:
-        logger.warning(f"Requested model '{model_to_use}' not in available list.")
-        # Check if it's a known category even if not explicitly in models.json
-        known_categories = mistral_models | pollinations_models | alternate_models | claude_3_models
-        if model_to_use not in known_categories:
-             raise HTTPException(
-                status_code=400,
-                detail=f"Model '{model_to_use}' is not available or recognized. Check /models."
-            )
-        else:
-            logger.info(f"Allowing known category model '{model_to_use}' despite not being in models.json.")
-    # Log request asynchronously
     asyncio.create_task(log_request(request, model_to_use))
     usage_tracker.record_request(model=model_to_use, endpoint="/chat/completions")
-    # Prepare payload for the target API
-    payload_dict = payload.dict(exclude_none=True) # Exclude None values
-    payload_dict["model"] = model_to_use # Ensure model is set
-    env_vars = get_env_vars()
-    hf_space_url = env_vars.get('hf_space_url', '') # Needed for Referer/Origin
-    # Determine target endpoint and headers
-    endpoint = None
-    custom_headers = {}
     if model_to_use in mistral_models:
-        endpoint = env_vars.get('mistral_api')
-        api_key = env_vars.get('mistral_key')
-        if not endpoint or not api_key:
-            raise HTTPException(status_code=500, detail="Mistral API endpoint or key not configured.")
-        custom_headers = {"Authorization": f"Bearer {api_key}", "Content-Type": "application/json", "Accept": "application/json"}
-        # Mistral specific adjustments if needed
-        # payload_dict.pop('system', None) # Example: if Mistral doesn't use 'system' role
     elif model_to_use in pollinations_models:
-        endpoint = env_vars.get('secret_api_endpoint_4')
-        if not endpoint:
-             raise HTTPException(status_code=500, detail="Pollinations API endpoint (SECRET_API_ENDPOINT_4) not configured.")
-        # Pollinations might need specific headers? Add them here.
-        custom_headers = {"Content-Type": "application/json"}
     elif model_to_use in alternate_models:
-        endpoint = env_vars.get('secret_api_endpoint_2')
-        if not endpoint:
-             raise HTTPException(status_code=500, detail="Alternate API endpoint (SECRET_API_ENDPOINT_2) not configured.")
-        custom_headers = {"Content-Type": "application/json"}
-    elif model_to_use in claude_3_models:
-        endpoint = env_vars.get('secret_api_endpoint_5')
-        if not endpoint:
-             raise HTTPException(status_code=500, detail="Claude 3 API endpoint (SECRET_API_ENDPOINT_5) not configured.")
-        custom_headers = {"Content-Type": "application/json"}
-        # Claude specific headers (like anthropic-version) might be needed
-        # custom_headers["anthropic-version"] = "2023-06-01"
-    else: # Default endpoint
-        endpoint = env_vars.get('secret_api_endpoint')
-        if not endpoint:
-             raise HTTPException(status_code=500, detail="Default API endpoint (SECRET_API_ENDPOINT) not configured.")
-        # Default endpoint might need Origin/Referer
-        if hf_space_url:
-            custom_headers = {
-                "Origin": hf_space_url,
-                "Referer": hf_space_url,
-                "Content-Type": "application/json"
-            }
-        else:
-             custom_headers = {"Content-Type": "application/json"}
-    target_url = f"{endpoint.rstrip('/')}/v1/chat/completions" # Assume OpenAI compatible path
-    logger.info(f"Proxying request for model '{model_to_use}' to endpoint: {endpoint}")
-    client = get_async_client()
-    async def stream_generator():
-        """Generator for streaming the response."""
-        nonlocal target_url # Allow modification if needed
         try:
-            async with client.stream("POST", target_url, json=payload_dict, headers=custom_headers) as response:
-                # Check for initial errors before streaming
-                if response.status_code >= 400:
-                     error_body = await response.aread()
-                     logger.error(f"Upstream API error: {response.status_code} - {error_body.decode()}")
-                     # Try to parse error detail from upstream
-                     detail = f"Upstream API error: {response.status_code}"
-                     try:
-                         error_json = json.loads(error_body)
-                         detail = error_json.get('error', {}).get('message', detail)
-                     except json.JSONDecodeError:
-                         pass
-                     # Send error as SSE event
-                     error_event = {"error": {"message": detail, "code": response.status_code}}
-                     yield f"data: {json.dumps(error_event)}\n\n"
-                     return # Stop generation
-                # Stream the response line by line
-                async for line in response.aiter_lines():
-                    if line:
-                        # Pass through the data directly
-                        yield line + "\n"
-                # Ensure stream is properly closed, yield [DONE] if backend doesn't
-                # Some backends might not send [DONE], uncomment if needed
-                # yield "data: [DONE]\n\n"
         except httpx.TimeoutException:
-            logger.error(f"Request to {target_url} timed out.")
-            error_event = {"error": {"message": "Request timed out", "code": 504}}
-            yield f"data: {json.dumps(error_event)}\n\n"
         except httpx.RequestError as e:
-            logger.error(f"Failed to connect to upstream API {target_url}: {e}")
-            error_event = {"error": {"message": f"Upstream connection error: {e}", "code": 502}}
-            yield f"data: {json.dumps(error_event)}\n\n"
         except Exception as e:
-            logger.error(f"An unexpected error occurred during streaming proxy: {e}", exc_info=True)
-            error_event = {"error": {"message": f"Internal server error: {e}", "code": 500}}
-            yield f"data: {json.dumps(error_event)}\n\n"
-    if payload.stream:
         return StreamingResponse(
-            stream_generator(),
             media_type="text/event-stream",
             headers={
                 "Content-Type": "text/event-stream",
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
-                "X-Accel-Buffering": "no" # Essential for HF Spaces proxying SSE
             }
         )
     else:
-        # Handle non-streaming request by collecting the streamed chunks
-        full_response_content = ""
-        final_json_response = None
-        async for line in stream_generator():
-            if line.startswith("data: "):
-                data_str = line[6:].strip()
-                if data_str == "[DONE]":
-                    break
-                try:
-                    chunk = json.loads(data_str)
-                    # Check for error chunk
-                    if "error" in chunk:
-                        logger.error(f"Received error during non-stream collection: {chunk['error']}")
-                        raise HTTPException(status_code=chunk['error'].get('code', 502), detail=chunk['error'].get('message', 'Upstream API error'))
-                    # Accumulate content from delta
-                    delta = chunk.get("choices", [{}])[0].get("delta", {})
-                    content = delta.get("content")
-                    if content:
-                        full_response_content += content
-                    # Store the last chunk structure to reconstruct the final response
-                    # We assume the last chunk contains necessary info like id, model, etc.
-                    # but we overwrite the choices/message part.
-                    final_json_response = chunk # Keep the structure
-                    # Check for finish reason
-                    finish_reason = chunk.get("choices", [{}])[0].get("finish_reason")
-                    if finish_reason:
-                        break # Stop collecting
-                except json.JSONDecodeError:
-                    logger.warning(f"Could not decode JSON chunk in non-stream mode: {data_str}")
-                except Exception as e:
-                     logger.error(f"Error processing chunk in non-stream mode: {e}")
-                     raise HTTPException(status_code=500, detail="Error processing response stream.")
-        if final_json_response is None:
-             # Handle cases where no valid data chunks were received
-             logger.error("No valid response chunks received for non-streaming request.")
-             raise HTTPException(status_code=502, detail="Failed to get valid response from upstream API.")
-        # Reconstruct OpenAI-like non-streaming response
-        final_response_obj = {
-            "id": final_json_response.get("id", f"chatcmpl-{int(time.time())}"),
-            "object": "chat.completion",
-            "created": final_json_response.get("created", int(time.time())),
-            "model": model_to_use, # Use the requested model
-            "choices": [{
-                "index": 0,
-                "message": {
-                    "role": "assistant",
-                    "content": full_response_content,
-                },
-                "finish_reason": final_json_response.get("choices", [{}])[0].get("finish_reason", "stop"), # Get finish reason from last chunk
-            }],
-            "usage": { # Token usage might be in the last chunk for some APIs, otherwise unknown
-                "prompt_tokens": None,
-                "completion_tokens": None,
-                "total_tokens": None,
-            }
-        }
-        # Attempt to extract usage if present in the (potentially non-standard) final chunk
-        usage_data = final_json_response.get("usage")
-        if isinstance(usage_data, dict):
-            final_response_obj["usage"].update(usage_data)
-        return JSONResponse(content=final_response_obj)
-# Image Generation Endpoint
-@app.post("/images/generations", tags=["Image Generation"])
-async def create_image(
-    payload: ImageGenerationPayload,
-    authenticated: bool = Depends(verify_api_key)
-):
     """
-    Generates images based on a text prompt using the configured backend.
     """
     if not server_status:
-        raise HTTPException(status_code=503, detail="Server is under maintenance.")
     if payload.model not in supported_image_models:
         raise HTTPException(
             status_code=400,
-            detail=f"Model '{payload.model}' is not supported for image generation. Supported: {', '.join(supported_image_models)}"
         )
     usage_tracker.record_request(model=payload.model, endpoint="/images/generations")
-    env_vars = get_env_vars()
-    target_api_url = env_vars.get('new_img_endpoint')
-    if not target_api_url:
-        raise HTTPException(status_code=500, detail="Image generation endpoint (NEW_IMG) not configured.")
-    # Prepare payload for the target API (adjust keys if needed)
-    # HF Space Note: Ensure the keys match the actual API expected by NEW_IMG endpoint.
-    # Assuming it's OpenAI compatible here.
     api_payload = {
         "model": payload.model,
         "prompt": payload.prompt,
-        "n": payload.n,
-        "size": payload.size
     }
-    # Remove None values the target API might not like
-    api_payload = {k: v for k, v in api_payload.items() if v is not None}
-    logger.info(f"Requesting image generation for model '{payload.model}' from {target_api_url}")
-    client = get_async_client()
     try:
-        # HF Space Note: Image generation can take time, use a longer timeout if needed.
-        # Consider making this truly async if the backend supports webhooks or polling.
-        response = await client.post(target_api_url, json=api_payload, timeout=120.0) # 2 min timeout
-        response.raise_for_status() # Raise HTTP errors
-        # Return the exact response from the backend
         return JSONResponse(content=response.json())
     except httpx.TimeoutException:
-        logger.error(f"Image generation request to {target_api_url} timed out.")
         raise HTTPException(status_code=504, detail="Image generation request timed out.")
-    except httpx.HTTPStatusError as e:
-        logger.error(f"Image generation API error: {e.response.status_code} - {e.response.text}")
-        detail = f"Image generation failed: Upstream API error {e.response.status_code}"
-        try:
-             err_json = e.response.json()
-             detail = err_json.get('error', {}).get('message', detail)
-        except json.JSONDecodeError:
-             pass
-        raise HTTPException(status_code=e.response.status_code, detail=detail)
     except httpx.RequestError as e:
-        logger.error(f"Error connecting to image generation service {target_api_url}: {e}")
         raise HTTPException(status_code=502, detail=f"Error connecting to image generation service: {e}")
     except Exception as e:
-        logger.error(f"Unexpected error during image generation: {e}", exc_info=True)
         raise HTTPException(status_code=500, detail=f"An unexpected error occurred during image generation: {e}")
-# --- Utility & Admin Endpoints ---
-async def log_request(request: Request, model: Optional[str] = None):
-    """Logs basic request information asynchronously."""
-    # HF Space Note: Avoid logging sensitive info like full IP or headers unless necessary.
-    # Hashing IP provides some privacy.
-    client_host = request.client.host if request.client else "unknown"
-    ip_hash = hash(client_host) % 10000
-    timestamp = datetime.datetime.now(datetime.timezone.utc).strftime("%Y-%m-%d %H:%M:%S %Z")
-    log_message = f"Timestamp: {timestamp}, IP Hash: {ip_hash}, Method: {request.method}, Path: {request.url.path}"
-    if model:
-        log_message += f", Model: {model}"
-    logger.info(log_message)
-@app.get("/usage", tags=["Admin"])
 async def get_usage(days: int = 7):
-    """Retrieves aggregated usage statistics."""
-    # HF Space Note: Ensure usage_tracker methods are efficient, especially get_usage_summary.
-    # Caching might be needed if it becomes slow.
-    if days <= 0:
-        raise HTTPException(status_code=400, detail="Number of days must be positive.")
-    try:
-        # Run potentially CPU-bound summary generation in executor
-        loop = asyncio.get_running_loop()
-        summary = await loop.run_in_executor(executor, usage_tracker.get_usage_summary, days)
-        return summary
-    except Exception as e:
-        logger.error(f"Error retrieving usage statistics: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail="Failed to retrieve usage statistics.")
-# HF Space Note: Generating HTML dynamically can be resource-intensive.
-# Consider caching the generated HTML or serving a static page updated periodically.
-def generate_usage_html(usage_data: Dict) -> str:
-    """Generates an HTML report from usage data."""
-    # (Keep the HTML generation logic as provided in the original file)
-    # ... (rest of the HTML generation code from the original file) ...
-    # Ensure this function handles potentially missing keys gracefully
-    models_usage = usage_data.get('models', {})
-    endpoints_usage = usage_data.get('api_endpoints', {})
-    daily_usage = usage_data.get('recent_daily_usage', {})
-    total_requests = usage_data.get('total_requests', 0)
     model_usage_rows = "\n".join([
         f"""
         <tr>
             <td>{model}</td>
-            <td>{model_data.get('total_requests', 'N/A')}</td>
-            <td>{model_data.get('first_used', 'N/A')}</td>
-            <td>{model_data.get('last_used', 'N/A')}</td>
         </tr>
-        """ for model, model_data in models_usage.items()
-    ]) if models_usage else "<tr><td colspan='4'>No model usage data</td></tr>"
     api_usage_rows = "\n".join([
         f"""
         <tr>
             <td>{endpoint}</td>
-            <td>{endpoint_data.get('total_requests', 'N/A')}</td>
-            <td>{endpoint_data.get('first_used', 'N/A')}</td>
-            <td>{endpoint_data.get('last_used', 'N/A')}</td>
         </tr>
-        """ for endpoint, endpoint_data in endpoints_usage.items()
-    ]) if endpoints_usage else "<tr><td colspan='4'>No API endpoint usage data</td></tr>"
     daily_usage_rows = "\n".join([
-        f"""
-        <tr>
-            <td>{date}</td>
-            <td>{entity}</td>
-            <td>{requests}</td>
-        </tr>
-        """
-        for date, date_data in daily_usage.items()
-        for entity, requests in date_data.items()
-    ]) if daily_usage else "<tr><td colspan='3'>No daily usage data</td></tr>"
-    # HF Space Note: Using f-string for large HTML is okay, but consider template engines (Jinja2)
-    # for more complex pages. Ensure CSS/JS are either inline or served via separate endpoints.
     html_content = f"""
     <!DOCTYPE html>
     <html lang="en">
     <head>
         <meta charset="UTF-8">
-        <meta name="viewport" content="width=device-width, initial-scale=1.0">
         <title>Lokiai AI - Usage Statistics</title>
         <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600&display=swap" rel="stylesheet">
         <style>
-            /* (Keep the CSS styles as provided in the original file) */
             :root {{
-                --bg-dark: #0f1011; --bg-darker: #070708; --text-primary: #e6e6e6;
-                --text-secondary: #8c8c8c; --border-color: #2c2c2c; --accent-color: #3a6ee0;
                 --accent-hover: #4a7ef0;
             }}
-            body {{ font-family: 'Inter', sans-serif; background-color: var(--bg-dark); color: var(--text-primary); max-width: 1200px; margin: 0 auto; padding: 40px 20px; line-height: 1.6; }}
-            .logo {{ display: flex; align-items: center; justify-content: center; margin-bottom: 30px; }}
-            .logo h1 {{ font-weight: 600; font-size: 2.5em; color: var(--text-primary); margin-left: 15px; }}
-            .logo img {{ width: 60px; height: 60px; border-radius: 10px; }}
-            .container {{ background-color: var(--bg-darker); border-radius: 12px; padding: 30px; box-shadow: 0 15px 40px rgba(0,0,0,0.3); border: 1px solid var(--border-color); }}
-            h2, h3 {{ color: var(--text-primary); border-bottom: 2px solid var(--border-color); padding-bottom: 10px; font-weight: 500; }}
-            .total-requests {{ background-color: var(--accent-color); color: white; text-align: center; padding: 15px; border-radius: 8px; margin-bottom: 30px; font-weight: 600; letter-spacing: -0.5px; }}
-            table {{ width: 100%; border-collapse: separate; border-spacing: 0; margin-bottom: 30px; background-color: var(--bg-dark); border-radius: 8px; overflow: hidden; }}
-            th, td {{ border: 1px solid var(--border-color); padding: 12px; text-align: left; transition: background-color 0.3s ease; }}
-            th {{ background-color: #1e1e1e; color: var(--text-primary); font-weight: 600; text-transform: uppercase; font-size: 0.9em; }}
-            tr:nth-child(even) {{ background-color: rgba(255,255,255,0.05); }}
-            tr:hover {{ background-color: rgba(62,100,255,0.1); }}
-            @media (max-width: 768px) {{ .container {{ padding: 15px; }} table {{ font-size: 0.9em; }} }}
         </style>
     </head>
     <body>
         <div class="container">
             <div class="logo">
                 <img src="data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cGF0aCBkPSJNMTAwIDM1TDUwIDkwaDEwMHoiIGZpbGw9IiMzYTZlZTAiLz48Y2lyY2xlIGN4PSIxMDAiIGN5PSIxNDAiIHI9IjMwIiBmaWxsPSIjM2E2ZWUwIi8+PC9zdmc+" alt="Lokai AI Logo">
-                <h1>Lokiai AI Usage</h1>
             </div>
             <div class="total-requests">
-                Total API Requests Recorded: {total_requests}
             </div>
             <h2>Model Usage</h2>
             <table>
-                <thead><tr><th>Model</th><th>Total Requests</th><th>First Used</th><th>Last Used</th></tr></thead>
-                <tbody>{model_usage_rows}</tbody>
             </table>
             <h2>API Endpoint Usage</h2>
             <table>
-                <thead><tr><th>Endpoint</th><th>Total Requests</th><th>First Used</th><th>Last Used</th></tr></thead>
-                <tbody>{api_usage_rows}</tbody>
             </table>
-            <h2>Daily Usage (Last {usage_data.get('days_analyzed', 7)} Days)</h2>
             <table>
-                 <thead><tr><th>Date</th><th>Entity (Model/Endpoint)</th><th>Requests</th></tr></thead>
-                <tbody>{daily_usage_rows}</tbody>
             </table>
         </div>
     </body>
@@ -1058,199 +888,166 @@ def generate_usage_html(usage_data: Dict) -> str:
     """
     return html_content
-# HF Space Note: Caching the generated HTML page can save resources.
-# Invalidate cache periodically or when usage data changes significantly.
-usage_html_cache = {"content": None, "timestamp": 0}
-CACHE_DURATION = 300 # Cache usage page for 5 minutes
-@app.get("/usage/page", response_class=HTMLResponse, tags=["Admin"])
 async def usage_page():
-    """Serves an HTML page showing usage statistics."""
-    now = time.monotonic()
-    if usage_html_cache["content"] and (now - usage_html_cache["timestamp"] < CACHE_DURATION):
-        logger.info("Serving cached usage page.")
-        return HTMLResponse(content=usage_html_cache["content"])
-    logger.info("Generating fresh usage page.")
-    try:
-        # Run potentially slow parts in executor
-        loop = asyncio.get_running_loop()
-        usage_data = await loop.run_in_executor(executor, usage_tracker.get_usage_summary, 7) # Get data for 7 days
-        html_content = await loop.run_in_executor(executor, generate_usage_html, usage_data)
-        # Update cache
-        usage_html_cache["content"] = html_content
-        usage_html_cache["timestamp"] = now
-        return HTMLResponse(content=html_content)
-    except Exception as e:
-        logger.error(f"Failed to generate usage page: {e}", exc_info=True)
-        # Serve stale cache if available, otherwise error
-        if usage_html_cache["content"]:
-            logger.warning("Serving stale usage page due to generation error.")
-            return HTMLResponse(content=usage_html_cache["content"])
-        else:
-            raise HTTPException(status_code=500, detail="Failed to generate usage statistics page.")
-# Meme Endpoint
-@app.get("/meme", tags=["Fun"])
 async def get_meme():
-    """Fetches a random meme and streams the image."""
-    # HF Space Note: Ensure meme-api.com is accessible from the HF Space network.
-    client = get_async_client()
-    meme_api_url = "https://meme-api.com/gimme"
     try:
-        logger.info("Fetching meme info...")
-        response = await client.get(meme_api_url)
-        response.raise_for_status()
         response_data = response.json()
         meme_url = response_data.get("url")
-        if not meme_url or not isinstance(meme_url, str):
-            logger.error(f"Invalid meme URL received from API: {meme_url}")
-            raise HTTPException(status_code=502, detail="Failed to get valid meme URL from API.")
-        logger.info(f"Fetching meme image: {meme_url}")
-        # Use streaming request for the image itself
-        async with client.stream("GET", meme_url) as image_response:
-            image_response.raise_for_status() # Check if image URL is valid
-            # Get content type, default to image/png
-            media_type = image_response.headers.get("content-type", "image/png")
-            if not media_type.startswith("image/"):
-                 logger.warning(f"Unexpected content type '{media_type}' for meme URL: {meme_url}")
-                 # You might want to reject non-image types
-                 # raise HTTPException(status_code=502, detail="Meme URL did not return an image.")
-            # Stream the image content directly
-            return StreamingResponse(
-                image_response.aiter_bytes(),
-                media_type=media_type,
-                headers={'Cache-Control': 'no-cache'} # Don't cache the meme itself heavily
-            )
-    except httpx.HTTPStatusError as e:
-         logger.error(f"HTTP error fetching meme ({e.request.url}): {e.response.status_code}")
-         raise HTTPException(status_code=502, detail=f"Failed to fetch meme (HTTP {e.response.status_code})")
-    except httpx.RequestError as e:
-        logger.error(f"Network error fetching meme ({e.request.url}): {e}")
-        raise HTTPException(status_code=502, detail="Failed to fetch meme (Network Error)")
-    except Exception as e:
-        logger.error(f"Unexpected error fetching meme: {e}", exc_info=True)
-        raise HTTPException(status_code=500, detail="Failed to retrieve meme due to an internal error.")
-# Health Check Endpoint
-@app.get("/health", tags=["Utility"])
-async def health_check():
-    """Provides a health check status, including missing critical configurations."""
-    env_vars = get_env_vars()
-    missing_critical_vars = []
-    # Define critical vars needed for core functionality
-    critical_vars = [
-        'api_keys', 'secret_api_endpoint', 'secret_api_endpoint_2',
-        'secret_api_endpoint_3', 'secret_api_endpoint_4', 'secret_api_endpoint_5',
-        'new_img_endpoint', 'hf_space_url'
-    ]
-    # Conditionally critical vars
-    if any(model in mistral_models for model in available_model_ids):
-        critical_vars.extend(['mistral_api', 'mistral_key'])
-    for var_name in critical_vars:
-        value = env_vars.get(var_name)
-        # Check for None or empty strings/lists/sets
-        if value is None or (isinstance(value, (str, list, set)) and not value):
-            missing_critical_vars.append(var_name)
-    is_healthy = not missing_critical_vars and server_status
-    status_code = 200 if is_healthy else 503 # Service Unavailable if unhealthy
-    health_status = {
-        "status": "healthy" if is_healthy else "unhealthy",
-        "server_mode": "online" if server_status else "maintenance",
-        "missing_critical_env_vars": missing_critical_vars,
-        "details": "All critical configurations seem okay. Ready to roll! 🚀" if is_healthy else "Service issues detected. Check missing env vars or server status. 🛠️"
-    }
-    return JSONResponse(content=health_status, status_code=status_code)
-# --- Startup and Shutdown Events ---
 @app.on_event("startup")
 async def startup_event():
-    """Tasks to run when the application starts."""
     global available_model_ids
-    logger.info("Application startup sequence initiated...")
-    # Load models from JSON
-    models_from_file = load_models_data()
-    model_ids_from_file = {model['id'] for model in models_from_file if 'id' in model}
-    # Combine models from file and predefined sets
-    predefined_model_sets = mistral_models | pollinations_models | alternate_models | claude_3_models
-    all_model_ids = model_ids_from_file.union(predefined_model_sets)
-    available_model_ids = sorted(list(all_model_ids)) # Keep as sorted list
-    logger.info(f"Loaded {len(model_ids_from_file)} models from models.json.")
-    logger.info(f"Total {len(available_model_ids)} unique models available.")
-    # Initialize scraper pool (can take time)
-    # HF Space Note: Run potentially blocking I/O in executor during startup
-    loop = asyncio.get_running_loop()
-    await loop.run_in_executor(executor, get_scraper) # This initializes the pool
-    # Validate critical environment variables and log warnings
     env_vars = get_env_vars()
-    logger.info("Checking critical environment variables (Secrets)...")
-    await health_check() # Run health check logic to log warnings
-    # Pre-connect async client? Optional, httpx handles connections on demand.
-    # client = get_async_client()
-    # await client.get("https://www.google.com") # Example warm-up call
-    logger.info("Startup complete. Server is ready to accept requests.")
 @app.on_event("shutdown")
 async def shutdown_event():
-    """Tasks to run when the application shuts down."""
-    logger.info("Application shutdown sequence initiated...")
-    # Close the httpx client gracefully
     client = get_async_client()
     await client.aclose()
-    logger.info("HTTP client closed.")
-    # Shutdown the thread pool executor
-    executor.shutdown(wait=True)
-    logger.info("Thread pool executor shut down.")
-    # Clear scraper pool (optional, resources will be reclaimed anyway)
     scraper_pool.clear()
-    logger.info("Scraper pool cleared.")
     # Persist usage data
-    # HF Space Note: Ensure file system is writable if saving locally.
-    # Consider using HF Datasets or external DB for persistent storage.
-    try:
-        logger.info("Saving usage data...")
-        usage_tracker.save_data()
-        logger.info("Usage data saved.")
-    except Exception as e:
-        logger.error(f"Failed to save usage data during shutdown: {e}")
-    logger.info("Shutdown complete.")
-# --- Main Execution Block ---
-# HF Space Note: This block is mainly for local testing.
-# HF Spaces usually run the app using `uvicorn main:app --host 0.0.0.0 --port 7860` (or similar)
-# defined in the README metadata or a Procfile.
 if __name__ == "__main__":
     import uvicorn
-    logger.info("Starting server locally with uvicorn...")
-    # HF Space Note: Port 7860 is the default for HF Spaces. Host 0.0.0.0 is required.
-    uvicorn.run(app, host="0.0.0.0", port=7860, log_level="info")

 import os
 import re
 from dotenv import load_dotenv
+from fastapi import FastAPI, HTTPException, Request, Depends, Security
 from fastapi.responses import StreamingResponse, HTMLResponse, JSONResponse, FileResponse
 from fastapi.security import APIKeyHeader
 from pydantic import BaseModel
 import httpx
+from functools import lru_cache
+from pathlib import Path
+import json
+import datetime
+import time
+import threading
+from typing import Optional, Dict, List, Any, Generator
+import asyncio
+from starlette.status import HTTP_403_FORBIDDEN
+import cloudscraper
+from concurrent.futures import ThreadPoolExecutor
+import uvloop
 from fastapi.middleware.gzip import GZipMiddleware
 from starlette.middleware.cors import CORSMiddleware
+import contextlib
+import requests
+# Enable uvloop for faster event loop
 asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())
+# Thread pool for CPU-bound operations
+executor = ThreadPoolExecutor(max_workers=16)  # Increased thread count for better parallelism
+# Load environment variables once at startup
 load_dotenv()
+# API key security scheme
 api_key_header = APIKeyHeader(name="Authorization", auto_error=False)
+# Initialize usage tracker
+from usage_tracker import UsageTracker
+usage_tracker = UsageTracker()
+app = FastAPI()
+# Add middleware for compression and CORS
+app.add_middleware(GZipMiddleware, minimum_size=1000)
 app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
     allow_credentials=True,
     allow_methods=["*"],
     allow_headers=["*"],
 )
+# Environment variables (cached)
+@lru_cache(maxsize=1)
+def get_env_vars():
     return {
+        'api_keys': os.getenv('API_KEYS', '').split(','),
         'secret_api_endpoint': os.getenv('SECRET_API_ENDPOINT'),
         'secret_api_endpoint_2': os.getenv('SECRET_API_ENDPOINT_2'),
+        'secret_api_endpoint_3': os.getenv('SECRET_API_ENDPOINT_3'),
+        'secret_api_endpoint_4': "https://text.pollinations.ai/openai",
+        'secret_api_endpoint_5': os.getenv('SECRET_API_ENDPOINT_5'),  # Added new endpoint
+        'mistral_api': "https://api.mistral.ai",
         'mistral_key': os.getenv('MISTRAL_KEY'),
+        'endpoint_origin': os.getenv('ENDPOINT_ORIGIN')
     }
+# Configuration for models - use sets for faster lookups
+mistral_models = {
+    "mistral-large-latest",
+    "pixtral-large-latest",
+    "mistral-moderation-latest",
+    "ministral-3b-latest",
+    "ministral-8b-latest",
+    "open-mistral-nemo",
+    "mistral-small-latest",
+    "mistral-saba-latest",
+    "codestral-latest"
 }
+pollinations_models = {
+    "openai",
+    "openai-large",
+    "openai-xlarge",
+    "openai-reasoning",
+    "qwen-coder",
+    "llama",
+    "mistral",
+    "searchgpt",
+    "deepseek",
+    "claude-hybridspace",
+    "deepseek-r1",
+    "deepseek-reasoner",
+    "llamalight",
+    "gemini",
+    "gemini-thinking",
+    "hormoz",
+    "phi",
+    "phi-mini",
+    "openai-audio",
+    "llama-scaleway"
 }
+alternate_models = {  # heh, should work now
+    "gpt-4o",
+    "deepseek-v3",
+    "llama-3.1-8b-instruct",
+    "llama-3.1-sonar-small-128k-online",
+    "deepseek-r1-uncensored",
+    "tinyswallow1.5b",
+    "andy-3.5",
+    "o3-mini-low",
+    "hermes-3-llama-3.2-3b",
+    "creitin-r1",
+    "fluffy.1-chat",
+    "plutotext-1-text",
+    "command-a",
+    "claude-3-7-sonnet-20250219",
+    "plutogpt-3.5-turbo"
 }
+claude_3_models = {  # Models for the new endpoint
+    "claude-3-7-sonnet",
+    "claude-3-7-sonnet-thinking",
+    "claude 3.5 haiku",
+    "claude 3.5 sonnet",
+    "claude 3.5 haiku",
+    "o3-mini-medium",
+    "o3-mini-high",
+    "grok-3",
+    "grok-3-thinking",
+    "grok 2"
 }
+# Supported image generation models
+supported_image_models = {
+    "Flux Pro Ultra",
+    "grok-2-aurora",
+    "Flux Pro",
+    "Flux Pro Ultra Raw",
+    "Flux Dev",
+    "Flux Schnell",
+    "stable-diffusion-3-large-turbo",
+    "Flux Realism",
+    "stable-diffusion-ultra",
+    "dall-e-3",
+    "sdxl-lightning-4step"
 }
+# Request payload model
 class Payload(BaseModel):
     model: str
+    messages: list
     stream: bool = False
+# Image generation payload model
 class ImageGenerationPayload(BaseModel):
     model: str
     prompt: str
+    size: int
+    number: int
+# Server status global variable
+server_status = True
+available_model_ids: List[str] = []
+# Create a reusable httpx client pool with connection pooling
 @lru_cache(maxsize=1)
+def get_async_client():
+    return httpx.AsyncClient(
+        timeout=60.0,
+        limits=httpx.Limits(max_keepalive_connections=50, max_connections=200)  # Increased limits
+    )
+# Create a cloudscraper pool
+scraper_pool = []
+MAX_SCRAPERS = 20  # Increased pool size
+def get_scraper():
     if not scraper_pool:
         for _ in range(MAX_SCRAPERS):
             scraper_pool.append(cloudscraper.create_scraper())
+    return scraper_pool[int(time.time() * 1000) % MAX_SCRAPERS]  # Simple round-robin
+# API key validation - optimized to avoid string operations when possible
 async def verify_api_key(
     request: Request,
+    api_key: str = Security(api_key_header)
 ) -> bool:
+    # Allow bypass if the referer is from /playground or /image-playground
     referer = request.headers.get("referer", "")
+    if referer.startswith(("https://parthsadaria-lokiai.hf.space/playground",
+                           "https://parthsadaria-lokiai.hf.space/image-playground")):
         return True
     if not api_key:
+        raise HTTPException(
+            status_code=HTTP_403_FORBIDDEN,
+            detail="No API key provided"
+        )
+    # Only clean if needed
     if api_key.startswith('Bearer '):
+        api_key = api_key[7:]  # Remove 'Bearer ' prefix
+    # Get API keys from environment
+    valid_api_keys = get_env_vars().get('api_keys', [])
+    if not valid_api_keys or valid_api_keys == ['']:
+        raise HTTPException(
+            status_code=HTTP_403_FORBIDDEN,
+            detail="API keys not configured on server"
+        )
+    # Fast check with set operation
+    if api_key not in set(valid_api_keys):
+        raise HTTPException(
+            status_code=HTTP_403_FORBIDDEN,
+            detail="Invalid API key"
+        )
     return True
+# Pre-load and cache models.json
 @lru_cache(maxsize=1)
+def load_models_data():
     try:
+        file_path = Path(__file__).parent / 'models.json'
+        with open(file_path, 'r') as f:
             return json.load(f)
     except (FileNotFoundError, json.JSONDecodeError) as e:
+        print(f"Error loading models.json: {str(e)}")
         return []
+# Async wrapper for models data
+async def get_models():
     models_data = load_models_data()
     if not models_data:
         raise HTTPException(status_code=500, detail="Error loading available models")
     return models_data
+# Enhanced async streaming - now with real-time SSE support
+async def generate_search_async(query: str, systemprompt: Optional[str] = None, stream: bool = True):
+    # Create a streaming response channel using asyncio.Queue
+    queue = asyncio.Queue()
+    async def _fetch_search_data():
+        try:
+            headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
+            # Use the provided system prompt, or default to "Be Helpful and Friendly"
+            system_message = systemprompt or "Be Helpful and Friendly"
+            # Create the prompt history
+            prompt = [
+                {"role": "user", "content": query},
+            ]
+            prompt.insert(0, {"content": system_message, "role": "system"})
+            # Prepare the payload for the API request
+            payload = {
+                "is_vscode_extension": True,
+                "message_history": prompt,
+                "requested_model": "searchgpt",
+                "user_input": prompt[-1]["content"],
+            }
+            # Get endpoint from environment
+            secret_api_endpoint_3 = get_env_vars()['secret_api_endpoint_3']
+            if not secret_api_endpoint_3:
+                await queue.put({"error": "Search API endpoint not configured"})
+                return
+            # Use AsyncClient for better performance
+            async with httpx.AsyncClient(timeout=30.0) as client:
+                async with client.stream("POST", secret_api_endpoint_3, json=payload, headers=headers) as response:
+                    if response.status_code != 200:
+                        await queue.put({"error": f"Search API returned status code {response.status_code}"})
+                        return
+                    # Process the streaming response in real-time
+                    buffer = ""
+                    async for line in response.aiter_lines():
+                        if line.startswith("data: "):
+                            try:
+                                json_data = json.loads(line[6:])
+                                content = json_data.get("choices", [{}])[0].get("delta", {}).get("content", "")
+                                if content.strip():
+                                    cleaned_response = {
+                                        "created": json_data.get("created"),
+                                        "id": json_data.get("id"),
+                                        "model": "searchgpt",
+                                        "object": "chat.completion",
+                                        "choices": [
+                                            {
+                                                "message": {
+                                                    "content": content
+                                                }
+                                            }
+                                        ]
+                                    }
+                                    # Send to queue immediately for streaming
+                                    await queue.put({"data": f"data: {json.dumps(cleaned_response)}\n\n", "text": content})
+                            except json.JSONDecodeError:
+                                continue
+                    # Signal completion
+                    await queue.put(None)
+        except Exception as e:
+            await queue.put({"error": str(e)})
+            await queue.put(None)
+    # Start the fetch process
+    asyncio.create_task(_fetch_search_data())
+    # Return the queue for consumption
+    return queue
+# Cache for frequently accessed static files
 @lru_cache(maxsize=10)
+def read_html_file(file_path):
     try:
+        with open(file_path, "r") as file:
             return file.read()
+    except FileNotFoundError:
         return None
+# Basic routes
+@app.get("/favicon.ico")
 async def favicon():
     favicon_path = Path(__file__).parent / "favicon.ico"
+    return FileResponse(favicon_path, media_type="image/x-icon")
+@app.get("/banner.jpg")
+async def favicon():
+    favicon_path = Path(__file__).parent / "banner.jpg"
+    return FileResponse(favicon_path, media_type="image/x-icon")
+@app.get("/ping")
+async def ping():
+    return {"message": "pong", "response_time": "0.000000 seconds"}
+@app.get("/", response_class=HTMLResponse)
+async def root():
+    html_content = read_html_file("index.html")
+    if html_content is None:
+        return HTMLResponse(content="<h1>File not found</h1>", status_code=404)
+    return HTMLResponse(content=html_content)
+@app.get("/script.js", response_class=HTMLResponse)
+async def root():
+    html_content = read_html_file("script.js")
+    if html_content is None:
+        return HTMLResponse(content="<h1>File not found</h1>", status_code=404)
+    return HTMLResponse(content=html_content)
+@app.get("/style.css", response_class=HTMLResponse)
+async def root():
+    html_content = read_html_file("style.css")
+    if html_content is None:
+        return HTMLResponse(content="<h1>File not found</h1>", status_code=404)
+    return HTMLResponse(content=html_content)
+@app.get("/dynamo", response_class=HTMLResponse)
 async def dynamic_ai_page(request: Request):
+    user_agent = request.headers.get('user-agent', 'Unknown User')
+    client_ip = request.client.host
+    location = f"IP: {client_ip}"
     prompt = f"""
+    Generate a dynamic HTML page for a user with the following details: with name "LOKI.AI"
     - User-Agent: {user_agent}
+    - Location: {location}
+    - Style: Cyberpunk, minimalist, or retro
+    Make sure the HTML is clean and includes a heading, also have cool animations a motivational message, and a cool background.
+    Wrap the generated HTML in triple backticks (```).
     """
     payload = {
+        "model": "mistral-small-latest",
+        "messages": [{"role": "user", "content": prompt}]
     }
     headers = {
+        "Authorization": "Bearer playground"
     }
+    response = requests.post("https://parthsadaria-lokiai.hf.space/chat/completions", json=payload, headers=headers)
+    data = response.json()
+    # Extract HTML from ``` blocks
+    html_content = re.search(r"```(.*?)```", data['choices'][0]['message']['content'], re.DOTALL)
+    if html_content:
+        html_content = html_content.group(1).strip()
+    # Remove the first word
+    if html_content:
+        html_content = ' '.join(html_content.split(' ')[1:])
+    return HTMLResponse(content=html_content)
+@app.get("/playground", response_class=HTMLResponse)
+async def playground():
+    html_content = read_html_file("playground.html")
+    if html_content is None:
+        return HTMLResponse(content="<h1>playground.html not found</h1>", status_code=404)
+    return HTMLResponse(content=html_content)
+@app.get("/image-playground", response_class=HTMLResponse)
+async def playground():
+    html_content = read_html_file("image-playground.html")
+    if html_content is None:
+        return HTMLResponse(content="<h1>image-playground.html not found</h1>", status_code=404)
+    return HTMLResponse(content=html_content)
+# VETRA
 GITHUB_BASE = "https://raw.githubusercontent.com/Parthsadaria/Vetra/main"
+FILES = {
+    "html": "index.html",
+    "css": "style.css",
+    "js": "script.js"
+}
+async def get_github_file(filename: str) -> str:
     url = f"{GITHUB_BASE}/{filename}"
+    async with httpx.AsyncClient() as client:
         res = await client.get(url)
+        return res.text if res.status_code == 200 else None
+@app.get("/vetra", response_class=HTMLResponse)
 async def serve_vetra():
+    html = await get_github_file(FILES["html"])
+    css = await get_github_file(FILES["css"])
+    js = await get_github_file(FILES["js"])
     if not html:
+        return HTMLResponse(content="<h1>index.html not found on GitHub</h1>", status_code=404)
+    final_html = html.replace(
+        "</head>",
+        f"<style>{css or '/* CSS not found */'}</style></head>"
+    ).replace(
+        "</body>",
+        f"<script>{js or '// JS not found'}</script></body>"
+    )
     return HTMLResponse(content=final_html)
+# Model routes
+@app.get("/api/v1/models")
+@app.get("/models")
+async def return_models():
+    return await get_models()
+# Search routes with enhanced real-time streaming
+@app.get("/searchgpt")
+async def search_gpt(q: str, stream: Optional[bool] = False, systemprompt: Optional[str] = None):
     if not q:
         raise HTTPException(status_code=400, detail="Query parameter 'q' is required")
     usage_tracker.record_request(endpoint="/searchgpt")
+    queue = await generate_search_async(q, systemprompt=systemprompt, stream=True)
     if stream:
         async def stream_generator():
+            collected_text = ""
             while True:
                 item = await queue.get()
+                if item is None:
                     break
                 if "error" in item:
+                    yield f"data: {json.dumps({'error': item['error']})}\n\n"
                     break
                 if "data" in item:
                     yield item["data"]
+                    collected_text += item.get("text", "")
         return StreamingResponse(
             stream_generator(),
+            media_type="text/event-stream"
         )
     else:
+        # For non-streaming, collect all text and return at once
+        collected_text = ""
         while True:
             item = await queue.get()
             if item is None:
                 break
             if "error" in item:
+                raise HTTPException(status_code=500, detail=item["error"])
+            collected_text += item.get("text", "")
+        return JSONResponse(content={"response": collected_text})
+# Enhanced streaming with direct SSE pass-through for real-time responses
+header_url = os.getenv('HEADER_URL')
+@app.post("/chat/completions")
+@app.post("/api/v1/chat/completions")
+async def get_completion(payload: Payload, request: Request, authenticated: bool = Depends(verify_api_key)):
+    # Check server status
+    if not server_status:
+        return JSONResponse(
+            status_code=503,
+            content={"message": "Server is under maintenance. Please try again later."}
+        )
+    model_to_use = payload.model or "gpt-4o-mini"
+    # Validate model availability - fast lookup with set
+    if available_model_ids and model_to_use not in set(available_model_ids):
+        raise HTTPException(
+            status_code=400,
+            detail=f"Model '{model_to_use}' is not available. Check /models for the available model list."
+        )
+    # Log request without blocking
     asyncio.create_task(log_request(request, model_to_use))
     usage_tracker.record_request(model=model_to_use, endpoint="/chat/completions")
+    # Prepare payload
+    payload_dict = payload.dict()
+    payload_dict["model"] = model_to_use
+    # Ensure stream is True for real-time streaming (can be overridden by client)
+    stream_enabled = payload_dict.get("stream", True)
+    # Get environment variables
+    env_vars = get_env_vars()
+    # Select the appropriate endpoint (fast lookup with sets)
     if model_to_use in mistral_models:
+        endpoint = env_vars['mistral_api']
+        custom_headers = {
+            "Authorization": f"Bearer {env_vars['mistral_key']}"
+        }
     elif model_to_use in pollinations_models:
+        endpoint = env_vars['secret_api_endpoint_4']
+        custom_headers = {}
     elif model_to_use in alternate_models:
+        endpoint = env_vars['secret_api_endpoint_2']
+        custom_headers = {}
+    elif model_to_use in claude_3_models:  # Use the new endpoint
+        endpoint = env_vars['secret_api_endpoint_5']
+        custom_headers = {}
+    else:
+        endpoint = env_vars['secret_api_endpoint']
+        custom_headers = {
+            "Origin": header_url,
+            "Priority": "u=1, i",
+            "Referer": header_url
+        }
+    print(f"Using endpoint: {endpoint} for model: {model_to_use}")
+    # Improved real-time streaming handler
+    async def real_time_stream_generator():
         try:
+            async with httpx.AsyncClient(timeout=60.0) as client:
+                async with client.stream("POST", f"{endpoint}/v1/chat/completions", json=payload_dict, headers=custom_headers) as response:
+                    if response.status_code >= 400:
+                        error_messages = {
+                            422: "Unprocessable entity. Check your payload.",
+                            400: "Bad request. Verify input data.",
+                            403: "Forbidden. You do not have access to this resource.",
+                            404: "The requested resource was not found.",
+                        }
+                        detail = error_messages.get(response.status_code, f"Error code: {response.status_code}")
+                        raise HTTPException(status_code=response.status_code, detail=detail)
+                    # Stream the response in real-time with minimal buffering
+                    async for line in response.aiter_lines():
+                        if line:
+                            # Yield immediately for faster streaming
+                            yield line + "\n"
         except httpx.TimeoutException:
+            raise HTTPException(status_code=504, detail="Request timed out")
         except httpx.RequestError as e:
+            raise HTTPException(status_code=502, detail=f"Failed to connect to upstream API: {str(e)}")
         except Exception as e:
+            if isinstance(e, HTTPException):
+                raise e
+            raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")
+    # Return streaming response with proper headers
+    if stream_enabled:
         return StreamingResponse(
+            real_time_stream_generator(),
             media_type="text/event-stream",
             headers={
                 "Content-Type": "text/event-stream",
                 "Cache-Control": "no-cache",
                 "Connection": "keep-alive",
+                "X-Accel-Buffering": "no"  # Disable proxy buffering for Nginx
             }
         )
     else:
+        # For non-streaming requests, collect the entire response
+        response_content = []
+        async for chunk in real_time_stream_generator():
+            response_content.append(chunk)
+        return JSONResponse(content=json.loads(''.join(response_content)))
+# New image generation endpoint
+@app.post("/images/generations")
+async def create_image(payload: ImageGenerationPayload, authenticated: bool = Depends(verify_api_key)):
     """
+    Endpoint for generating images based on a text prompt.
     """
+    # Check server status
     if not server_status:
+        return JSONResponse(
+            status_code=503,
+            content={"message": "Server is under maintenance. Please try again later."}
+        )
+    # Validate model
     if payload.model not in supported_image_models:
         raise HTTPException(
             status_code=400,
+            detail=f"Model '{payload.model}' is not supported for image generation.  Supported models are: {supported_image_models}"
         )
+    # Log the request
     usage_tracker.record_request(model=payload.model, endpoint="/images/generations")
+    # Prepare the payload for the external API
     api_payload = {
         "model": payload.model,
         "prompt": payload.prompt,
+        "size": payload.size,
+        "number": payload.number
     }
+    # Target API endpoint
+    target_api_url = os.getenv('NEW_IMG')
     try:
+        # Use a timeout for the image generation request
+        async with httpx.AsyncClient(timeout=60.0) as client:
+            response = await client.post(target_api_url, json=api_payload)
+        if response.status_code != 200:
+            error_detail = response.json().get("detail", f"Image generation failed with status code: {response.status_code}")
+            raise HTTPException(status_code=response.status_code, detail=error_detail)
+        # Return the response from the external API
         return JSONResponse(content=response.json())
     except httpx.TimeoutException:
         raise HTTPException(status_code=504, detail="Image generation request timed out.")
     except httpx.RequestError as e:
         raise HTTPException(status_code=502, detail=f"Error connecting to image generation service: {e}")
     except Exception as e:
         raise HTTPException(status_code=500, detail=f"An unexpected error occurred during image generation: {e}")
+# Asynchronous logging function
+async def log_request(request, model):
+    # Get minimal data for logging
+    current_time = (datetime.datetime.utcnow() + datetime.timedelta(hours=5, minutes=30)).strftime("%Y-%m-%d %I:%M:%S %p")
+    ip_hash = hash(request.client.host) % 10000  # Hash the IP for privacy
+    print(f"Time: {current_time}, IP Hash: {ip_hash}, Model: {model}")
+# Cache usage statistics
+@lru_cache(maxsize=10)
+def get_usage_summary(days=7):
+    return usage_tracker.get_usage_summary(days)
+@app.get("/usage")
 async def get_usage(days: int = 7):
+    """Retrieve usage statistics"""
+    return get_usage_summary(days)
+# Generate HTML for usage page
+def generate_usage_html(usage_data):
+    # Model Usage Table Rows
     model_usage_rows = "\n".join([
         f"""
         <tr>
             <td>{model}</td>
+            <td>{model_data['total_requests']}</td>
+            <td>{model_data['first_used']}</td>
+            <td>{model_data['last_used']}</td>
         </tr>
+        """ for model, model_data in usage_data['models'].items()
+    ])
+    # API Endpoint Usage Table Rows
     api_usage_rows = "\n".join([
         f"""
         <tr>
             <td>{endpoint}</td>
+            <td>{endpoint_data['total_requests']}</td>
+            <td>{endpoint_data['first_used']}</td>
+            <td>{endpoint_data['last_used']}</td>
         </tr>
+        """ for endpoint, endpoint_data in usage_data['api_endpoints'].items()
+    ])
+    # Daily Usage Table Rows
     daily_usage_rows = "\n".join([
+        "\n".join([
+            f"""
+            <tr>
+                <td>{date}</td>
+                <td>{entity}</td>
+                <td>{requests}</td>
+            </tr>
+            """ for entity, requests in date_data.items()
+        ]) for date, date_data in usage_data['recent_daily_usage'].items()
+    ])
     html_content = f"""
     <!DOCTYPE html>
     <html lang="en">
     <head>
         <meta charset="UTF-8">
         <title>Lokiai AI - Usage Statistics</title>
         <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600&display=swap" rel="stylesheet">
         <style>
             :root {{
+                --bg-dark: #0f1011;
+                --bg-darker: #070708;
+                --text-primary: #e6e6e6;
+                --text-secondary: #8c8c8c;
+                --border-color: #2c2c2c;
+                --accent-color: #3a6ee0;
                 --accent-hover: #4a7ef0;
             }}
+            body {{
+                font-family: 'Inter', sans-serif;
+                background-color: var(--bg-dark);
+                color: var(--text-primary);
+                max-width: 1200px;
+                margin: 0 auto;
+                padding: 40px 20px;
+                line-height: 1.6;
+            }}
+            .logo {{
+                display: flex;
+                align-items: center;
+                justify-content: center;
+                margin-bottom: 30px;
+            }}
+            .logo h1 {{
+                font-weight: 600;
+                font-size: 2.5em;
+                color: var(--text-primary);
+                margin-left: 15px;
+            }}
+            .logo img {{
+                width: 60px;
+                height: 60px;
+                border-radius: 10px;
+            }}
+            .container {{
+                background-color: var(--bg-darker);
+                border-radius: 12px;
+                padding: 30px;
+                box-shadow: 0 15px 40px rgba(0,0,0,0.3);
+                border: 1px solid var(--border-color);
+            }}
+            h2, h3 {{
+                color: var(--text-primary);
+                border-bottom: 2px solid var(--border-color);
+                padding-bottom: 10px;
+                font-weight: 500;
+            }}
+            .total-requests {{
+                background-color: var(--accent-color);
+                color: white;
+                text-align: center;
+                padding: 15px;
+                border-radius: 8px;
+                margin-bottom: 30px;
+                font-weight: 600;
+                letter-spacing: -0.5px;
+            }}
+            table {{
+                width: 100%;
+                border-collapse: separate;
+                border-spacing: 0;
+                margin-bottom: 30px;
+                background-color: var(--bg-dark);
+                border-radius: 8px;
+                overflow: hidden;
+            }}
+            th, td {{
+                border: 1px solid var(--border-color);
+                padding: 12px;
+                text-align: left;
+                transition: background-color 0.3s ease;
+            }}
+            th {{
+                background-color: #1e1e1e;
+                color: var(--text-primary);
+                font-weight: 600;
+                text-transform: uppercase;
+                font-size: 0.9em;
+            }}
+            tr:nth-child(even) {{
+                background-color: rgba(255,255,255,0.05);
+            }}
+            tr:hover {{
+                background-color: rgba(62,100,255,0.1);
+            }}
+            @media (max-width: 768px) {{
+                .container {{
+                    padding: 15px;
+                }}
+                table {{
+                    font-size: 0.9em;
+                }}
+            }}
         </style>
     </head>
     <body>
         <div class="container">
             <div class="logo">
                 <img src="data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cGF0aCBkPSJNMTAwIDM1TDUwIDkwaDEwMHoiIGZpbGw9IiMzYTZlZTAiLz48Y2lyY2xlIGN4PSIxMDAiIGN5PSIxNDAiIHI9IjMwIiBmaWxsPSIjM2E2ZWUwIi8+PC9zdmc+" alt="Lokai AI Logo">
+                <h1>Lokiai AI</h1>
             </div>
             <div class="total-requests">
+                Total API Requests: {usage_data['total_requests']}
             </div>
             <h2>Model Usage</h2>
             <table>
+                <tr>
+                    <th>Model</th>
+                    <th>Total Requests</th>
+                    <th>First Used</th>
+                    <th>Last Used</th>
+                </tr>
+                {model_usage_rows}
             </table>
             <h2>API Endpoint Usage</h2>
             <table>
+                <tr>
+                    <th>Endpoint</th>
+                    <th>Total Requests</th>
+                    <th>First Used</th>
+                    <th>Last Used</th>
+                </tr>
+                {api_usage_rows}
             </table>
+            <h2>Daily Usage (Last 7 Days)</h2>
             <table>
+                <tr>
+                    <th>Date</th>
+                    <th>Entity</th>
+                    <th>Requests</th>
+                </tr>
+                {daily_usage_rows}
             </table>
         </div>
     </body>
     """
     return html_content
+# Cache the usage page HTML
+@lru_cache(maxsize=1)
+def get_usage_page_html():
+    usage_data = get_usage_summary()
+    return generate_usage_html(usage_data)
+@app.get("/usage/page", response_class=HTMLResponse)
 async def usage_page():
+    """Serve an HTML page showing usage statistics"""
+    # Use cached HTML if available, regenerate if not
+    html_content = get_usage_page_html()
+    return HTMLResponse(content=html_content)
+# Meme endpoint with optimized networking
+@app.get("/meme")
 async def get_meme():
     try:
+        # Use the shared client for connection pooling
+        client = get_async_client()
+        response = await client.get("https://meme-api.com/gimme")
         response_data = response.json()
         meme_url = response_data.get("url")
+        if not meme_url:
+            raise HTTPException(status_code=404, detail="No meme found")
+        image_response = await client.get(meme_url, follow_redirects=True)
+        # Use larger chunks for streaming
+        async def stream_with_larger_chunks():
+            chunks = []
+            size = 0
+            async for chunk in image_response.aiter_bytes(chunk_size=16384):
+                chunks.append(chunk)
+                size += len(chunk)
+                if size >= 65536:
+                    yield b''.join(chunks)
+                    chunks = []
+                    size = 0
+            if chunks:
+                yield b''.join(chunks)
+        return StreamingResponse(
+            stream_with_larger_chunks(),
+            media_type=image_response.headers.get("content-type", "image/png"),
+            headers={'Cache-Control': 'max-age=3600'}  # Add caching
+        )
+    except Exception:
+        raise HTTPException(status_code=500, detail="Failed to retrieve meme")
+# Utility function for loading model IDs - optimized to run once at startup
+def load_model_ids(json_file_path):
+    try:
+        with open(json_file_path, 'r') as f:
+            models_data = json.load(f)
+            # Extract 'id' from each model object and use a set for fast lookups
+            return [model['id'] for model in models_data if 'id' in model]
+    except Exception as e:
+        print(f"Error loading model IDs: {str(e)}")
+        return []
 @app.on_event("startup")
 async def startup_event():
     global available_model_ids
+    available_model_ids = load_model_ids("models.json")
+    print(f"Loaded {len(available_model_ids)} model IDs")
+    # Add all pollinations models to available_model_ids
+    available_model_ids.extend(list(pollinations_models))
+    # Add alternate models to available_model_ids
+    available_model_ids.extend(list(alternate_models))
+    # Add mistral models to available_model_ids
+    available_model_ids.extend(list(mistral_models))
+     # Add claude models
+    available_model_ids.extend(list(claude_3_models))
+    available_model_ids = list(set(available_model_ids))  # Remove duplicates
+    print(f"Total available models: {len(available_model_ids)}")
+    # Preload scrapers
+    for _ in range(MAX_SCRAPERS):
+        scraper_pool.append(cloudscraper.create_scraper())
+    # Validate critical environment variables
     env_vars = get_env_vars()
+    missing_vars = []
+    if not env_vars['api_keys'] or env_vars['api_keys'] == ['']:
+        missing_vars.append('API_KEYS')
+    if not env_vars['secret_api_endpoint']:
+        missing_vars.append('SECRET_API_ENDPOINT')
+    if not env_vars['secret_api_endpoint_2']:
+        missing_vars.append('SECRET_API_ENDPOINT_2')
+    if not env_vars['secret_api_endpoint_3']:
+        missing_vars.append('SECRET_API_ENDPOINT_3')
+    if not env_vars['secret_api_endpoint_4']:
+        missing_vars.append('SECRET_API_ENDPOINT_4')
+    if not env_vars['secret_api_endpoint_5']: # Check the new endpoint
+        missing_vars.append('SECRET_API_ENDPOINT_5')
+    if not env_vars['mistral_api'] and any(model in mistral_models for model in available_model_ids):
+        missing_vars.append('MISTRAL_API')
+    if not env_vars['mistral_key'] and any(model in mistral_models for model in available_model_ids):
+        missing_vars.append('MISTRAL_KEY')
+    if missing_vars:
+        print(f"WARNING: The following environment variables are missing: {', '.join(missing_vars)}")
+        print("Some functionality may be limited.")
+    print("Server started successfully!")
 @app.on_event("shutdown")
 async def shutdown_event():
+    # Close the httpx client
     client = get_async_client()
     await client.aclose()
+    # Clear scraper pool
     scraper_pool.clear()
     # Persist usage data
+    usage_tracker.save_data()
+    print("Server shutdown complete!")
+# Health check endpoint
+# Health check endpoint
+@app.get("/health")
+async def health_check():
+    """Health check endpoint for monitoring"""
+    env_vars = get_env_vars()
+    missing_critical_vars = []
+    # Check critical environment variables
+    if not env_vars['api_keys'] or env_vars['api_keys'] == ['']:
+        missing_critical_vars.append('API_KEYS')
+    if not env_vars['secret_api_endpoint']:
+        missing_critical_vars.append('SECRET_API_ENDPOINT')
+    if not env_vars['secret_api_endpoint_2']:
+        missing_critical_vars.append('SECRET_API_ENDPOINT_2')
+    if not env_vars['secret_api_endpoint_3']:
+        missing_critical_vars.append('SECRET_API_ENDPOINT_3')
+    if not env_vars['secret_api_endpoint_4']:
+        missing_critical_vars.append('SECRET_API_ENDPOINT_4')
+    if not env_vars['secret_api_endpoint_5']: # Check the new endpoint
+        missing_critical_vars.append('SECRET_API_ENDPOINT_5')
+    if not env_vars['mistral_api']:
+        missing_critical_vars.append('MISTRAL_API')
+    if not env_vars['mistral_key']:
+        missing_critical_vars.append('MISTRAL_KEY')
+    health_status = {
+        "status": "healthy" if not missing_critical_vars else "unhealthy",
+        "missing_env_vars": missing_critical_vars,
+        "server_status": server_status,
+        "message": "Everything's lit! 🚀" if not missing_critical_vars else "Uh oh, some env vars are missing. 😬"
+    }
+    return JSONResponse(content=health_status)
 if __name__ == "__main__":
     import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=7860)