File size: 29,233 Bytes
4986fe4
d38c2eb
c3d5a54
 
 
378f2c3
8834a20
eefae44
c3d5a54
a111cf9
31eab42
aee7d80
8ef5ca7
c3d5a54
 
dc21031
c3d5a54
396b35b
 
 
 
4bcb2c2
396b35b
 
 
 
 
4bcb2c2
a06f1b3
c3d5a54
 
dc21031
c3d5a54
 
a06f1b3
c3d5a54
 
ea75284
4986fe4
378f2c3
c3d5a54
396b35b
 
 
 
 
 
 
 
 
 
c3d5a54
 
 
 
 
 
 
 
9b57c30
c3d5a54
 
 
 
 
396b35b
 
20b41cb
 
 
 
 
 
 
 
 
396b35b
20b41cb
9b57c30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cb01ce9
 
9b57c30
 
 
cb01ce9
c2e84c8
 
 
 
 
 
 
 
 
 
3fed513
 
c2e84c8
 
378f2c3
c3d5a54
378f2c3
 
 
16f4d5b
c3d5a54
 
 
 
 
396b35b
c3d5a54
 
396b35b
 
4bcb2c2
396b35b
 
 
 
4bcb2c2
c3d5a54
396b35b
 
 
 
 
 
 
 
b2ec9cb
 
 
 
 
 
 
 
 
c3d5a54
 
 
 
 
b2ec9cb
396b35b
c3d5a54
 
b2ec9cb
c3d5a54
b2ec9cb
c3d5a54
 
 
 
 
b2ec9cb
396b35b
 
c3d5a54
 
 
 
b2ec9cb
c3d5a54
 
396b35b
c3d5a54
396b35b
c3d5a54
 
 
 
 
 
396b35b
 
 
 
 
 
c3d5a54
396b35b
c3d5a54
4bcb2c2
396b35b
4bcb2c2
 
a111cf9
4bcb2c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396b35b
4bcb2c2
 
 
 
 
396b35b
4bcb2c2
 
 
 
 
 
 
 
 
a111cf9
4bcb2c2
 
396b35b
 
 
 
 
 
 
 
 
a01be99
c3d5a54
 
 
 
 
 
707dc28
 
396b35b
c3d5a54
2c1c62a
7ef5d89
396b35b
 
c613f2b
396b35b
7ef5d89
c3d5a54
 
396b35b
 
c3d5a54
396b35b
c3d5a54
 
 
ac4bad0
2ae9a8f
3109050
c3d5a54
4bcb2c2
c3d5a54
 
 
 
 
 
 
4bcb2c2
396b35b
c3d5a54
396b35b
4bcb2c2
 
 
 
 
 
 
 
 
 
 
 
 
396b35b
c3d5a54
396b35b
c3d5a54
 
 
4bcb2c2
 
 
 
 
 
 
 
 
 
 
 
 
c3d5a54
4bcb2c2
a0270ea
de98206
 
4466943
c3d5a54
 
 
 
 
 
396b35b
de98206
396b35b
 
9bfb2c6
4466943
9bfb2c6
 
de98206
396b35b
 
eefae44
de98206
eefae44
b955cc1
c5cedd6
4bcb2c2
 
 
 
c3d5a54
 
 
396b35b
de98206
c3d5a54
de98206
c3d5a54
de98206
9b57c30
 
 
de98206
c3d5a54
de98206
 
c3d5a54
de98206
7030cff
4bcb2c2
8ef5ca7
4bcb2c2
 
1d32d66
4bcb2c2
 
8ef5ca7
 
 
 
 
 
 
 
4bcb2c2
8ef5ca7
4bcb2c2
 
8ef5ca7
4bcb2c2
 
 
 
 
 
1d32d66
8ef5ca7
 
4bcb2c2
c3d5a54
4bcb2c2
 
8ef5ca7
4bcb2c2
 
 
 
 
 
 
 
8ef5ca7
4bcb2c2
 
 
 
 
 
 
 
396b35b
 
 
 
 
 
 
 
 
 
 
 
ea75284
 
 
396b35b
ea75284
396b35b
 
314966f
 
 
 
 
 
 
 
 
 
 
c3d5a54
314966f
 
 
 
 
 
 
 
 
 
 
c3d5a54
314966f
 
 
 
 
 
 
 
 
 
 
 
c20175b
ea75284
 
 
 
 
4f22928
c20175b
ea75284
c20175b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea75284
 
 
c20175b
 
 
4f22928
c20175b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea75284
 
 
396b35b
 
 
 
 
 
 
 
 
 
 
 
 
ea75284
2ab4453
4bcb2c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
396b35b
c3d5a54
 
 
 
396b35b
 
2ab4453
c3d5a54
 
 
7ef5d89
 
72b5133
 
c3d5a54
396b35b
9b57c30
 
4bcb2c2
 
 
 
 
9b57c30
4bcb2c2
9b57c30
396b35b
 
 
0ebc28b
c3d5a54
 
 
 
 
 
 
 
 
 
9b57c30
 
 
 
 
 
c3d5a54
 
9b57c30
 
 
 
 
 
 
 
 
 
 
 
 
396b35b
9b57c30
 
 
 
 
4bcb2c2
9b57c30
 
 
 
 
 
 
 
 
 
 
 
4bcb2c2
 
 
 
 
 
 
 
 
 
9b57c30
4bcb2c2
 
 
 
 
9b57c30
4bcb2c2
9b57c30
6a84e5c
 
4bcb2c2
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
import os
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request, Depends, Security
from fastapi.responses import StreamingResponse, HTMLResponse, JSONResponse, FileResponse
from fastapi.security import APIKeyHeader
from pydantic import BaseModel
import httpx
from functools import lru_cache
from pathlib import Path
import json
import datetime
import time
import threading
from typing import Optional, Dict, List, Any, Generator
import asyncio
from starlette.status import HTTP_403_FORBIDDEN
import cloudscraper
from concurrent.futures import ThreadPoolExecutor
import uvloop
from fastapi.middleware.gzip import GZipMiddleware
from starlette.middleware.cors import CORSMiddleware
import contextlib

# Enable uvloop for faster event loop
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())

# Thread pool for CPU-bound operations
executor = ThreadPoolExecutor(max_workers=16)  # Increased thread count for better parallelism

# Load environment variables once at startup
load_dotenv()

# API key security scheme
api_key_header = APIKeyHeader(name="Authorization", auto_error=False)

# Initialize usage tracker
from usage_tracker import UsageTracker
usage_tracker = UsageTracker()

app = FastAPI()

# Add middleware for compression and CORS
app.add_middleware(GZipMiddleware, minimum_size=1000)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Environment variables (cached)
@lru_cache(maxsize=1)
def get_env_vars():
    return {
        'api_keys': os.getenv('API_KEYS', '').split(','),
        'secret_api_endpoint': os.getenv('SECRET_API_ENDPOINT'),
        'secret_api_endpoint_2': os.getenv('SECRET_API_ENDPOINT_2'),
        'secret_api_endpoint_3': os.getenv('SECRET_API_ENDPOINT_3'),
        'secret_api_endpoint_4': "https://text.pollinations.ai/openai",
        'mistral_api': "https://api.mistral.ai",
        'mistral_key': os.getenv('MISTRAL_KEY'),
        'endpoint_origin': os.getenv('ENDPOINT_ORIGIN')
    }

# Configuration for models - use sets for faster lookups
mistral_models = {
    "mistral-large-latest",
    "pixtral-large-latest",
    "mistral-moderation-latest",
    "ministral-3b-latest",
    "ministral-8b-latest",
    "open-mistral-nemo",
    "mistral-small-latest",
    "mistral-saba-latest",
    "codestral-latest"
}

pollinations_models = {
    "openai",
    "openai-large",
    "openai-reasoning",
    "qwen-coder",
    "llama",
    "mistral",
    "searchgpt",
    "deepseek",
    "claude-hybridspace",
    "deepseek-r1",
    "deepseek-reasoner",
    "llamalight",
    "gemini",
    "gemini-thinking",
    "hormoz",
    "phi",
    "openai-audio",
    "llama-scaleway"
}

alternate_models = { #heh doesnt work
    "gpt-4o-mini",
    "deepseek-v3",
    "llama-3.1-8b-instruct",
    "searchgpt",
    "llama-3.1-sonar-small-128k-online",
    "sonar-reasoning",
    "deepseek-r1-uncensored",
    "tinyswallow1.5b",
    "andy-3.5",
    "o3-mini-low",
    "command-a",
    "claude-3-7-20250219",
    "hermes-3-llama-3.2-3b"
}

# Request payload model
class Payload(BaseModel):
    model: str
    messages: list
    stream: bool = False

# Server status global variable
server_status = True
available_model_ids: List[str] = []

# Create a reusable httpx client pool with connection pooling
@lru_cache(maxsize=1)
def get_async_client():
    return httpx.AsyncClient(
        timeout=60.0,
        limits=httpx.Limits(max_keepalive_connections=50, max_connections=200)  # Increased limits
    )

# Create a cloudscraper pool
scraper_pool = []
MAX_SCRAPERS = 20  # Increased pool size

def get_scraper():
    if not scraper_pool:
        for _ in range(MAX_SCRAPERS):
            scraper_pool.append(cloudscraper.create_scraper())
    
    return scraper_pool[int(time.time() * 1000) % MAX_SCRAPERS]  # Simple round-robin

# API key validation - optimized to avoid string operations when possible
async def verify_api_key(
    request: Request,
    api_key: str = Security(api_key_header)
) -> bool:
    # Allow bypass if the referer is from /playground
    referer = request.headers.get("referer", "")
    if referer.startswith("https://parthsadaria-lokiai.hf.space/playground"):
        return True

    if not api_key:
        raise HTTPException(
            status_code=HTTP_403_FORBIDDEN,
            detail="No API key provided"
        )

    # Only clean if needed    
    if api_key.startswith('Bearer '):
        api_key = api_key[7:]  # Remove 'Bearer ' prefix

    # Get API keys from environment
    valid_api_keys = get_env_vars().get('api_keys', [])
    if not valid_api_keys or valid_api_keys == ['']:
        raise HTTPException(
            status_code=HTTP_403_FORBIDDEN,
            detail="API keys not configured on server"
        )

    # Fast check with set operation
    if api_key not in set(valid_api_keys):
        raise HTTPException(
            status_code=HTTP_403_FORBIDDEN,
            detail="Invalid API key"
        )

    return True

# Pre-load and cache models.json
@lru_cache(maxsize=1)
def load_models_data():
    try:
        file_path = Path(__file__).parent / 'models.json'
        with open(file_path, 'r') as f:
            return json.load(f)
    except (FileNotFoundError, json.JSONDecodeError) as e:
        print(f"Error loading models.json: {str(e)}")
        return []

# Async wrapper for models data
async def get_models():
    models_data = load_models_data()
    if not models_data:
        raise HTTPException(status_code=500, detail="Error loading available models")
    return models_data

# Enhanced async streaming - now with real-time SSE support
async def generate_search_async(query: str, systemprompt: Optional[str] = None, stream: bool = True):
    # Create a streaming response channel using asyncio.Queue
    queue = asyncio.Queue()
    
    async def _fetch_search_data():
        try:
            headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36"}
            
            # Use the provided system prompt, or default to "Be Helpful and Friendly"
            system_message = systemprompt or "Be Helpful and Friendly"
            
            # Create the prompt history
            prompt = [
                {"role": "user", "content": query},
            ]
            
            prompt.insert(0, {"content": system_message, "role": "system"})
            
            # Prepare the payload for the API request
            payload = {
                "is_vscode_extension": True,
                "message_history": prompt,
                "requested_model": "Claude 3.7 Sonnet",
                "user_input": prompt[-1]["content"],
            }
            
            # Get endpoint from environment
            secret_api_endpoint_3 = get_env_vars()['secret_api_endpoint_3']
            if not secret_api_endpoint_3:
                await queue.put({"error": "Search API endpoint not configured"})
                return
            
            # Use AsyncClient for better performance
            async with httpx.AsyncClient(timeout=30.0) as client:
                async with client.stream("POST", secret_api_endpoint_3, json=payload, headers=headers) as response:
                    if response.status_code != 200:
                        await queue.put({"error": f"Search API returned status code {response.status_code}"})
                        return
                    
                    # Process the streaming response in real-time
                    buffer = ""
                    async for line in response.aiter_lines():
                        if line.startswith("data: "):
                            try:
                                json_data = json.loads(line[6:])
                                content = json_data.get("choices", [{}])[0].get("delta", {}).get("content", "")
                                
                                if content.strip():
                                    cleaned_response = {
                                        "created": json_data.get("created"),
                                        "id": json_data.get("id"),
                                        "model": "searchgpt",
                                        "object": "chat.completion",
                                        "choices": [
                                            {
                                                "message": {
                                                    "content": content
                                                }
                                            }
                                        ]
                                    }
                                    
                                    # Send to queue immediately for streaming
                                    await queue.put({"data": f"data: {json.dumps(cleaned_response)}\n\n", "text": content})
                            except json.JSONDecodeError:
                                continue
            
            # Signal completion
            await queue.put(None)
                
        except Exception as e:
            await queue.put({"error": str(e)})
            await queue.put(None)
    
    # Start the fetch process
    asyncio.create_task(_fetch_search_data())
    
    # Return the queue for consumption
    return queue

# Cache for frequently accessed static files
@lru_cache(maxsize=10)
def read_html_file(file_path):
    try:
        with open(file_path, "r") as file:
            return file.read()
    except FileNotFoundError:
        return None

# Basic routes
@app.get("/favicon.ico")
async def favicon():
    favicon_path = Path(__file__).parent / "favicon.ico"
    return FileResponse(favicon_path, media_type="image/x-icon")

@app.get("/ping")
async def ping():
    return {"message": "pong", "response_time": "0.000000 seconds"}

@app.get("/", response_class=HTMLResponse)
async def root():
    html_content = read_html_file("index.html")
    if html_content is None:
        return HTMLResponse(content="<h1>File not found</h1>", status_code=404)
    return HTMLResponse(content=html_content)

@app.get("/playground", response_class=HTMLResponse)
async def playground():
    html_content = read_html_file("playground.html")
    if html_content is None:
        return HTMLResponse(content="<h1>playground.html not found</h1>", status_code=404)
    return HTMLResponse(content=html_content)

# Model routes
@app.get("/api/v1/models")
@app.get("/models")
async def return_models():
    return await get_models()

# Search routes with enhanced real-time streaming
@app.get("/searchgpt")
async def search_gpt(q: str, stream: Optional[bool] = False, systemprompt: Optional[str] = None):
    if not q:
        raise HTTPException(status_code=400, detail="Query parameter 'q' is required")
    
    usage_tracker.record_request(endpoint="/searchgpt")
    
    queue = await generate_search_async(q, systemprompt=systemprompt, stream=True)
    
    if stream:
        async def stream_generator():
            collected_text = ""
            while True:
                item = await queue.get()
                if item is None:
                    break
                
                if "error" in item:
                    yield f"data: {json.dumps({'error': item['error']})}\n\n"
                    break
                
                if "data" in item:
                    yield item["data"]
                    collected_text += item.get("text", "")
        
        return StreamingResponse(
            stream_generator(),
            media_type="text/event-stream"
        )
    else:
        # For non-streaming, collect all text and return at once
        collected_text = ""
        while True:
            item = await queue.get()
            if item is None:
                break
            
            if "error" in item:
                raise HTTPException(status_code=500, detail=item["error"])
            
            collected_text += item.get("text", "")
        
        return JSONResponse(content={"response": collected_text})

# Enhanced streaming with direct SSE pass-through for real-time responses
@app.post("/chat/completions")
@app.post("/api/v1/chat/completions")
async def get_completion(payload: Payload, request: Request, authenticated: bool = Depends(verify_api_key)):
    # Check server status
    if not server_status:
        return JSONResponse(
            status_code=503,
            content={"message": "Server is under maintenance. Please try again later."}
        )
    
    model_to_use = payload.model or "gpt-4o-mini"

    # Validate model availability - fast lookup with set
    if available_model_ids and model_to_use not in set(available_model_ids):
        raise HTTPException(
            status_code=400,
            detail=f"Model '{model_to_use}' is not available. Check /models for the available model list."
        )

    # Log request without blocking
    asyncio.create_task(log_request(request, model_to_use))
    usage_tracker.record_request(model=model_to_use, endpoint="/chat/completions")

    # Prepare payload
    payload_dict = payload.dict()
    payload_dict["model"] = model_to_use
    
    # Ensure stream is True for real-time streaming (can be overridden by client)
    stream_enabled = payload_dict.get("stream", True)
    
    # Get environment variables
    env_vars = get_env_vars()

    # Select the appropriate endpoint (fast lookup with sets)
    if model_to_use in mistral_models:
        endpoint = env_vars['mistral_api']
        custom_headers = {
            "Authorization": f"Bearer {env_vars['mistral_key']}"
        }
    elif model_to_use in pollinations_models:
        endpoint = env_vars['secret_api_endpoint_4']
        custom_headers = {}
    elif model_to_use in alternate_models:
        endpoint = env_vars['secret_api_endpoint_2']
        custom_headers = {}
    else:
        endpoint = env_vars['secret_api_endpoint']
        custom_headers = {}
    
    print(f"Using endpoint: {endpoint} for model: {model_to_use}")
    
    # Improved real-time streaming handler
    async def real_time_stream_generator():
        try:
            async with httpx.AsyncClient(timeout=60.0) as client:
                async with client.stream("POST", f"{endpoint}/v1/chat/completions", json=payload_dict, headers=custom_headers) as response:
                    if response.status_code >= 400:
                        error_messages = {
                            422: "Unprocessable entity. Check your payload.",
                            400: "Bad request. Verify input data.",
                            403: "Forbidden. You do not have access to this resource.",
                            404: "The requested resource was not found.",
                        }
                        detail = error_messages.get(response.status_code, f"Error code: {response.status_code}")
                        raise HTTPException(status_code=response.status_code, detail=detail)
                    
                    # Stream the response in real-time with minimal buffering
                    async for line in response.aiter_lines():
                        if line:
                            # Yield immediately for faster streaming
                            yield line + "\n"
        except httpx.TimeoutException:
            raise HTTPException(status_code=504, detail="Request timed out")
        except httpx.RequestError as e:
            raise HTTPException(status_code=502, detail=f"Failed to connect to upstream API: {str(e)}")
        except Exception as e:
            if isinstance(e, HTTPException):
                raise e
            raise HTTPException(status_code=500, detail=f"An error occurred: {str(e)}")

    # Return streaming response with proper headers
    if stream_enabled:
        return StreamingResponse(
            real_time_stream_generator(),
            media_type="text/event-stream",
            headers={
                "Content-Type": "text/event-stream",
                "Cache-Control": "no-cache",
                "Connection": "keep-alive",
                "X-Accel-Buffering": "no"  # Disable proxy buffering for Nginx
            }
        )
    else:
        # For non-streaming requests, collect the entire response
        response_content = []
        async for chunk in real_time_stream_generator():
            response_content.append(chunk)
        
        return JSONResponse(content=json.loads(''.join(response_content)))

# Asynchronous logging function
async def log_request(request, model):
    # Get minimal data for logging
    current_time = (datetime.datetime.utcnow() + datetime.timedelta(hours=5, minutes=30)).strftime("%Y-%m-%d %I:%M:%S %p")
    ip_hash = hash(request.client.host) % 10000  # Hash the IP for privacy
    print(f"Time: {current_time}, IP Hash: {ip_hash}, Model: {model}")

# Cache usage statistics
@lru_cache(maxsize=10)
def get_usage_summary(days=7):
    return usage_tracker.get_usage_summary(days)

@app.get("/usage")
async def get_usage(days: int = 7):
    """Retrieve usage statistics"""
    return get_usage_summary(days)

# Generate HTML for usage page
def generate_usage_html(usage_data):
    # Model Usage Table Rows
    model_usage_rows = "\n".join([
        f"""
        <tr>
            <td>{model}</td>
            <td>{model_data['total_requests']}</td>
            <td>{model_data['first_used']}</td>
            <td>{model_data['last_used']}</td>
        </tr>
        """ for model, model_data in usage_data['models'].items()
    ])
    
    # API Endpoint Usage Table Rows
    api_usage_rows = "\n".join([
        f"""
        <tr>
            <td>{endpoint}</td>
            <td>{endpoint_data['total_requests']}</td>
            <td>{endpoint_data['first_used']}</td>
            <td>{endpoint_data['last_used']}</td>
        </tr>
        """ for endpoint, endpoint_data in usage_data['api_endpoints'].items()
    ])
    
    # Daily Usage Table Rows
    daily_usage_rows = "\n".join([
        "\n".join([
            f"""
            <tr>
                <td>{date}</td>
                <td>{entity}</td>
                <td>{requests}</td>
            </tr>
            """ for entity, requests in date_data.items()
        ]) for date, date_data in usage_data['recent_daily_usage'].items()
    ])
    
    html_content = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>Lokiai AI - Usage Statistics</title>
        <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600&display=swap" rel="stylesheet">
        <style>
            :root {{
                --bg-dark: #0f1011;
                --bg-darker: #070708;
                --text-primary: #e6e6e6;
                --text-secondary: #8c8c8c;
                --border-color: #2c2c2c;
                --accent-color: #3a6ee0;
                --accent-hover: #4a7ef0;
            }}
            body {{
                font-family: 'Inter', sans-serif;
                background-color: var(--bg-dark);
                color: var(--text-primary);
                max-width: 1200px;
                margin: 0 auto;
                padding: 40px 20px;
                line-height: 1.6;
            }}
            .logo {{
                display: flex;
                align-items: center;
                justify-content: center;
                margin-bottom: 30px;
            }}
            .logo h1 {{
                font-weight: 600;
                font-size: 2.5em;
                color: var(--text-primary);
                margin-left: 15px;
            }}
            .logo img {{
                width: 60px;
                height: 60px;
                border-radius: 10px;
            }}
            .container {{
                background-color: var(--bg-darker);
                border-radius: 12px;
                padding: 30px;
                box-shadow: 0 15px 40px rgba(0,0,0,0.3);
                border: 1px solid var(--border-color);
            }}
            h2, h3 {{
                color: var(--text-primary);
                border-bottom: 2px solid var(--border-color);
                padding-bottom: 10px;
                font-weight: 500;
            }}
            .total-requests {{
                background-color: var(--accent-color);
                color: white;
                text-align: center;
                padding: 15px;
                border-radius: 8px;
                margin-bottom: 30px;
                font-weight: 600;
                letter-spacing: -0.5px;
            }}
            table {{
                width: 100%;
                border-collapse: separate;
                border-spacing: 0;
                margin-bottom: 30px;
                background-color: var(--bg-dark);
                border-radius: 8px;
                overflow: hidden;
            }}
            th, td {{
                border: 1px solid var(--border-color);
                padding: 12px;
                text-align: left;
                transition: background-color 0.3s ease;
            }}
            th {{
                background-color: #1e1e1e;
                color: var(--text-primary);
                font-weight: 600;
                text-transform: uppercase;
                font-size: 0.9em;
            }}
            tr:nth-child(even) {{
                background-color: rgba(255,255,255,0.05);
            }}
            tr:hover {{
                background-color: rgba(62,100,255,0.1);
            }}
            @media (max-width: 768px) {{
                .container {{
                    padding: 15px;
                }}
                table {{
                    font-size: 0.9em;
                }}
            }}
        </style>
    </head>
    <body>
        <div class="container">
            <div class="logo">
                <img src="data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cGF0aCBkPSJNMTAwIDM1TDUwIDkwaDEwMHoiIGZpbGw9IiMzYTZlZTAiLz48Y2lyY2xlIGN4PSIxMDAiIGN5PSIxNDAiIHI9IjMwIiBmaWxsPSIjM2E2ZWUwIi8+PC9zdmc+" alt="Lokai AI Logo">
                <h1>Lokiai AI</h1>
            </div>
            
            <div class="total-requests">
                Total API Requests: {usage_data['total_requests']}
            </div>
            
            <h2>Model Usage</h2>
            <table>
                <tr>
                    <th>Model</th>
                    <th>Total Requests</th>
                    <th>First Used</th>
                    <th>Last Used</th>
                </tr>
                {model_usage_rows}
            </table>
            
            <h2>API Endpoint Usage</h2>
            <table>
                <tr>
                    <th>Endpoint</th>
                    <th>Total Requests</th>
                    <th>First Used</th>
                    <th>Last Used</th>
                </tr>
                {api_usage_rows}
            </table>
            
            <h2>Daily Usage (Last 7 Days)</h2>
            <table>
                <tr>
                    <th>Date</th>
                    <th>Entity</th>
                    <th>Requests</th>
                </tr>
                {daily_usage_rows}
            </table>
        </div>
    </body>
    </html>
    """
    return html_content

# Cache the usage page HTML
@lru_cache(maxsize=1)
def get_usage_page_html():
    usage_data = get_usage_summary()
    return generate_usage_html(usage_data)

@app.get("/usage/page", response_class=HTMLResponse)
async def usage_page():
    """Serve an HTML page showing usage statistics"""
    # Use cached HTML if available, regenerate if not
    html_content = get_usage_page_html()
    return HTMLResponse(content=html_content)

# Meme endpoint with optimized networking
@app.get("/meme")
async def get_meme():
    try:
        # Use the shared client for connection pooling
        client = get_async_client()
        response = await client.get("https://meme-api.com/gimme")
        response_data = response.json()

        meme_url = response_data.get("url")
        if not meme_url:
            raise HTTPException(status_code=404, detail="No meme found")

        image_response = await client.get(meme_url, follow_redirects=True)
        
        # Use larger chunks for streaming
        async def stream_with_larger_chunks():
            chunks = []
            size = 0
            async for chunk in image_response.aiter_bytes(chunk_size=16384):
                chunks.append(chunk)
                size += len(chunk)
                
                if size >= 65536:
                    yield b''.join(chunks)
                    chunks = []
                    size = 0
            
            if chunks:
                yield b''.join(chunks)
                
        return StreamingResponse(
            stream_with_larger_chunks(), 
            media_type=image_response.headers.get("content-type", "image/png"),
            headers={'Cache-Control': 'max-age=3600'}  # Add caching
        )
    except Exception:
        raise HTTPException(status_code=500, detail="Failed to retrieve meme")

# Utility function for loading model IDs - optimized to run once at startup
def load_model_ids(json_file_path):
    try:
        with open(json_file_path, 'r') as f:
            models_data = json.load(f)
            # Extract 'id' from each model object and use a set for fast lookups
            return [model['id'] for model in models_data if 'id' in model]
    except Exception as e:
        print(f"Error loading model IDs: {str(e)}")
        return []

@app.on_event("startup")
async def startup_event():
    global available_model_ids
    available_model_ids = load_model_ids("models.json")
    print(f"Loaded {len(available_model_ids)} model IDs")
    
    # Add all pollinations models to available_model_ids
    available_model_ids.extend(list(pollinations_models))
    # Add alternate models to available_model_ids
    available_model_ids.extend(list(alternate_models))
    # Add mistral models to available_model_ids
    available_model_ids.extend(list(mistral_models))
    
    available_model_ids = list(set(available_model_ids))  # Remove duplicates
    print(f"Total available models: {len(available_model_ids)}")
    
    # Preload scrapers
    for _ in range(MAX_SCRAPERS):
        scraper_pool.append(cloudscraper.create_scraper())
    
    # Validate critical environment variables
    env_vars = get_env_vars()
    missing_vars = []
    
    if not env_vars['secret_api_endpoint']:
        missing_vars.append('SECRET_API_ENDPOINT')
    if not env_vars['secret_api_endpoint_2']:
        missing_vars.append('SECRET_API_ENDPOINT_2')
    if not env_vars['secret_api_endpoint_3']:
        missing_vars.append('SECRET_API_ENDPOINT_3')
    if not env_vars['secret_api_endpoint_4']:
        missing_vars.append('SECRET_API_ENDPOINT_4')
    if not env_vars['mistral_api'] and any(model in mistral_models for model in available_model_ids):
        missing_vars.append('MISTRAL_API')
    if not env_vars['mistral_key'] and any(model in mistral_models for model in available_model_ids):
        missing_vars.append('MISTRAL_KEY')
    
    if missing_vars:
        print(f"WARNING: The following environment variables are missing: {', '.join(missing_vars)}")
        print("Some functionality may be limited.")

    print("Server started successfully!")

@app.on_event("shutdown")
async def shutdown_event():
    # Close the httpx client
    client = get_async_client()
    await client.aclose()
    
    # Clear scraper pool
    scraper_pool.clear()
    
    # Persist usage data
    usage_tracker.save_data()
    
    print("Server shutdown complete!")

# Health check endpoint
# Health check endpoint
@app.get("/health")
async def health_check():
    """Health check endpoint for monitoring"""
    env_vars = get_env_vars()
    missing_critical_vars = []
    
    # Check critical environment variables
    if not env_vars['api_keys'] or env_vars['api_keys'] == ['']:
        missing_critical_vars.append('API_KEYS')
    if not env_vars['secret_api_endpoint']:
        missing_critical_vars.append('SECRET_API_ENDPOINT')
    if not env_vars['secret_api_endpoint_2']:
        missing_critical_vars.append('SECRET_API_ENDPOINT_2')
    if not env_vars['secret_api_endpoint_3']:
        missing_critical_vars.append('SECRET_API_ENDPOINT_3')
    if not env_vars['secret_api_endpoint_4']:
        missing_critical_vars.append('SECRET_API_ENDPOINT_4')
    if not env_vars['mistral_api']:
        missing_critical_vars.append('MISTRAL_API')
    if not env_vars['mistral_key']:
        missing_critical_vars.append('MISTRAL_KEY')
    
    health_status = {
        "status": "healthy" if not missing_critical_vars else "unhealthy",
        "missing_env_vars": missing_critical_vars,
        "server_status": server_status,
        "message": "Everything's lit! πŸš€" if not missing_critical_vars else "Uh oh, some env vars are missing. 😬"
    }
    return JSONResponse(content=health_status)

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=7860)