File size: 28,548 Bytes
4986fe4
d38c2eb
c3d5a54
 
 
378f2c3
8834a20
eefae44
c3d5a54
a111cf9
31eab42
aee7d80
c3d5a54
 
dc21031
c3d5a54
396b35b
 
 
 
 
 
 
 
 
 
a06f1b3
c3d5a54
 
dc21031
c3d5a54
 
a06f1b3
c3d5a54
 
ea75284
4986fe4
378f2c3
c3d5a54
396b35b
 
 
 
 
 
 
 
 
 
c3d5a54
 
 
 
 
 
 
 
 
 
 
 
 
 
396b35b
 
20b41cb
 
 
 
 
 
 
 
 
396b35b
20b41cb
c2e84c8
 
 
 
 
 
 
 
 
 
 
 
 
378f2c3
c3d5a54
378f2c3
 
 
16f4d5b
c3d5a54
 
 
 
 
396b35b
c3d5a54
 
396b35b
 
 
 
 
 
 
 
c3d5a54
396b35b
 
 
 
 
 
 
 
c3d5a54
 
 
 
 
 
396b35b
 
c3d5a54
 
396b35b
c3d5a54
 
 
 
 
 
 
396b35b
 
 
c3d5a54
 
 
 
396b35b
c3d5a54
 
396b35b
c3d5a54
396b35b
c3d5a54
 
 
 
 
 
396b35b
 
 
 
 
 
c3d5a54
396b35b
c3d5a54
396b35b
 
 
a111cf9
396b35b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1ba1f47
396b35b
 
 
 
 
 
 
 
 
 
 
 
 
 
a111cf9
396b35b
 
 
 
 
 
 
 
 
 
 
a01be99
c3d5a54
 
 
 
 
 
707dc28
 
396b35b
c3d5a54
2c1c62a
7ef5d89
396b35b
 
c613f2b
396b35b
7ef5d89
c3d5a54
 
396b35b
 
c3d5a54
396b35b
c3d5a54
 
 
ac4bad0
2ae9a8f
3109050
c3d5a54
 
 
 
 
 
 
 
 
396b35b
 
c3d5a54
396b35b
 
 
 
c3d5a54
396b35b
c3d5a54
 
 
396b35b
 
c3d5a54
 
a0270ea
de98206
 
4466943
c3d5a54
 
 
 
 
 
396b35b
de98206
396b35b
 
9bfb2c6
4466943
9bfb2c6
 
de98206
396b35b
 
eefae44
de98206
eefae44
b955cc1
c5cedd6
de98206
c3d5a54
 
 
396b35b
de98206
c3d5a54
de98206
c3d5a54
de98206
 
c3d5a54
de98206
 
c3d5a54
de98206
 
396b35b
 
de98206
3109050
1d32d66
de98206
d03f8cc
de98206
 
d03f8cc
 
 
de98206
 
c3d5a54
 
 
 
 
 
 
 
 
de98206
396b35b
 
 
 
 
1d32d66
 
396b35b
 
 
 
 
 
 
 
 
 
 
 
de98206
1d32d66
c3d5a54
 
de98206
a0270ea
c3d5a54
396b35b
 
 
 
 
 
 
 
c3d5a54
36f3cc3
757b439
c3d5a54
36f3cc3
 
 
 
 
 
c3d5a54
dc21031
36f3cc3
88cbc7b
c3d5a54
36f3cc3
 
c3d5a54
ea75284
c3d5a54
757b439
 
 
c3d5a54
757b439
 
 
 
 
 
 
 
 
 
88cbc7b
757b439
 
88cbc7b
c3d5a54
88cbc7b
757b439
88cbc7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757b439
36f3cc3
396b35b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757b439
396b35b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
757b439
396b35b
 
 
 
 
 
 
 
 
757b439
88cbc7b
 
c3d5a54
 
 
 
6a3f7b0
396b35b
c3d5a54
 
6a3f7b0
396b35b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c3d5a54
 
 
396b35b
 
 
 
 
ea75284
 
 
396b35b
ea75284
396b35b
 
314966f
 
 
 
 
 
 
 
 
 
 
c3d5a54
314966f
 
 
 
 
 
 
 
 
 
 
c3d5a54
314966f
 
 
 
 
 
 
 
 
 
 
 
c20175b
ea75284
 
 
 
 
4f22928
c20175b
ea75284
c20175b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea75284
 
 
c20175b
 
 
4f22928
c20175b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
ea75284
 
 
396b35b
 
 
 
 
 
 
 
 
 
 
 
 
ea75284
2ab4453
396b35b
c3d5a54
 
 
 
396b35b
 
2ab4453
c3d5a54
 
 
7ef5d89
 
72b5133
 
c3d5a54
396b35b
 
 
 
0ebc28b
c3d5a54
 
 
 
 
 
 
 
 
 
 
 
 
396b35b
 
c3d5a54
6a84e5c
 
396b35b
 
 
ad8074f
 
396b35b
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
import os
from dotenv import load_dotenv
from fastapi import FastAPI, HTTPException, Request, Depends, Security
from fastapi.responses import StreamingResponse, HTMLResponse, JSONResponse, FileResponse
from fastapi.security import APIKeyHeader
from pydantic import BaseModel
import httpx
from functools import lru_cache
from pathlib import Path
import json
import datetime
import time
from typing import Optional, Dict, List, Any, Generator
import asyncio
from starlette.status import HTTP_403_FORBIDDEN
import cloudscraper
from concurrent.futures import ThreadPoolExecutor
import uvloop
from fastapi.middleware.gzip import GZipMiddleware
from starlette.middleware.cors import CORSMiddleware

# Enable uvloop for faster event loop
asyncio.set_event_loop_policy(uvloop.EventLoopPolicy())

# Thread pool for CPU-bound operations
executor = ThreadPoolExecutor(max_workers=8)

# Load environment variables once at startup
load_dotenv()

# API key security scheme
api_key_header = APIKeyHeader(name="Authorization", auto_error=False)

# Initialize usage tracker
from usage_tracker import UsageTracker
usage_tracker = UsageTracker()

app = FastAPI()

# Add middleware for compression and CORS
app.add_middleware(GZipMiddleware, minimum_size=1000)
app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

# Environment variables (cached)
@lru_cache(maxsize=1)
def get_env_vars():
    return {
        'api_keys': os.getenv('API_KEYS', '').split(','),
        'secret_api_endpoint': os.getenv('SECRET_API_ENDPOINT'),
        'secret_api_endpoint_2': os.getenv('SECRET_API_ENDPOINT_2'),
        'secret_api_endpoint_3': os.getenv('SECRET_API_ENDPOINT_3'),
        'mistral_api': "https://api.mistral.ai",
        'mistral_key': os.getenv('MISTRAL_KEY'),
        'image_endpoint': os.getenv("IMAGE_ENDPOINT"),
        'endpoint_origin': os.getenv('ENDPOINT_ORIGIN')
    }

# Configuration for models - use sets for faster lookups
mistral_models = {
    "mistral-large-latest",
    "pixtral-large-latest",
    "mistral-moderation-latest",
    "ministral-3b-latest",
    "ministral-8b-latest",
    "open-mistral-nemo",
    "mistral-small-latest",
    "mistral-saba-latest",
    "codestral-latest"
}

alternate_models = {
    "gpt-4o-mini",
    "deepseek-v3",
    "llama-3.1-8b-instruct",
    "searchgpt",
    "llama-3.1-sonar-small-128k-online",
    "sonar-reasoning",
    "deepseek-r1-uncensored",
    "tinyswallow1.5b",
    "andy-3.5",
    "o3-mini-low",
    "hermes-3-llama-3.2-3b"
}

# Request payload model
class Payload(BaseModel):
    model: str
    messages: list
    stream: bool = False

# Server status global variable
server_status = True
available_model_ids: List[str] = []

# Create a reusable httpx client pool with connection pooling
@lru_cache(maxsize=1)
def get_async_client():
    return httpx.AsyncClient(
        timeout=60.0,
        limits=httpx.Limits(max_keepalive_connections=20, max_connections=100)
    )

# Create a cloudscraper pool
scraper_pool = []
MAX_SCRAPERS = 10

def get_scraper():
    if not scraper_pool:
        for _ in range(MAX_SCRAPERS):
            scraper_pool.append(cloudscraper.create_scraper())
    
    return scraper_pool[int(time.time() * 1000) % MAX_SCRAPERS]  # Simple round-robin

# API key validation - optimized to avoid string operations when possible
async def verify_api_key(api_key: str = Security(api_key_header)) -> bool:
    if not api_key:
        raise HTTPException(
            status_code=HTTP_403_FORBIDDEN,
            detail="No API key provided"
        )
    
    # Only clean if needed    
    if api_key.startswith('Bearer '):
        api_key = api_key[7:]  # Remove 'Bearer ' prefix
    
    # Get API keys from environment
    valid_api_keys = get_env_vars()['api_keys']
    if not valid_api_keys or valid_api_keys == ['']:
        raise HTTPException(
            status_code=HTTP_403_FORBIDDEN,
            detail="API keys not configured on server"
        )
    
    # Fast check with set operation
    if api_key not in set(valid_api_keys):
        raise HTTPException(
            status_code=HTTP_403_FORBIDDEN,
            detail="Invalid API key"
        )
    
    return True

# Pre-load and cache models.json
@lru_cache(maxsize=1)
def load_models_data():
    try:
        file_path = Path(__file__).parent / 'models.json'
        with open(file_path, 'r') as f:
            return json.load(f)
    except (FileNotFoundError, json.JSONDecodeError) as e:
        print(f"Error loading models.json: {str(e)}")
        return []

# Async wrapper for models data
async def get_models():
    models_data = load_models_data()
    if not models_data:
        raise HTTPException(status_code=500, detail="Error loading available models")
    return models_data

# Searcher function with optimized streaming - moved to a separate thread
async def generate_search_async(query: str, systemprompt: Optional[str] = None, stream: bool = True):
    loop = asyncio.get_running_loop()
    
    def _generate_search():
        headers = {"User-Agent": ""}
        
        # Use the provided system prompt, or default to "Be Helpful and Friendly"
        system_message = systemprompt or "Be Helpful and Friendly"
        
        # Create the prompt history with the user query and system message
        prompt = [
            {"role": "user", "content": query},
        ]
        
        prompt.insert(0, {"content": system_message, "role": "system"})
        
        # Prepare the payload for the API request
        payload = {
            "is_vscode_extension": True,
            "message_history": prompt,
            "requested_model": "searchgpt",
            "user_input": prompt[-1]["content"],
        }
        
        # Get endpoint from environment
        secret_api_endpoint_3 = get_env_vars()['secret_api_endpoint_3']
        if not secret_api_endpoint_3:
            raise ValueError("Search API endpoint not configured")
        
        # Send the request to the chat endpoint using a scraper from the pool
        response = get_scraper().post(
            secret_api_endpoint_3, 
            headers=headers, 
            json=payload, 
            stream=True
        )
        
        result = []
        streaming_text = ""
        
        # Process the streaming response
        for value in response.iter_lines(decode_unicode=True):
            if value.startswith("data: "):  
                try:
                    json_modified_value = json.loads(value[6:])
                    content = json_modified_value.get("choices", [{}])[0].get("delta", {}).get("content", "")

                    if content.strip():  # Only process non-empty content
                        cleaned_response = {
                            "created": json_modified_value.get("created"),
                            "id": json_modified_value.get("id"),
                            "model": "searchgpt",
                            "object": "chat.completion",
                            "choices": [
                                {
                                    "message": {
                                        "content": content
                                    }
                                }
                            ]
                        }
                        
                        if stream:
                            result.append(f"data: {json.dumps(cleaned_response)}\n\n")
                        
                        streaming_text += content
                except json.JSONDecodeError:
                    continue
        
        if not stream:
            result.append(streaming_text)
            
        return result
    
    # Run in thread pool to avoid blocking the event loop
    return await loop.run_in_executor(executor, _generate_search)

# Cache for frequently accessed static files
@lru_cache(maxsize=10)
def read_html_file(file_path):
    try:
        with open(file_path, "r") as file:
            return file.read()
    except FileNotFoundError:
        return None

# Basic routes
@app.get("/favicon.ico")
async def favicon():
    favicon_path = Path(__file__).parent / "favicon.ico"
    return FileResponse(favicon_path, media_type="image/x-icon")

@app.get("/ping")
async def ping():
    return {"message": "pong", "response_time": "0.000000 seconds"}

@app.get("/", response_class=HTMLResponse)
async def root():
    html_content = read_html_file("index.html")
    if html_content is None:
        return HTMLResponse(content="<h1>File not found</h1>", status_code=404)
    return HTMLResponse(content=html_content)

@app.get("/playground", response_class=HTMLResponse)
async def playground():
    html_content = read_html_file("playground.html")
    if html_content is None:
        return HTMLResponse(content="<h1>playground.html not found</h1>", status_code=404)
    return HTMLResponse(content=html_content)

# Model routes
@app.get("/api/v1/models")
@app.get("/models")
async def return_models():
    return await get_models()

# Search routes
@app.get("/searchgpt")
async def search_gpt(q: str, stream: Optional[bool] = False, systemprompt: Optional[str] = None):
    if not q:
        raise HTTPException(status_code=400, detail="Query parameter 'q' is required")
    
    usage_tracker.record_request(endpoint="/searchgpt")
    
    result = await generate_search_async(q, systemprompt=systemprompt, stream=stream)
    
    if stream:
        async def stream_generator():
            for chunk in result:
                yield chunk
        
        return StreamingResponse(
            stream_generator(),
            media_type="text/event-stream"
        )
    else:
        # For non-streaming, return the collected text
        return JSONResponse(content={"response": result[0] if result else ""})

# Chat completion endpoint
@app.post("/chat/completions")
@app.post("/api/v1/chat/completions")
async def get_completion(payload: Payload, request: Request, authenticated: bool = Depends(verify_api_key)):
    # Check server status
    if not server_status:
        return JSONResponse(
            status_code=503,
            content={"message": "Server is under maintenance. Please try again later."}
        )
    
    model_to_use = payload.model or "gpt-4o-mini"

    # Validate model availability - fast lookup with set
    if available_model_ids and model_to_use not in set(available_model_ids):
        raise HTTPException(
            status_code=400,
            detail=f"Model '{model_to_use}' is not available. Check /models for the available model list."
        )

    # Log request without blocking
    asyncio.create_task(log_request(request, model_to_use))
    usage_tracker.record_request(model=model_to_use, endpoint="/chat/completions")

    # Prepare payload
    payload_dict = payload.dict()
    payload_dict["model"] = model_to_use

    # Get environment variables
    env_vars = get_env_vars()

    # Select the appropriate endpoint (fast lookup with sets)
    if model_to_use in mistral_models:
        endpoint = env_vars['mistral_api']
        custom_headers = {
            "Authorization": f"Bearer {env_vars['mistral_key']}"
        }
    elif model_to_use in alternate_models:
        endpoint = env_vars['secret_api_endpoint_2']
        custom_headers = {}
    else:
        endpoint = env_vars['secret_api_endpoint']
        custom_headers = {}

    # Get a scraper from the pool
    scraper = get_scraper()

    async def stream_generator(payload_dict):
        try:
            # Send POST request with the correct headers
            response = scraper.post(
                f"{endpoint}/v1/chat/completions",
                json=payload_dict,
                headers=custom_headers,
                stream=True
            )

            # Handle response errors
            if response.status_code >= 400:
                error_messages = {
                    422: "Unprocessable entity. Check your payload.",
                    400: "Bad request. Verify input data.",
                    403: "Forbidden. You do not have access to this resource.",
                    404: "The requested resource was not found.",
                }
                detail = error_messages.get(response.status_code, f"Error code: {response.status_code}")
                raise HTTPException(status_code=response.status_code, detail=detail)

            # Stream response lines to the client - use buffer for efficiency
            buffer = []
            buffer_size = 0
            max_buffer = 8192  # 8KB buffer
            
            for line in response.iter_lines():
                if line:
                    decoded = line.decode('utf-8') + "\n"
                    buffer.append(decoded)
                    buffer_size += len(decoded)
                    
                    if buffer_size >= max_buffer:
                        yield ''.join(buffer)
                        buffer = []
                        buffer_size = 0
            
            # Flush remaining buffer
            if buffer:
                yield ''.join(buffer)

        except Exception as e:
            # Use a generic error message that doesn't expose internal details
            raise HTTPException(status_code=500, detail="An error occurred while processing your request")

    return StreamingResponse(stream_generator(payload_dict), media_type="application/json")

# Asynchronous logging function
async def log_request(request, model):
    # Get minimal data for logging
    current_time = (datetime.datetime.utcnow() + datetime.timedelta(hours=5, minutes=30)).strftime("%Y-%m-%d %I:%M:%S %p")
    ip_hash = hash(request.client.host) % 10000  # Hash the IP for privacy
    print(f"Time: {current_time}, IP Hash: {ip_hash}, Model: {model}")

# Image generation endpoint - optimized to use connection pool
@app.api_route("/images/generations", methods=["GET", "POST"])
async def generate_image(
    prompt: Optional[str] = None, 
    model: str = "flux",
    seed: Optional[int] = None,
    width: Optional[int] = None,
    height: Optional[int] = None,
    nologo: Optional[bool] = True,
    private: Optional[bool] = None,
    enhance: Optional[bool] = None,
    request: Request = None,
    authenticated: bool = Depends(verify_api_key)
):
    # Validate the image endpoint
    image_endpoint = get_env_vars()['image_endpoint']
    if not image_endpoint:
        raise HTTPException(status_code=500, detail="Image endpoint not configured in environment variables.")
    
    usage_tracker.record_request(endpoint="/images/generations")
    
    # Handle GET and POST prompts
    if request.method == "POST":
        try:
            body = await request.json()
            prompt = body.get("prompt", "").strip()
            if not prompt:
                raise HTTPException(status_code=400, detail="Prompt cannot be empty")
        except Exception:
            raise HTTPException(status_code=400, detail="Invalid JSON payload")
    elif request.method == "GET":
        if not prompt or not prompt.strip():
            raise HTTPException(status_code=400, detail="Prompt cannot be empty")
        prompt = prompt.strip()

    # Sanitize and encode the prompt
    encoded_prompt = httpx.QueryParams({'prompt': prompt}).get('prompt')

    # Construct the URL with the encoded prompt
    base_url = image_endpoint.rstrip('/')
    url = f"{base_url}/{encoded_prompt}"

    # Prepare query parameters with validation
    params = {}
    if model and isinstance(model, str):
        params['model'] = model
    if seed is not None and isinstance(seed, int):
        params['seed'] = seed
    if width is not None and isinstance(width, int) and 64 <= width <= 2048:
        params['width'] = width
    if height is not None and isinstance(height, int) and 64 <= height <= 2048:
        params['height'] = height
    if nologo is not None:
        params['nologo'] = str(nologo).lower()
    if private is not None:
        params['private'] = str(private).lower()
    if enhance is not None:
        params['enhance'] = str(enhance).lower()

    try:
        # Use the shared httpx client for connection pooling
        client = get_async_client()
        response = await client.get(url, params=params, follow_redirects=True)

        # Check for various error conditions
        if response.status_code != 200:
            error_messages = {
                404: "Image generation service not found",
                400: "Invalid parameters provided to image service",
                429: "Too many requests to image service",
            }
            detail = error_messages.get(response.status_code, f"Image generation failed with status code {response.status_code}")
            raise HTTPException(status_code=response.status_code, detail=detail)

        # Verify content type
        content_type = response.headers.get('content-type', '')
        if not content_type.startswith('image/'):
            raise HTTPException(
                status_code=500,
                detail="Unexpected content type received from image service"
            )

        # Use larger chunks for streaming for better performance
        async def stream_with_larger_chunks():
            chunks = []
            size = 0
            async for chunk in response.aiter_bytes(chunk_size=16384):  # Use 16KB chunks
                chunks.append(chunk)
                size += len(chunk)
                
                if size >= 65536:  # Yield every 64KB
                    yield b''.join(chunks)
                    chunks = []
                    size = 0
            
            if chunks:
                yield b''.join(chunks)

        return StreamingResponse(
            stream_with_larger_chunks(),
            media_type=content_type,
            headers={
                'Cache-Control': 'no-cache, no-store, must-revalidate',
                'Pragma': 'no-cache',
                'Expires': '0'
            }
        )

    except httpx.TimeoutException:
        raise HTTPException(status_code=504, detail="Image generation request timed out")
    except httpx.RequestError:
        raise HTTPException(status_code=500, detail="Failed to contact image service")
    except Exception:
        raise HTTPException(status_code=500, detail="Unexpected error during image generation")

# Meme endpoint with optimized networking
@app.get("/meme")
async def get_meme():
    try:
        # Use the shared client for connection pooling
        client = get_async_client()
        response = await client.get("https://meme-api.com/gimme")
        response_data = response.json()

        meme_url = response_data.get("url")
        if not meme_url:
            raise HTTPException(status_code=404, detail="No meme found")

        image_response = await client.get(meme_url, follow_redirects=True)
        
        # Use larger chunks for streaming
        async def stream_with_larger_chunks():
            chunks = []
            size = 0
            async for chunk in image_response.aiter_bytes(chunk_size=16384):
                chunks.append(chunk)
                size += len(chunk)
                
                if size >= 65536:
                    yield b''.join(chunks)
                    chunks = []
                    size = 0
            
            if chunks:
                yield b''.join(chunks)
                
        return StreamingResponse(
            stream_with_larger_chunks(), 
            media_type=image_response.headers.get("content-type", "image/png"),
            headers={'Cache-Control': 'max-age=3600'}  # Add caching
        )
    except Exception:
        raise HTTPException(status_code=500, detail="Failed to retrieve meme")

# Cache usage statistics
@lru_cache(maxsize=10)
def get_usage_summary(days=7):
    return usage_tracker.get_usage_summary(days)

@app.get("/usage")
async def get_usage(days: int = 7):
    """Retrieve usage statistics"""
    return get_usage_summary(days)

# Generate HTML for usage page
def generate_usage_html(usage_data):
    # Model Usage Table Rows
    model_usage_rows = "\n".join([
        f"""
        <tr>
            <td>{model}</td>
            <td>{model_data['total_requests']}</td>
            <td>{model_data['first_used']}</td>
            <td>{model_data['last_used']}</td>
        </tr>
        """ for model, model_data in usage_data['models'].items()
    ])
    
    # API Endpoint Usage Table Rows
    api_usage_rows = "\n".join([
        f"""
        <tr>
            <td>{endpoint}</td>
            <td>{endpoint_data['total_requests']}</td>
            <td>{endpoint_data['first_used']}</td>
            <td>{endpoint_data['last_used']}</td>
        </tr>
        """ for endpoint, endpoint_data in usage_data['api_endpoints'].items()
    ])
    
    # Daily Usage Table Rows
    daily_usage_rows = "\n".join([
        "\n".join([
            f"""
            <tr>
                <td>{date}</td>
                <td>{entity}</td>
                <td>{requests}</td>
            </tr>
            """ for entity, requests in date_data.items()
        ]) for date, date_data in usage_data['recent_daily_usage'].items()
    ])
    
    html_content = f"""
    <!DOCTYPE html>
    <html lang="en">
    <head>
        <meta charset="UTF-8">
        <title>Lokiai AI - Usage Statistics</title>
        <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;600&display=swap" rel="stylesheet">
        <style>
            :root {{
                --bg-dark: #0f1011;
                --bg-darker: #070708;
                --text-primary: #e6e6e6;
                --text-secondary: #8c8c8c;
                --border-color: #2c2c2c;
                --accent-color: #3a6ee0;
                --accent-hover: #4a7ef0;
            }}
            body {{
                font-family: 'Inter', sans-serif;
                background-color: var(--bg-dark);
                color: var(--text-primary);
                max-width: 1200px;
                margin: 0 auto;
                padding: 40px 20px;
                line-height: 1.6;
            }}
            .logo {{
                display: flex;
                align-items: center;
                justify-content: center;
                margin-bottom: 30px;
            }}
            .logo h1 {{
                font-weight: 600;
                font-size: 2.5em;
                color: var(--text-primary);
                margin-left: 15px;
            }}
            .logo img {{
                width: 60px;
                height: 60px;
                border-radius: 10px;
            }}
            .container {{
                background-color: var(--bg-darker);
                border-radius: 12px;
                padding: 30px;
                box-shadow: 0 15px 40px rgba(0,0,0,0.3);
                border: 1px solid var(--border-color);
            }}
            h2, h3 {{
                color: var(--text-primary);
                border-bottom: 2px solid var(--border-color);
                padding-bottom: 10px;
                font-weight: 500;
            }}
            .total-requests {{
                background-color: var(--accent-color);
                color: white;
                text-align: center;
                padding: 15px;
                border-radius: 8px;
                margin-bottom: 30px;
                font-weight: 600;
                letter-spacing: -0.5px;
            }}
            table {{
                width: 100%;
                border-collapse: separate;
                border-spacing: 0;
                margin-bottom: 30px;
                background-color: var(--bg-dark);
                border-radius: 8px;
                overflow: hidden;
            }}
            th, td {{
                border: 1px solid var(--border-color);
                padding: 12px;
                text-align: left;
                transition: background-color 0.3s ease;
            }}
            th {{
                background-color: #1e1e1e;
                color: var(--text-primary);
                font-weight: 600;
                text-transform: uppercase;
                font-size: 0.9em;
            }}
            tr:nth-child(even) {{
                background-color: rgba(255,255,255,0.05);
            }}
            tr:hover {{
                background-color: rgba(62,100,255,0.1);
            }}
            @media (max-width: 768px) {{
                .container {{
                    padding: 15px;
                }}
                table {{
                    font-size: 0.9em;
                }}
            }}
        </style>
    </head>
    <body>
        <div class="container">
            <div class="logo">
                <img src="data:image/svg+xml;base64,PHN2ZyB3aWR0aD0iMjAwIiBoZWlnaHQ9IjIwMCIgeG1sbnM9Imh0dHA6Ly93d3cudzMub3JnLzIwMDAvc3ZnIj48cGF0aCBkPSJNMTAwIDM1TDUwIDkwaDEwMHoiIGZpbGw9IiMzYTZlZTAiLz48Y2lyY2xlIGN4PSIxMDAiIGN5PSIxNDAiIHI9IjMwIiBmaWxsPSIjM2E2ZWUwIi8+PC9zdmc+" alt="Lokai AI Logo">
                <h1>Lokiai AI</h1>
            </div>
            
            <div class="total-requests">
                Total API Requests: {usage_data['total_requests']}
            </div>
            
            <h2>Model Usage</h2>
            <table>
                <tr>
                    <th>Model</th>
                    <th>Total Requests</th>
                    <th>First Used</th>
                    <th>Last Used</th>
                </tr>
                {model_usage_rows}
            </table>
            
            <h2>API Endpoint Usage</h2>
            <table>
                <tr>
                    <th>Endpoint</th>
                    <th>Total Requests</th>
                    <th>First Used</th>
                    <th>Last Used</th>
                </tr>
                {api_usage_rows}
            </table>
            
            <h2>Daily Usage (Last 7 Days)</h2>
            <table>
                <tr>
                    <th>Date</th>
                    <th>Entity</th>
                    <th>Requests</th>
                </tr>
                {daily_usage_rows}
            </table>
        </div>
    </body>
    </html>
    """
    return html_content

# Cache the usage page HTML
@lru_cache(maxsize=1)
def get_usage_page_html():
    usage_data = get_usage_summary()
    return generate_usage_html(usage_data)

@app.get("/usage/page", response_class=HTMLResponse)
async def usage_page():
    """Serve an HTML page showing usage statistics"""
    # Use cached HTML if available, regenerate if not
    html_content = get_usage_page_html()
    return HTMLResponse(content=html_content)

# Utility function for loading model IDs - optimized to run once at startup
def load_model_ids(json_file_path):
    try:
        with open(json_file_path, 'r') as f:
            models_data = json.load(f)
            # Extract 'id' from each model object and use a set for fast lookups
            return [model['id'] for model in models_data if 'id' in model]
    except Exception as e:
        print(f"Error loading model IDs: {str(e)}")
        return []

@app.on_event("startup")
async def startup_event():
    global available_model_ids
    available_model_ids = load_model_ids("models.json")
    print(f"Loaded {len(available_model_ids)} model IDs")
    
    # Preload scrapers
    for _ in range(MAX_SCRAPERS):
        scraper_pool.append(cloudscraper.create_scraper())
    
    # Validate critical environment variables
    env_vars = get_env_vars()
    missing_vars = []
    
    if not env_vars['secret_api_endpoint']:
        missing_vars.append('SECRET_API_ENDPOINT')
    if not env_vars['secret_api_endpoint_2']:
        missing_vars.append('SECRET_API_ENDPOINT_2')
    if not env_vars['secret_api_endpoint_3']:
        missing_vars.append('SECRET_API_ENDPOINT_3')
    
    if missing_vars:
        print(f"WARNING: The following required environment variables are missing: {', '.join(missing_vars)}")
    
    print("API started successfully with high-performance optimizations")

if __name__ == "__main__":
    import uvicorn
    uvicorn.run(
        app, 
        host="0.0.0.0", 
        port=7860,
        workers=4,  # Multiple workers for better CPU utilization
        loop="uvloop",  # Use uvloop for faster async operations
        http="httptools",  # Faster HTTP parsing
        log_level="warning",  # Reduce logging overhead
        limit_concurrency=100,  # Limit concurrent connections
        timeout_keep_alive=5  # Reduce idle connection time
    )