Niansuh commited on
Commit
ebc0716
·
verified ·
1 Parent(s): 53aa71f

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +0 -367
main.py CHANGED
@@ -386,370 +386,3 @@ class Blackbox:
386
  logger.error(f"Unexpected error: {e}. Retrying attempt {attempt + 1}/{retry_attempts}")
387
  if attempt == retry_attempts - 1:
388
  raise HTTPException(status_code=500, detail=str(e))
389
-
390
- # FastAPI app setup
391
- app = FastAPI()
392
-
393
- # Add the cleanup task when the app starts
394
- @app.on_event("startup")
395
- async def startup_event():
396
- asyncio.create_task(cleanup_rate_limit_stores())
397
- logger.info("Started rate limit store cleanup task.")
398
-
399
- # Middleware to enhance security and enforce Content-Type for specific endpoints
400
- @app.middleware("http")
401
- async def security_middleware(request: Request, call_next):
402
- client_ip = request.client.host
403
- # Enforce that POST requests to /v1/chat/completions must have Content-Type: application/json
404
- if request.method == "POST" and request.url.path == "/v1/chat/completions":
405
- content_type = request.headers.get("Content-Type")
406
- if content_type != "application/json":
407
- logger.warning(f"Invalid Content-Type from IP: {client_ip} for path: {request.url.path}")
408
- return JSONResponse(
409
- status_code=400,
410
- content={
411
- "error": {
412
- "message": "Content-Type must be application/json",
413
- "type": "invalid_request_error",
414
- "param": None,
415
- "code": None
416
- }
417
- },
418
- )
419
- response = await call_next(request)
420
- return response
421
-
422
- # Request Models
423
- class Message(BaseModel):
424
- role: str
425
- content: Union[str, List[Any]] # content can be a string or a list (for images)
426
-
427
- class ChatRequest(BaseModel):
428
- model: str
429
- messages: List[Message]
430
- temperature: Optional[float] = 1.0
431
- top_p: Optional[float] = 1.0
432
- n: Optional[int] = 1
433
- stream: Optional[bool] = False
434
- stop: Optional[Union[str, List[str]]] = None
435
- max_tokens: Optional[int] = None
436
- presence_penalty: Optional[float] = 0.0
437
- frequency_penalty: Optional[float] = 0.0
438
- logit_bias: Optional[Dict[str, float]] = None
439
- user: Optional[str] = None
440
- webSearchMode: Optional[bool] = False # Custom parameter
441
- image: Optional[str] = None # Base64-encoded image
442
-
443
- class TokenizerRequest(BaseModel):
444
- text: str
445
-
446
- def calculate_estimated_cost(prompt_tokens: int, completion_tokens: int) -> float:
447
- """
448
- Calculate the estimated cost based on the number of tokens.
449
- Replace the pricing below with your actual pricing model.
450
- """
451
- # Example pricing: $0.00000268 per token
452
- cost_per_token = 0.00000268
453
- return round((prompt_tokens + completion_tokens) * cost_per_token, 8)
454
-
455
- def create_response(content: str, model: str, finish_reason: Optional[str] = None) -> Dict[str, Any]:
456
- return {
457
- "id": f"chatcmpl-{uuid.uuid4()}",
458
- "object": "chat.completion",
459
- "created": int(datetime.now().timestamp()),
460
- "model": model,
461
- "choices": [
462
- {
463
- "index": 0,
464
- "message": {
465
- "role": "assistant",
466
- "content": content
467
- },
468
- "finish_reason": finish_reason
469
- }
470
- ],
471
- "usage": None, # To be filled in non-streaming responses
472
- }
473
-
474
- @app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter_per_ip)])
475
- async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
476
- client_ip = req.client.host
477
- # Redact user messages only for logging purposes
478
- redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
479
-
480
- logger.info(f"Received chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages}")
481
-
482
- analysis_result = None
483
- if request.image:
484
- try:
485
- image = decode_base64_image(request.image)
486
- analysis_result = analyze_image(image)
487
- logger.info("Image analysis completed successfully.")
488
- except HTTPException as he:
489
- logger.error(f"Image analysis failed: {he.detail}")
490
- raise he
491
- except Exception as e:
492
- logger.exception("Unexpected error during image analysis.")
493
- raise HTTPException(status_code=500, detail="Image analysis failed.") from e
494
-
495
- # Prepare messages to send to the external API, excluding image data
496
- processed_messages = []
497
- for msg in request.messages:
498
- if isinstance(msg.content, list) and len(msg.content) == 2:
499
- # Assume the second item is image data, skip it
500
- processed_messages.append({
501
- "role": msg.role,
502
- "content": msg.content[0]["text"] # Only include the text part
503
- })
504
- else:
505
- processed_messages.append({
506
- "role": msg.role,
507
- "content": msg.content
508
- })
509
-
510
- # Create a modified ChatRequest without the image
511
- modified_request = ChatRequest(
512
- model=request.model,
513
- messages=[msg for msg in processed_messages],
514
- stream=request.stream,
515
- temperature=request.temperature,
516
- top_p=request.top_p,
517
- max_tokens=request.max_tokens,
518
- presence_penalty=request.presence_penalty,
519
- frequency_penalty=request.frequency_penalty,
520
- logit_bias=request.logit_bias,
521
- user=request.user,
522
- webSearchMode=request.webSearchMode,
523
- image=None # Exclude image from external API
524
- )
525
-
526
- try:
527
- if request.stream:
528
- logger.info("Streaming response")
529
- streaming_response = await Blackbox.create_async_generator(
530
- model=modified_request.model,
531
- messages=[{"role": msg["role"], "content": msg["content"]} for msg in modified_request.messages],
532
- proxy=None,
533
- image=None,
534
- image_name=None,
535
- webSearchMode=modified_request.webSearchMode
536
- )
537
-
538
- # Wrap the streaming generator to include image analysis at the end
539
- async def generate_with_analysis():
540
- assistant_content = ""
541
- try:
542
- async for chunk in streaming_response:
543
- if isinstance(chunk, ImageResponse):
544
- # Handle image responses if necessary
545
- image_markdown = f"![image]({chunk.url})\n"
546
- assistant_content += image_markdown
547
- response_chunk = create_response(image_markdown, modified_request.model, finish_reason=None)
548
- else:
549
- assistant_content += chunk
550
- # Yield the chunk as a partial choice
551
- response_chunk = {
552
- "id": f"chatcmpl-{uuid.uuid4()}",
553
- "object": "chat.completion.chunk",
554
- "created": int(datetime.now().timestamp()),
555
- "model": modified_request.model,
556
- "choices": [
557
- {
558
- "index": 0,
559
- "delta": {"content": chunk, "role": "assistant"},
560
- "finish_reason": None,
561
- }
562
- ],
563
- "usage": None, # Usage can be updated if you track tokens in real-time
564
- }
565
- yield f"data: {json.dumps(response_chunk)}\n\n"
566
-
567
- # After all chunks are sent, send the final message with finish_reason
568
- prompt_tokens = sum(len(msg["content"].split()) for msg in modified_request.messages)
569
- completion_tokens = len(assistant_content.split())
570
- total_tokens = prompt_tokens + completion_tokens
571
- estimated_cost = calculate_estimated_cost(prompt_tokens, completion_tokens)
572
-
573
- final_response = {
574
- "id": f"chatcmpl-{uuid.uuid4()}",
575
- "object": "chat.completion",
576
- "created": int(datetime.now().timestamp()),
577
- "model": modified_request.model,
578
- "choices": [
579
- {
580
- "message": {
581
- "role": "assistant",
582
- "content": assistant_content
583
- },
584
- "finish_reason": "stop",
585
- "index": 0
586
- }
587
- ],
588
- "usage": {
589
- "prompt_tokens": prompt_tokens,
590
- "completion_tokens": completion_tokens,
591
- "total_tokens": total_tokens,
592
- "estimated_cost": estimated_cost
593
- },
594
- }
595
-
596
- if analysis_result:
597
- final_response["choices"][0]["message"]["content"] += f"\n\n**Image Analysis:** {analysis_result}"
598
-
599
- yield f"data: {json.dumps(final_response)}\n\n"
600
- yield "data: [DONE]\n\n"
601
- except HTTPException as he:
602
- error_response = {"error": he.detail}
603
- yield f"data: {json.dumps(error_response)}\n\n"
604
- except Exception as e:
605
- logger.exception(f"Error during streaming response generation from IP: {client_ip}.")
606
- error_response = {"error": str(e)}
607
- yield f"data: {json.dumps(error_response)}\n\n"
608
-
609
- return StreamingResponse(generate_with_analysis(), media_type="text/event-stream")
610
- else:
611
- logger.info("Non-streaming response")
612
- streaming_response = await Blackbox.create_async_generator(
613
- model=modified_request.model,
614
- messages=[{"role": msg["role"], "content": msg["content"]} for msg in modified_request.messages],
615
- proxy=None,
616
- image=None,
617
- image_name=None,
618
- webSearchMode=modified_request.webSearchMode
619
- )
620
-
621
- response_content = ""
622
- async for chunk in streaming_response:
623
- if isinstance(chunk, ImageResponse):
624
- response_content += f"![image]({chunk.url})\n"
625
- else:
626
- response_content += chunk
627
-
628
- prompt_tokens = sum(len(msg["content"].split()) for msg in modified_request.messages)
629
- completion_tokens = len(response_content.split())
630
- total_tokens = prompt_tokens + completion_tokens
631
- estimated_cost = calculate_estimated_cost(prompt_tokens, completion_tokens)
632
-
633
- logger.info(f"Completed non-streaming response generation for API key: {api_key} | IP: {client_ip}")
634
-
635
- response = {
636
- "id": f"chatcmpl-{uuid.uuid4()}",
637
- "object": "chat.completion",
638
- "created": int(datetime.now().timestamp()),
639
- "model": modified_request.model,
640
- "choices": [
641
- {
642
- "message": {
643
- "role": "assistant",
644
- "content": response_content
645
- },
646
- "finish_reason": "stop",
647
- "index": 0
648
- }
649
- ],
650
- "usage": {
651
- "prompt_tokens": prompt_tokens,
652
- "completion_tokens": completion_tokens,
653
- "total_tokens": total_tokens,
654
- "estimated_cost": estimated_cost
655
- },
656
- }
657
-
658
- if analysis_result:
659
- response["choices"][0]["message"]["content"] += f"\n\n**Image Analysis:** {analysis_result}"
660
-
661
- return response
662
- except ModelNotWorkingException as e:
663
- logger.warning(f"Model not working: {e} | IP: {client_ip}")
664
- raise HTTPException(status_code=503, detail=str(e))
665
- except HTTPException as he:
666
- logger.warning(f"HTTPException: {he.detail} | IP: {client_ip}")
667
- raise he
668
- except Exception as e:
669
- logger.exception(f"An unexpected error occurred while processing the chat completions request from IP: {client_ip}.")
670
- raise HTTPException(status_code=500, detail=str(e))
671
-
672
- # Endpoint: POST /v1/tokenizer
673
- @app.post("/v1/tokenizer", dependencies=[Depends(rate_limiter_per_ip)])
674
- async def tokenizer(request: TokenizerRequest, req: Request):
675
- client_ip = req.client.host
676
- text = request.text
677
- token_count = len(text.split())
678
- logger.info(f"Tokenizer requested from IP: {client_ip} | Text length: {len(text)}")
679
- return {"text": text, "tokens": token_count}
680
-
681
- # Endpoint: GET /v1/models
682
- @app.get("/v1/models", dependencies=[Depends(rate_limiter_per_ip)])
683
- async def get_models(req: Request):
684
- client_ip = req.client.host
685
- logger.info(f"Fetching available models from IP: {client_ip}")
686
- return {"data": [{"id": model, "object": "model"} for model in Blackbox.models]}
687
-
688
- # Endpoint: GET /v1/models/{model}/status
689
- @app.get("/v1/models/{model}/status", dependencies=[Depends(rate_limiter_per_ip)])
690
- async def model_status(model: str, req: Request):
691
- client_ip = req.client.host
692
- logger.info(f"Model status requested for '{model}' from IP: {client_ip}")
693
- if model in Blackbox.models:
694
- return {"model": model, "status": "available"}
695
- elif model in Blackbox.model_aliases and Blackbox.model_aliases[model] in Blackbox.models:
696
- actual_model = Blackbox.model_aliases[model]
697
- return {"model": actual_model, "status": "available via alias"}
698
- else:
699
- logger.warning(f"Model not found: {model} from IP: {client_ip}")
700
- raise HTTPException(status_code=404, detail="Model not found")
701
-
702
- # Endpoint: GET /v1/health
703
- @app.get("/v1/health", dependencies=[Depends(rate_limiter_per_ip)])
704
- async def health_check(req: Request):
705
- client_ip = req.client.host
706
- logger.info(f"Health check requested from IP: {client_ip}")
707
- return {"status": "ok"}
708
-
709
- # Endpoint: GET /v1/chat/completions (GET method)
710
- @app.get("/v1/chat/completions")
711
- async def chat_completions_get(req: Request):
712
- client_ip = req.client.host
713
- logger.info(f"GET request made to /v1/chat/completions from IP: {client_ip}, redirecting to 'about:blank'")
714
- return RedirectResponse(url='about:blank')
715
-
716
- # Custom exception handler to match OpenAI's error format
717
- @app.exception_handler(HTTPException)
718
- async def http_exception_handler(request: Request, exc: HTTPException):
719
- client_ip = request.client.host
720
- logger.error(f"HTTPException: {exc.detail} | Path: {request.url.path} | IP: {client_ip}")
721
- return JSONResponse(
722
- status_code=exc.status_code,
723
- content={
724
- "error": {
725
- "message": exc.detail,
726
- "type": "invalid_request_error",
727
- "param": None,
728
- "code": None
729
- }
730
- },
731
- )
732
-
733
- # Image Processing Utilities
734
- def decode_base64_image(base64_str: str) -> Image.Image:
735
- try:
736
- image_data = base64.b64decode(base64_str)
737
- image = Image.open(BytesIO(image_data))
738
- return image
739
- except Exception as e:
740
- logger.error("Failed to decode base64 image.")
741
- raise HTTPException(status_code=400, detail="Invalid base64 image data.") from e
742
-
743
- def analyze_image(image: Image.Image) -> str:
744
- """
745
- Placeholder for image analysis.
746
- Replace this with actual image analysis logic.
747
- """
748
- # Example: Return image size as analysis
749
- width, height = image.size
750
- return f"Image analyzed successfully. Width: {width}px, Height: {height}px."
751
-
752
- # Run the application
753
- if __name__ == "__main__":
754
- import uvicorn
755
- uvicorn.run(app, host="0.0.0.0", port=8000)
 
386
  logger.error(f"Unexpected error: {e}. Retrying attempt {attempt + 1}/{retry_attempts}")
387
  if attempt == retry_attempts - 1:
388
  raise HTTPException(status_code=500, detail=str(e))