Update main.py
Browse files
main.py
CHANGED
@@ -10,7 +10,7 @@ import logging
|
|
10 |
import asyncio
|
11 |
import time
|
12 |
from collections import defaultdict
|
13 |
-
from typing import List, Dict, Any, Optional,
|
14 |
|
15 |
from datetime import datetime
|
16 |
|
@@ -453,272 +453,8 @@ async def security_middleware(request: Request, call_next):
|
|
453 |
response = await call_next(request)
|
454 |
return response
|
455 |
|
456 |
-
#
|
457 |
-
|
458 |
-
type: str = "text"
|
459 |
-
text: str
|
460 |
-
|
461 |
-
@validator('type')
|
462 |
-
def type_must_be_text(cls, v):
|
463 |
-
if v != "text":
|
464 |
-
raise ValueError("Type must be 'text'")
|
465 |
-
return v
|
466 |
-
|
467 |
-
class ImageContent(BaseModel):
|
468 |
-
type: str = "image_url"
|
469 |
-
image_url: Dict[str, str]
|
470 |
-
|
471 |
-
@validator('type')
|
472 |
-
def type_must_be_image_url(cls, v):
|
473 |
-
if v != "image_url":
|
474 |
-
raise ValueError("Type must be 'image_url'")
|
475 |
-
return v
|
476 |
-
|
477 |
-
ContentItem = Union[TextContent, ImageContent]
|
478 |
-
|
479 |
-
class Message(BaseModel):
|
480 |
-
role: str
|
481 |
-
content: List[ContentItem]
|
482 |
-
|
483 |
-
@validator('role')
|
484 |
-
def role_must_be_valid(cls, v):
|
485 |
-
if v not in {"system", "user", "assistant"}:
|
486 |
-
raise ValueError("Role must be 'system', 'user', or 'assistant'")
|
487 |
-
return v
|
488 |
-
|
489 |
-
class ChatRequest(BaseModel):
|
490 |
-
model: str
|
491 |
-
messages: List[Message]
|
492 |
-
temperature: Optional[float] = 1.0
|
493 |
-
top_p: Optional[float] = 1.0
|
494 |
-
n: Optional[int] = 1
|
495 |
-
stream: Optional[bool] = False
|
496 |
-
stop: Optional[Union[str, List[str]]] = None
|
497 |
-
max_tokens: Optional[int] = None
|
498 |
-
presence_penalty: Optional[float] = 0.0
|
499 |
-
frequency_penalty: Optional[float] = 0.0
|
500 |
-
logit_bias: Optional[Dict[str, float]] = None
|
501 |
-
user: Optional[str] = None
|
502 |
-
webSearchMode: Optional[bool] = False # Custom parameter
|
503 |
-
|
504 |
-
class TokenizerRequest(BaseModel):
|
505 |
-
text: str
|
506 |
-
|
507 |
-
def calculate_estimated_cost(prompt_tokens: int, completion_tokens: int) -> float:
|
508 |
-
"""
|
509 |
-
Calculate the estimated cost based on the number of tokens.
|
510 |
-
Replace the pricing below with your actual pricing model.
|
511 |
-
"""
|
512 |
-
# Example pricing: $0.00000268 per token
|
513 |
-
cost_per_token = 0.00000268
|
514 |
-
return round((prompt_tokens + completion_tokens) * cost_per_token, 8)
|
515 |
-
|
516 |
-
def create_response(content: str, model: str, finish_reason: Optional[str] = None) -> Dict[str, Any]:
|
517 |
-
return {
|
518 |
-
"id": f"chatcmpl-{uuid.uuid4()}",
|
519 |
-
"object": "chat.completion",
|
520 |
-
"created": int(datetime.now().timestamp()),
|
521 |
-
"model": model,
|
522 |
-
"choices": [
|
523 |
-
{
|
524 |
-
"index": 0,
|
525 |
-
"message": {
|
526 |
-
"role": "assistant",
|
527 |
-
"content": content
|
528 |
-
},
|
529 |
-
"finish_reason": finish_reason
|
530 |
-
}
|
531 |
-
],
|
532 |
-
"usage": None, # To be filled in non-streaming responses
|
533 |
-
}
|
534 |
-
|
535 |
-
def extract_all_images_from_content(content_items: List[ContentItem]) -> List[Tuple[str, str]]:
|
536 |
-
"""
|
537 |
-
Extracts all images from the content list.
|
538 |
-
Returns a list of tuples containing (alt_text, image_data_uri).
|
539 |
-
"""
|
540 |
-
images = []
|
541 |
-
for item in content_items:
|
542 |
-
if isinstance(item, ImageContent):
|
543 |
-
alt_text = item.image_url.get('alt', '') # Optional alt text
|
544 |
-
image_data_uri = item.image_url.get('url', '')
|
545 |
-
if image_data_uri:
|
546 |
-
images.append((alt_text, image_data_uri))
|
547 |
-
return images
|
548 |
-
|
549 |
-
# Endpoint: POST /v1/chat/completions
|
550 |
-
@app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter_per_ip)])
|
551 |
-
async def chat_completions(request: ChatRequest, req: Request, api_key: str = Depends(get_api_key)):
|
552 |
-
client_ip = req.client.host
|
553 |
-
# Redact user messages only for logging purposes
|
554 |
-
redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
|
555 |
-
|
556 |
-
logger.info(f"Received chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages}")
|
557 |
-
|
558 |
-
try:
|
559 |
-
# Validate that the requested model is available
|
560 |
-
if request.model not in Blackbox.models and request.model not in Blackbox.model_aliases:
|
561 |
-
logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
|
562 |
-
raise HTTPException(status_code=400, detail="Requested model is not available.")
|
563 |
-
|
564 |
-
# Initialize response content
|
565 |
-
assistant_content = ""
|
566 |
-
|
567 |
-
# Iterate through messages to find and process images
|
568 |
-
for msg in request.messages:
|
569 |
-
if msg.role == "user":
|
570 |
-
# Extract all images from the message content
|
571 |
-
images = extract_all_images_from_content(msg.content)
|
572 |
-
for alt_text, image_data_uri in images:
|
573 |
-
# Analyze the image
|
574 |
-
analysis_result = await analyze_image(image_data_uri)
|
575 |
-
assistant_content += analysis_result + "\n"
|
576 |
-
|
577 |
-
# Example response content
|
578 |
-
assistant_content += "Based on the image you provided, here are the insights..."
|
579 |
-
|
580 |
-
# Calculate token usage (simple approximation)
|
581 |
-
prompt_tokens = sum(len(" ".join([item.text if isinstance(item, TextContent) else item.image_url['url'] for item in msg.content]).split()) for msg in request.messages)
|
582 |
-
completion_tokens = len(assistant_content.split())
|
583 |
-
total_tokens = prompt_tokens + completion_tokens
|
584 |
-
estimated_cost = calculate_estimated_cost(prompt_tokens, completion_tokens)
|
585 |
-
|
586 |
-
logger.info(f"Completed response generation for API key: {api_key} | IP: {client_ip}")
|
587 |
-
|
588 |
-
if request.stream:
|
589 |
-
async def generate():
|
590 |
-
try:
|
591 |
-
for msg in request.messages:
|
592 |
-
if msg.role == "user":
|
593 |
-
images = extract_all_images_from_content(msg.content)
|
594 |
-
for alt_text, image_data_uri in images:
|
595 |
-
analysis_result = await analyze_image(image_data_uri)
|
596 |
-
response_chunk = {
|
597 |
-
"id": f"chatcmpl-{uuid.uuid4()}",
|
598 |
-
"object": "chat.completion.chunk",
|
599 |
-
"created": int(datetime.now().timestamp()),
|
600 |
-
"model": request.model,
|
601 |
-
"choices": [
|
602 |
-
{
|
603 |
-
"index": 0,
|
604 |
-
"delta": {"content": analysis_result + "\n", "role": "assistant"},
|
605 |
-
"finish_reason": None,
|
606 |
-
}
|
607 |
-
],
|
608 |
-
"usage": None,
|
609 |
-
}
|
610 |
-
yield f"data: {json.dumps(response_chunk)}\n\n"
|
611 |
-
|
612 |
-
# Final message
|
613 |
-
final_response = {
|
614 |
-
"id": f"chatcmpl-{uuid.uuid4()}",
|
615 |
-
"object": "chat.completion",
|
616 |
-
"created": int(datetime.now().timestamp()),
|
617 |
-
"model": request.model,
|
618 |
-
"choices": [
|
619 |
-
{
|
620 |
-
"message": {
|
621 |
-
"role": "assistant",
|
622 |
-
"content": assistant_content.strip()
|
623 |
-
},
|
624 |
-
"finish_reason": "stop",
|
625 |
-
"index": 0
|
626 |
-
}
|
627 |
-
],
|
628 |
-
"usage": {
|
629 |
-
"prompt_tokens": prompt_tokens,
|
630 |
-
"completion_tokens": completion_tokens,
|
631 |
-
"total_tokens": total_tokens,
|
632 |
-
"estimated_cost": estimated_cost
|
633 |
-
},
|
634 |
-
}
|
635 |
-
yield f"data: {json.dumps(final_response)}\n\n"
|
636 |
-
yield "data: [DONE]\n\n"
|
637 |
-
except HTTPException as he:
|
638 |
-
error_response = {"error": he.detail}
|
639 |
-
yield f"data: {json.dumps(error_response)}\n\n"
|
640 |
-
except Exception as e:
|
641 |
-
logger.exception(f"Error during streaming response generation from IP: {client_ip}.")
|
642 |
-
error_response = {"error": str(e)}
|
643 |
-
yield f"data: {json.dumps(error_response)}\n\n"
|
644 |
-
|
645 |
-
return StreamingResponse(generate(), media_type="text/event-stream")
|
646 |
-
else:
|
647 |
-
return {
|
648 |
-
"id": f"chatcmpl-{uuid.uuid4()}",
|
649 |
-
"object": "chat.completion",
|
650 |
-
"created": int(datetime.now().timestamp()),
|
651 |
-
"model": request.model,
|
652 |
-
"choices": [
|
653 |
-
{
|
654 |
-
"message": {
|
655 |
-
"role": "assistant",
|
656 |
-
"content": assistant_content.strip()
|
657 |
-
},
|
658 |
-
"finish_reason": "stop",
|
659 |
-
"index": 0
|
660 |
-
}
|
661 |
-
],
|
662 |
-
"usage": {
|
663 |
-
"prompt_tokens": prompt_tokens,
|
664 |
-
"completion_tokens": completion_tokens,
|
665 |
-
"total_tokens": total_tokens,
|
666 |
-
"estimated_cost": estimated_cost
|
667 |
-
},
|
668 |
-
}
|
669 |
-
except ModelNotWorkingException as e:
|
670 |
-
logger.warning(f"Model not working: {e} | IP: {client_ip}")
|
671 |
-
raise HTTPException(status_code=503, detail=str(e))
|
672 |
-
except HTTPException as he:
|
673 |
-
logger.warning(f"HTTPException: {he.detail} | IP: {client_ip}")
|
674 |
-
raise he
|
675 |
-
except Exception as e:
|
676 |
-
logger.exception(f"An unexpected error occurred while processing the chat completions request from IP: {client_ip}.")
|
677 |
-
raise HTTPException(status_code=500, detail=str(e))
|
678 |
-
|
679 |
-
# Endpoint: POST /v1/tokenizer
|
680 |
-
@app.post("/v1/tokenizer", dependencies=[Depends(rate_limiter_per_ip)])
|
681 |
-
async def tokenizer(request: TokenizerRequest, req: Request):
|
682 |
-
client_ip = req.client.host
|
683 |
-
text = request.text
|
684 |
-
token_count = len(text.split())
|
685 |
-
logger.info(f"Tokenizer requested from IP: {client_ip} | Text length: {len(text)}")
|
686 |
-
return {"text": text, "tokens": token_count}
|
687 |
-
|
688 |
-
# Endpoint: GET /v1/models
|
689 |
-
@app.get("/v1/models", dependencies=[Depends(rate_limiter_per_ip)])
|
690 |
-
async def get_models(req: Request):
|
691 |
-
client_ip = req.client.host
|
692 |
-
logger.info(f"Fetching available models from IP: {client_ip}")
|
693 |
-
return {"data": [{"id": model, "object": "model"} for model in Blackbox.models]}
|
694 |
-
|
695 |
-
# Endpoint: GET /v1/models/{model}/status
|
696 |
-
@app.get("/v1/models/{model}/status", dependencies=[Depends(rate_limiter_per_ip)])
|
697 |
-
async def model_status(model: str, req: Request):
|
698 |
-
client_ip = req.client.host
|
699 |
-
logger.info(f"Model status requested for '{model}' from IP: {client_ip}")
|
700 |
-
if model in Blackbox.models:
|
701 |
-
return {"model": model, "status": "available"}
|
702 |
-
elif model in Blackbox.model_aliases and Blackbox.model_aliases[model] in Blackbox.models:
|
703 |
-
actual_model = Blackbox.model_aliases[model]
|
704 |
-
return {"model": actual_model, "status": "available via alias"}
|
705 |
-
else:
|
706 |
-
logger.warning(f"Model not found: {model} from IP: {client_ip}")
|
707 |
-
raise HTTPException(status_code=404, detail="Model not found")
|
708 |
-
|
709 |
-
# Endpoint: GET /v1/health
|
710 |
-
@app.get("/v1/health", dependencies=[Depends(rate_limiter_per_ip)])
|
711 |
-
async def health_check(req: Request):
|
712 |
-
client_ip = req.client.host
|
713 |
-
logger.info(f"Health check requested from IP: {client_ip}")
|
714 |
-
return {"status": "ok"}
|
715 |
-
|
716 |
-
# Endpoint: GET /v1/chat/completions (GET method)
|
717 |
-
@app.get("/v1/chat/completions")
|
718 |
-
async def chat_completions_get(req: Request):
|
719 |
-
client_ip = req.client.host
|
720 |
-
logger.info(f"GET request made to /v1/chat/completions from IP: {client_ip}, redirecting to 'about:blank'")
|
721 |
-
return RedirectResponse(url='about:blank')
|
722 |
|
723 |
# Custom exception handler to match OpenAI's error format
|
724 |
@app.exception_handler(HTTPException)
|
|
|
10 |
import asyncio
|
11 |
import time
|
12 |
from collections import defaultdict
|
13 |
+
from typing import List, Dict, Any, Optional, Union, Tuple
|
14 |
|
15 |
from datetime import datetime
|
16 |
|
|
|
453 |
response = await call_next(request)
|
454 |
return response
|
455 |
|
456 |
+
# Define other endpoints as needed
|
457 |
+
# ... [Insert other endpoints here] ...
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
458 |
|
459 |
# Custom exception handler to match OpenAI's error format
|
460 |
@app.exception_handler(HTTPException)
|