Update main.py
Browse files
main.py
CHANGED
@@ -41,7 +41,7 @@ CLEANUP_INTERVAL = 60 # seconds
|
|
41 |
RATE_LIMIT_WINDOW = 60 # seconds
|
42 |
|
43 |
# Define the ImageResponse model
|
44 |
-
class
|
45 |
images: str
|
46 |
alt: str
|
47 |
|
@@ -203,7 +203,7 @@ class Blackbox:
|
|
203 |
proxy: Optional[str] = None,
|
204 |
websearch: bool = False,
|
205 |
**kwargs
|
206 |
-
) -> AsyncGenerator[Union[str,
|
207 |
"""
|
208 |
Creates an asynchronous generator for streaming responses from Blackbox AI.
|
209 |
|
@@ -215,7 +215,7 @@ class Blackbox:
|
|
215 |
**kwargs: Additional keyword arguments.
|
216 |
|
217 |
Yields:
|
218 |
-
Union[str,
|
219 |
"""
|
220 |
model = cls.get_model(model)
|
221 |
|
@@ -323,7 +323,7 @@ class Blackbox:
|
|
323 |
match = re.search(r'!\[.*?\]\((https?://[^\)]+)\)', cleaned_response)
|
324 |
if match:
|
325 |
image_url = match.group(1)
|
326 |
-
image_response =
|
327 |
yield image_response
|
328 |
else:
|
329 |
yield cleaned_response
|
@@ -485,6 +485,7 @@ class ChatRequest(BaseModel):
|
|
485 |
frequency_penalty: Optional[float] = 0.0
|
486 |
logit_bias: Optional[Dict[str, float]] = None
|
487 |
user: Optional[str] = None
|
|
|
488 |
|
489 |
@app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter_per_ip)])
|
490 |
async def chat_completions(
|
@@ -496,7 +497,7 @@ async def chat_completions(
|
|
496 |
# Redact user messages only for logging purposes
|
497 |
redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
|
498 |
|
499 |
-
logger.info(f"Received chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages}")
|
500 |
|
501 |
try:
|
502 |
# Validate that the requested model is available
|
@@ -504,43 +505,99 @@ async def chat_completions(
|
|
504 |
logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
|
505 |
raise HTTPException(status_code=400, detail="Requested model is not available.")
|
506 |
|
507 |
-
|
508 |
-
|
509 |
-
|
510 |
-
|
511 |
-
|
512 |
-
|
513 |
-
|
514 |
-
|
515 |
-
|
516 |
-
|
517 |
-
|
518 |
-
|
519 |
-
async
|
520 |
-
|
521 |
-
|
522 |
-
|
523 |
-
|
524 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
525 |
"choices": [{
|
526 |
"delta": {
|
527 |
-
"content": f"
|
528 |
}
|
529 |
}]
|
530 |
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
else:
|
532 |
-
|
533 |
-
|
534 |
-
|
535 |
-
|
536 |
-
|
537 |
-
|
538 |
-
|
539 |
-
|
540 |
-
|
|
|
541 |
|
542 |
-
|
543 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
544 |
except ModelNotWorkingException as e:
|
545 |
logger.warning(f"Model not working: {e} | IP: {client_ip}")
|
546 |
raise HTTPException(status_code=503, detail=str(e))
|
|
|
41 |
RATE_LIMIT_WINDOW = 60 # seconds
|
42 |
|
43 |
# Define the ImageResponse model
|
44 |
+
class ImageResponseModel(BaseModel):
|
45 |
images: str
|
46 |
alt: str
|
47 |
|
|
|
203 |
proxy: Optional[str] = None,
|
204 |
websearch: bool = False,
|
205 |
**kwargs
|
206 |
+
) -> AsyncGenerator[Union[str, ImageResponseModel], None]:
|
207 |
"""
|
208 |
Creates an asynchronous generator for streaming responses from Blackbox AI.
|
209 |
|
|
|
215 |
**kwargs: Additional keyword arguments.
|
216 |
|
217 |
Yields:
|
218 |
+
Union[str, ImageResponseModel]: Segments of the generated response or ImageResponseModel objects.
|
219 |
"""
|
220 |
model = cls.get_model(model)
|
221 |
|
|
|
323 |
match = re.search(r'!\[.*?\]\((https?://[^\)]+)\)', cleaned_response)
|
324 |
if match:
|
325 |
image_url = match.group(1)
|
326 |
+
image_response = ImageResponseModel(images=image_url, alt="Generated Image")
|
327 |
yield image_response
|
328 |
else:
|
329 |
yield cleaned_response
|
|
|
485 |
frequency_penalty: Optional[float] = 0.0
|
486 |
logit_bias: Optional[Dict[str, float]] = None
|
487 |
user: Optional[str] = None
|
488 |
+
stream: Optional[bool] = False # Added stream parameter
|
489 |
|
490 |
@app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter_per_ip)])
|
491 |
async def chat_completions(
|
|
|
497 |
# Redact user messages only for logging purposes
|
498 |
redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
|
499 |
|
500 |
+
logger.info(f"Received chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages} | Stream: {request.stream}")
|
501 |
|
502 |
try:
|
503 |
# Validate that the requested model is available
|
|
|
505 |
logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
|
506 |
raise HTTPException(status_code=400, detail="Requested model is not available.")
|
507 |
|
508 |
+
if request.stream:
|
509 |
+
# Create the asynchronous generator for streaming responses
|
510 |
+
async_generator = Blackbox.create_async_generator(
|
511 |
+
model=request.model,
|
512 |
+
messages=[{"role": msg.role, "content": msg.content} for msg in request.messages],
|
513 |
+
temperature=request.temperature,
|
514 |
+
max_tokens=request.max_tokens
|
515 |
+
)
|
516 |
+
|
517 |
+
logger.info(f"Started streaming response for API key: {api_key} | IP: {client_ip}")
|
518 |
+
|
519 |
+
# Define a generator function to yield the streamed data in the OpenAI format
|
520 |
+
async def stream_response():
|
521 |
+
try:
|
522 |
+
async for chunk in async_generator:
|
523 |
+
if isinstance(chunk, ImageResponseModel):
|
524 |
+
# Handle image responses by sending markdown image syntax
|
525 |
+
data = json.dumps({
|
526 |
+
"choices": [{
|
527 |
+
"delta": {
|
528 |
+
"content": f""
|
529 |
+
}
|
530 |
+
}]
|
531 |
+
})
|
532 |
+
else:
|
533 |
+
# Assuming chunk is a string
|
534 |
+
data = json.dumps({
|
535 |
+
"choices": [{
|
536 |
+
"delta": {
|
537 |
+
"content": chunk
|
538 |
+
}
|
539 |
+
}]
|
540 |
+
})
|
541 |
+
yield f"data: {data}\n\n"
|
542 |
+
except Exception as e:
|
543 |
+
# If an error occurs during streaming, send it as an SSE error event
|
544 |
+
error_data = json.dumps({
|
545 |
"choices": [{
|
546 |
"delta": {
|
547 |
+
"content": f"Error: {str(e)}"
|
548 |
}
|
549 |
}]
|
550 |
})
|
551 |
+
yield f"data: {error_data}\n\n"
|
552 |
+
|
553 |
+
return StreamingResponse(stream_response(), media_type="text/event-stream")
|
554 |
+
else:
|
555 |
+
# Non-streaming: Collect all chunks and assemble the final response
|
556 |
+
all_chunks = []
|
557 |
+
async for chunk in Blackbox.create_async_generator(
|
558 |
+
model=request.model,
|
559 |
+
messages=[{"role": msg.role, "content": msg.content} for msg in request.messages],
|
560 |
+
temperature=request.temperature,
|
561 |
+
max_tokens=request.max_tokens
|
562 |
+
):
|
563 |
+
if isinstance(chunk, ImageResponseModel):
|
564 |
+
# Convert ImageResponseModel to markdown image syntax
|
565 |
+
content = f""
|
566 |
else:
|
567 |
+
content = chunk
|
568 |
+
all_chunks.append(content)
|
569 |
+
|
570 |
+
# Assemble the full response content
|
571 |
+
full_content = "\n".join(all_chunks)
|
572 |
+
|
573 |
+
# Calculate token usage (approximation)
|
574 |
+
prompt_tokens = sum(len(msg.content.split()) for msg in request.messages)
|
575 |
+
completion_tokens = len(full_content.split())
|
576 |
+
total_tokens = prompt_tokens + completion_tokens
|
577 |
|
578 |
+
logger.info(f"Completed non-stream response generation for API key: {api_key} | IP: {client_ip}")
|
579 |
|
580 |
+
return {
|
581 |
+
"id": f"chatcmpl-{uuid.uuid4()}",
|
582 |
+
"object": "chat.completion",
|
583 |
+
"created": int(datetime.now().timestamp()),
|
584 |
+
"model": request.model,
|
585 |
+
"choices": [
|
586 |
+
{
|
587 |
+
"index": 0,
|
588 |
+
"message": {
|
589 |
+
"role": "assistant",
|
590 |
+
"content": full_content
|
591 |
+
},
|
592 |
+
"finish_reason": "stop"
|
593 |
+
}
|
594 |
+
],
|
595 |
+
"usage": {
|
596 |
+
"prompt_tokens": prompt_tokens,
|
597 |
+
"completion_tokens": completion_tokens,
|
598 |
+
"total_tokens": total_tokens
|
599 |
+
},
|
600 |
+
}
|
601 |
except ModelNotWorkingException as e:
|
602 |
logger.warning(f"Model not working: {e} | IP: {client_ip}")
|
603 |
raise HTTPException(status_code=503, detail=str(e))
|