Niansuh commited on
Commit
fe69885
·
verified ·
1 Parent(s): 44f4452

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +91 -34
main.py CHANGED
@@ -41,7 +41,7 @@ CLEANUP_INTERVAL = 60 # seconds
41
  RATE_LIMIT_WINDOW = 60 # seconds
42
 
43
  # Define the ImageResponse model
44
- class ImageResponse(BaseModel):
45
  images: str
46
  alt: str
47
 
@@ -203,7 +203,7 @@ class Blackbox:
203
  proxy: Optional[str] = None,
204
  websearch: bool = False,
205
  **kwargs
206
- ) -> AsyncGenerator[Union[str, ImageResponse], None]:
207
  """
208
  Creates an asynchronous generator for streaming responses from Blackbox AI.
209
 
@@ -215,7 +215,7 @@ class Blackbox:
215
  **kwargs: Additional keyword arguments.
216
 
217
  Yields:
218
- Union[str, ImageResponse]: Segments of the generated response or ImageResponse objects.
219
  """
220
  model = cls.get_model(model)
221
 
@@ -323,7 +323,7 @@ class Blackbox:
323
  match = re.search(r'!\[.*?\]\((https?://[^\)]+)\)', cleaned_response)
324
  if match:
325
  image_url = match.group(1)
326
- image_response = ImageResponse(images=image_url, alt="Generated Image")
327
  yield image_response
328
  else:
329
  yield cleaned_response
@@ -485,6 +485,7 @@ class ChatRequest(BaseModel):
485
  frequency_penalty: Optional[float] = 0.0
486
  logit_bias: Optional[Dict[str, float]] = None
487
  user: Optional[str] = None
 
488
 
489
  @app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter_per_ip)])
490
  async def chat_completions(
@@ -496,7 +497,7 @@ async def chat_completions(
496
  # Redact user messages only for logging purposes
497
  redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
498
 
499
- logger.info(f"Received chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages}")
500
 
501
  try:
502
  # Validate that the requested model is available
@@ -504,43 +505,99 @@ async def chat_completions(
504
  logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
505
  raise HTTPException(status_code=400, detail="Requested model is not available.")
506
 
507
- # Create the asynchronous generator for streaming responses
508
- async_generator = Blackbox.create_async_generator(
509
- model=request.model,
510
- messages=[{"role": msg.role, "content": msg.content} for msg in request.messages],
511
- temperature=request.temperature,
512
- max_tokens=request.max_tokens
513
- )
514
-
515
- logger.info(f"Started streaming response for API key: {api_key} | IP: {client_ip}")
516
-
517
- # Define a generator function to yield the streamed data in the OpenAI format
518
- async def stream_response():
519
- async for chunk in async_generator:
520
- if isinstance(chunk, ImageResponse):
521
- # Handle image responses if necessary
522
- # For simplicity, converting ImageResponse to a string representation
523
- # You might want to handle it differently based on your requirements
524
- data = json.dumps({
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
525
  "choices": [{
526
  "delta": {
527
- "content": f"![Image]({chunk.images})"
528
  }
529
  }]
530
  })
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
531
  else:
532
- # Assuming chunk is a string
533
- data = json.dumps({
534
- "choices": [{
535
- "delta": {
536
- "content": chunk
537
- }
538
- }]
539
- })
540
- yield f"data: {data}\n\n"
 
541
 
542
- return StreamingResponse(stream_response(), media_type="text/event-stream")
543
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
544
  except ModelNotWorkingException as e:
545
  logger.warning(f"Model not working: {e} | IP: {client_ip}")
546
  raise HTTPException(status_code=503, detail=str(e))
 
41
  RATE_LIMIT_WINDOW = 60 # seconds
42
 
43
  # Define the ImageResponse model
44
+ class ImageResponseModel(BaseModel):
45
  images: str
46
  alt: str
47
 
 
203
  proxy: Optional[str] = None,
204
  websearch: bool = False,
205
  **kwargs
206
+ ) -> AsyncGenerator[Union[str, ImageResponseModel], None]:
207
  """
208
  Creates an asynchronous generator for streaming responses from Blackbox AI.
209
 
 
215
  **kwargs: Additional keyword arguments.
216
 
217
  Yields:
218
+ Union[str, ImageResponseModel]: Segments of the generated response or ImageResponseModel objects.
219
  """
220
  model = cls.get_model(model)
221
 
 
323
  match = re.search(r'!\[.*?\]\((https?://[^\)]+)\)', cleaned_response)
324
  if match:
325
  image_url = match.group(1)
326
+ image_response = ImageResponseModel(images=image_url, alt="Generated Image")
327
  yield image_response
328
  else:
329
  yield cleaned_response
 
485
  frequency_penalty: Optional[float] = 0.0
486
  logit_bias: Optional[Dict[str, float]] = None
487
  user: Optional[str] = None
488
+ stream: Optional[bool] = False # Added stream parameter
489
 
490
  @app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter_per_ip)])
491
  async def chat_completions(
 
497
  # Redact user messages only for logging purposes
498
  redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
499
 
500
+ logger.info(f"Received chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages} | Stream: {request.stream}")
501
 
502
  try:
503
  # Validate that the requested model is available
 
505
  logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
506
  raise HTTPException(status_code=400, detail="Requested model is not available.")
507
 
508
+ if request.stream:
509
+ # Create the asynchronous generator for streaming responses
510
+ async_generator = Blackbox.create_async_generator(
511
+ model=request.model,
512
+ messages=[{"role": msg.role, "content": msg.content} for msg in request.messages],
513
+ temperature=request.temperature,
514
+ max_tokens=request.max_tokens
515
+ )
516
+
517
+ logger.info(f"Started streaming response for API key: {api_key} | IP: {client_ip}")
518
+
519
+ # Define a generator function to yield the streamed data in the OpenAI format
520
+ async def stream_response():
521
+ try:
522
+ async for chunk in async_generator:
523
+ if isinstance(chunk, ImageResponseModel):
524
+ # Handle image responses by sending markdown image syntax
525
+ data = json.dumps({
526
+ "choices": [{
527
+ "delta": {
528
+ "content": f"![Image]({chunk.images})"
529
+ }
530
+ }]
531
+ })
532
+ else:
533
+ # Assuming chunk is a string
534
+ data = json.dumps({
535
+ "choices": [{
536
+ "delta": {
537
+ "content": chunk
538
+ }
539
+ }]
540
+ })
541
+ yield f"data: {data}\n\n"
542
+ except Exception as e:
543
+ # If an error occurs during streaming, send it as an SSE error event
544
+ error_data = json.dumps({
545
  "choices": [{
546
  "delta": {
547
+ "content": f"Error: {str(e)}"
548
  }
549
  }]
550
  })
551
+ yield f"data: {error_data}\n\n"
552
+
553
+ return StreamingResponse(stream_response(), media_type="text/event-stream")
554
+ else:
555
+ # Non-streaming: Collect all chunks and assemble the final response
556
+ all_chunks = []
557
+ async for chunk in Blackbox.create_async_generator(
558
+ model=request.model,
559
+ messages=[{"role": msg.role, "content": msg.content} for msg in request.messages],
560
+ temperature=request.temperature,
561
+ max_tokens=request.max_tokens
562
+ ):
563
+ if isinstance(chunk, ImageResponseModel):
564
+ # Convert ImageResponseModel to markdown image syntax
565
+ content = f"![Image]({chunk.images})"
566
  else:
567
+ content = chunk
568
+ all_chunks.append(content)
569
+
570
+ # Assemble the full response content
571
+ full_content = "\n".join(all_chunks)
572
+
573
+ # Calculate token usage (approximation)
574
+ prompt_tokens = sum(len(msg.content.split()) for msg in request.messages)
575
+ completion_tokens = len(full_content.split())
576
+ total_tokens = prompt_tokens + completion_tokens
577
 
578
+ logger.info(f"Completed non-stream response generation for API key: {api_key} | IP: {client_ip}")
579
 
580
+ return {
581
+ "id": f"chatcmpl-{uuid.uuid4()}",
582
+ "object": "chat.completion",
583
+ "created": int(datetime.now().timestamp()),
584
+ "model": request.model,
585
+ "choices": [
586
+ {
587
+ "index": 0,
588
+ "message": {
589
+ "role": "assistant",
590
+ "content": full_content
591
+ },
592
+ "finish_reason": "stop"
593
+ }
594
+ ],
595
+ "usage": {
596
+ "prompt_tokens": prompt_tokens,
597
+ "completion_tokens": completion_tokens,
598
+ "total_tokens": total_tokens
599
+ },
600
+ }
601
  except ModelNotWorkingException as e:
602
  logger.warning(f"Model not working: {e} | IP: {client_ip}")
603
  raise HTTPException(status_code=503, detail=str(e))