test24

Sleeping

App Files Files Community

Niansuh commited on Oct 17, 2024

Commit

fe69885

verified ·

1 Parent(s): 44f4452

Update main.py

Browse files

Files changed (1) hide show

main.py +91 -34

main.py CHANGED Viewed

@@ -41,7 +41,7 @@ CLEANUP_INTERVAL = 60  # seconds
 RATE_LIMIT_WINDOW = 60  # seconds
 # Define the ImageResponse model
-class ImageResponse(BaseModel):
     images: str
     alt: str
@@ -203,7 +203,7 @@ class Blackbox:
         proxy: Optional[str] = None,
         websearch: bool = False,
         **kwargs
-    ) -> AsyncGenerator[Union[str, ImageResponse], None]:
         """
         Creates an asynchronous generator for streaming responses from Blackbox AI.
@@ -215,7 +215,7 @@ class Blackbox:
             **kwargs: Additional keyword arguments.
         Yields:
-            Union[str, ImageResponse]: Segments of the generated response or ImageResponse objects.
         """
         model = cls.get_model(model)
@@ -323,7 +323,7 @@ class Blackbox:
                         match = re.search(r'!\[.*?\]\((https?://[^\)]+)\)', cleaned_response)
                         if match:
                             image_url = match.group(1)
-                            image_response = ImageResponse(images=image_url, alt="Generated Image")
                             yield image_response
                         else:
                             yield cleaned_response
@@ -485,6 +485,7 @@ class ChatRequest(BaseModel):
     frequency_penalty: Optional[float] = 0.0
     logit_bias: Optional[Dict[str, float]] = None
     user: Optional[str] = None
 @app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter_per_ip)])
 async def chat_completions(
@@ -496,7 +497,7 @@ async def chat_completions(
     # Redact user messages only for logging purposes
     redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
-    logger.info(f"Received chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages}")
     try:
         # Validate that the requested model is available
@@ -504,43 +505,99 @@ async def chat_completions(
             logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
             raise HTTPException(status_code=400, detail="Requested model is not available.")
-        # Create the asynchronous generator for streaming responses
-        async_generator = Blackbox.create_async_generator(
-            model=request.model,
-            messages=[{"role": msg.role, "content": msg.content} for msg in request.messages],
-            temperature=request.temperature,
-            max_tokens=request.max_tokens
-        )
-        logger.info(f"Started streaming response for API key: {api_key} | IP: {client_ip}")
-        # Define a generator function to yield the streamed data in the OpenAI format
-        async def stream_response():
-            async for chunk in async_generator:
-                if isinstance(chunk, ImageResponse):
-                    # Handle image responses if necessary
-                    # For simplicity, converting ImageResponse to a string representation
-                    # You might want to handle it differently based on your requirements
-                    data = json.dumps({
                         "choices": [{
                             "delta": {
-                                "content": f"![Image]({chunk.images})"
                             }
                         }]
                     })
                 else:
-                    # Assuming chunk is a string
-                    data = json.dumps({
-                        "choices": [{
-                            "delta": {
-                                "content": chunk
-                            }
-                        }]
-                    })
-                yield f"data: {data}\n\n"
-        return StreamingResponse(stream_response(), media_type="text/event-stream")
     except ModelNotWorkingException as e:
         logger.warning(f"Model not working: {e} | IP: {client_ip}")
         raise HTTPException(status_code=503, detail=str(e))

 RATE_LIMIT_WINDOW = 60  # seconds
 # Define the ImageResponse model
+class ImageResponseModel(BaseModel):
     images: str
     alt: str
         proxy: Optional[str] = None,
         websearch: bool = False,
         **kwargs
+    ) -> AsyncGenerator[Union[str, ImageResponseModel], None]:
         """
         Creates an asynchronous generator for streaming responses from Blackbox AI.
             **kwargs: Additional keyword arguments.
         Yields:
+            Union[str, ImageResponseModel]: Segments of the generated response or ImageResponseModel objects.
         """
         model = cls.get_model(model)
                         match = re.search(r'!\[.*?\]\((https?://[^\)]+)\)', cleaned_response)
                         if match:
                             image_url = match.group(1)
+                            image_response = ImageResponseModel(images=image_url, alt="Generated Image")
                             yield image_response
                         else:
                             yield cleaned_response
     frequency_penalty: Optional[float] = 0.0
     logit_bias: Optional[Dict[str, float]] = None
     user: Optional[str] = None
+    stream: Optional[bool] = False  # Added stream parameter
 @app.post("/v1/chat/completions", dependencies=[Depends(rate_limiter_per_ip)])
 async def chat_completions(
     # Redact user messages only for logging purposes
     redacted_messages = [{"role": msg.role, "content": "[redacted]"} for msg in request.messages]
+    logger.info(f"Received chat completions request from API key: {api_key} | IP: {client_ip} | Model: {request.model} | Messages: {redacted_messages} | Stream: {request.stream}")
     try:
         # Validate that the requested model is available
             logger.warning(f"Attempt to use unavailable model: {request.model} from IP: {client_ip}")
             raise HTTPException(status_code=400, detail="Requested model is not available.")
+        if request.stream:
+            # Create the asynchronous generator for streaming responses
+            async_generator = Blackbox.create_async_generator(
+                model=request.model,
+                messages=[{"role": msg.role, "content": msg.content} for msg in request.messages],
+                temperature=request.temperature,
+                max_tokens=request.max_tokens
+            )
+            logger.info(f"Started streaming response for API key: {api_key} | IP: {client_ip}")
+            # Define a generator function to yield the streamed data in the OpenAI format
+            async def stream_response():
+                try:
+                    async for chunk in async_generator:
+                        if isinstance(chunk, ImageResponseModel):
+                            # Handle image responses by sending markdown image syntax
+                            data = json.dumps({
+                                "choices": [{
+                                    "delta": {
+                                        "content": f"![Image]({chunk.images})"
+                                    }
+                                }]
+                            })
+                        else:
+                            # Assuming chunk is a string
+                            data = json.dumps({
+                                "choices": [{
+                                    "delta": {
+                                        "content": chunk
+                                    }
+                                }]
+                            })
+                        yield f"data: {data}\n\n"
+                except Exception as e:
+                    # If an error occurs during streaming, send it as an SSE error event
+                    error_data = json.dumps({
                         "choices": [{
                             "delta": {
+                                "content": f"Error: {str(e)}"
                             }
                         }]
                     })
+                    yield f"data: {error_data}\n\n"
+            return StreamingResponse(stream_response(), media_type="text/event-stream")
+        else:
+            # Non-streaming: Collect all chunks and assemble the final response
+            all_chunks = []
+            async for chunk in Blackbox.create_async_generator(
+                model=request.model,
+                messages=[{"role": msg.role, "content": msg.content} for msg in request.messages],
+                temperature=request.temperature,
+                max_tokens=request.max_tokens
+            ):
+                if isinstance(chunk, ImageResponseModel):
+                    # Convert ImageResponseModel to markdown image syntax
+                    content = f"![Image]({chunk.images})"
                 else:
+                    content = chunk
+                all_chunks.append(content)
+            # Assemble the full response content
+            full_content = "\n".join(all_chunks)
+            # Calculate token usage (approximation)
+            prompt_tokens = sum(len(msg.content.split()) for msg in request.messages)
+            completion_tokens = len(full_content.split())
+            total_tokens = prompt_tokens + completion_tokens
+            logger.info(f"Completed non-stream response generation for API key: {api_key} | IP: {client_ip}")
+            return {
+                "id": f"chatcmpl-{uuid.uuid4()}",
+                "object": "chat.completion",
+                "created": int(datetime.now().timestamp()),
+                "model": request.model,
+                "choices": [
+                    {
+                        "index": 0,
+                        "message": {
+                            "role": "assistant",
+                            "content": full_content
+                        },
+                        "finish_reason": "stop"
+                    }
+                ],
+                "usage": {
+                    "prompt_tokens": prompt_tokens,
+                    "completion_tokens": completion_tokens,
+                    "total_tokens": total_tokens
+                },
+            }
     except ModelNotWorkingException as e:
         logger.warning(f"Model not working: {e} | IP: {client_ip}")
         raise HTTPException(status_code=503, detail=str(e))