Spaces:

jbilcke-hf
/

aitube2

Running on CPU Upgrade

App Files Files Community

jbilcke-hf HF Staff commited on 10 days ago

Commit

59e0f6a

1 Parent(s): 49dcd35

up

Browse files

Files changed (10) hide show

DEPLOYMENT.md +8 -3
PROMPT_CONTEXT.md +3 -0
README.md +2 -2
api.py +20 -4
api_config.py +12 -1
api_core.py +112 -40
build/web/flutter_bootstrap.js +1 -1
build/web/flutter_service_worker.js +2 -2
build/web/main.dart.js +0 -0
lib/widgets/maintenance_screen.dart +2 -2

DEPLOYMENT.md CHANGED Viewed

@@ -52,13 +52,18 @@ curl https://api.endpoints.huggingface.cloud/v2/endpoint/<YOUR_ACCOUNT_NAME> 	-X
 # load the environment
 # (if you haven't done it already for this shell session)
 source .python_venv/bin/activate
-HF_TOKEN="<USE YOUR OWN TOKEN>" \
-    SECRET_TOKEN="<USE YOUR OWN TOKEN>" \
     VIDEO_ROUND_ROBIN_SERVER_1="https:/<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
     VIDEO_ROUND_ROBIN_SERVER_2="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
     VIDEO_ROUND_ROBIN_SERVER_3="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
     VIDEO_ROUND_ROBIN_SERVER_4="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
     HF_IMAGE_MODEL="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
     HF_TEXT_MODEL="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
     python3 api.py

 # load the environment
 # (if you haven't done it already for this shell session)
 source .python_venv/bin/activate
+PRODUCT_NAME="AiTube" \
+    MAX_NODES="3" \
+    MAINTENANCE_MODE=false \
+    HF_TOKEN="<USE YOUR OWN HF TOKEN>" \
+    SECRET_TOKEN="<USE YOUR OWN AITUBE SECRET>" \
     VIDEO_ROUND_ROBIN_SERVER_1="https:/<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
     VIDEO_ROUND_ROBIN_SERVER_2="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
     VIDEO_ROUND_ROBIN_SERVER_3="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
     VIDEO_ROUND_ROBIN_SERVER_4="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
+    VIDEO_ROUND_ROBIN_SERVER_5="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
+    VIDEO_ROUND_ROBIN_SERVER_6="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
     HF_IMAGE_MODEL="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
     HF_TEXT_MODEL="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
     python3 api.py

PROMPT_CONTEXT.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ For some context: our app is an AI video generation platform, where people use the frontend app (written in Flutter) to virtually "search for video (the video synopsis and their content are generated on the fly using AI). This uses a custom API written in Python, with a WebSockets communication.
2	+
3	+ Task to perform: <fill your demand here>

README.md CHANGED Viewed

@@ -6,7 +6,7 @@ colorTo: red
 sdk: docker
 app_file: api.py
 pinned: true
-short_description: A Latent YouTube
 app_port: 8080
 disable_embedding: false
 hf_oauth: true
@@ -27,7 +27,7 @@ Stay hooked at @flngr on X!
 ## What is AiTube?
-AiTube 2 is a reboot of [AiTube 1](https://x.com/danielpikl/status/1737882643625078835), a project made in 2023 which generated AI videos in the background using LLM agents, to simulate an AI generated YouTube.
 In [AiTube 2](https://x.com/flngr/status/1864127796945011016), this concept is put upside down: now the content is generated on demand (when the user types something in the latent search input) and on the fly (video chunks are generated within a few seconds and streamed continuously).

 sdk: docker
 app_file: api.py
 pinned: true
+short_description: The Latent Video Platform
 app_port: 8080
 disable_embedding: false
 hf_oauth: true
 ## What is AiTube?
+AiTube 2 is a reboot of [AiTube 1](https://x.com/danielpikl/status/1737882643625078835), a project made in 2023 which generated AI videos in the background using LLM agents, to simulate an AI generated video platform.
 In [AiTube 2](https://x.com/flngr/status/1864127796945011016), this concept is put upside down: now the content is generated on demand (when the user types something in the latent search input) and on the fly (video chunks are generated within a few seconds and streamed continuously).

api.py CHANGED Viewed

@@ -3,6 +3,7 @@ import json
 import logging
 import os
 import pathlib
 from aiohttp import web, WSMsgType
 from typing import Dict, Any
 from api_core import VideoGenerationAPI
@@ -273,11 +274,26 @@ async def process_video_queue(queue: asyncio.Queue, ws: web.WebSocketResponse):
 async def status_handler(request: web.Request) -> web.Response:
     """Handler for API status endpoint"""
     api = request.app['api']
     return web.json_response({
         'product': PRODUCT_NAME,
-        'version': '0.1.0',
         'maintenance_mode': MAINTENANCE_MODE,
-        'available_endpoints': len(VIDEO_ROUND_ROBIN_ENDPOINT_URLS)
     })
 async def websocket_handler(request: web.Request) -> web.WebSocketResponse:
@@ -290,7 +306,7 @@ async def websocket_handler(request: web.Request) -> web.WebSocketResponse:
         }, status=503)  # 503 Service Unavailable
     ws = web.WebSocketResponse(
-        max_msg_size=1024*1024*10,  # 10MB max message size
         timeout=30.0  # we want to keep things tight and short
     )
@@ -361,7 +377,7 @@ async def websocket_handler(request: web.Request) -> web.WebSocketResponse:
 async def init_app() -> web.Application:
     app = web.Application(
-        client_max_size=1024**2*10  # 10MB max size
     )
     # Create API instance

 import logging
 import os
 import pathlib
+import time
 from aiohttp import web, WSMsgType
 from typing import Dict, Any
 from api_core import VideoGenerationAPI
 async def status_handler(request: web.Request) -> web.Response:
     """Handler for API status endpoint"""
     api = request.app['api']
+    # Get current busy status of all endpoints
+    endpoint_statuses = []
+    for ep in api.endpoint_manager.endpoints:
+        endpoint_statuses.append({
+            'id': ep.id,
+            'url': ep.url,
+            'busy': ep.busy,
+            'last_used': ep.last_used,
+            'error_count': ep.error_count,
+            'error_until': ep.error_until
+        })
     return web.json_response({
         'product': PRODUCT_NAME,
+        'version': PRODUCT_VERSION,
         'maintenance_mode': MAINTENANCE_MODE,
+        'available_endpoints': len(VIDEO_ROUND_ROBIN_ENDPOINT_URLS),
+        'endpoint_status': endpoint_statuses,
+        'active_endpoints': sum(1 for ep in endpoint_statuses if not ep['busy'] and ('error_until' not in ep or ep['error_until'] < time.time()))
     })
 async def websocket_handler(request: web.Request) -> web.WebSocketResponse:
         }, status=503)  # 503 Service Unavailable
     ws = web.WebSocketResponse(
+        max_msg_size=1024*1024*20,  # 20MB max message size
         timeout=30.0  # we want to keep things tight and short
     )
 async def init_app() -> web.Application:
     app = web.Application(
+        client_max_size=1024**2*20  # 20MB max size
     )
     # Create API instance

api_config.py CHANGED Viewed

@@ -1,6 +1,7 @@
 import os
 PRODUCT_NAME = os.environ.get('PRODUCT_NAME', 'AiTube')
 TEXT_MODEL = os.environ.get('HF_TEXT_MODEL',
     #'HuggingFaceH4/zephyr-7b-beta'
@@ -12,6 +13,9 @@ IMAGE_MODEL = os.environ.get('HF_IMAGE_MODEL', '')
 # Environment variable to control maintenance mode
 MAINTENANCE_MODE = os.environ.get('MAINTENANCE_MODE', 'false').lower() in ('true', 'yes', '1', 't')
 ADMIN_ACCOUNTS = [
     "jbilcke-hf"
 ]
@@ -21,10 +25,17 @@ RAW_VIDEO_ROUND_ROBIN_ENDPOINT_URLS = [
     os.environ.get('VIDEO_ROUND_ROBIN_SERVER_2', ''),
     os.environ.get('VIDEO_ROUND_ROBIN_SERVER_3', ''),
     os.environ.get('VIDEO_ROUND_ROBIN_SERVER_4', ''),
 ]
 # Filter out empty strings from the endpoint list
-VIDEO_ROUND_ROBIN_ENDPOINT_URLS = [url for url in RAW_VIDEO_ROUND_ROBIN_ENDPOINT_URLS if url]
 HF_TOKEN = os.environ.get('HF_TOKEN')

 import os
 PRODUCT_NAME = os.environ.get('PRODUCT_NAME', 'AiTube')
+PRODUCT_VERSION = "2.0.0"
 TEXT_MODEL = os.environ.get('HF_TEXT_MODEL',
     #'HuggingFaceH4/zephyr-7b-beta'
 # Environment variable to control maintenance mode
 MAINTENANCE_MODE = os.environ.get('MAINTENANCE_MODE', 'false').lower() in ('true', 'yes', '1', 't')
+# Environment variable to control how many nodes to use
+MAX_NODES = int(os.environ.get('MAX_NODES', '8'))
 ADMIN_ACCOUNTS = [
     "jbilcke-hf"
 ]
     os.environ.get('VIDEO_ROUND_ROBIN_SERVER_2', ''),
     os.environ.get('VIDEO_ROUND_ROBIN_SERVER_3', ''),
     os.environ.get('VIDEO_ROUND_ROBIN_SERVER_4', ''),
+    os.environ.get('VIDEO_ROUND_ROBIN_SERVER_5', ''),
+    os.environ.get('VIDEO_ROUND_ROBIN_SERVER_6', ''),
+    os.environ.get('VIDEO_ROUND_ROBIN_SERVER_7', ''),
+    os.environ.get('VIDEO_ROUND_ROBIN_SERVER_8', ''),
 ]
 # Filter out empty strings from the endpoint list
+filtered_urls = [url for url in RAW_VIDEO_ROUND_ROBIN_ENDPOINT_URLS if url]
+# Limit the number of URLs based on MAX_NODES environment variable
+VIDEO_ROUND_ROBIN_ENDPOINT_URLS = filtered_urls[:MAX_NODES]
 HF_TOKEN = os.environ.get('HF_TOKEN')

api_core.py CHANGED Viewed

@@ -111,20 +111,52 @@ class Endpoint:
     url: str
     busy: bool = False
     last_used: float = 0
 class EndpointManager:
     def __init__(self):
         self.endpoints: List[Endpoint] = []
         self.lock = Lock()
-        self.endpoint_queue: Queue[Endpoint] = Queue()
         self.initialize_endpoints()
     def initialize_endpoints(self):
         """Initialize the list of endpoints"""
         for i, url in enumerate(VIDEO_ROUND_ROBIN_ENDPOINT_URLS):
             endpoint = Endpoint(id=i + 1, url=url)
             self.endpoints.append(endpoint)
-            self.endpoint_queue.put_nowait(endpoint)
     @asynccontextmanager
     async def get_endpoint(self, max_wait_time: int = 10):
@@ -137,18 +169,15 @@ class EndpointManager:
                 if time.time() - start_time > max_wait_time:
                     raise TimeoutError(f"Could not acquire an endpoint within {max_wait_time} seconds")
-                try:
-                    endpoint = self.endpoint_queue.get_nowait()
-                    async with self.lock:
-                        if not endpoint.busy:
-                            endpoint.busy = True
-                            endpoint.last_used = time.time()
-                            break
-                        else:
-                            await self.endpoint_queue.put(endpoint)
-                except asyncio.QueueEmpty:
-                    await asyncio.sleep(0.5)
-                    continue
             yield endpoint
@@ -157,7 +186,7 @@ class EndpointManager:
                 async with self.lock:
                     endpoint.busy = False
                     endpoint.last_used = time.time()
-                    await self.endpoint_queue.put(endpoint)
 class ChatRoom:
     def __init__(self):
@@ -626,32 +655,75 @@ Your caption:"""
         }
         async with self.endpoint_manager.get_endpoint() as endpoint:
-            #logger.info(f"Using endpoint {endpoint.id} for video generation with prompt: {prompt}")
-            async with ClientSession() as session:
-                async with session.post(
-                    endpoint.url,
-                    headers={
-                        "Accept": "application/json",
-                        "Authorization": f"Bearer {HF_TOKEN}",
-                        "Content-Type": "application/json"
-                    },
-                    json=json_payload
-                ) as response:
-                    if response.status != 200:
-                        error_text = await response.text()
-                        raise Exception(f"Video generation failed: HTTP {response.status} - {error_text}")
-                    result = await response.json()
-                    if "error" in result:
-                        raise Exception(f"Video generation failed: {result['error']}")
-                    video_data_uri = result.get("video")
-                    if not video_data_uri:
-                        raise Exception("No video data in response")
-                    return video_data_uri
     async def handle_chat_message(self, data: dict, ws: web.WebSocketResponse) -> dict:

     url: str
     busy: bool = False
     last_used: float = 0
+    error_count: int = 0
+    error_until: float = 0  # Timestamp until which this endpoint is considered in error state
 class EndpointManager:
     def __init__(self):
         self.endpoints: List[Endpoint] = []
         self.lock = Lock()
         self.initialize_endpoints()
+        self.last_used_index = -1  # Track the last used endpoint for round-robin
     def initialize_endpoints(self):
         """Initialize the list of endpoints"""
         for i, url in enumerate(VIDEO_ROUND_ROBIN_ENDPOINT_URLS):
             endpoint = Endpoint(id=i + 1, url=url)
             self.endpoints.append(endpoint)
+    def _get_next_free_endpoint(self):
+        """Get the next available non-busy endpoint, or oldest endpoint if all are busy"""
+        current_time = time.time()
+        # First priority: Get any non-busy and non-error endpoint
+        free_endpoints = [
+            ep for ep in self.endpoints
+            if not ep.busy and current_time > ep.error_until
+        ]
+        if free_endpoints:
+            # Return the least recently used free endpoint
+            return min(free_endpoints, key=lambda ep: ep.last_used)
+        # Second priority: If all busy/error, use round-robin but skip error endpoints
+        tried_count = 0
+        next_index = self.last_used_index
+        while tried_count < len(self.endpoints):
+            next_index = (next_index + 1) % len(self.endpoints)
+            tried_count += 1
+            # If endpoint is not in error state, use it
+            if current_time > self.endpoints[next_index].error_until:
+                self.last_used_index = next_index
+                return self.endpoints[next_index]
+        # If all endpoints are in error state, use the one with earliest error expiry
+        self.last_used_index = next_index
+        return min(self.endpoints, key=lambda ep: ep.error_until)
     @asynccontextmanager
     async def get_endpoint(self, max_wait_time: int = 10):
                 if time.time() - start_time > max_wait_time:
                     raise TimeoutError(f"Could not acquire an endpoint within {max_wait_time} seconds")
+                async with self.lock:
+                    # Get the next available endpoint using our selection strategy
+                    endpoint = self._get_next_free_endpoint()
+                    # Mark it as busy
+                    endpoint.busy = True
+                    endpoint.last_used = time.time()
+                    logger.info(f"Using endpoint {endpoint.id} (busy: {endpoint.busy}, last used: {endpoint.last_used})")
+                    break
             yield endpoint
                 async with self.lock:
                     endpoint.busy = False
                     endpoint.last_used = time.time()
+                    # We don't need to put back into queue - our strategy now picks directly from the list
 class ChatRoom:
     def __init__(self):
         }
         async with self.endpoint_manager.get_endpoint() as endpoint:
+            logger.info(f"Using endpoint {endpoint.id} for video generation")
+            try:
+                async with ClientSession() as session:
+                    async with session.post(
+                        endpoint.url,
+                        headers={
+                            "Accept": "application/json",
+                            "Authorization": f"Bearer {HF_TOKEN}",
+                            "Content-Type": "application/json"
+                        },
+                        json=json_payload,
+                        timeout=10  # Fast generation should complete within 10 seconds
+                    ) as response:
+                        if response.status != 200:
+                            error_text = await response.text()
+                            # Mark endpoint as in error state
+                            await self._mark_endpoint_error(endpoint)
+                            raise Exception(f"Video generation failed: HTTP {response.status} - {error_text}")
+                        result = await response.json()
+                        if "error" in result:
+                            # Mark endpoint as in error state
+                            await self._mark_endpoint_error(endpoint)
+                            raise Exception(f"Video generation failed: {result['error']}")
+                        video_data_uri = result.get("video")
+                        if not video_data_uri:
+                            # Mark endpoint as in error state
+                            await self._mark_endpoint_error(endpoint)
+                            raise Exception("No video data in response")
+                        # Reset error count on successful call
+                        endpoint.error_count = 0
+                        endpoint.error_until = 0
+                        return video_data_uri
+            except asyncio.TimeoutError:
+                # Handle timeout specifically
+                await self._mark_endpoint_error(endpoint, is_timeout=True)
+                raise Exception(f"Endpoint {endpoint.id} timed out")
+            except Exception as e:
+                # Handle all other exceptions
+                if not isinstance(e, asyncio.TimeoutError):  # Already handled above
+                    await self._mark_endpoint_error(endpoint)
+                raise e
+    async def _mark_endpoint_error(self, endpoint: Endpoint, is_timeout: bool = False):
+        """Mark an endpoint as being in error state with exponential backoff"""
+        async with self.endpoint_manager.lock:
+            endpoint.error_count += 1
+            # Calculate backoff time exponentially based on error count
+            # Start with 15 seconds, then 30, 60, etc. up to a max of 5 minutes
+            # Using shorter backoffs since generation should be fast
+            backoff_seconds = min(15 * (2 ** (endpoint.error_count - 1)), 300)
+            # Add extra backoff for timeouts which are more indicative of serious issues
+            if is_timeout:
+                backoff_seconds *= 2
+            endpoint.error_until = time.time() + backoff_seconds
+            logger.warning(
+                f"Endpoint {endpoint.id} marked as in error state (count: {endpoint.error_count}, "
+                f"unavailable until: {datetime.datetime.fromtimestamp(endpoint.error_until).strftime('%H:%M:%S')})"
+            )
     async def handle_chat_message(self, data: dict, ws: web.WebSocketResponse) -> dict:

build/web/flutter_bootstrap.js CHANGED Viewed

@@ -39,6 +39,6 @@ _flutter.buildConfig = {"engineRevision":"382be0028d370607f76215a9be322e5514b263
 _flutter.loader.load({
   serviceWorkerSettings: {
-    serviceWorkerVersion: "2193392314"
   }
 });

 _flutter.loader.load({
   serviceWorkerSettings: {
+    serviceWorkerVersion: "3416246217"
   }
 });

build/web/flutter_service_worker.js CHANGED Viewed

@@ -3,11 +3,11 @@ const MANIFEST = 'flutter-app-manifest';
 const TEMP = 'flutter-temp-cache';
 const CACHE_NAME = 'flutter-app-cache';
-const RESOURCES = {"flutter_bootstrap.js": "f915e2d221c0db2d3ffba348852bcac5",
 "version.json": "b5eaae4fc120710a3c35125322173615",
 "index.html": "f34c56fffc6b38f62412a5db2315dec8",
 "/": "f34c56fffc6b38f62412a5db2315dec8",
-"main.dart.js": "fc451217f225adcf35d6421bcaf1a36e",
 "flutter.js": "83d881c1dbb6d6bcd6b42e274605b69c",
 "favicon.png": "5dcef449791fa27946b3d35ad8803796",
 "icons/Icon-192.png": "ac9a721a12bbc803b44f645561ecb1e1",

 const TEMP = 'flutter-temp-cache';
 const CACHE_NAME = 'flutter-app-cache';
+const RESOURCES = {"flutter_bootstrap.js": "ebc878297d861e3a1c9a095721c1547d",
 "version.json": "b5eaae4fc120710a3c35125322173615",
 "index.html": "f34c56fffc6b38f62412a5db2315dec8",
 "/": "f34c56fffc6b38f62412a5db2315dec8",
+"main.dart.js": "cbdcb63cb16e4942066acf2c417ced32",
 "flutter.js": "83d881c1dbb6d6bcd6b42e274605b69c",
 "favicon.png": "5dcef449791fa27946b3d35ad8803796",
 "icons/Icon-192.png": "ac9a721a12bbc803b44f645561ecb1e1",

build/web/main.dart.js CHANGED Viewed

The diff for this file is too large to render. See raw diff

lib/widgets/maintenance_screen.dart CHANGED Viewed

@@ -25,7 +25,7 @@ class MaintenanceScreen extends StatelessWidget {
               ),
               const SizedBox(height: 24),
               const Text(
-                'AiTube is currently in maintenance',
                 textAlign: TextAlign.center,
                 style: TextStyle(
                   color: Colors.grey,
@@ -35,7 +35,7 @@ class MaintenanceScreen extends StatelessWidget {
               ),
               const SizedBox(height: 16),
               const Text(
-                'Please ping @flngr on X for news',
                 textAlign: TextAlign.center,
                 style: TextStyle(
                   color: Colors.grey,

               ),
               const SizedBox(height: 24),
               const Text(
+                'AiTube2 is currently in maintenance',
                 textAlign: TextAlign.center,
                 style: TextStyle(
                   color: Colors.grey,
               ),
               const SizedBox(height: 16),
               const Text(
+                'Please follow @flngr on X for news',
                 textAlign: TextAlign.center,
                 style: TextStyle(
                   color: Colors.grey,