jbilcke-hf HF Staff commited on
Commit
59e0f6a
·
1 Parent(s): 49dcd35
DEPLOYMENT.md CHANGED
@@ -52,13 +52,18 @@ curl https://api.endpoints.huggingface.cloud/v2/endpoint/<YOUR_ACCOUNT_NAME> -X
52
  # load the environment
53
  # (if you haven't done it already for this shell session)
54
  source .python_venv/bin/activate
55
-
56
- HF_TOKEN="<USE YOUR OWN TOKEN>" \
57
- SECRET_TOKEN="<USE YOUR OWN TOKEN>" \
 
 
 
58
  VIDEO_ROUND_ROBIN_SERVER_1="https:/<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
59
  VIDEO_ROUND_ROBIN_SERVER_2="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
60
  VIDEO_ROUND_ROBIN_SERVER_3="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
61
  VIDEO_ROUND_ROBIN_SERVER_4="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
 
 
62
  HF_IMAGE_MODEL="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
63
  HF_TEXT_MODEL="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
64
  python3 api.py
 
52
  # load the environment
53
  # (if you haven't done it already for this shell session)
54
  source .python_venv/bin/activate
55
+
56
+ PRODUCT_NAME="AiTube" \
57
+ MAX_NODES="3" \
58
+ MAINTENANCE_MODE=false \
59
+ HF_TOKEN="<USE YOUR OWN HF TOKEN>" \
60
+ SECRET_TOKEN="<USE YOUR OWN AITUBE SECRET>" \
61
  VIDEO_ROUND_ROBIN_SERVER_1="https:/<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
62
  VIDEO_ROUND_ROBIN_SERVER_2="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
63
  VIDEO_ROUND_ROBIN_SERVER_3="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
64
  VIDEO_ROUND_ROBIN_SERVER_4="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
65
+ VIDEO_ROUND_ROBIN_SERVER_5="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
66
+ VIDEO_ROUND_ROBIN_SERVER_6="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
67
  HF_IMAGE_MODEL="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
68
  HF_TEXT_MODEL="https://<USE YOUR OWN SERVER>.endpoints.huggingface.cloud" \
69
  python3 api.py
PROMPT_CONTEXT.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ For some context: our app is an AI video generation platform, where people use the frontend app (written in Flutter) to virtually "search for video (the video synopsis and their content are generated on the fly using AI). This uses a custom API written in Python, with a WebSockets communication.
2
+
3
+ Task to perform: <fill your demand here>
README.md CHANGED
@@ -6,7 +6,7 @@ colorTo: red
6
  sdk: docker
7
  app_file: api.py
8
  pinned: true
9
- short_description: A Latent YouTube
10
  app_port: 8080
11
  disable_embedding: false
12
  hf_oauth: true
@@ -27,7 +27,7 @@ Stay hooked at @flngr on X!
27
 
28
  ## What is AiTube?
29
 
30
- AiTube 2 is a reboot of [AiTube 1](https://x.com/danielpikl/status/1737882643625078835), a project made in 2023 which generated AI videos in the background using LLM agents, to simulate an AI generated YouTube.
31
 
32
  In [AiTube 2](https://x.com/flngr/status/1864127796945011016), this concept is put upside down: now the content is generated on demand (when the user types something in the latent search input) and on the fly (video chunks are generated within a few seconds and streamed continuously).
33
 
 
6
  sdk: docker
7
  app_file: api.py
8
  pinned: true
9
+ short_description: The Latent Video Platform
10
  app_port: 8080
11
  disable_embedding: false
12
  hf_oauth: true
 
27
 
28
  ## What is AiTube?
29
 
30
+ AiTube 2 is a reboot of [AiTube 1](https://x.com/danielpikl/status/1737882643625078835), a project made in 2023 which generated AI videos in the background using LLM agents, to simulate an AI generated video platform.
31
 
32
  In [AiTube 2](https://x.com/flngr/status/1864127796945011016), this concept is put upside down: now the content is generated on demand (when the user types something in the latent search input) and on the fly (video chunks are generated within a few seconds and streamed continuously).
33
 
api.py CHANGED
@@ -3,6 +3,7 @@ import json
3
  import logging
4
  import os
5
  import pathlib
 
6
  from aiohttp import web, WSMsgType
7
  from typing import Dict, Any
8
  from api_core import VideoGenerationAPI
@@ -273,11 +274,26 @@ async def process_video_queue(queue: asyncio.Queue, ws: web.WebSocketResponse):
273
  async def status_handler(request: web.Request) -> web.Response:
274
  """Handler for API status endpoint"""
275
  api = request.app['api']
 
 
 
 
 
 
 
 
 
 
 
 
 
276
  return web.json_response({
277
  'product': PRODUCT_NAME,
278
- 'version': '0.1.0',
279
  'maintenance_mode': MAINTENANCE_MODE,
280
- 'available_endpoints': len(VIDEO_ROUND_ROBIN_ENDPOINT_URLS)
 
 
281
  })
282
 
283
  async def websocket_handler(request: web.Request) -> web.WebSocketResponse:
@@ -290,7 +306,7 @@ async def websocket_handler(request: web.Request) -> web.WebSocketResponse:
290
  }, status=503) # 503 Service Unavailable
291
 
292
  ws = web.WebSocketResponse(
293
- max_msg_size=1024*1024*10, # 10MB max message size
294
  timeout=30.0 # we want to keep things tight and short
295
  )
296
 
@@ -361,7 +377,7 @@ async def websocket_handler(request: web.Request) -> web.WebSocketResponse:
361
 
362
  async def init_app() -> web.Application:
363
  app = web.Application(
364
- client_max_size=1024**2*10 # 10MB max size
365
  )
366
 
367
  # Create API instance
 
3
  import logging
4
  import os
5
  import pathlib
6
+ import time
7
  from aiohttp import web, WSMsgType
8
  from typing import Dict, Any
9
  from api_core import VideoGenerationAPI
 
274
  async def status_handler(request: web.Request) -> web.Response:
275
  """Handler for API status endpoint"""
276
  api = request.app['api']
277
+
278
+ # Get current busy status of all endpoints
279
+ endpoint_statuses = []
280
+ for ep in api.endpoint_manager.endpoints:
281
+ endpoint_statuses.append({
282
+ 'id': ep.id,
283
+ 'url': ep.url,
284
+ 'busy': ep.busy,
285
+ 'last_used': ep.last_used,
286
+ 'error_count': ep.error_count,
287
+ 'error_until': ep.error_until
288
+ })
289
+
290
  return web.json_response({
291
  'product': PRODUCT_NAME,
292
+ 'version': PRODUCT_VERSION,
293
  'maintenance_mode': MAINTENANCE_MODE,
294
+ 'available_endpoints': len(VIDEO_ROUND_ROBIN_ENDPOINT_URLS),
295
+ 'endpoint_status': endpoint_statuses,
296
+ 'active_endpoints': sum(1 for ep in endpoint_statuses if not ep['busy'] and ('error_until' not in ep or ep['error_until'] < time.time()))
297
  })
298
 
299
  async def websocket_handler(request: web.Request) -> web.WebSocketResponse:
 
306
  }, status=503) # 503 Service Unavailable
307
 
308
  ws = web.WebSocketResponse(
309
+ max_msg_size=1024*1024*20, # 20MB max message size
310
  timeout=30.0 # we want to keep things tight and short
311
  )
312
 
 
377
 
378
  async def init_app() -> web.Application:
379
  app = web.Application(
380
+ client_max_size=1024**2*20 # 20MB max size
381
  )
382
 
383
  # Create API instance
api_config.py CHANGED
@@ -1,6 +1,7 @@
1
  import os
2
 
3
  PRODUCT_NAME = os.environ.get('PRODUCT_NAME', 'AiTube')
 
4
 
5
  TEXT_MODEL = os.environ.get('HF_TEXT_MODEL',
6
  #'HuggingFaceH4/zephyr-7b-beta'
@@ -12,6 +13,9 @@ IMAGE_MODEL = os.environ.get('HF_IMAGE_MODEL', '')
12
  # Environment variable to control maintenance mode
13
  MAINTENANCE_MODE = os.environ.get('MAINTENANCE_MODE', 'false').lower() in ('true', 'yes', '1', 't')
14
 
 
 
 
15
  ADMIN_ACCOUNTS = [
16
  "jbilcke-hf"
17
  ]
@@ -21,10 +25,17 @@ RAW_VIDEO_ROUND_ROBIN_ENDPOINT_URLS = [
21
  os.environ.get('VIDEO_ROUND_ROBIN_SERVER_2', ''),
22
  os.environ.get('VIDEO_ROUND_ROBIN_SERVER_3', ''),
23
  os.environ.get('VIDEO_ROUND_ROBIN_SERVER_4', ''),
 
 
 
 
24
  ]
25
 
26
  # Filter out empty strings from the endpoint list
27
- VIDEO_ROUND_ROBIN_ENDPOINT_URLS = [url for url in RAW_VIDEO_ROUND_ROBIN_ENDPOINT_URLS if url]
 
 
 
28
 
29
  HF_TOKEN = os.environ.get('HF_TOKEN')
30
 
 
1
  import os
2
 
3
  PRODUCT_NAME = os.environ.get('PRODUCT_NAME', 'AiTube')
4
+ PRODUCT_VERSION = "2.0.0"
5
 
6
  TEXT_MODEL = os.environ.get('HF_TEXT_MODEL',
7
  #'HuggingFaceH4/zephyr-7b-beta'
 
13
  # Environment variable to control maintenance mode
14
  MAINTENANCE_MODE = os.environ.get('MAINTENANCE_MODE', 'false').lower() in ('true', 'yes', '1', 't')
15
 
16
+ # Environment variable to control how many nodes to use
17
+ MAX_NODES = int(os.environ.get('MAX_NODES', '8'))
18
+
19
  ADMIN_ACCOUNTS = [
20
  "jbilcke-hf"
21
  ]
 
25
  os.environ.get('VIDEO_ROUND_ROBIN_SERVER_2', ''),
26
  os.environ.get('VIDEO_ROUND_ROBIN_SERVER_3', ''),
27
  os.environ.get('VIDEO_ROUND_ROBIN_SERVER_4', ''),
28
+ os.environ.get('VIDEO_ROUND_ROBIN_SERVER_5', ''),
29
+ os.environ.get('VIDEO_ROUND_ROBIN_SERVER_6', ''),
30
+ os.environ.get('VIDEO_ROUND_ROBIN_SERVER_7', ''),
31
+ os.environ.get('VIDEO_ROUND_ROBIN_SERVER_8', ''),
32
  ]
33
 
34
  # Filter out empty strings from the endpoint list
35
+ filtered_urls = [url for url in RAW_VIDEO_ROUND_ROBIN_ENDPOINT_URLS if url]
36
+
37
+ # Limit the number of URLs based on MAX_NODES environment variable
38
+ VIDEO_ROUND_ROBIN_ENDPOINT_URLS = filtered_urls[:MAX_NODES]
39
 
40
  HF_TOKEN = os.environ.get('HF_TOKEN')
41
 
api_core.py CHANGED
@@ -111,20 +111,52 @@ class Endpoint:
111
  url: str
112
  busy: bool = False
113
  last_used: float = 0
 
 
114
 
115
  class EndpointManager:
116
  def __init__(self):
117
  self.endpoints: List[Endpoint] = []
118
  self.lock = Lock()
119
- self.endpoint_queue: Queue[Endpoint] = Queue()
120
  self.initialize_endpoints()
 
121
 
122
  def initialize_endpoints(self):
123
  """Initialize the list of endpoints"""
124
  for i, url in enumerate(VIDEO_ROUND_ROBIN_ENDPOINT_URLS):
125
  endpoint = Endpoint(id=i + 1, url=url)
126
  self.endpoints.append(endpoint)
127
- self.endpoint_queue.put_nowait(endpoint)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
  @asynccontextmanager
130
  async def get_endpoint(self, max_wait_time: int = 10):
@@ -137,18 +169,15 @@ class EndpointManager:
137
  if time.time() - start_time > max_wait_time:
138
  raise TimeoutError(f"Could not acquire an endpoint within {max_wait_time} seconds")
139
 
140
- try:
141
- endpoint = self.endpoint_queue.get_nowait()
142
- async with self.lock:
143
- if not endpoint.busy:
144
- endpoint.busy = True
145
- endpoint.last_used = time.time()
146
- break
147
- else:
148
- await self.endpoint_queue.put(endpoint)
149
- except asyncio.QueueEmpty:
150
- await asyncio.sleep(0.5)
151
- continue
152
 
153
  yield endpoint
154
 
@@ -157,7 +186,7 @@ class EndpointManager:
157
  async with self.lock:
158
  endpoint.busy = False
159
  endpoint.last_used = time.time()
160
- await self.endpoint_queue.put(endpoint)
161
 
162
  class ChatRoom:
163
  def __init__(self):
@@ -626,32 +655,75 @@ Your caption:"""
626
  }
627
 
628
  async with self.endpoint_manager.get_endpoint() as endpoint:
629
- #logger.info(f"Using endpoint {endpoint.id} for video generation with prompt: {prompt}")
630
 
631
- async with ClientSession() as session:
632
- async with session.post(
633
- endpoint.url,
634
- headers={
635
- "Accept": "application/json",
636
- "Authorization": f"Bearer {HF_TOKEN}",
637
- "Content-Type": "application/json"
638
- },
639
- json=json_payload
640
- ) as response:
641
- if response.status != 200:
642
- error_text = await response.text()
643
- raise Exception(f"Video generation failed: HTTP {response.status} - {error_text}")
644
-
645
- result = await response.json()
646
-
647
- if "error" in result:
648
- raise Exception(f"Video generation failed: {result['error']}")
649
-
650
- video_data_uri = result.get("video")
651
- if not video_data_uri:
652
- raise Exception("No video data in response")
653
-
654
- return video_data_uri
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
655
 
656
 
657
  async def handle_chat_message(self, data: dict, ws: web.WebSocketResponse) -> dict:
 
111
  url: str
112
  busy: bool = False
113
  last_used: float = 0
114
+ error_count: int = 0
115
+ error_until: float = 0 # Timestamp until which this endpoint is considered in error state
116
 
117
  class EndpointManager:
118
  def __init__(self):
119
  self.endpoints: List[Endpoint] = []
120
  self.lock = Lock()
 
121
  self.initialize_endpoints()
122
+ self.last_used_index = -1 # Track the last used endpoint for round-robin
123
 
124
  def initialize_endpoints(self):
125
  """Initialize the list of endpoints"""
126
  for i, url in enumerate(VIDEO_ROUND_ROBIN_ENDPOINT_URLS):
127
  endpoint = Endpoint(id=i + 1, url=url)
128
  self.endpoints.append(endpoint)
129
+
130
+ def _get_next_free_endpoint(self):
131
+ """Get the next available non-busy endpoint, or oldest endpoint if all are busy"""
132
+ current_time = time.time()
133
+
134
+ # First priority: Get any non-busy and non-error endpoint
135
+ free_endpoints = [
136
+ ep for ep in self.endpoints
137
+ if not ep.busy and current_time > ep.error_until
138
+ ]
139
+
140
+ if free_endpoints:
141
+ # Return the least recently used free endpoint
142
+ return min(free_endpoints, key=lambda ep: ep.last_used)
143
+
144
+ # Second priority: If all busy/error, use round-robin but skip error endpoints
145
+ tried_count = 0
146
+ next_index = self.last_used_index
147
+
148
+ while tried_count < len(self.endpoints):
149
+ next_index = (next_index + 1) % len(self.endpoints)
150
+ tried_count += 1
151
+
152
+ # If endpoint is not in error state, use it
153
+ if current_time > self.endpoints[next_index].error_until:
154
+ self.last_used_index = next_index
155
+ return self.endpoints[next_index]
156
+
157
+ # If all endpoints are in error state, use the one with earliest error expiry
158
+ self.last_used_index = next_index
159
+ return min(self.endpoints, key=lambda ep: ep.error_until)
160
 
161
  @asynccontextmanager
162
  async def get_endpoint(self, max_wait_time: int = 10):
 
169
  if time.time() - start_time > max_wait_time:
170
  raise TimeoutError(f"Could not acquire an endpoint within {max_wait_time} seconds")
171
 
172
+ async with self.lock:
173
+ # Get the next available endpoint using our selection strategy
174
+ endpoint = self._get_next_free_endpoint()
175
+
176
+ # Mark it as busy
177
+ endpoint.busy = True
178
+ endpoint.last_used = time.time()
179
+ logger.info(f"Using endpoint {endpoint.id} (busy: {endpoint.busy}, last used: {endpoint.last_used})")
180
+ break
 
 
 
181
 
182
  yield endpoint
183
 
 
186
  async with self.lock:
187
  endpoint.busy = False
188
  endpoint.last_used = time.time()
189
+ # We don't need to put back into queue - our strategy now picks directly from the list
190
 
191
  class ChatRoom:
192
  def __init__(self):
 
655
  }
656
 
657
  async with self.endpoint_manager.get_endpoint() as endpoint:
658
+ logger.info(f"Using endpoint {endpoint.id} for video generation")
659
 
660
+ try:
661
+ async with ClientSession() as session:
662
+ async with session.post(
663
+ endpoint.url,
664
+ headers={
665
+ "Accept": "application/json",
666
+ "Authorization": f"Bearer {HF_TOKEN}",
667
+ "Content-Type": "application/json"
668
+ },
669
+ json=json_payload,
670
+ timeout=10 # Fast generation should complete within 10 seconds
671
+ ) as response:
672
+ if response.status != 200:
673
+ error_text = await response.text()
674
+ # Mark endpoint as in error state
675
+ await self._mark_endpoint_error(endpoint)
676
+ raise Exception(f"Video generation failed: HTTP {response.status} - {error_text}")
677
+
678
+ result = await response.json()
679
+
680
+ if "error" in result:
681
+ # Mark endpoint as in error state
682
+ await self._mark_endpoint_error(endpoint)
683
+ raise Exception(f"Video generation failed: {result['error']}")
684
+
685
+ video_data_uri = result.get("video")
686
+ if not video_data_uri:
687
+ # Mark endpoint as in error state
688
+ await self._mark_endpoint_error(endpoint)
689
+ raise Exception("No video data in response")
690
+
691
+ # Reset error count on successful call
692
+ endpoint.error_count = 0
693
+ endpoint.error_until = 0
694
+
695
+ return video_data_uri
696
+
697
+ except asyncio.TimeoutError:
698
+ # Handle timeout specifically
699
+ await self._mark_endpoint_error(endpoint, is_timeout=True)
700
+ raise Exception(f"Endpoint {endpoint.id} timed out")
701
+ except Exception as e:
702
+ # Handle all other exceptions
703
+ if not isinstance(e, asyncio.TimeoutError): # Already handled above
704
+ await self._mark_endpoint_error(endpoint)
705
+ raise e
706
+
707
+ async def _mark_endpoint_error(self, endpoint: Endpoint, is_timeout: bool = False):
708
+ """Mark an endpoint as being in error state with exponential backoff"""
709
+ async with self.endpoint_manager.lock:
710
+ endpoint.error_count += 1
711
+
712
+ # Calculate backoff time exponentially based on error count
713
+ # Start with 15 seconds, then 30, 60, etc. up to a max of 5 minutes
714
+ # Using shorter backoffs since generation should be fast
715
+ backoff_seconds = min(15 * (2 ** (endpoint.error_count - 1)), 300)
716
+
717
+ # Add extra backoff for timeouts which are more indicative of serious issues
718
+ if is_timeout:
719
+ backoff_seconds *= 2
720
+
721
+ endpoint.error_until = time.time() + backoff_seconds
722
+
723
+ logger.warning(
724
+ f"Endpoint {endpoint.id} marked as in error state (count: {endpoint.error_count}, "
725
+ f"unavailable until: {datetime.datetime.fromtimestamp(endpoint.error_until).strftime('%H:%M:%S')})"
726
+ )
727
 
728
 
729
  async def handle_chat_message(self, data: dict, ws: web.WebSocketResponse) -> dict:
build/web/flutter_bootstrap.js CHANGED
@@ -39,6 +39,6 @@ _flutter.buildConfig = {"engineRevision":"382be0028d370607f76215a9be322e5514b263
39
 
40
  _flutter.loader.load({
41
  serviceWorkerSettings: {
42
- serviceWorkerVersion: "2193392314"
43
  }
44
  });
 
39
 
40
  _flutter.loader.load({
41
  serviceWorkerSettings: {
42
+ serviceWorkerVersion: "3416246217"
43
  }
44
  });
build/web/flutter_service_worker.js CHANGED
@@ -3,11 +3,11 @@ const MANIFEST = 'flutter-app-manifest';
3
  const TEMP = 'flutter-temp-cache';
4
  const CACHE_NAME = 'flutter-app-cache';
5
 
6
- const RESOURCES = {"flutter_bootstrap.js": "f915e2d221c0db2d3ffba348852bcac5",
7
  "version.json": "b5eaae4fc120710a3c35125322173615",
8
  "index.html": "f34c56fffc6b38f62412a5db2315dec8",
9
  "/": "f34c56fffc6b38f62412a5db2315dec8",
10
- "main.dart.js": "fc451217f225adcf35d6421bcaf1a36e",
11
  "flutter.js": "83d881c1dbb6d6bcd6b42e274605b69c",
12
  "favicon.png": "5dcef449791fa27946b3d35ad8803796",
13
  "icons/Icon-192.png": "ac9a721a12bbc803b44f645561ecb1e1",
 
3
  const TEMP = 'flutter-temp-cache';
4
  const CACHE_NAME = 'flutter-app-cache';
5
 
6
+ const RESOURCES = {"flutter_bootstrap.js": "ebc878297d861e3a1c9a095721c1547d",
7
  "version.json": "b5eaae4fc120710a3c35125322173615",
8
  "index.html": "f34c56fffc6b38f62412a5db2315dec8",
9
  "/": "f34c56fffc6b38f62412a5db2315dec8",
10
+ "main.dart.js": "cbdcb63cb16e4942066acf2c417ced32",
11
  "flutter.js": "83d881c1dbb6d6bcd6b42e274605b69c",
12
  "favicon.png": "5dcef449791fa27946b3d35ad8803796",
13
  "icons/Icon-192.png": "ac9a721a12bbc803b44f645561ecb1e1",
build/web/main.dart.js CHANGED
The diff for this file is too large to render. See raw diff
 
lib/widgets/maintenance_screen.dart CHANGED
@@ -25,7 +25,7 @@ class MaintenanceScreen extends StatelessWidget {
25
  ),
26
  const SizedBox(height: 24),
27
  const Text(
28
- 'AiTube is currently in maintenance',
29
  textAlign: TextAlign.center,
30
  style: TextStyle(
31
  color: Colors.grey,
@@ -35,7 +35,7 @@ class MaintenanceScreen extends StatelessWidget {
35
  ),
36
  const SizedBox(height: 16),
37
  const Text(
38
- 'Please ping @flngr on X for news',
39
  textAlign: TextAlign.center,
40
  style: TextStyle(
41
  color: Colors.grey,
 
25
  ),
26
  const SizedBox(height: 24),
27
  const Text(
28
+ 'AiTube2 is currently in maintenance',
29
  textAlign: TextAlign.center,
30
  style: TextStyle(
31
  color: Colors.grey,
 
35
  ),
36
  const SizedBox(height: 16),
37
  const Text(
38
+ 'Please follow @flngr on X for news',
39
  textAlign: TextAlign.center,
40
  style: TextStyle(
41
  color: Colors.grey,