Spaces:

chenjoya
/

LiveCC

Running on Zero

App Files Files Community

chenjoya commited on 5 days ago

Commit

8f86fe7

verified ·

1 Parent(s): 5d6f2f7

Update demo/infer.py

Browse files

Files changed (1) hide show

demo/infer.py +6 -6

demo/infer.py CHANGED Viewed

@@ -56,7 +56,7 @@ class LiveCCDemoInfer:
         self,
         message: str,
         state: dict,
-        max_pixels: int = 256 * 28 * 28,
         default_query: str = 'Please describe the video.',
         do_sample: bool = False,
         repetition_penalty: float = 1.05,
@@ -122,20 +122,20 @@ class LiveCCDemoInfer:
         # 5. make conversation and send to model
         for clip, timestamps in zip(interleave_clips, interleave_timestamps):
             start_timestamp, stop_timestamp = timestamps[0].item(), timestamps[-1].item() + self.frame_time_interval
-            message = {
                 "role": "user",
                 "content": [
                     {"type": "text", "text": f'Time={start_timestamp:.1f}-{stop_timestamp:.1f}s'},
                     {"type": "video", "video": clip}
                 ]
-            }
             if not message and not state.get('message', None):
                 message = default_query
                 logger.warning(f'No query provided, use default_query={default_query}')
             if message and state.get('message', None) != message:
-                message['content'].append({"type": "text", "text": message})
                 state['message'] = message
-            texts = self.processor.apply_chat_template([message], tokenize=False, add_generation_prompt=True, return_tensors='pt')
             past_ids = state.get('past_ids', None)
             if past_ids is not None:
                 texts = '<|im_end|>\n' + texts[self.system_prompt_offset:]
@@ -146,7 +146,6 @@ class LiveCCDemoInfer:
                 return_tensors="pt",
                 return_attention_mask=False
             )
-            print(texts)
             inputs.to(self.model.device)
             if past_ids is not None:
                 inputs['input_ids'] = torch.cat([past_ids, inputs.input_ids], dim=1)
@@ -159,6 +158,7 @@ class LiveCCDemoInfer:
                 return_dict_in_generate=True, do_sample=do_sample,
                 repetition_penalty=repetition_penalty,
                 logits_processor=logits_processor,
             )
             state['past_key_values'] = outputs.past_key_values
             state['past_ids'] = outputs.sequences[:, :-1]

         self,
         message: str,
         state: dict,
+        max_pixels: int = 384 * 28 * 28,
         default_query: str = 'Please describe the video.',
         do_sample: bool = False,
         repetition_penalty: float = 1.05,
         # 5. make conversation and send to model
         for clip, timestamps in zip(interleave_clips, interleave_timestamps):
             start_timestamp, stop_timestamp = timestamps[0].item(), timestamps[-1].item() + self.frame_time_interval
+            conversation = [{
                 "role": "user",
                 "content": [
                     {"type": "text", "text": f'Time={start_timestamp:.1f}-{stop_timestamp:.1f}s'},
                     {"type": "video", "video": clip}
                 ]
+            }]
             if not message and not state.get('message', None):
                 message = default_query
                 logger.warning(f'No query provided, use default_query={default_query}')
             if message and state.get('message', None) != message:
+                conversation[0]['content'].append({"type": "text", "text": message})
                 state['message'] = message
+            texts = self.processor.apply_chat_template(conversation, tokenize=False, add_generation_prompt=True, return_tensors='pt')
             past_ids = state.get('past_ids', None)
             if past_ids is not None:
                 texts = '<|im_end|>\n' + texts[self.system_prompt_offset:]
                 return_tensors="pt",
                 return_attention_mask=False
             )
             inputs.to(self.model.device)
             if past_ids is not None:
                 inputs['input_ids'] = torch.cat([past_ids, inputs.input_ids], dim=1)
                 return_dict_in_generate=True, do_sample=do_sample,
                 repetition_penalty=repetition_penalty,
                 logits_processor=logits_processor,
+                max_new_tokens=16,
             )
             state['past_key_values'] = outputs.past_key_values
             state['past_ids'] = outputs.sequences[:, :-1]