saadfarhad commited on
Commit
6d16e6e
·
verified ·
1 Parent(s): e18d158

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +51 -0
app.py ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from transformers import AutoProcessor, AutoModelForCausalLM
4
+
5
+ # Load the processor and model from Hugging Face
6
+ processor = AutoProcessor.from_pretrained("lmms-lab/LLaVA-Video-7B-Qwen2")
7
+ model = AutoModelForCausalLM.from_pretrained("lmms-lab/LLaVA-Video-7B-Qwen2")
8
+
9
+ # Set the device (use GPU if available)
10
+ device = "cuda" if torch.cuda.is_available() else "cpu"
11
+ model.to(device)
12
+
13
+ def analyze_video(video_path):
14
+ """
15
+ This function accepts the path to a video file,
16
+ then uses the LLaVA-Video-7B-Qwen2 model to analyze the video.
17
+
18
+ The prompt instructs the model to analyze the video and return
19
+ the moment when the crowd is most engaged.
20
+ """
21
+ # Define the prompt for the model
22
+ prompt = "Analyze this video of a concert and determine the moment when the crowd is most engaged."
23
+
24
+ # Process the video and prompt.
25
+ # Note: The processor is expected to handle the video input (e.g., by reading frames).
26
+ inputs = processor(text=prompt, video=video_path, return_tensors="pt")
27
+
28
+ # Move all tensor inputs to the selected device
29
+ inputs = {key: value.to(device) for key, value in inputs.items()}
30
+
31
+ # Generate the model's response
32
+ outputs = model.generate(**inputs, max_new_tokens=100)
33
+
34
+ # Decode the generated tokens to a human-readable string
35
+ answer = processor.decode(outputs[0], skip_special_tokens=True)
36
+ return answer
37
+
38
+ # Create the Gradio Interface
39
+ iface = gr.Interface(
40
+ fn=analyze_video,
41
+ inputs=gr.Video(label="Upload Concert/Event Video", type="filepath"),
42
+ outputs=gr.Textbox(label="Engagement Analysis"),
43
+ title="Crowd Engagement Analyzer",
44
+ description=(
45
+ "Upload a video of a concert or event and the model will analyze "
46
+ "the video to identify the moment when the crowd is most engaged."
47
+ )
48
+ )
49
+
50
+ if __name__ == "__main__":
51
+ iface.launch()