Spaces:
Runtime error
Runtime error
Vincent Claes
commited on
Commit
·
5c823d7
1
Parent(s):
a861406
add examples
Browse files
app.py
CHANGED
@@ -21,6 +21,30 @@ examples = [
|
|
21 |
"movies/bathroom.mp4",
|
22 |
ROOMS,
|
23 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
24 |
]
|
25 |
|
26 |
|
@@ -45,28 +69,12 @@ def get_num_total_frames(file_path: str):
|
|
45 |
return len(videoreader)
|
46 |
|
47 |
|
48 |
-
# def convert_frames_to_gif(frames, save_path: str = "frames.gif"):
|
49 |
-
# converted_frames = frames.astype(np.uint8)
|
50 |
-
# Path(save_path).parent.mkdir(parents=True, exist_ok=True)
|
51 |
-
# imageio.mimsave(save_path, converted_frames, fps=8)
|
52 |
-
# return save_path
|
53 |
-
|
54 |
-
|
55 |
-
# def create_gif_from_video_file(
|
56 |
-
# file_path: str,
|
57 |
-
# num_frames: int = 16,
|
58 |
-
# frame_sampling_rate: int = 1,
|
59 |
-
# save_path: str = "frames.gif",
|
60 |
-
# ):
|
61 |
-
# frames = sample_frames_from_video_file(file_path, num_frames, frame_sampling_rate)
|
62 |
-
# return convert_frames_to_gif(frames, save_path)
|
63 |
-
|
64 |
-
|
65 |
def select_model(model_name):
|
66 |
global processor, model
|
67 |
processor = AutoProcessor.from_pretrained(model_name)
|
68 |
model = AutoModel.from_pretrained(model_name)
|
69 |
|
|
|
70 |
def get_frame_sampling_rate(video_path, num_model_input_frames):
|
71 |
# rearrange sampling rate based on video length and model input length
|
72 |
num_total_frames = get_num_total_frames(video_path)
|
@@ -76,6 +84,7 @@ def get_frame_sampling_rate(video_path, num_model_input_frames):
|
|
76 |
frame_sampling_rate = FRAME_SAMPLING_RATE
|
77 |
return frame_sampling_rate
|
78 |
|
|
|
79 |
def predict(video_path, labels_text):
|
80 |
labels = labels_text.split(",")
|
81 |
num_model_input_frames = model.config.vision_config.num_frames
|
@@ -83,8 +92,6 @@ def predict(video_path, labels_text):
|
|
83 |
frames = sample_frames_from_video_file(
|
84 |
video_path, num_model_input_frames, frame_sampling_rate
|
85 |
)
|
86 |
-
# gif_path = convert_frames_to_gif(frames, save_path="video.gif")
|
87 |
-
|
88 |
inputs = processor(
|
89 |
text=labels, videos=list(frames), return_tensors="pt", padding=True
|
90 |
)
|
@@ -103,49 +110,33 @@ def predict(video_path, labels_text):
|
|
103 |
|
104 |
app = gr.Blocks()
|
105 |
with app:
|
|
|
106 |
gr.Markdown(
|
107 |
-
"
|
108 |
-
)
|
109 |
-
gr.Markdown(
|
110 |
-
"### **<p align='center'>Upload a video of a room and provide a list of type of rooms the model should select from.</p>**"
|
111 |
-
|
112 |
)
|
113 |
|
114 |
with gr.Row():
|
115 |
with gr.Column():
|
116 |
video_file = gr.Video(label="Video File:", show_label=True)
|
117 |
-
local_video_labels_text = gr.Textbox(
|
118 |
-
label="Labels Text:", show_label=True
|
119 |
-
)
|
120 |
submit_button = gr.Button(value="Predict")
|
121 |
-
# with gr.Column():
|
122 |
-
# video_gif = gr.Image(
|
123 |
-
# label="Input Clip",
|
124 |
-
# show_label=True,
|
125 |
-
# )
|
126 |
with gr.Column():
|
127 |
predictions = gr.Label(label="Predictions:", show_label=True)
|
128 |
|
129 |
gr.Markdown("**Examples:**")
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
submit_button.click(
|
139 |
predict,
|
140 |
inputs=[video_file, local_video_labels_text],
|
141 |
-
# outputs=[predictions, video_gif],
|
142 |
outputs=predictions,
|
143 |
)
|
144 |
-
|
145 |
-
# """
|
146 |
-
# \n Created by: Vincent Claes, <a href=\"https://www.meet-drift.ai/\">Drift</a>.
|
147 |
-
# \n Inspired by: <a href=\"https://huggingface.co/spaces/fcakyon/zero-shot-video-classification\">fcakyon</a>.
|
148 |
-
# """
|
149 |
-
# )
|
150 |
|
151 |
app.launch()
|
|
|
21 |
"movies/bathroom.mp4",
|
22 |
ROOMS,
|
23 |
],
|
24 |
+
[
|
25 |
+
"movies/bedroom.mp4",
|
26 |
+
ROOMS,
|
27 |
+
],
|
28 |
+
[
|
29 |
+
"movies/dressing.mp4",
|
30 |
+
ROOMS,
|
31 |
+
],
|
32 |
+
[
|
33 |
+
"movies/home-office.mp4",
|
34 |
+
ROOMS,
|
35 |
+
],
|
36 |
+
[
|
37 |
+
"movies/kitchen.mp4",
|
38 |
+
ROOMS,
|
39 |
+
],
|
40 |
+
[
|
41 |
+
"movies/living-room.mp4",
|
42 |
+
ROOMS,
|
43 |
+
],
|
44 |
+
[
|
45 |
+
"movies/toilet.mp4",
|
46 |
+
ROOMS,
|
47 |
+
],
|
48 |
]
|
49 |
|
50 |
|
|
|
69 |
return len(videoreader)
|
70 |
|
71 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
def select_model(model_name):
|
73 |
global processor, model
|
74 |
processor = AutoProcessor.from_pretrained(model_name)
|
75 |
model = AutoModel.from_pretrained(model_name)
|
76 |
|
77 |
+
|
78 |
def get_frame_sampling_rate(video_path, num_model_input_frames):
|
79 |
# rearrange sampling rate based on video length and model input length
|
80 |
num_total_frames = get_num_total_frames(video_path)
|
|
|
84 |
frame_sampling_rate = FRAME_SAMPLING_RATE
|
85 |
return frame_sampling_rate
|
86 |
|
87 |
+
|
88 |
def predict(video_path, labels_text):
|
89 |
labels = labels_text.split(",")
|
90 |
num_model_input_frames = model.config.vision_config.num_frames
|
|
|
92 |
frames = sample_frames_from_video_file(
|
93 |
video_path, num_model_input_frames, frame_sampling_rate
|
94 |
)
|
|
|
|
|
95 |
inputs = processor(
|
96 |
text=labels, videos=list(frames), return_tensors="pt", padding=True
|
97 |
)
|
|
|
110 |
|
111 |
app = gr.Blocks()
|
112 |
with app:
|
113 |
+
gr.Markdown("# **<p align='center'>Classification of Rooms</p>**")
|
114 |
gr.Markdown(
|
115 |
+
"#### **<p align='center'>Upload a video (mp4) of a room and provide a list of type of rooms the model should select from.</p>**"
|
|
|
|
|
|
|
|
|
116 |
)
|
117 |
|
118 |
with gr.Row():
|
119 |
with gr.Column():
|
120 |
video_file = gr.Video(label="Video File:", show_label=True)
|
121 |
+
local_video_labels_text = gr.Textbox(label="Labels Text:", show_label=True)
|
|
|
|
|
122 |
submit_button = gr.Button(value="Predict")
|
|
|
|
|
|
|
|
|
|
|
123 |
with gr.Column():
|
124 |
predictions = gr.Label(label="Predictions:", show_label=True)
|
125 |
|
126 |
gr.Markdown("**Examples:**")
|
127 |
+
gr.Examples(
|
128 |
+
examples,
|
129 |
+
[video_file, local_video_labels_text],
|
130 |
+
predictions,
|
131 |
+
fn=predict,
|
132 |
+
cache_examples=True,
|
133 |
+
)
|
134 |
+
|
135 |
submit_button.click(
|
136 |
predict,
|
137 |
inputs=[video_file, local_video_labels_text],
|
|
|
138 |
outputs=predictions,
|
139 |
)
|
140 |
+
|
|
|
|
|
|
|
|
|
|
|
141 |
|
142 |
app.launch()
|