Spaces:
Paused
Paused
Update app.py
Browse files
app.py
CHANGED
@@ -10,13 +10,14 @@ from base64 import b64encode
|
|
10 |
from speech_recognition import AudioFile, Recognizer
|
11 |
import numpy as np
|
12 |
from utils import tts, read_image_file, pil_to_base64, base64_to_pil, get_hist
|
|
|
13 |
|
14 |
model = YOLO('ultralyticsplus/yolov8s')
|
15 |
CLASS = model.model.names
|
16 |
defaul_bot_voice = "γγ―γγγγγγγΎγ"
|
17 |
area_thres = 0.3
|
18 |
|
19 |
-
def infer(image):
|
20 |
results = model.predict(image, show=False)[0]
|
21 |
masks, boxes = results.masks, results.boxes
|
22 |
area_image = image.width * image.height
|
@@ -33,6 +34,10 @@ def infer(image):
|
|
33 |
if area_rate >= most_close:
|
34 |
out_img = image.crop(tuple(box)).resize((128, 128))
|
35 |
most_close = area_rate
|
|
|
|
|
|
|
|
|
36 |
print(most_close, diff_value)
|
37 |
if most_close >= area_thres and diff_value >= 0.5:
|
38 |
voice_bot = tts(defaul_bot_voice, language="ja")
|
@@ -41,7 +46,7 @@ def infer(image):
|
|
41 |
iface = gr.Interface(
|
42 |
fn=infer,
|
43 |
title="aisatsu api",
|
44 |
-
inputs=[gr.Image(label="image", type="pil", shape=(960, 640))],
|
45 |
outputs=[gr.Image(label="output image"), gr.Textbox(label="output voice")],
|
46 |
article = "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>.",
|
47 |
).launch(enable_queue=True, debug=True)
|
|
|
10 |
from speech_recognition import AudioFile, Recognizer
|
11 |
import numpy as np
|
12 |
from utils import tts, read_image_file, pil_to_base64, base64_to_pil, get_hist
|
13 |
+
from scipy.spatial import distance as dist
|
14 |
|
15 |
model = YOLO('ultralyticsplus/yolov8s')
|
16 |
CLASS = model.model.names
|
17 |
defaul_bot_voice = "γγ―γγγγγγγΎγ"
|
18 |
area_thres = 0.3
|
19 |
|
20 |
+
def infer(image, last_seen):
|
21 |
results = model.predict(image, show=False)[0]
|
22 |
masks, boxes = results.masks, results.boxes
|
23 |
area_image = image.width * image.height
|
|
|
34 |
if area_rate >= most_close:
|
35 |
out_img = image.crop(tuple(box)).resize((128, 128))
|
36 |
most_close = area_rate
|
37 |
+
if last_seen != "":
|
38 |
+
last_seen = base64_to_pil(last_seen)
|
39 |
+
if out_img is not None:
|
40 |
+
diff_value = dist.euclidean(get_hist(out_img), get_hist(last_seen))
|
41 |
print(most_close, diff_value)
|
42 |
if most_close >= area_thres and diff_value >= 0.5:
|
43 |
voice_bot = tts(defaul_bot_voice, language="ja")
|
|
|
46 |
iface = gr.Interface(
|
47 |
fn=infer,
|
48 |
title="aisatsu api",
|
49 |
+
inputs=[gr.Image(label="image", type="pil", shape=(960, 640)), gr.Textbox(label="last seen", value="")],
|
50 |
outputs=[gr.Image(label="output image"), gr.Textbox(label="output voice")],
|
51 |
article = "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>.",
|
52 |
).launch(enable_queue=True, debug=True)
|