vumichien commited on
Commit
2b9ea7f
Β·
1 Parent(s): 0ac7fe7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +7 -2
app.py CHANGED
@@ -10,13 +10,14 @@ from base64 import b64encode
10
  from speech_recognition import AudioFile, Recognizer
11
  import numpy as np
12
  from utils import tts, read_image_file, pil_to_base64, base64_to_pil, get_hist
 
13
 
14
  model = YOLO('ultralyticsplus/yolov8s')
15
  CLASS = model.model.names
16
  defaul_bot_voice = "γŠγ―γ„γ‚ˆγ†γ”γ–γ„γΎγ™"
17
  area_thres = 0.3
18
 
19
- def infer(image):
20
  results = model.predict(image, show=False)[0]
21
  masks, boxes = results.masks, results.boxes
22
  area_image = image.width * image.height
@@ -33,6 +34,10 @@ def infer(image):
33
  if area_rate >= most_close:
34
  out_img = image.crop(tuple(box)).resize((128, 128))
35
  most_close = area_rate
 
 
 
 
36
  print(most_close, diff_value)
37
  if most_close >= area_thres and diff_value >= 0.5:
38
  voice_bot = tts(defaul_bot_voice, language="ja")
@@ -41,7 +46,7 @@ def infer(image):
41
  iface = gr.Interface(
42
  fn=infer,
43
  title="aisatsu api",
44
- inputs=[gr.Image(label="image", type="pil", shape=(960, 640))],
45
  outputs=[gr.Image(label="output image"), gr.Textbox(label="output voice")],
46
  article = "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>.",
47
  ).launch(enable_queue=True, debug=True)
 
10
  from speech_recognition import AudioFile, Recognizer
11
  import numpy as np
12
  from utils import tts, read_image_file, pil_to_base64, base64_to_pil, get_hist
13
+ from scipy.spatial import distance as dist
14
 
15
  model = YOLO('ultralyticsplus/yolov8s')
16
  CLASS = model.model.names
17
  defaul_bot_voice = "γŠγ―γ„γ‚ˆγ†γ”γ–γ„γΎγ™"
18
  area_thres = 0.3
19
 
20
+ def infer(image, last_seen):
21
  results = model.predict(image, show=False)[0]
22
  masks, boxes = results.masks, results.boxes
23
  area_image = image.width * image.height
 
34
  if area_rate >= most_close:
35
  out_img = image.crop(tuple(box)).resize((128, 128))
36
  most_close = area_rate
37
+ if last_seen != "":
38
+ last_seen = base64_to_pil(last_seen)
39
+ if out_img is not None:
40
+ diff_value = dist.euclidean(get_hist(out_img), get_hist(last_seen))
41
  print(most_close, diff_value)
42
  if most_close >= area_thres and diff_value >= 0.5:
43
  voice_bot = tts(defaul_bot_voice, language="ja")
 
46
  iface = gr.Interface(
47
  fn=infer,
48
  title="aisatsu api",
49
+ inputs=[gr.Image(label="image", type="pil", shape=(960, 640)), gr.Textbox(label="last seen", value="")],
50
  outputs=[gr.Image(label="output image"), gr.Textbox(label="output voice")],
51
  article = "Author: <a href=\"https://huggingface.co/vumichien\">Vu Minh Chien</a>.",
52
  ).launch(enable_queue=True, debug=True)