Fanny1366 commited on
Commit
67f1091
·
verified ·
1 Parent(s): 4f55026

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -28
app.py CHANGED
@@ -1,66 +1,61 @@
1
  # import part
2
  import streamlit as st
3
  from transformers import pipeline
4
- from gtts import gTTS
5
- import os
 
6
 
7
  # function part
8
  # img2text
9
  def img2text(url):
10
- image_to_text_model = pipeline("image-to-text",
11
- model="Salesforce/blip-image-captioning-base")
12
  text = image_to_text_model(url)[0]["generated_text"]
13
  return text
14
 
15
  # text2story
16
  def text2story(text):
17
- story_text = pipeline("text-generation", model="perplexity-ai/r1-1776", trust_remote_code=True) # to be completed
18
- return story_text
 
19
 
20
  # text2audio
21
  def text2audio(story_text):
22
- # Convert text to audio using gTTS
23
- tts = gTTS(story_text, lang="en")
24
- audio_file = "story_audio.wav"
25
- tts.save(audio_file)
26
- return audio_file
 
 
 
 
27
 
28
  # main part
29
- st.set_page_config(page_title="Your Image to Audio Story",
30
- page_icon="🦜") # prepare configuration
31
  st.header("Turn Your Image to Audio Story")
32
-
33
- # Upload image
34
  uploaded_file = st.file_uploader("Select an Image...")
35
 
36
- # If it is none, skip all the following things
37
  if uploaded_file is not None:
38
- print(uploaded_file)
39
  bytes_data = uploaded_file.getvalue()
40
  with open(uploaded_file.name, "wb") as file:
41
  file.write(bytes_data)
42
 
43
- st.image(uploaded_file, caption="Uploaded Image",
44
- use_column_width=True)
45
 
46
- #Stage 1: Image to Text
47
  st.text('Processing img2text...')
48
  scenario = img2text(uploaded_file.name)
49
  st.write(scenario)
50
 
51
- #Stage 2: Text to Story
52
  st.text('Generating a story...')
53
  story = text2story(scenario)
54
  st.write(story)
55
 
56
- #Stage 3: Story to Audio data
57
  st.text('Generating audio data...')
58
- audio_data =text2audio(story)
59
 
60
  # Play button
61
  if st.button("Play Audio"):
62
- st.audio(audio_data['audio'],
63
- format="audio/wav",
64
- start_time=0,
65
- sample_rate = audio_data['sampling_rate'])
66
- st.audio("kids_playing_audio.wav")
 
1
  # import part
2
  import streamlit as st
3
  from transformers import pipeline
4
+ import torch
5
+ import soundfile as sf
6
+ import numpy as np
7
 
8
  # function part
9
  # img2text
10
  def img2text(url):
11
+ image_to_text_model = pipeline("image-to-text", model="Salesforce/blip-image-captioning-base")
 
12
  text = image_to_text_model(url)[0]["generated_text"]
13
  return text
14
 
15
  # text2story
16
  def text2story(text):
17
+ story_text_model = pipeline("text-generation", model="Qwen/QwQ-32B")
18
+ story = story_text_model(text, max_length=150)[0]['generated_text']
19
+ return story
20
 
21
  # text2audio
22
  def text2audio(story_text):
23
+ # Here we will use a text-to-speech model from Hugging Face
24
+ tts_model = pipeline("text-to-speech", model="tts_models/en/ljspeech/tacotron2")
25
+ audio_data = tts_model(story_text)
26
+
27
+ # Save audio to a file
28
+ audio_filename = "story_audio.wav"
29
+ sf.write(audio_filename, audio_data['audio'], audio_data['sampling_rate'])
30
+
31
+ return audio_filename
32
 
33
  # main part
34
+ st.set_page_config(page_title="Your Image to Audio Story", page_icon="🦜")
 
35
  st.header("Turn Your Image to Audio Story")
 
 
36
  uploaded_file = st.file_uploader("Select an Image...")
37
 
 
38
  if uploaded_file is not None:
 
39
  bytes_data = uploaded_file.getvalue()
40
  with open(uploaded_file.name, "wb") as file:
41
  file.write(bytes_data)
42
 
43
+ st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
 
44
 
45
+ # Stage 1: Image to Text
46
  st.text('Processing img2text...')
47
  scenario = img2text(uploaded_file.name)
48
  st.write(scenario)
49
 
50
+ # Stage 2: Text to Story
51
  st.text('Generating a story...')
52
  story = text2story(scenario)
53
  st.write(story)
54
 
55
+ # Stage 3: Story to Audio data
56
  st.text('Generating audio data...')
57
+ audio_filename = text2audio(story)
58
 
59
  # Play button
60
  if st.button("Play Audio"):
61
+ st.audio(audio_filename, format="audio/wav")