meraj12 commited on
Commit
7fdc019
Β·
verified Β·
1 Parent(s): 3ff4225

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +94 -38
app.py CHANGED
@@ -1,42 +1,98 @@
1
  import streamlit as st
2
  from utils import transcribe_audio_segments, create_video_segments, generate_speech
3
- import tempfile
4
  import os
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
 
6
- st.set_page_config(page_title="Voice Visualizer", layout="centered")
7
- st.title("πŸŽ₯ Voice-to-Image Video Generator")
8
- st.markdown("Upload an audio file and images. The app transcribes the speech, matches keywords with images, and creates a video with images, audio, and translated subtitles.")
9
-
10
- audio_file = st.file_uploader("Upload audio (MP3/WAV)", type=["mp3", "wav"])
11
- images = st.file_uploader("Upload images", type=["jpg", "png", "jpeg"], accept_multiple_files=True)
12
-
13
- voice_to_voice = st.checkbox("πŸ” Voice-to-Voice Response (AI replies in speech)")
14
-
15
- if st.button("Generate Video") and audio_file and images:
16
- with tempfile.TemporaryDirectory() as tmpdir:
17
- audio_path = os.path.join(tmpdir, audio_file.name)
18
- with open(audio_path, "wb") as f:
19
- f.write(audio_file.read())
20
-
21
- image_paths = []
22
- for img in images:
23
- img_path = os.path.join(tmpdir, img.name)
24
- with open(img_path, "wb") as f:
25
- f.write(img.read())
26
- image_paths.append(img_path)
27
-
28
- st.info("Transcribing audio and splitting segments...")
29
- segments = transcribe_audio_segments(audio_path)
30
-
31
- st.info("Creating video...")
32
- final_video = os.path.join(tmpdir, "video_output.mp4")
33
- output = create_video_segments(segments, audio_path, image_paths, final_video)
34
- st.success("πŸŽ‰ Video created!")
35
- st.video(output)
36
-
37
- if voice_to_voice:
38
- full_text = " ".join([s['text'] for s in segments])
39
- st.info("Generating voice response...")
40
- ai_response = f"Hello! Here's your visual story based on your audio: {full_text}"
41
- speech_path = generate_speech(ai_response)
42
- st.audio(speech_path)
 
1
  import streamlit as st
2
  from utils import transcribe_audio_segments, create_video_segments, generate_speech
3
+ from PIL import Image
4
  import os
5
+ import cv2
6
+ import numpy as np
7
+ from io import BytesIO
8
+ import tempfile
9
+ from moviepy.editor import ImageSequenceClip
10
+ from rembg import remove
11
+ from basicsr.archs.rrdbnet_arch import RRDBNet
12
+ from realesrgan import RealESRGANer
13
+
14
+ # Helper function to apply cartoon effect
15
+ def cartoonize(img):
16
+ gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
17
+ gray = cv2.medianBlur(gray, 5)
18
+ edges = cv2.adaptiveThreshold(
19
+ gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
20
+ cv2.THRESH_BINARY, 9, 9
21
+ )
22
+ color = cv2.bilateralFilter(img, 9, 300, 300)
23
+ cartoon = cv2.bitwise_and(color, color, mask=edges)
24
+ return cartoon
25
+
26
+ # Helper function to apply edge detection
27
+ def edge_detect(img):
28
+ return cv2.Canny(img, 100, 200)
29
+
30
+ # Helper function to enhance face (upscale image)
31
+ def enhance_image(img):
32
+ model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64,
33
+ num_block=23, num_grow_ch=32, scale=4)
34
+ upsampler = RealESRGANer(
35
+ scale=4, model_path='realesrgan-x4plus.pth', model=model,
36
+ tile=0, tile_pad=10, pre_pad=0, half=False
37
+ )
38
+ _, _, output = upsampler.enhance(np.array(img), outscale=4)
39
+ return output
40
+
41
+ # Save processed images
42
+ def save_image(img, filename):
43
+ cv2.imwrite(filename, img)
44
+
45
+ # Convert images to slideshow video
46
+ def create_slideshow(images, out_path="slideshow.mp4"):
47
+ clip = ImageSequenceClip(images, fps=1)
48
+ clip.write_videofile(out_path, codec='libx264')
49
+
50
+ # Streamlit UI
51
+ st.set_page_config(layout="wide")
52
+ st.title("🎨 Interactive Image Effects Studio")
53
+
54
+ uploaded_files = st.file_uploader("Upload images", type=["jpg", "jpeg", "png"], accept_multiple_files=True)
55
+ effects = st.multiselect("Select effects to apply", ["Cartoonize", "Edge Detection", "Background Removal", "Enhance/ Upscale"])
56
+
57
+ effect_images = []
58
+
59
+ if uploaded_files:
60
+ for uploaded_file in uploaded_files:
61
+ image = Image.open(uploaded_file).convert("RGB")
62
+ img_array = np.array(image)
63
+ original_img = cv2.cvtColor(img_array, cv2.COLOR_RGB2BGR)
64
+
65
+ processed_img = original_img.copy()
66
+ if "Cartoonize" in effects:
67
+ processed_img = cartoonize(processed_img)
68
+ if "Edge Detection" in effects:
69
+ processed_img = cv2.cvtColor(edge_detect(processed_img), cv2.COLOR_GRAY2BGR)
70
+ if "Background Removal" in effects:
71
+ processed_img = remove(Image.fromarray(cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB)))
72
+ processed_img = cv2.cvtColor(np.array(processed_img), cv2.COLOR_RGB2BGR)
73
+ if "Enhance/ Upscale" in effects:
74
+ processed_img = enhance_image(processed_img)
75
+
76
+ st.image(cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB), caption="Processed Image", use_column_width=True)
77
+
78
+ # Save processed
79
+ effect_images.append(cv2.cvtColor(processed_img, cv2.COLOR_BGR2RGB))
80
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".jpg") as tmp_file:
81
+ save_image(processed_img, tmp_file.name)
82
+ with open(tmp_file.name, "rb") as file:
83
+ btn = st.download_button(
84
+ label="Download Processed Image",
85
+ data=file,
86
+ file_name=os.path.basename(uploaded_file.name),
87
+ mime="image/jpeg"
88
+ )
89
+
90
+ if st.button("Create Slideshow from Images"):
91
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".mp4") as tmp_video:
92
+ create_slideshow(effect_images, tmp_video.name)
93
+ with open(tmp_video.name, "rb") as file:
94
+ st.video(file)
95
+ st.download_button("Download Slideshow Video", file, file_name="slideshow.mp4")
96
+
97
 
98
+