Spaces:

meraj12
/

imagine

Sleeping

App Files Files Community

imagine / app.py

meraj12

Update app.py

fa34db2 verified 28 days ago

raw

history blame contribute delete

2.78 kB

	import streamlit as st
	from diffusers import StableDiffusionPipeline
	import torch
	from PIL import Image
	import tempfile
	import whisper
	import os
	from realesrgan import RealESRGAN
	from huggingface_hub import hf_hub_download

	# Load Whisper for voice-to-text
	@st.cache_resource
	def load_whisper():
	return whisper.load_model("base")

	# Load Stable Diffusion models
	@st.cache_resource
	def load_pipelines():
	pipelines = {
	"Realistic": StableDiffusionPipeline.from_pretrained("runwayml/stable-diffusion-v1-5", torch_dtype=torch.float32),
	"Anime": StableDiffusionPipeline.from_pretrained("andite/anything-v4.0", torch_dtype=torch.float32),
	"Ghibli": StableDiffusionPipeline.from_pretrained("nitrosocke/Ghibli-Diffusion", torch_dtype=torch.float32)
	}
	for key in pipelines:
	pipelines[key] = pipelines[key].to("cpu")
	return pipelines

	# Load Real-ESRGAN for enhancement
	@st.cache_resource
	def load_enhancer():
	from basicsr.archs.rrdbnet_arch import RRDBNet
	model = RRDBNet(num_in_ch=3, num_out_ch=3, num_feat=64,
	num_block=23, num_grow_ch=32, scale=4)
	return RealESRGAN(device=torch.device("cpu"), scale=4).load_weights(model)

	whisper_model = load_whisper()
	pipes = load_pipelines()
	enhancer = load_enhancer()

	st.set_page_config(page_title="Love Text to Image", layout="centered")
	st.title("💖 Love Text to Image Generator")

	st.markdown("Write or speak a romantic poem and turn it into beautiful art ✨")

	style = st.selectbox("Choose art style", ["Realistic", "Anime", "Ghibli"])

	tab1, tab2 = st.tabs(["📝 Text Input", "🎤 Voice Input"])

	prompt = ""

	with tab1:
	prompt = st.text_area("Enter your romantic poem or message")

	with tab2:
	audio_file = st.file_uploader("Upload your audio file (mp3, wav)", type=["mp3", "wav"])
	if audio_file is not None:
	with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp:
	tmp.write(audio_file.read())
	tmp_path = tmp.name
	with st.spinner("Transcribing..."):
	result = whisper_model.transcribe(tmp_path)
	prompt = result["text"]
	st.success("Transcription Complete!")
	st.markdown(f"Transcribed Text: {prompt}")
	os.remove(tmp_path)

	if st.button("Generate Image"):
	if prompt.strip() == "":
	st.warning("Please enter or upload a love message.")
	else:
	with st.spinner("Generating image..."):
	image = pipes[style](prompt).images[0]
	st.image(image, caption="Original Generated Image", use_column_width=True)

	with st.spinner("Enhancing image..."):
	enhanced = enhancer.predict(image)
	st.image(enhanced, caption="Enhanced Image", use_column_width=True)