Spaces:

joey1101
/

Comment_Reply

Running

App Files Files Community

Comment_Reply / app.py

joey1101

Update app.py

befe307 verified about 1 month ago

raw

history blame

5.7 kB

	##########################################
	# Step 0: Essential imports
	##########################################
	import streamlit as st # Web interface
	from transformers import ( # AI components
	pipeline,
	SpeechT5Processor,
	SpeechT5ForTextToSpeech,
	SpeechT5HifiGan,
	AutoModelForCausalLM,
	AutoTokenizer
	)
	from datasets import load_dataset # Voice data
	import torch # Tensor operations
	import soundfile as sf # Audio processing

	##########################################
	# Initial configuration (MUST BE FIRST)
	##########################################
	st.set_page_config( # Set page config first
	page_title="Just Comment",
	page_icon="💬",
	layout="centered"
	)

	##########################################
	# Optimized model loader with caching
	##########################################
	@st.cache_resource(show_spinner=False)
	def _load_components():
	"""Load and cache all models with hardware optimization"""
	device = "cuda" if torch.cuda.is_available() else "cpu"

	# Emotion classifier (fast)
	emotion_pipe = pipeline(
	"text-classification",
	model="Thea231/jhartmann_emotion_finetuning",
	device=device,
	truncation=True
	)

	# Text generator (optimized)
	text_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-0.5B")
	text_model = AutoModelForCausalLM.from_pretrained(
	"Qwen/Qwen1.5-0.5B",
	torch_dtype=torch.float16,
	device_map="auto"
	)

	# TTS system (accelerated)
	tts_processor = SpeechT5Processor.from_pretrained("microsoft/speecht5_tts")
	tts_model = SpeechT5ForTextToSpeech.from_pretrained(
	"microsoft/speecht5_tts",
	torch_dtype=torch.float16
	).to(device)
	tts_vocoder = SpeechT5HifiGan.from_pretrained(
	"microsoft/speecht5_hifigan",
	torch_dtype=torch.float16
	).to(device)

	# Preloaded voice profile
	speaker_emb = torch.tensor(
	load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")[7306]["xvector"]
	).unsqueeze(0).to(device)

	return {
	"emotion": emotion_pipe,
	"text_model": text_model,
	"text_tokenizer": text_tokenizer,
	"tts_processor": tts_processor,
	"tts_model": tts_model,
	"tts_vocoder": tts_vocoder,
	"speaker_emb": speaker_emb,
	"device": device
	}

	##########################################
	# User interface components
	##########################################
	def _show_interface():
	"""Render input interface"""
	st.title("Just Comment")
	st.markdown("### I'm listening to you, my friend～")
	return st.text_area( # Input field
	"📝 Enter your comment:",
	placeholder="Share your thoughts...",
	height=150,
	key="input"
	)

	##########################################
	# Core processing functions
	##########################################
	def _fast_emotion(text, analyzer):
	"""Rapid emotion detection with input limits"""
	result = analyzer(text[:256], return_all_scores=True)[0] # Limit input length
	emotions = ['sadness', 'joy', 'love', 'anger', 'fear', 'surprise']
	return max(
	(e for e in result if e['label'].lower() in emotions),
	key=lambda x: x['score'],
	default={'label': 'neutral', 'score': 0}
	)

	def _build_prompt(text, emotion):
	"""Template-based prompt engineering for response generation"""
	return f"{emotion.capitalize()} detected: {text}\nRespond with a coherent and supportive response."

	def _generate_response(text, models):
	"""Optimized text generation pipeline"""
	# Emotion detection
	emotion = _fast_emotion(text, models["emotion"])

	# Prompt construction
	prompt = _build_prompt(text, emotion["label"])

	# Generate text
	inputs = models["text_tokenizer"](
	prompt,
	return_tensors="pt",
	max_length=100,
	truncation=True
	).to(models["device"])

	output = models["text_model"].generate(
	inputs.input_ids,
	max_new_tokens=100, # Balanced length for response
	temperature=0.7,
	top_p=0.9,
	do_sample=True,
	pad_token_id=models["text_tokenizer"].eos_token_id
	)

	# Process output
	response = models["text_tokenizer"].decode(output[0], skip_special_tokens=True)
	return response.strip()[:200] or "Thank you for your feedback."

	def _text_to_speech(text, models):
	"""High-speed audio synthesis"""
	inputs = models["tts_processor"](text=text[:150], return_tensors="pt").to(models["device"])

	with torch.inference_mode(): # Accelerated inference
	spectrogram = models["tts_model"].generate_speech(inputs["input_ids"], models["speaker_emb"])
	audio = models["tts_vocoder"](spectrogram)

	sf.write("output.wav", audio.cpu().numpy(), 16000)
	return "output.wav"

	##########################################
	# Main application flow
	##########################################
	def main():
	"""Primary execution controller"""
	# Load components
	components = _load_components()

	# Show interface
	user_input = _show_interface()

	if user_input:
	# Text generation
	with st.spinner("🔍 Analyzing..."):
	response = _generate_response(user_input, components)

	# Display result
	st.subheader("📄 Response")
	st.markdown(f"```\n{response}\n```") # f-string formatted

	# Audio generation
	with st.spinner("🔊 Synthesizing..."):
	audio_path = _text_to_speech(response, components)
	st.audio(audio_path, format="audio/wav")

	if __name__ == "__main__":
	main() # Execute the main function