Spaces:

joey1101
/

Comment_Reply

Running

App Files Files Community

joey1101 commited on Mar 28

Commit

317475a

verified ·

1 Parent(s): d7ef86b

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -66

app.py CHANGED Viewed

@@ -3,61 +3,62 @@
 ##########################################
 import streamlit as st  # For web interface
 from transformers import (
-    pipeline,
-    SpeechT5Processor,
-    SpeechT5ForTextToSpeech,
-    SpeechT5HifiGan,
-    AutoModelForCausalLM,
-    AutoTokenizer
 )  # AI model components
-from datasets import load_dataset  # For voice embeddings
-import torch  # Tensor computations
-import soundfile as sf  # Audio file handling
-import re  # Regular expressions for text processing
 ##########################################
 # Initial configuration (MUST be first)
 ##########################################
 st.set_page_config(
-    page_title="Just Comment",
-    page_icon="💬",
-    layout="centered",
-    initial_sidebar_state="collapsed"
 )
 ##########################################
 # Global model loading with caching
 ##########################################
-@st.cache_resource(show_spinner=False)
 def _load_models():
     """Load and cache all ML models with optimized settings"""
     return {
         # Emotion classification pipeline
         'emotion': pipeline(
-            "text-classification",
-            model="Thea231/jhartmann_emotion_finetuning",
             truncation=True  # Enable text truncation for long inputs
         ),
         # Text generation components
         'textgen_tokenizer': AutoTokenizer.from_pretrained(
-            "Qwen/Qwen1.5-0.5B",
             use_fast=True  # Enable fast tokenization
         ),
         'textgen_model': AutoModelForCausalLM.from_pretrained(
-            "Qwen/Qwen1.5-0.5B",
             torch_dtype=torch.float16  # Use half-precision for faster inference
         ),
         # Text-to-speech components
-        'tts_processor': SpeechT5Processor.from_pretrained("microsoft/speecht5_tts"),
-        'tts_model': SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts"),
-        'tts_vocoder': SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan"),
         # Preloaded speaker embeddings
         'speaker_embeddings': torch.tensor(
-            load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")[7306]["xvector"]
-        ).unsqueeze(0)
     }
 ##########################################
@@ -65,14 +66,14 @@ def _load_models():
 ##########################################
 def _display_interface():
     """Render user interface elements"""
-    st.title("Just Comment")
-    st.markdown("### I'm listening to you, my friend～")
     return st.text_area(
-        "📝 Enter your comment:",
-        placeholder="Type your message here...",
-        height=150,
-        key="user_input"
     )
 ##########################################
@@ -80,10 +81,10 @@ def _display_interface():
 ##########################################
 def _analyze_emotion(text, classifier):
     """Identify dominant emotion with confidence threshold"""
-    results = classifier(text, return_all_scores=True)[0]
-    valid_emotions = {'sadness', 'joy', 'love', 'anger', 'fear', 'surprise'}
-    filtered = [e for e in results if e['label'].lower() in valid_emotions]
-    return max(filtered, key=lambda x: x['score'])
 def _generate_prompt(text, emotion):
     """Create structured prompts for all emotion types"""
@@ -125,16 +126,16 @@ def _generate_prompt(text, emotion):
             "Response:"
         )
     }
-    return prompt_templates.get(emotion.lower(), "").format(input=text)
 def _process_response(raw_text):
-    """Clean and format generated response"""
     # Extract text after last "Response:" marker
     processed = raw_text.split("Response:")[-1].strip()
     # Remove incomplete sentences
     if '.' in processed:
-        processed = processed.rsplit('.', 1)[0] + '.'
     # Ensure length between 50-200 characters
     return processed[:200].strip() if len(processed) > 50 else "Thank you for your feedback. We value your input and will respond shortly."
@@ -142,44 +143,44 @@ def _process_response(raw_text):
 def _generate_text_response(input_text, models):
     """Generate optimized text response with timing controls"""
     # Emotion analysis
-    emotion = _analyze_emotion(input_text, models['emotion'])
     # Prompt engineering
-    prompt = _generate_prompt(input_text, emotion['label'])
     # Text generation with optimized parameters
-    inputs = models['textgen_tokenizer'](prompt, return_tensors="pt").to('cpu')
     outputs = models['textgen_model'].generate(
-        inputs.input_ids,
-        max_new_tokens=100,  # Strict token limit
-        temperature=0.7,
-        top_p=0.9,
-        do_sample=True,
-        pad_token_id=models['textgen_tokenizer'].eos_token_id
     )
     return _process_response(
-        models['textgen_tokenizer'].decode(outputs[0], skip_special_tokens=True)
     )
 def _generate_audio_response(text, models):
     """Convert text to speech with performance optimizations"""
-    # Process text input
-    inputs = models['tts_processor'](text=text, return_tensors="pt")
     # Generate spectrogram
     spectrogram = models['tts_model'].generate_speech(
-        inputs["input_ids"],
-        models['speaker_embeddings']
     )
     # Generate waveform with optimizations
-    with torch.no_grad():  # Disable gradient calculation
-        waveform = models['tts_vocoder'](spectrogram)
     # Save audio file
-    sf.write("response.wav", waveform.numpy(), samplerate=16000)
-    return "response.wav"
 ##########################################
 # Main Application Flow
@@ -187,24 +188,24 @@ def _generate_audio_response(text, models):
 def main():
     """Primary execution flow"""
     # Load models once
-    ml_models = _load_models()
     # Display interface
-    user_input = _display_interface()
-    if user_input:
         # Text generation stage
-        with st.spinner("🔍 Analyzing emotions and generating response..."):
-            text_response = _generate_text_response(user_input, ml_models)
         # Display results
-        st.subheader("📄 Generated Response")
-        st.markdown(f"```\n{text_response}\n```")  # f-string formatted output
         # Audio generation stage
-        with st.spinner("🔊 Converting to speech..."):
-            audio_file = _generate_audio_response(text_response, ml_models)
-            st.audio(audio_file, format="audio/wav")
 if __name__ == "__main__":
-    main()

 ##########################################
 import streamlit as st  # For web interface
 from transformers import (
+    pipeline,  # For loading pre-trained models
+    SpeechT5Processor,  # For text-to-speech processing
+    SpeechT5ForTextToSpeech,  # TTS model
+    SpeechT5HifiGan,  # Vocoder for generating audio waveforms
+    AutoModelForCausalLM,  # For text generation
+    AutoTokenizer  # For tokenizing input text
 )  # AI model components
+from datasets import load_dataset  # To load voice embeddings
+import torch  # For tensor computations
+import soundfile as sf  # For handling audio files
+import re  # For regular expressions in text processing
 ##########################################
 # Initial configuration (MUST be first)
 ##########################################
 st.set_page_config(
+    page_title="Just Comment",  # Title of the web app
+    page_icon="💬",  # Icon displayed in the browser tab
+    layout="centered",  # Center the layout of the app
+    initial_sidebar_state="collapsed"  # Start with sidebar collapsed
 )
 ##########################################
 # Global model loading with caching
 ##########################################
+@st.cache_resource(show_spinner=False)  # Cache the models for performance
 def _load_models():
     """Load and cache all ML models with optimized settings"""
     return {
         # Emotion classification pipeline
         'emotion': pipeline(
+            "text-classification",  # Specify task type
+            model="Thea231/jhartmann_emotion_finetuning",  # Load the model
             truncation=True  # Enable text truncation for long inputs
         ),
         # Text generation components
         'textgen_tokenizer': AutoTokenizer.from_pretrained(
+            "Qwen/Qwen1.5-0.5B",  # Load tokenizer
             use_fast=True  # Enable fast tokenization
         ),
         'textgen_model': AutoModelForCausalLM.from_pretrained(
+            "Qwen/Qwen1.5-0.5B",  # Load text generation model
             torch_dtype=torch.float16  # Use half-precision for faster inference
         ),
         # Text-to-speech components
+        'tts_processor': SpeechT5Processor.from_pretrained("microsoft/speecht5_tts"),  # Load TTS processor
+        'tts_model': SpeechT5ForTextToSpeech.from_pretrained("microsoft/speecht5_tts"),  # Load TTS model
+        'tts_vocoder': SpeechT5HifiGan.from_pretrained("microsoft/speecht5_hifigan"),  # Load vocoder
         # Preloaded speaker embeddings
         'speaker_embeddings': torch.tensor(
+            load_dataset("Matthijs/cmu-arctic-xvectors", split="validation")[7306]["xvector"]  # Load speaker embeddings
+        ).unsqueeze(0)  # Add an additional dimension for batch processing
     }
 ##########################################
 ##########################################
 def _display_interface():
     """Render user interface elements"""
+    st.title("Just Comment")  # Set the main title of the app
+    st.markdown("### I'm listening to you, my friend～")  # Subheading for user interaction
     return st.text_area(
+        "📝 Enter your comment:",  # Label for the text area
+        placeholder="Type your message here...",  # Placeholder text
+        height=150,  # Height of the text area
+        key="user_input"  # Unique key for the text area
     )
 ##########################################
 ##########################################
 def _analyze_emotion(text, classifier):
     """Identify dominant emotion with confidence threshold"""
+    results = classifier(text, return_all_scores=True)[0]  # Get emotion scores
+    valid_emotions = {'sadness', 'joy', 'love', 'anger', 'fear', 'surprise'}  # Define valid emotions
+    filtered = [e for e in results if e['label'].lower() in valid_emotions]  # Filter results by valid emotions
+    return max(filtered, key=lambda x: x['score'])  # Return the emotion with the highest score
 def _generate_prompt(text, emotion):
     """Create structured prompts for all emotion types"""
             "Response:"
         )
     }
+    return prompt_templates.get(emotion.lower(), "").format(input=text)  # Format and return the appropriate prompt
 def _process_response(raw_text):
+    """Clean and format the generated response"""
     # Extract text after last "Response:" marker
     processed = raw_text.split("Response:")[-1].strip()
     # Remove incomplete sentences
     if '.' in processed:
+        processed = processed.rsplit('.', 1)[0] + '.'  # Ensure the response ends with a period
     # Ensure length between 50-200 characters
     return processed[:200].strip() if len(processed) > 50 else "Thank you for your feedback. We value your input and will respond shortly."
 def _generate_text_response(input_text, models):
     """Generate optimized text response with timing controls"""
     # Emotion analysis
+    emotion = _analyze_emotion(input_text, models['emotion'])  # Analyze the emotion of user input
     # Prompt engineering
+    prompt = _generate_prompt(input_text, emotion['label'])  # Generate prompt based on detected emotion
     # Text generation with optimized parameters
+    inputs = models['textgen_tokenizer'](prompt, return_tensors="pt").to('cpu')  # Tokenize the prompt
     outputs = models['textgen_model'].generate(
+        inputs.input_ids,  # Input token IDs
+        max_new_tokens=100,  # Strict token limit for response length
+        temperature=0.7,  # Control randomness in text generation
+        top_p=0.9,  # Control diversity in sampling
+        do_sample=True,  # Enable sampling to generate varied responses
+        pad_token_id=models['textgen_tokenizer'].eos_token_id  # Use end-of-sequence token for padding
     )
     return _process_response(
+        models['textgen_tokenizer'].decode(outputs[0], skip_special_tokens=True)  # Decode and process the response
     )
 def _generate_audio_response(text, models):
     """Convert text to speech with performance optimizations"""
+    # Process text input for TTS
+    inputs = models['tts_processor'](text=text, return_tensors="pt")  # Tokenize input text for TTS
     # Generate spectrogram
     spectrogram = models['tts_model'].generate_speech(
+        inputs["input_ids"],  # Input token IDs for TTS
+        models['speaker_embeddings']  # Use preloaded speaker embeddings
     )
     # Generate waveform with optimizations
+    with torch.no_grad():  # Disable gradient calculation for inference
+        waveform = models['tts_vocoder'](spectrogram)  # Generate audio waveform from spectrogram
     # Save audio file
+    sf.write("response.wav", waveform.numpy(), samplerate=16000)  # Save waveform as a WAV file
+    return "response.wav"  # Return the path to the saved audio file
 ##########################################
 # Main Application Flow
 def main():
     """Primary execution flow"""
     # Load models once
+    ml_models = _load_models()  # Load all models and cache them
     # Display interface
+    user_input = _display_interface()  # Show the user input interface
+    if user_input:  # Check if user has entered input
         # Text generation stage
+        with st.spinner("🔍 Analyzing emotions and generating response..."):  # Show loading spinner
+            text_response = _generate_text_response(user_input, ml_models)  # Generate text response
         # Display results
+        st.subheader("📄 Generated Response")  # Subheader for response section
+        st.markdown(f"```\n{text_response}\n```")  # Display generated response in markdown format
         # Audio generation stage
+        with st.spinner("🔊 Converting to speech..."):  # Show loading spinner
+            audio_file = _generate_audio_response(text_response, ml_models)  # Generate audio response
+            st.audio(audio_file, format="audio/wav")  # Play audio file in the app
 if __name__ == "__main__":
+    main()  # Execute the main function when the script is run