Spaces:

mac9087
/

metanice

Build error

App Files Files Community

mac9087 commited on 8 days ago

Commit

c310c35

verified ·

1 Parent(s): b3b50b5

Update app.py

Browse files

Files changed (1) hide show

app.py +73 -25

app.py CHANGED Viewed

@@ -23,12 +23,27 @@ llm = pipeline(
 tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
 def process_response(input_text, generated_text):
     # Remove the input text from the beginning of the response
-    if generated_text.startswith(input_text):
         clean_response = generated_text[len(input_text):].strip()
     else:
         clean_response = generated_text.strip()
     # Split into sentences and take only the first 1-2 meaningful sentences
     sentences = re.split(r'(?<=[.!?])\s+', clean_response)
@@ -42,8 +57,13 @@ def process_response(input_text, generated_text):
         else:
             result = " ".join(meaningful_sentences)
     else:
-        # Fallback if no good sentences were found
-        result = "I'm not sure what to say about that."
     # Remove any repetitive phrases
     result = remove_repetitions(result)
@@ -86,35 +106,63 @@ def talk():
         audio_path = tmp.name
         audio_file.save(audio_path)
-    # Transcribe
-    segments, _ = whisper_model.transcribe(audio_path)
-    transcription = "".join([seg.text for seg in segments])
-    # Generate response
-    raw_response = llm(transcription)[0]["generated_text"]
-    # Process to get clean, short response
-    final_response = process_response(transcription, raw_response)
-    # Synthesize speech
-    tts_audio_path = audio_path.replace(".wav", "_reply.wav")
-    tts.tts_to_file(text=final_response, file_path=tts_audio_path)
-    return send_file(tts_audio_path, mimetype="audio/wav")
 @app.route("/chat", methods=["POST"])
 def chat():
     data = request.get_json()
     if not data or "text" not in data:
         return jsonify({"error": "Missing 'text' in request body"}), 400
-    user_input = data["text"]
-    raw_response = llm(user_input)[0]["generated_text"]
-    # Process to get clean, short response
-    final_response = process_response(user_input, raw_response)
-    return jsonify({"response": final_response})
 @app.route("/")
 def index():

 tts = TTS(model_name="tts_models/en/ljspeech/tacotron2-DDC", progress_bar=False, gpu=False)
 def process_response(input_text, generated_text):
+    # Handle the case where generated_text might be None
+    if not generated_text:
+        return "I'm not sure what to say about that."
+    # Make sure both are strings
+    input_text = str(input_text).strip()
+    generated_text = str(generated_text).strip()
+    # Skip empty input
+    if not input_text:
+        clean_response = generated_text
     # Remove the input text from the beginning of the response
+    elif generated_text.startswith(input_text):
         clean_response = generated_text[len(input_text):].strip()
     else:
         clean_response = generated_text.strip()
+    # If we ended up with nothing, provide a default response
+    if not clean_response:
+        return "I'm listening."
     # Split into sentences and take only the first 1-2 meaningful sentences
     sentences = re.split(r'(?<=[.!?])\s+', clean_response)
         else:
             result = " ".join(meaningful_sentences)
     else:
+        # If no meaningful sentences, but we have short sentences, use those
+        if sentences and any(s.strip() for s in sentences):
+            short_sentences = [s for s in sentences if s.strip()]
+            result = " ".join(short_sentences[:2])
+        else:
+            # Fallback if no good sentences were found
+            result = "I'm not sure what to say about that."
     # Remove any repetitive phrases
     result = remove_repetitions(result)
         audio_path = tmp.name
         audio_file.save(audio_path)
+    try:
+        # Transcribe
+        segments, _ = whisper_model.transcribe(audio_path)
+        transcription = "".join([seg.text for seg in segments])
+        print(f"Transcription: {transcription}")  # Debugging
+        if not transcription.strip():
+            # Handle empty transcription
+            final_response = "I didn't catch that. Could you please speak again?"
+        else:
+            # Generate response
+            raw_response = llm(transcription)[0]["generated_text"]
+            # Process to get clean, short response
+            final_response = process_response(transcription, raw_response)
+        print(f"Response: {final_response}")  # Debugging
+        # Synthesize speech
+        tts_audio_path = audio_path.replace(".wav", "_reply.wav")
+        tts.tts_to_file(text=final_response, file_path=tts_audio_path)
+        # Return both the audio file and the text response
+        response = send_file(tts_audio_path, mimetype="audio/wav")
+        response.headers["X-Response-Text"] = final_response
+        return response
+    except Exception as e:
+        print(f"Error in talk endpoint: {str(e)}")
+        return jsonify({"error": str(e)}), 500
 @app.route("/chat", methods=["POST"])
 def chat():
     data = request.get_json()
     if not data or "text" not in data:
         return jsonify({"error": "Missing 'text' in request body"}), 400
+    try:
+        user_input = data["text"]
+        print(f"Text input: {user_input}")  # Debugging
+        # Handle empty or too short input
+        if not user_input or len(user_input.strip()) < 2:
+            return jsonify({"response": "I'm listening. Please say more."})
+        # Generate response
+        raw_response = llm(user_input)[0]["generated_text"]
+        # Process to get clean, short response
+        final_response = process_response(user_input, raw_response)
+        print(f"Text response: {final_response}")  # Debugging
+        return jsonify({"response": final_response})
+    except Exception as e:
+        print(f"Error in chat endpoint: {str(e)}")
+        return jsonify({"response": "I'm having trouble processing that. Could you try again?", "error": str(e)})
 @app.route("/")
 def index():