Docfile commited on
Commit
aa815df
·
verified ·
1 Parent(s): 5ccca07

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +198 -227
  2. main.py +1 -1
app.py CHANGED
@@ -1,277 +1,248 @@
1
  import os
 
 
 
2
  import logging
3
- import base64
4
  import json
5
- import uuid
6
- import google.generativeai as genai
7
- from datetime import datetime
8
- from functools import wraps
9
- from flask import Flask, render_template, request, jsonify, session, redirect
10
- from dotenv import load_dotenv
11
- from werkzeug.utils import secure_filename
12
 
13
  # Configure logging
14
  logging.basicConfig(level=logging.DEBUG)
15
  logger = logging.getLogger(__name__)
16
 
17
- # Load environment variables
18
- load_dotenv()
19
 
20
- # Configure Google Gemini API
21
  api_key = os.environ.get("GEMINI_API_KEY")
22
  if not api_key:
23
- logger.warning("GEMINI_API_KEY not found in environment variables")
24
- else:
25
- logger.info("GEMINI_API_KEY found. API configured successfully.")
26
-
27
- genai.configure(api_key=api_key)
28
 
29
- # Initialize Flask app
30
- app = Flask(__name__)
31
- app.secret_key = os.environ.get("SESSION_SECRET", "default-dev-secret-key")
32
- app.config['UPLOAD_FOLDER'] = 'static/uploads'
33
- app.config['MAX_CONTENT_LENGTH'] = 10 * 1024 * 1024 # 10 MB max
 
34
 
35
- # Middleware to ensure user has a session_id
36
- def session_required(f):
37
- @wraps(f)
38
- def decorated_function(*args, **kwargs):
39
- if 'session_id' not in session:
40
- session['session_id'] = str(uuid.uuid4())
41
- logger.info(f"Created new session: {session['session_id']}")
42
- return f(*args, **kwargs)
43
- return decorated_function
44
 
45
- # Ensure upload directory exists
46
- os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
47
-
48
- # Configure Gemini model with specific parameters for better responses
49
- model = genai.GenerativeModel(
50
- model_name='gemini-2.0-flash',
51
- generation_config={
52
- 'temperature': 0.7, # Slightly creative but still focused
53
- 'top_p': 0.9, # Diverse output but not too random
54
- 'top_k': 40, # Reasonable range of tokens to consider
55
- 'max_output_tokens': 2048 # Allow longer responses
56
  }
57
- )
 
 
 
 
 
 
 
 
58
 
59
- # Configure Gemini vision model for image processing
60
- vision_model = genai.GenerativeModel('gemini-2.0-vision-flash')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  @app.route('/')
63
- @session_required
64
  def index():
65
- """Render the chat interface."""
66
- return render_template('index.html')
67
 
68
- @app.route('/api/chat', methods=['POST'])
69
- @session_required
70
- def chat():
71
- """Process chat messages and get responses from Gemini API."""
72
  try:
73
  data = request.json
74
- user_message = data.get('message', '')
75
- chat_history = data.get('history', [])
76
- image_data = data.get('image', None)
77
 
78
- if not user_message and not image_data:
79
- return jsonify({'error': 'Veuillez entrer un message ou joindre une image.'}), 400
80
 
81
- # Log the incoming request (but not full chat history for privacy)
82
- session_id = session.get('session_id')
83
- logger.info(f"Received chat request from session {session_id}. Message length: {len(user_message)}")
84
 
85
- # Format conversation history for context
86
- formatted_history = []
87
- for msg in chat_history[-15:]: # Use the last 15 messages for more context
88
- role = "user" if msg['sender'] == 'user' else "model"
89
- formatted_history.append({"role": role, "parts": [msg['text']]})
90
 
91
- # Handle image processing if images are included
92
- if image_data:
93
- try:
94
- parts = []
95
-
96
- # Process single image or multiple images
97
- if isinstance(image_data, list):
98
- # Handle multiple images
99
- for img in image_data:
100
- if isinstance(img, str) and ',' in img:
101
- # Extract the base64 part after the comma
102
- image_base64 = img.split(',')[1]
103
- # Create image part
104
- image = genai.types.Part.from_data(
105
- data=base64.b64decode(image_base64),
106
- mime_type="image/jpeg"
107
- )
108
- parts.append(image)
109
-
110
- # Save each image
111
- session_id = session.get('session_id')
112
- session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
113
- os.makedirs(session_dir, exist_ok=True)
114
-
115
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
116
- filename = secure_filename(f"image_{timestamp}_{len(parts)}.jpg")
117
- filepath = os.path.join(session_dir, filename)
118
- with open(filepath, "wb") as f:
119
- f.write(base64.b64decode(image_base64))
120
- else:
121
- # Handle single image
122
- image_base64 = image_data.split(',')[1]
123
- image = genai.types.Part.from_data(
124
- data=base64.b64decode(image_base64),
125
- mime_type="image/jpeg"
126
- )
127
- parts.append(image)
128
-
129
- # Save image with timestamp in the session directory
130
- session_id = session.get('session_id')
131
- session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
132
- os.makedirs(session_dir, exist_ok=True)
133
-
134
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
135
- filename = secure_filename(f"image_{timestamp}.jpg")
136
- filepath = os.path.join(session_dir, filename)
137
- with open(filepath, "wb") as f:
138
- f.write(base64.b64decode(image_base64))
139
-
140
- # Add text message if provided
141
- if user_message:
142
- parts.append(user_message)
143
-
144
- # Generate response using vision model
145
- response = vision_model.generate_content(parts)
146
- return jsonify({'response': response.text})
147
-
148
- except Exception as img_error:
149
- logger.error(f"Error processing image: {str(img_error)}")
150
- return jsonify({
151
- 'error': 'Désolé, une erreur est survenue lors du traitement de l\'image. Veuillez réessayer.'
152
- }), 500
153
- else:
154
- # Text-only processing
155
- # Create a chat session with history
156
- chat = model.start_chat(history=formatted_history)
157
-
158
- # Generate response
159
- response = chat.send_message(user_message)
160
-
161
- # Log successful response
162
- logger.info(f"Generated response successfully. Response length: {len(response.text)}")
163
-
164
- # Return the response
165
- return jsonify({'response': response.text})
166
-
167
- except genai.types.generation_types.BlockedPromptException as be:
168
- logger.warning(f"Content blocked: {str(be)}")
169
  return jsonify({
170
- 'error': 'Votre message ou la conversation ne peut pas être traitée car elle contient du contenu potentiellement inapproprié.'
171
- }), 400
172
-
 
 
173
  except Exception as e:
174
- logger.error(f"Error in chat endpoint: {str(e)}")
175
- return jsonify({
176
- 'error': 'Désolé, j\'ai rencontré une erreur lors du traitement de votre demande. Veuillez réessayer.'
177
- }), 500
178
 
179
- @app.route('/api/save-chat', methods=['POST'])
180
- @session_required
181
- def save_chat():
182
- """Save the current chat history."""
183
  try:
184
- session_id = session.get('session_id')
185
-
186
- # Create session-specific directory
187
- session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
188
- os.makedirs(session_dir, exist_ok=True)
189
-
190
- data = request.json
191
- chat_history = data.get('history', [])
192
-
193
- if not chat_history:
194
- return jsonify({'error': 'Aucune conversation à sauvegarder.'}), 400
195
-
196
- # Generate filename with timestamp
197
- timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
198
- filename = f"chat_{timestamp}.json"
199
- filepath = os.path.join(session_dir, filename)
200
 
201
- # Save chat history to file
202
- with open(filepath, 'w', encoding='utf-8') as f:
203
- json.dump(chat_history, f, ensure_ascii=False, indent=2)
204
 
205
- return jsonify({'success': True, 'filename': filename})
206
 
207
  except Exception as e:
208
- logger.error(f"Error saving chat: {str(e)}")
209
- return jsonify({
210
- 'error': 'Désolé, une erreur est survenue lors de la sauvegarde de la conversation.'
211
- }), 500
212
 
213
- @app.route('/api/load-chats', methods=['GET'])
214
- @session_required
215
- def load_chats():
216
- """Get a list of saved chat files for current session."""
217
  try:
218
- session_id = session.get('session_id')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
220
- # Get session-specific directory
221
- session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
222
 
223
- # If the directory doesn't exist yet, return empty list
224
- if not os.path.exists(session_dir):
225
- return jsonify({'chats': []})
 
 
226
 
227
- chat_files = []
228
- for filename in os.listdir(session_dir):
229
- if filename.startswith('chat_') and filename.endswith('.json'):
230
- # Extract timestamp from filename
231
- timestamp = filename[5:-5] # Remove 'chat_' and '.json'
232
- # Add to list
233
- chat_files.append({
234
- 'filename': filename,
235
- 'timestamp': timestamp
236
- })
237
 
238
- # Sort by timestamp (newest first)
239
- chat_files.sort(key=lambda x: x['timestamp'], reverse=True)
240
 
241
- logger.info(f"Loaded {len(chat_files)} chats for session {session_id}")
242
- return jsonify({'chats': chat_files})
 
 
 
243
 
244
  except Exception as e:
245
- logger.error(f"Error loading chat list: {str(e)}")
246
- return jsonify({
247
- 'error': 'Désolé, une erreur est survenue lors du chargement des conversations.'
248
- }), 500
249
 
250
- @app.route('/api/load-chat/<filename>', methods=['GET'])
251
- @session_required
252
- def load_chat(filename):
253
- """Load a specific chat history file."""
 
 
 
 
254
  try:
255
- session_id = session.get('session_id')
256
-
257
- # Load from session-specific directory
258
- session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
259
- filepath = os.path.join(session_dir, secure_filename(filename))
260
 
261
- if not os.path.exists(filepath):
262
- return jsonify({'error': 'Conversation introuvable.'}), 404
263
 
264
- with open(filepath, 'r', encoding='utf-8') as f:
265
- chat_history = json.load(f)
266
 
267
- logger.info(f"Loaded chat {filename} for session {session_id}")
268
- return jsonify({'history': chat_history})
269
 
270
  except Exception as e:
271
- logger.error(f"Error loading chat file: {str(e)}")
272
- return jsonify({
273
- 'error': 'Désolé, une erreur est survenue lors du chargement de la conversation.'
274
- }), 500
275
-
276
- if __name__ == '__main__':
277
- app.run(host='0.0.0.0', port=5000, debug=True)
 
1
  import os
2
+ import asyncio
3
+ import wave
4
+ import tempfile
5
  import logging
 
6
  import json
7
+ import time
8
+ from flask import Flask, render_template, request, jsonify, send_file, stream_with_context, Response
9
+ from google import genai
10
+ import aiohttp
11
+ from pydub import AudioSegment
 
 
12
 
13
  # Configure logging
14
  logging.basicConfig(level=logging.DEBUG)
15
  logger = logging.getLogger(__name__)
16
 
17
+ app = Flask(__name__)
18
+ app.secret_key = os.environ.get("SESSION_SECRET", "default-secret-key")
19
 
20
+ # Configure Gemini API
21
  api_key = os.environ.get("GEMINI_API_KEY")
22
  if not api_key:
23
+ logger.warning("GEMINI_API_KEY not found in environment variables. Using default value for development.")
24
+ api_key = "YOUR_API_KEY" # This will be replaced with env var in production
 
 
 
25
 
26
+ # Define available voices
27
+ AVAILABLE_VOICES = [
28
+ "Puck", "Charon", "Kore", "Fenrir",
29
+ "Aoede", "Leda", "Orus", "Zephyr"
30
+ ]
31
+ language_code="fr-FR"
32
 
33
+ # Global variable to track generation progress
34
+ generation_progress = {
35
+ "status": "idle",
36
+ "current": 0,
37
+ "total": 0,
38
+ "message": ""
39
+ }
 
 
40
 
41
+ def update_progress(current, total, message):
42
+ """Update the global progress tracker."""
43
+ global generation_progress
44
+ generation_progress = {
45
+ "status": "in_progress" if current < total else "complete",
46
+ "current": current,
47
+ "total": total,
48
+ "message": message
 
 
 
49
  }
50
+ def create_async_enumerate(async_iterator):
51
+ """Create an async enumerate function since it's not built-in."""
52
+ i = 0
53
+ async def async_iter():
54
+ nonlocal i
55
+ async for item in async_iterator:
56
+ yield i, item
57
+ i += 1
58
+ return async_iter()
59
 
60
+ async def generate_speech(text, selected_voice):
61
+ """Generate speech from text using Gemini AI."""
62
+ try:
63
+ client = genai.Client(api_key=api_key)
64
+ model = "gemini-2.0-flash-live-001"
65
+
66
+ # Configure the voice settings
67
+ speech_config = genai.types.SpeechConfig(
68
+ language_code=language_code,
69
+ voice_config=genai.types.VoiceConfig(
70
+ prebuilt_voice_config=genai.types.PrebuiltVoiceConfig(
71
+ voice_name=selected_voice
72
+ )
73
+ )
74
+ )
75
+
76
+ config = genai.types.LiveConnectConfig(
77
+ response_modalities=["AUDIO"],
78
+ speech_config=speech_config
79
+ )
80
+
81
+ # Create a temporary file to store the audio
82
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
83
+ temp_filename = tmp_file.name
84
+
85
+ async with client.aio.live.connect(model=model, config=config) as session:
86
+ # Open the WAV file for writing
87
+ wf = wave.open(temp_filename, "wb")
88
+ wf.setnchannels(1)
89
+ wf.setsampwidth(2)
90
+ wf.setframerate(24000)
91
+
92
+ # Send the text to Gemini
93
+ await session.send_client_content(
94
+ turns={"role": "user", "parts": [{"text": text}]},
95
+ turn_complete=True
96
+ )
97
+
98
+ # Receive the audio data and write it to the file
99
+ async for idx, response in create_async_enumerate(session.receive()):
100
+ if response.data is not None:
101
+ wf.writeframes(response.data)
102
+
103
+ wf.close()
104
+
105
+ return temp_filename
106
+
107
+ except Exception as e:
108
+ logger.error(f"Error generating speech: {str(e)}")
109
+ raise e
110
 
111
  @app.route('/')
 
112
  def index():
113
+ """Render the main page."""
114
+ return render_template('index.html', voices=AVAILABLE_VOICES)
115
 
116
+ @app.route('/generate', methods=['POST'])
117
+ async def generate():
118
+ """Generate speech from text."""
 
119
  try:
120
  data = request.json
121
+ text = data.get('text', '')
122
+ voice = data.get('voice', 'Kore') # Default voice
 
123
 
124
+ if not text:
125
+ return jsonify({"error": "Text is required"}), 400
126
 
127
+ if voice not in AVAILABLE_VOICES:
128
+ return jsonify({"error": "Invalid voice selection"}), 400
 
129
 
130
+ # Generate the speech
131
+ audio_file = await generate_speech(text, voice)
 
 
 
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  return jsonify({
134
+ "status": "success",
135
+ "message": "Audio generated successfully",
136
+ "audioUrl": f"/audio/{os.path.basename(audio_file)}"
137
+ })
138
+
139
  except Exception as e:
140
+ logger.error(f"Error in generate endpoint: {str(e)}")
141
+ return jsonify({"error": str(e)}), 500
 
 
142
 
143
+ @app.route('/audio/<filename>')
144
+ def get_audio(filename):
145
+ """Serve the generated audio file."""
 
146
  try:
147
+ temp_dir = tempfile.gettempdir()
148
+ file_path = os.path.join(temp_dir, filename)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
149
 
150
+ if not os.path.exists(file_path):
151
+ return jsonify({"error": "Audio file not found"}), 404
 
152
 
153
+ return send_file(file_path, mimetype="audio/wav", as_attachment=False)
154
 
155
  except Exception as e:
156
+ logger.error(f"Error serving audio file: {str(e)}")
157
+ return jsonify({"error": str(e)}), 500
 
 
158
 
159
+ @app.route('/generate-podcast', methods=['POST'])
160
+ async def generate_podcast_route():
161
+ """Generate a podcast from a scenario."""
 
162
  try:
163
+ scenario = request.json
164
+
165
+ # Reset progress tracker
166
+ global generation_progress
167
+ generation_progress = {
168
+ "status": "in_progress",
169
+ "current": 0,
170
+ "total": len(scenario.get('characters', [])),
171
+ "message": "Démarrage de la génération..."
172
+ }
173
+
174
+ # Generate audio for each character
175
+ characters = scenario.get('characters', [])
176
+ total_characters = len(characters)
177
+ update_progress(0, total_characters, f"Préparation du podcast avec {total_characters} personnages...")
178
+
179
+ audio_segments = []
180
+
181
+ for idx, character in enumerate(characters):
182
+ character_name = character.get('name', 'Unknown')
183
+ voice = character.get('voice', 'Kore')
184
+ text = character.get('text', '')
185
+
186
+ update_progress(idx, total_characters, f"Génération de l'audio pour {character_name} ({idx+1}/{total_characters})...")
187
+
188
+ if voice not in AVAILABLE_VOICES:
189
+ logger.warning(f"Voice {voice} not available. Using default voice Kore for {character_name}.")
190
+ voice = 'Kore'
191
+
192
+ # Generate speech for this character
193
+ audio_file = await generate_speech(text, voice)
194
+ audio_segments.append(audio_file)
195
+
196
+ update_progress(total_characters, total_characters, "Assemblage des segments audio...")
197
 
198
+ # Combine all audio segments into one file
199
+ combined = AudioSegment.empty()
200
 
201
+ for audio_file in audio_segments:
202
+ segment = AudioSegment.from_wav(audio_file)
203
+ combined += segment
204
+ # Add a short silence between segments (500ms)
205
+ combined += AudioSegment.silent(duration=500)
206
 
207
+ # Export the combined audio
208
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
209
+ output_filename = output_file.name
210
+ combined.export(output_filename, format="wav")
 
 
 
 
 
 
211
 
212
+ update_progress(total_characters + 1, total_characters + 1, "Podcast généré avec succès!")
 
213
 
214
+ return jsonify({
215
+ "status": "success",
216
+ "message": "Podcast generated successfully",
217
+ "audioUrl": f"/audio/{os.path.basename(output_filename)}"
218
+ })
219
 
220
  except Exception as e:
221
+ logger.error(f"Error in generate-podcast endpoint: {str(e)}")
222
+ update_progress(0, 0, f"Erreur: {str(e)}")
223
+ return jsonify({"error": str(e)}), 500
 
224
 
225
+ @app.route('/generation-progress')
226
+ def get_generation_progress():
227
+ """Get the current progress of podcast generation."""
228
+ return jsonify(generation_progress)
229
+
230
+ @app.route('/download/<filename>')
231
+ def download_audio(filename):
232
+ """Download the generated audio file."""
233
  try:
234
+ temp_dir = tempfile.gettempdir()
235
+ file_path = os.path.join(temp_dir, filename)
 
 
 
236
 
237
+ if not os.path.exists(file_path):
238
+ return jsonify({"error": "Audio file not found"}), 404
239
 
240
+ # Check if this is a podcast or simple speech
241
+ download_name = "gemini_podcast.wav"
242
 
243
+ return send_file(file_path, mimetype="audio/wav", as_attachment=True,
244
+ download_name=download_name)
245
 
246
  except Exception as e:
247
+ logger.error(f"Error downloading audio file: {str(e)}")
248
+ return jsonify({"error": str(e)}), 500
 
 
 
 
 
main.py CHANGED
@@ -1,4 +1,4 @@
1
- from app import app # noqa: F401
2
 
3
  if __name__ == "__main__":
4
  app.run(host="0.0.0.0", port=5000, debug=True)
 
1
+ from app import app
2
 
3
  if __name__ == "__main__":
4
  app.run(host="0.0.0.0", port=5000, debug=True)