Upload 2 files
Browse files
app.py
CHANGED
@@ -1,277 +1,248 @@
|
|
1 |
import os
|
|
|
|
|
|
|
2 |
import logging
|
3 |
-
import base64
|
4 |
import json
|
5 |
-
import
|
6 |
-
import
|
7 |
-
from
|
8 |
-
|
9 |
-
from
|
10 |
-
from dotenv import load_dotenv
|
11 |
-
from werkzeug.utils import secure_filename
|
12 |
|
13 |
# Configure logging
|
14 |
logging.basicConfig(level=logging.DEBUG)
|
15 |
logger = logging.getLogger(__name__)
|
16 |
|
17 |
-
|
18 |
-
|
19 |
|
20 |
-
# Configure
|
21 |
api_key = os.environ.get("GEMINI_API_KEY")
|
22 |
if not api_key:
|
23 |
-
logger.warning("GEMINI_API_KEY not found in environment variables")
|
24 |
-
|
25 |
-
logger.info("GEMINI_API_KEY found. API configured successfully.")
|
26 |
-
|
27 |
-
genai.configure(api_key=api_key)
|
28 |
|
29 |
-
#
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
34 |
|
35 |
-
#
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
return f(*args, **kwargs)
|
43 |
-
return decorated_function
|
44 |
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
'top_p': 0.9, # Diverse output but not too random
|
54 |
-
'top_k': 40, # Reasonable range of tokens to consider
|
55 |
-
'max_output_tokens': 2048 # Allow longer responses
|
56 |
}
|
57 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
58 |
|
59 |
-
|
60 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
|
62 |
@app.route('/')
|
63 |
-
@session_required
|
64 |
def index():
|
65 |
-
"""Render the
|
66 |
-
return render_template('index.html')
|
67 |
|
68 |
-
@app.route('/
|
69 |
-
|
70 |
-
|
71 |
-
"""Process chat messages and get responses from Gemini API."""
|
72 |
try:
|
73 |
data = request.json
|
74 |
-
|
75 |
-
|
76 |
-
image_data = data.get('image', None)
|
77 |
|
78 |
-
if not
|
79 |
-
return jsonify({
|
80 |
|
81 |
-
|
82 |
-
|
83 |
-
logger.info(f"Received chat request from session {session_id}. Message length: {len(user_message)}")
|
84 |
|
85 |
-
#
|
86 |
-
|
87 |
-
for msg in chat_history[-15:]: # Use the last 15 messages for more context
|
88 |
-
role = "user" if msg['sender'] == 'user' else "model"
|
89 |
-
formatted_history.append({"role": role, "parts": [msg['text']]})
|
90 |
|
91 |
-
# Handle image processing if images are included
|
92 |
-
if image_data:
|
93 |
-
try:
|
94 |
-
parts = []
|
95 |
-
|
96 |
-
# Process single image or multiple images
|
97 |
-
if isinstance(image_data, list):
|
98 |
-
# Handle multiple images
|
99 |
-
for img in image_data:
|
100 |
-
if isinstance(img, str) and ',' in img:
|
101 |
-
# Extract the base64 part after the comma
|
102 |
-
image_base64 = img.split(',')[1]
|
103 |
-
# Create image part
|
104 |
-
image = genai.types.Part.from_data(
|
105 |
-
data=base64.b64decode(image_base64),
|
106 |
-
mime_type="image/jpeg"
|
107 |
-
)
|
108 |
-
parts.append(image)
|
109 |
-
|
110 |
-
# Save each image
|
111 |
-
session_id = session.get('session_id')
|
112 |
-
session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
|
113 |
-
os.makedirs(session_dir, exist_ok=True)
|
114 |
-
|
115 |
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
116 |
-
filename = secure_filename(f"image_{timestamp}_{len(parts)}.jpg")
|
117 |
-
filepath = os.path.join(session_dir, filename)
|
118 |
-
with open(filepath, "wb") as f:
|
119 |
-
f.write(base64.b64decode(image_base64))
|
120 |
-
else:
|
121 |
-
# Handle single image
|
122 |
-
image_base64 = image_data.split(',')[1]
|
123 |
-
image = genai.types.Part.from_data(
|
124 |
-
data=base64.b64decode(image_base64),
|
125 |
-
mime_type="image/jpeg"
|
126 |
-
)
|
127 |
-
parts.append(image)
|
128 |
-
|
129 |
-
# Save image with timestamp in the session directory
|
130 |
-
session_id = session.get('session_id')
|
131 |
-
session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
|
132 |
-
os.makedirs(session_dir, exist_ok=True)
|
133 |
-
|
134 |
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
135 |
-
filename = secure_filename(f"image_{timestamp}.jpg")
|
136 |
-
filepath = os.path.join(session_dir, filename)
|
137 |
-
with open(filepath, "wb") as f:
|
138 |
-
f.write(base64.b64decode(image_base64))
|
139 |
-
|
140 |
-
# Add text message if provided
|
141 |
-
if user_message:
|
142 |
-
parts.append(user_message)
|
143 |
-
|
144 |
-
# Generate response using vision model
|
145 |
-
response = vision_model.generate_content(parts)
|
146 |
-
return jsonify({'response': response.text})
|
147 |
-
|
148 |
-
except Exception as img_error:
|
149 |
-
logger.error(f"Error processing image: {str(img_error)}")
|
150 |
-
return jsonify({
|
151 |
-
'error': 'Désolé, une erreur est survenue lors du traitement de l\'image. Veuillez réessayer.'
|
152 |
-
}), 500
|
153 |
-
else:
|
154 |
-
# Text-only processing
|
155 |
-
# Create a chat session with history
|
156 |
-
chat = model.start_chat(history=formatted_history)
|
157 |
-
|
158 |
-
# Generate response
|
159 |
-
response = chat.send_message(user_message)
|
160 |
-
|
161 |
-
# Log successful response
|
162 |
-
logger.info(f"Generated response successfully. Response length: {len(response.text)}")
|
163 |
-
|
164 |
-
# Return the response
|
165 |
-
return jsonify({'response': response.text})
|
166 |
-
|
167 |
-
except genai.types.generation_types.BlockedPromptException as be:
|
168 |
-
logger.warning(f"Content blocked: {str(be)}")
|
169 |
return jsonify({
|
170 |
-
|
171 |
-
|
172 |
-
|
|
|
|
|
173 |
except Exception as e:
|
174 |
-
logger.error(f"Error in
|
175 |
-
return jsonify({
|
176 |
-
'error': 'Désolé, j\'ai rencontré une erreur lors du traitement de votre demande. Veuillez réessayer.'
|
177 |
-
}), 500
|
178 |
|
179 |
-
@app.route('/
|
180 |
-
|
181 |
-
|
182 |
-
"""Save the current chat history."""
|
183 |
try:
|
184 |
-
|
185 |
-
|
186 |
-
# Create session-specific directory
|
187 |
-
session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
|
188 |
-
os.makedirs(session_dir, exist_ok=True)
|
189 |
-
|
190 |
-
data = request.json
|
191 |
-
chat_history = data.get('history', [])
|
192 |
-
|
193 |
-
if not chat_history:
|
194 |
-
return jsonify({'error': 'Aucune conversation à sauvegarder.'}), 400
|
195 |
-
|
196 |
-
# Generate filename with timestamp
|
197 |
-
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
198 |
-
filename = f"chat_{timestamp}.json"
|
199 |
-
filepath = os.path.join(session_dir, filename)
|
200 |
|
201 |
-
|
202 |
-
|
203 |
-
json.dump(chat_history, f, ensure_ascii=False, indent=2)
|
204 |
|
205 |
-
return
|
206 |
|
207 |
except Exception as e:
|
208 |
-
logger.error(f"Error
|
209 |
-
return jsonify({
|
210 |
-
'error': 'Désolé, une erreur est survenue lors de la sauvegarde de la conversation.'
|
211 |
-
}), 500
|
212 |
|
213 |
-
@app.route('/
|
214 |
-
|
215 |
-
|
216 |
-
"""Get a list of saved chat files for current session."""
|
217 |
try:
|
218 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
219 |
|
220 |
-
#
|
221 |
-
|
222 |
|
223 |
-
|
224 |
-
|
225 |
-
|
|
|
|
|
226 |
|
227 |
-
|
228 |
-
|
229 |
-
|
230 |
-
|
231 |
-
timestamp = filename[5:-5] # Remove 'chat_' and '.json'
|
232 |
-
# Add to list
|
233 |
-
chat_files.append({
|
234 |
-
'filename': filename,
|
235 |
-
'timestamp': timestamp
|
236 |
-
})
|
237 |
|
238 |
-
|
239 |
-
chat_files.sort(key=lambda x: x['timestamp'], reverse=True)
|
240 |
|
241 |
-
|
242 |
-
|
|
|
|
|
|
|
243 |
|
244 |
except Exception as e:
|
245 |
-
logger.error(f"Error
|
246 |
-
|
247 |
-
|
248 |
-
}), 500
|
249 |
|
250 |
-
@app.route('/
|
251 |
-
|
252 |
-
|
253 |
-
|
|
|
|
|
|
|
|
|
254 |
try:
|
255 |
-
|
256 |
-
|
257 |
-
# Load from session-specific directory
|
258 |
-
session_dir = os.path.join(app.config['UPLOAD_FOLDER'], session_id)
|
259 |
-
filepath = os.path.join(session_dir, secure_filename(filename))
|
260 |
|
261 |
-
if not os.path.exists(
|
262 |
-
return jsonify({
|
263 |
|
264 |
-
|
265 |
-
|
266 |
|
267 |
-
|
268 |
-
|
269 |
|
270 |
except Exception as e:
|
271 |
-
logger.error(f"Error
|
272 |
-
return jsonify({
|
273 |
-
'error': 'Désolé, une erreur est survenue lors du chargement de la conversation.'
|
274 |
-
}), 500
|
275 |
-
|
276 |
-
if __name__ == '__main__':
|
277 |
-
app.run(host='0.0.0.0', port=5000, debug=True)
|
|
|
1 |
import os
|
2 |
+
import asyncio
|
3 |
+
import wave
|
4 |
+
import tempfile
|
5 |
import logging
|
|
|
6 |
import json
|
7 |
+
import time
|
8 |
+
from flask import Flask, render_template, request, jsonify, send_file, stream_with_context, Response
|
9 |
+
from google import genai
|
10 |
+
import aiohttp
|
11 |
+
from pydub import AudioSegment
|
|
|
|
|
12 |
|
13 |
# Configure logging
|
14 |
logging.basicConfig(level=logging.DEBUG)
|
15 |
logger = logging.getLogger(__name__)
|
16 |
|
17 |
+
app = Flask(__name__)
|
18 |
+
app.secret_key = os.environ.get("SESSION_SECRET", "default-secret-key")
|
19 |
|
20 |
+
# Configure Gemini API
|
21 |
api_key = os.environ.get("GEMINI_API_KEY")
|
22 |
if not api_key:
|
23 |
+
logger.warning("GEMINI_API_KEY not found in environment variables. Using default value for development.")
|
24 |
+
api_key = "YOUR_API_KEY" # This will be replaced with env var in production
|
|
|
|
|
|
|
25 |
|
26 |
+
# Define available voices
|
27 |
+
AVAILABLE_VOICES = [
|
28 |
+
"Puck", "Charon", "Kore", "Fenrir",
|
29 |
+
"Aoede", "Leda", "Orus", "Zephyr"
|
30 |
+
]
|
31 |
+
language_code="fr-FR"
|
32 |
|
33 |
+
# Global variable to track generation progress
|
34 |
+
generation_progress = {
|
35 |
+
"status": "idle",
|
36 |
+
"current": 0,
|
37 |
+
"total": 0,
|
38 |
+
"message": ""
|
39 |
+
}
|
|
|
|
|
40 |
|
41 |
+
def update_progress(current, total, message):
|
42 |
+
"""Update the global progress tracker."""
|
43 |
+
global generation_progress
|
44 |
+
generation_progress = {
|
45 |
+
"status": "in_progress" if current < total else "complete",
|
46 |
+
"current": current,
|
47 |
+
"total": total,
|
48 |
+
"message": message
|
|
|
|
|
|
|
49 |
}
|
50 |
+
def create_async_enumerate(async_iterator):
|
51 |
+
"""Create an async enumerate function since it's not built-in."""
|
52 |
+
i = 0
|
53 |
+
async def async_iter():
|
54 |
+
nonlocal i
|
55 |
+
async for item in async_iterator:
|
56 |
+
yield i, item
|
57 |
+
i += 1
|
58 |
+
return async_iter()
|
59 |
|
60 |
+
async def generate_speech(text, selected_voice):
|
61 |
+
"""Generate speech from text using Gemini AI."""
|
62 |
+
try:
|
63 |
+
client = genai.Client(api_key=api_key)
|
64 |
+
model = "gemini-2.0-flash-live-001"
|
65 |
+
|
66 |
+
# Configure the voice settings
|
67 |
+
speech_config = genai.types.SpeechConfig(
|
68 |
+
language_code=language_code,
|
69 |
+
voice_config=genai.types.VoiceConfig(
|
70 |
+
prebuilt_voice_config=genai.types.PrebuiltVoiceConfig(
|
71 |
+
voice_name=selected_voice
|
72 |
+
)
|
73 |
+
)
|
74 |
+
)
|
75 |
+
|
76 |
+
config = genai.types.LiveConnectConfig(
|
77 |
+
response_modalities=["AUDIO"],
|
78 |
+
speech_config=speech_config
|
79 |
+
)
|
80 |
+
|
81 |
+
# Create a temporary file to store the audio
|
82 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as tmp_file:
|
83 |
+
temp_filename = tmp_file.name
|
84 |
+
|
85 |
+
async with client.aio.live.connect(model=model, config=config) as session:
|
86 |
+
# Open the WAV file for writing
|
87 |
+
wf = wave.open(temp_filename, "wb")
|
88 |
+
wf.setnchannels(1)
|
89 |
+
wf.setsampwidth(2)
|
90 |
+
wf.setframerate(24000)
|
91 |
+
|
92 |
+
# Send the text to Gemini
|
93 |
+
await session.send_client_content(
|
94 |
+
turns={"role": "user", "parts": [{"text": text}]},
|
95 |
+
turn_complete=True
|
96 |
+
)
|
97 |
+
|
98 |
+
# Receive the audio data and write it to the file
|
99 |
+
async for idx, response in create_async_enumerate(session.receive()):
|
100 |
+
if response.data is not None:
|
101 |
+
wf.writeframes(response.data)
|
102 |
+
|
103 |
+
wf.close()
|
104 |
+
|
105 |
+
return temp_filename
|
106 |
+
|
107 |
+
except Exception as e:
|
108 |
+
logger.error(f"Error generating speech: {str(e)}")
|
109 |
+
raise e
|
110 |
|
111 |
@app.route('/')
|
|
|
112 |
def index():
|
113 |
+
"""Render the main page."""
|
114 |
+
return render_template('index.html', voices=AVAILABLE_VOICES)
|
115 |
|
116 |
+
@app.route('/generate', methods=['POST'])
|
117 |
+
async def generate():
|
118 |
+
"""Generate speech from text."""
|
|
|
119 |
try:
|
120 |
data = request.json
|
121 |
+
text = data.get('text', '')
|
122 |
+
voice = data.get('voice', 'Kore') # Default voice
|
|
|
123 |
|
124 |
+
if not text:
|
125 |
+
return jsonify({"error": "Text is required"}), 400
|
126 |
|
127 |
+
if voice not in AVAILABLE_VOICES:
|
128 |
+
return jsonify({"error": "Invalid voice selection"}), 400
|
|
|
129 |
|
130 |
+
# Generate the speech
|
131 |
+
audio_file = await generate_speech(text, voice)
|
|
|
|
|
|
|
132 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
133 |
return jsonify({
|
134 |
+
"status": "success",
|
135 |
+
"message": "Audio generated successfully",
|
136 |
+
"audioUrl": f"/audio/{os.path.basename(audio_file)}"
|
137 |
+
})
|
138 |
+
|
139 |
except Exception as e:
|
140 |
+
logger.error(f"Error in generate endpoint: {str(e)}")
|
141 |
+
return jsonify({"error": str(e)}), 500
|
|
|
|
|
142 |
|
143 |
+
@app.route('/audio/<filename>')
|
144 |
+
def get_audio(filename):
|
145 |
+
"""Serve the generated audio file."""
|
|
|
146 |
try:
|
147 |
+
temp_dir = tempfile.gettempdir()
|
148 |
+
file_path = os.path.join(temp_dir, filename)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
149 |
|
150 |
+
if not os.path.exists(file_path):
|
151 |
+
return jsonify({"error": "Audio file not found"}), 404
|
|
|
152 |
|
153 |
+
return send_file(file_path, mimetype="audio/wav", as_attachment=False)
|
154 |
|
155 |
except Exception as e:
|
156 |
+
logger.error(f"Error serving audio file: {str(e)}")
|
157 |
+
return jsonify({"error": str(e)}), 500
|
|
|
|
|
158 |
|
159 |
+
@app.route('/generate-podcast', methods=['POST'])
|
160 |
+
async def generate_podcast_route():
|
161 |
+
"""Generate a podcast from a scenario."""
|
|
|
162 |
try:
|
163 |
+
scenario = request.json
|
164 |
+
|
165 |
+
# Reset progress tracker
|
166 |
+
global generation_progress
|
167 |
+
generation_progress = {
|
168 |
+
"status": "in_progress",
|
169 |
+
"current": 0,
|
170 |
+
"total": len(scenario.get('characters', [])),
|
171 |
+
"message": "Démarrage de la génération..."
|
172 |
+
}
|
173 |
+
|
174 |
+
# Generate audio for each character
|
175 |
+
characters = scenario.get('characters', [])
|
176 |
+
total_characters = len(characters)
|
177 |
+
update_progress(0, total_characters, f"Préparation du podcast avec {total_characters} personnages...")
|
178 |
+
|
179 |
+
audio_segments = []
|
180 |
+
|
181 |
+
for idx, character in enumerate(characters):
|
182 |
+
character_name = character.get('name', 'Unknown')
|
183 |
+
voice = character.get('voice', 'Kore')
|
184 |
+
text = character.get('text', '')
|
185 |
+
|
186 |
+
update_progress(idx, total_characters, f"Génération de l'audio pour {character_name} ({idx+1}/{total_characters})...")
|
187 |
+
|
188 |
+
if voice not in AVAILABLE_VOICES:
|
189 |
+
logger.warning(f"Voice {voice} not available. Using default voice Kore for {character_name}.")
|
190 |
+
voice = 'Kore'
|
191 |
+
|
192 |
+
# Generate speech for this character
|
193 |
+
audio_file = await generate_speech(text, voice)
|
194 |
+
audio_segments.append(audio_file)
|
195 |
+
|
196 |
+
update_progress(total_characters, total_characters, "Assemblage des segments audio...")
|
197 |
|
198 |
+
# Combine all audio segments into one file
|
199 |
+
combined = AudioSegment.empty()
|
200 |
|
201 |
+
for audio_file in audio_segments:
|
202 |
+
segment = AudioSegment.from_wav(audio_file)
|
203 |
+
combined += segment
|
204 |
+
# Add a short silence between segments (500ms)
|
205 |
+
combined += AudioSegment.silent(duration=500)
|
206 |
|
207 |
+
# Export the combined audio
|
208 |
+
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as output_file:
|
209 |
+
output_filename = output_file.name
|
210 |
+
combined.export(output_filename, format="wav")
|
|
|
|
|
|
|
|
|
|
|
|
|
211 |
|
212 |
+
update_progress(total_characters + 1, total_characters + 1, "Podcast généré avec succès!")
|
|
|
213 |
|
214 |
+
return jsonify({
|
215 |
+
"status": "success",
|
216 |
+
"message": "Podcast generated successfully",
|
217 |
+
"audioUrl": f"/audio/{os.path.basename(output_filename)}"
|
218 |
+
})
|
219 |
|
220 |
except Exception as e:
|
221 |
+
logger.error(f"Error in generate-podcast endpoint: {str(e)}")
|
222 |
+
update_progress(0, 0, f"Erreur: {str(e)}")
|
223 |
+
return jsonify({"error": str(e)}), 500
|
|
|
224 |
|
225 |
+
@app.route('/generation-progress')
|
226 |
+
def get_generation_progress():
|
227 |
+
"""Get the current progress of podcast generation."""
|
228 |
+
return jsonify(generation_progress)
|
229 |
+
|
230 |
+
@app.route('/download/<filename>')
|
231 |
+
def download_audio(filename):
|
232 |
+
"""Download the generated audio file."""
|
233 |
try:
|
234 |
+
temp_dir = tempfile.gettempdir()
|
235 |
+
file_path = os.path.join(temp_dir, filename)
|
|
|
|
|
|
|
236 |
|
237 |
+
if not os.path.exists(file_path):
|
238 |
+
return jsonify({"error": "Audio file not found"}), 404
|
239 |
|
240 |
+
# Check if this is a podcast or simple speech
|
241 |
+
download_name = "gemini_podcast.wav"
|
242 |
|
243 |
+
return send_file(file_path, mimetype="audio/wav", as_attachment=True,
|
244 |
+
download_name=download_name)
|
245 |
|
246 |
except Exception as e:
|
247 |
+
logger.error(f"Error downloading audio file: {str(e)}")
|
248 |
+
return jsonify({"error": str(e)}), 500
|
|
|
|
|
|
|
|
|
|
main.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
from app import app
|
2 |
|
3 |
if __name__ == "__main__":
|
4 |
app.run(host="0.0.0.0", port=5000, debug=True)
|
|
|
1 |
+
from app import app
|
2 |
|
3 |
if __name__ == "__main__":
|
4 |
app.run(host="0.0.0.0", port=5000, debug=True)
|