PierreBrunelle commited on
Commit
0b74be0
·
verified ·
1 Parent(s): 5736427

Delete src

Browse files
Files changed (2) hide show
  1. src/interface.py +0 -215
  2. src/processor.py +0 -120
src/interface.py DELETED
@@ -1,215 +0,0 @@
1
- import gradio as gr
2
- from .processor import process_document
3
-
4
- SYNTHESIS_MODES = {
5
- "narration": {
6
- "description": "Simple document narration with clear voice and natural pacing",
7
- "styles": ["Technical", "Narrative", "Instructional", "Descriptive"],
8
- "default_temp": 0.7,
9
- "default_chunks": 300,
10
- "system_prompt": """Convert this content into clear narration."""
11
- },
12
- "podcast": {
13
- "description": "Conversational style with engaging tone and dynamic pacing",
14
- "styles": ["Casual", "Interview", "Educational", "Commentary"],
15
- "default_temp": 0.8,
16
- "default_chunks": 400,
17
- "system_prompt": """Transform this content into engaging podcast-style speech."""
18
- },
19
- "presentation": {
20
- "description": "Professional presentation style with clear structure",
21
- "styles": ["Business", "Academic", "Sales", "Training"],
22
- "default_temp": 0.6,
23
- "default_chunks": 250,
24
- "system_prompt": """Convert this content into a presentation format."""
25
- },
26
- "storytelling": {
27
- "description": "Narrative style with emotional engagement",
28
- "styles": ["Dynamic", "Dramatic", "Calm", "Energetic"],
29
- "default_temp": 0.9,
30
- "default_chunks": 500,
31
- "system_prompt": """Transform this content into an engaging story."""
32
- }
33
- }
34
-
35
- def create_interface():
36
- with gr.Blocks(theme=gr.themes.Base()) as demo:
37
- gr.HTML(
38
- """
39
- <div style="margin-bottom: 1rem;">
40
- <img src="https://raw.githubusercontent.com/pixeltable/pixeltable/main/docs/source/data/pixeltable-logo-large.png"
41
- alt="Pixeltable" style="max-width: 150px;" />
42
- <h1>📄 Document to Audio Synthesis 🎧</h1>
43
- </div>
44
- """
45
- )
46
-
47
- # Overview Row
48
- with gr.Row():
49
- with gr.Column():
50
- with gr.Accordion("🎯 What does it do?", open=True):
51
- gr.Markdown("""
52
- - 📄 Document processing - 🧠 Content transformation
53
- - 🎧 Audio synthesis - ⚙️ Multiple output styles
54
- """)
55
- with gr.Column():
56
- with gr.Accordion("⚡ How does it work?", open=True):
57
- gr.Markdown("""
58
- 1. 📑 **Processing:** Token-based segmentation
59
- 2. 🔍 **Analysis:** LLM optimization & scripts
60
- 3. 🎵 **Synthesis:** Multiple voice options
61
- """)
62
-
63
- synthesis_mode = gr.State(SYNTHESIS_MODES["narration"])
64
-
65
- # Main Settings Row
66
- with gr.Row():
67
- # Core Settings Column
68
- with gr.Column():
69
- with gr.Accordion("🔑 Core Settings", open=True):
70
- with gr.Row():
71
- api_key = gr.Textbox(
72
- label="OpenAI API Key",
73
- placeholder="sk-...",
74
- type="password",
75
- scale=2
76
- )
77
- file_input = gr.File(
78
- label="PDF Document",
79
- file_types=[".pdf"],
80
- scale=1
81
- )
82
-
83
- # Mode Selection Column
84
- with gr.Column():
85
- with gr.Accordion("🎭 Output Mode", open=True):
86
- mode_select = gr.Radio(
87
- choices=list(SYNTHESIS_MODES.keys()),
88
- value="narration",
89
- label="Select Mode",
90
- info="Choose output style"
91
- )
92
- mode_description = gr.Markdown(
93
- SYNTHESIS_MODES["narration"]["description"]
94
- )
95
-
96
- # Voice and Processing Settings Row
97
- with gr.Row():
98
- # Voice Settings Column
99
- with gr.Column():
100
- with gr.Accordion("🎛️ Voice & Style", open=True):
101
- voice_select = gr.Radio(
102
- choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"],
103
- value="onyx",
104
- label="🎙️ Voice",
105
- interactive=True
106
- )
107
- style_select = gr.Radio(
108
- choices=SYNTHESIS_MODES["narration"]["styles"],
109
- value=SYNTHESIS_MODES["narration"]["styles"][0],
110
- label="💫 Style",
111
- interactive=True
112
- )
113
-
114
- # Processing Settings Column
115
- with gr.Column():
116
- with gr.Accordion("⚙️ Processing Parameters", open=True):
117
- with gr.Row():
118
- chunk_size = gr.Slider(
119
- minimum=100, maximum=1000,
120
- value=SYNTHESIS_MODES["narration"]["default_chunks"],
121
- step=50,
122
- label="📏 Chunk Size"
123
- )
124
- temperature = gr.Slider(
125
- minimum=0, maximum=1,
126
- value=SYNTHESIS_MODES["narration"]["default_temp"],
127
- step=0.1,
128
- label="🌡️ Temperature"
129
- )
130
- max_tokens = gr.Slider(
131
- minimum=100, maximum=1000,
132
- value=300,
133
- step=50,
134
- label="📊 Tokens"
135
- )
136
-
137
- # Process Button Row
138
- with gr.Row():
139
- process_btn = gr.Button("🚀 Generate Audio", variant="primary", scale=2)
140
- status_output = gr.Textbox(label="📋 Status", scale=1)
141
-
142
- # Output Section
143
- with gr.Tabs():
144
- with gr.TabItem("📝 Content"):
145
- output_table = gr.Dataframe(
146
- headers=["🔍 Segment", "📄 Content", "🎭 Script"],
147
- wrap=True
148
- )
149
- with gr.TabItem("🎧 Audio"):
150
- with gr.Row():
151
- with gr.Column(scale=2):
152
- audio_output = gr.Audio(
153
- label="🔊 Synthesized Audio",
154
- type="filepath",
155
- show_download_button=True
156
- )
157
- with gr.Column(scale=1):
158
- with gr.Accordion("📚 Quick Tips", open=True):
159
- gr.Markdown("""
160
- - 🎯 Lower temperature = more consistent
161
- - 📏 Smaller chunks = more precise
162
- - 🎙️ Try different voices for best fit
163
- - 💫 Match style to content type
164
- """)
165
-
166
- gr.HTML(
167
- """
168
- <div style="text-align: center; margin-top: 1rem; padding-top: 1rem; border-top: 1px solid #ccc;">
169
- <p style="margin: 0; color: #666; font-size: 0.8em;">
170
- 🚀 Powered by <a href="https://github.com/pixeltable/pixeltable" target="_blank" style="color: #F25022; text-decoration: none;">Pixeltable</a>
171
- | 📚 <a href="https://docs.pixeltable.io" target="_blank" style="color: #666;">Docs</a>
172
- | 🤗 <a href="https://huggingface.co/spaces/Pixeltable/document-to-audio-synthesis" target="_blank" style="color: #666;">HF Space</a>
173
- </p>
174
- </div>
175
- """
176
- )
177
-
178
- def update_mode(mode_name):
179
- mode = SYNTHESIS_MODES[mode_name]
180
- return (
181
- gr.update(choices=mode["styles"], value=mode["styles"][0]),
182
- gr.update(value=mode["default_chunks"]),
183
- gr.update(value=mode["default_temp"]),
184
- mode["description"]
185
- )
186
-
187
- mode_select.change(
188
- update_mode,
189
- inputs=[mode_select],
190
- outputs=[style_select, chunk_size, temperature, mode_description]
191
- )
192
-
193
- def update_interface(pdf_file, api_key, mode_name, voice, style, chunk_size, temperature, max_tokens):
194
- mode = SYNTHESIS_MODES[mode_name]
195
- return process_document(
196
- pdf_file=pdf_file,
197
- api_key=api_key,
198
- voice_choice=voice,
199
- style_choice=style,
200
- chunk_size=chunk_size,
201
- temperature=temperature,
202
- max_tokens=max_tokens,
203
- system_prompt=mode["system_prompt"]
204
- )
205
-
206
- process_btn.click(
207
- update_interface,
208
- inputs=[
209
- file_input, api_key, mode_select, voice_select, style_select,
210
- chunk_size, temperature, max_tokens
211
- ],
212
- outputs=[output_table, audio_output, status_output]
213
- )
214
-
215
- return demo
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/processor.py DELETED
@@ -1,120 +0,0 @@
1
- import pixeltable as pxt
2
- from pixeltable.iterators import DocumentSplitter
3
- from pixeltable.functions import openai
4
- import os
5
- import requests
6
- import tempfile
7
- import gradio as gr
8
-
9
- def process_document(pdf_file, api_key, voice_choice, style_choice, chunk_size, temperature, max_tokens, system_prompt, progress=gr.Progress()):
10
- try:
11
- os.environ['OPENAI_API_KEY'] = api_key
12
-
13
- progress(0.1, desc="Initializing...")
14
- pxt.drop_dir('document_audio', force=True)
15
- pxt.create_dir('document_audio')
16
-
17
- docs = pxt.create_table(
18
- 'document_audio.documents',
19
- {
20
- 'document': pxt.Document,
21
- 'voice': pxt.String,
22
- 'style': pxt.String,
23
- 'mode_prompt': pxt.String
24
- }
25
- )
26
-
27
- progress(0.2, desc="Processing document...")
28
- docs.insert([{
29
- 'document': pdf_file.name,
30
- 'voice': voice_choice,
31
- 'style': style_choice,
32
- 'mode_prompt': system_prompt
33
- }])
34
-
35
- chunks = pxt.create_view(
36
- 'document_audio.chunks',
37
- docs,
38
- iterator=DocumentSplitter.create(
39
- document=docs.document,
40
- separators='token_limit',
41
- limit=chunk_size
42
- )
43
- )
44
-
45
- progress(0.4, desc="Text processing...")
46
- chunks['content_response'] = openai.chat_completions(
47
- messages=[
48
- {
49
- 'role': 'system',
50
- 'content': docs.mode_prompt # Use the mode-specific prompt
51
- },
52
- {'role': 'user', 'content': chunks.text}
53
- ],
54
- model='gpt-4o-mini-2024-07-18',
55
- max_tokens=max_tokens,
56
- temperature=temperature
57
- )
58
-
59
- chunks['content'] = chunks.content_response['choices'][0]['message']['content']
60
-
61
- progress(0.6, desc="Script generation...")
62
- chunks['script_response'] = openai.chat_completions(
63
- messages=[
64
- {
65
- 'role': 'system',
66
- 'content': f"""Convert content to audio script.
67
- Style: {docs.style}
68
- Format:
69
- - Clear sentence structures
70
- - Natural pauses (...)
71
- - Term definitions when needed
72
- - Proper transitions"""
73
- },
74
- {'role': 'user', 'content': chunks.content}
75
- ],
76
- model='gpt-4o-mini-2024-07-18',
77
- max_tokens=max_tokens,
78
- temperature=temperature
79
- )
80
- chunks['script'] = chunks.script_response['choices'][0]['message']['content']
81
-
82
- progress(0.8, desc="Audio synthesis...")
83
- @pxt.udf(return_type=pxt.Audio)
84
- def generate_audio(script: str, voice: str):
85
- if not script or not voice:
86
- return None
87
- try:
88
- response = requests.post(
89
- "https://api.openai.com/v1/audio/speech",
90
- headers={"Authorization": f"Bearer {api_key}"},
91
- json={"model": "tts-1", "input": script, "voice": voice}
92
- )
93
- if response.status_code == 200:
94
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp3')
95
- temp_file.write(response.content)
96
- temp_file.close()
97
- return temp_file.name
98
- except Exception as e:
99
- print(f"Error in audio synthesis: {e}")
100
- return None
101
-
102
- chunks['audio'] = generate_audio(chunks.script, docs.voice)
103
-
104
- audio_path = chunks.select(chunks.audio).tail(1)['audio'][0]
105
-
106
- results = chunks.select(
107
- chunks.content,
108
- chunks.script
109
- ).collect()
110
-
111
- display_data = [
112
- [f"Segment {idx + 1}", row['content'], row['script']]
113
- for idx, row in enumerate(results)
114
- ]
115
-
116
- progress(1.0, desc="Complete")
117
- return display_data, audio_path, "Processing complete"
118
-
119
- except Exception as e:
120
- return None, None, f"Error: {str(e)}"