badassgi

Paused

App Files Files Community

openfree commited on Oct 22, 2024

Commit

55cddee

verified ·

1 Parent(s): 9cd25a0

Update app.py

Browse files

Files changed (1) hide show

app.py +129 -53

app.py CHANGED Viewed

@@ -4,10 +4,15 @@ import gradio as gr
 from transformers import pipeline
 from huggingface_hub import InferenceClient
 import os
 MODEL_NAME = "openai/whisper-large-v3-turbo"
 BATCH_SIZE = 8
 FILE_LIMIT_MB = 1000
 device = 0 if torch.cuda.is_available() else "cpu"
@@ -25,83 +30,140 @@ hf_client = InferenceClient(
     token=os.getenv("HF_TOKEN")
 )
-@spaces.GPU
-def transcribe_summarize(audio_input, task):
-    if audio_input is None:
-        raise gr.Error("오디오 파일이 제출되지 않았습니다!")
-    # 음성을 텍스트로 변환
     result = pipe(
-        audio_input,
         batch_size=BATCH_SIZE,
         generate_kwargs={"task": task},
         return_timestamps=True
     )
-    transcribed_text = result["text"]
-    # 텍스트 요약 (수정된 부분)
-    try:
-        # 요약용 프롬프트 생성
-        prompt = f"""아래 텍스트를 간단히 요약해주세요:
-텍스트: {transcribed_text}
-요약:"""
-        # API 호출
-        response = hf_client.text_generation(
-            model="CohereForAI/c4ai-command-r-plus-08-2024",
-            prompt=prompt,
-            max_new_tokens=150,
-            temperature=0.3,
-            top_p=0.9,
-            repetition_penalty=1.2,
-            stop_sequences=["\n", "텍스트:", "요약:"]
-        )
-        # API 응답 처리 (수정된 부분)
-        if isinstance(response, str):
-            summary_text = response
-        else:
-            summary_text = response.generated_text if hasattr(response, 'generated_text') else str(response)
-        # 프롬프트 부분 제거
-        if "요약:" in summary_text:
-            summary_text = summary_text.split("요약:")[1].strip()
-        if not summary_text:
-            summary_text = "요약을 생성할 수 없습니다."
     except Exception as e:
-        print(f"요약 생성 중 오류 발생: {str(e)}")  # 디버깅용 로그
-        summary_text = "요약을 생성할 수 없습니다. 잠시 후 다시 시도해주세요."
-    print(f"변환된 텍스트: {transcribed_text}")  # 디버깅용 로그
-    print(f"생성된 요약: {summary_text}")  # 디버깅용 로그
-    return [transcribed_text, summary_text]
 # CSS 스타일
 css = """
 footer { visibility: hidden; }
 """
 # 파일 업로드 인터페이스
 file_transcribe = gr.Interface(
     fn=transcribe_summarize,
     inputs=[
-        gr.Audio(sources="upload", type="filepath", label="오디오 파일"),
         gr.Radio(
             choices=["transcribe", "translate"],
             label="작업",
             value="transcribe"
-        ),
     ],
     outputs=[
-        gr.Textbox(label="변환된 텍스트", lines=5),
-        gr.Textbox(label="요약", lines=3)
     ],
-    title="받아쓰기 AI: 음성을 텍스트로 변환하고 요약하기",
     flagging_mode="never"
 )
@@ -109,24 +171,34 @@ file_transcribe = gr.Interface(
 mic_transcribe = gr.Interface(
     fn=transcribe_summarize,
     inputs=[
-        gr.Audio(sources="microphone", type="filepath"),
         gr.Radio(
             choices=["transcribe", "translate"],
             label="작업",
             value="transcribe"
-        ),
     ],
     outputs=[
-        gr.Textbox(label="변환된 텍스트", lines=5),
-        gr.Textbox(label="요약", lines=3)
     ],
-    title="받아쓰기 AI: 음성을 텍스트로 변환하고 요약하기",
     flagging_mode="never",
     css=css
 )
 # 메인 애플리케이션
-demo = gr.Blocks(theme="Nymbo/Nymbo_Theme",css=css)
 with demo:
     gr.TabbedInterface(
         [file_transcribe, mic_transcribe],
@@ -134,4 +206,8 @@ with demo:
     )
 # 애플리케이션 실행
-demo.queue().launch(ssr_mode=False)

 from transformers import pipeline
 from huggingface_hub import InferenceClient
 import os
+import numpy as np
+from pydub import AudioSegment
+import tempfile
+import math
 MODEL_NAME = "openai/whisper-large-v3-turbo"
 BATCH_SIZE = 8
 FILE_LIMIT_MB = 1000
+CHUNK_LENGTH = 10 * 60  # 10분 단위로 분할
 device = 0 if torch.cuda.is_available() else "cpu"
     token=os.getenv("HF_TOKEN")
 )
+def split_audio(audio_path, chunk_length=CHUNK_LENGTH):
+    """오디오 파일을 청크로 분할"""
+    audio = AudioSegment.from_file(audio_path)
+    duration = len(audio) / 1000  # 초 단위 변환
+    chunks = []
+    # 청크 개수 계산
+    num_chunks = math.ceil(duration / chunk_length)
+    for i in range(num_chunks):
+        start_time = i * chunk_length * 1000  # milliseconds
+        end_time = min((i + 1) * chunk_length * 1000, len(audio))
+        chunk = audio[start_time:end_time]
+        # 임시 파일로 저장
+        with tempfile.NamedTemporaryFile(suffix='.wav', delete=False) as temp_file:
+            chunk.export(temp_file.name, format='wav')
+            chunks.append(temp_file.name)
+    return chunks, num_chunks
+def process_chunk(chunk_path, task):
+    """개별 청크 처리"""
     result = pipe(
+        chunk_path,
         batch_size=BATCH_SIZE,
         generate_kwargs={"task": task},
         return_timestamps=True
     )
+    # 임시 파일 삭제
+    os.unlink(chunk_path)
+    return result["text"]
+def update_progress(progress):
+    """진행 상황 업데이트"""
+    return f"처리 중... {progress}% 완료"
+@spaces.GPU
+def transcribe_summarize(audio_input, task, progress=gr.Progress()):
+    if audio_input is None:
+        raise gr.Error("오디오 파일이 제출되지 않았습니다!")
+    try:
+        # 오디오 파일 분할
+        chunks, num_chunks = split_audio(audio_input)
+        progress(0, desc="오디오 파일 분할 완료")
+        # 각 청크 처리
+        transcribed_texts = []
+        for i, chunk in enumerate(chunks):
+            chunk_text = process_chunk(chunk, task)
+            transcribed_texts.append(chunk_text)
+            progress((i + 1) / num_chunks, desc=f"청크 {i+1}/{num_chunks} 처리 중")
+        # 전체 텍스트 조합
+        transcribed_text = " ".join(transcribed_texts)
+        progress(0.9, desc="텍스트 변환 완료")
+        # 텍스트 요약
+        try:
+            # 긴 텍스트를 위한 요약 프롬프트
+            prompt = f"""다음 긴 텍스트를 주요 내용 중심으로 간단히 요약해주세요:
+텍스트: {transcribed_text[:3000]}... # 텍스트가 너무 길 경우 앞부분만 요약
+요약:"""
+            response = hf_client.text_generation(
+                model="CohereForAI/c4ai-command-r-plus-08-2024",
+                prompt=prompt,
+                max_new_tokens=250,
+                temperature=0.3,
+                top_p=0.9,
+                repetition_penalty=1.2,
+                stop_sequences=["\n", "텍스트:", "요약:"]
+            )
+            summary_text = str(response)
+            if "요약:" in summary_text:
+                summary_text = summary_text.split("요약:")[1].strip()
+        except Exception as e:
+            print(f"요약 생성 중 오류 발생: {str(e)}")
+            summary_text = "요약을 생성할 수 없습니다. 텍스트가 너무 길거나 처리 중 오류가 발생했습니다."
+        progress(1.0, desc="처리 완료")
+        return [transcribed_text, summary_text]
     except Exception as e:
+        error_msg = f"음성 처리 중 오류가 발생했습니다: {str(e)}"
+        return ["", error_msg]
 # CSS 스타일
 css = """
 footer { visibility: hidden; }
+.progress-bar { height: 15px; border-radius: 5px; }
+.container { max-width: 1200px; margin: auto; padding: 20px; }
 """
 # 파일 업로드 인터페이스
 file_transcribe = gr.Interface(
     fn=transcribe_summarize,
     inputs=[
+        gr.Audio(
+            sources="upload",
+            type="filepath",
+            label="오디오 파일"
+        ),
         gr.Radio(
             choices=["transcribe", "translate"],
             label="작업",
             value="transcribe"
+        )
     ],
     outputs=[
+        gr.Textbox(
+            label="변환된 텍스트",
+            lines=10,
+            max_lines=30,
+            placeholder="음성이 텍스트로 변환되어 여기에 표시됩니다..."
+        ),
+        gr.Textbox(
+            label="요약",
+            lines=5,
+            placeholder="텍스트 요약이 여기에 표시됩니다..."
+        )
     ],
+    title="받아쓰기 AI: 장시간 음성 변환 및 요약",
+    description="""
+    긴 음성 파일(1시간 이상)도 처리할 수 있습니다.
+    처리 시간은 파일 길이에 비례하여 증가합니다.
+    변환 중에는 진행 상황이 표시됩니다.
+    """,
     flagging_mode="never"
 )
 mic_transcribe = gr.Interface(
     fn=transcribe_summarize,
     inputs=[
+        gr.Audio(
+            sources="microphone",
+            type="filepath"
+        ),
         gr.Radio(
             choices=["transcribe", "translate"],
             label="작업",
             value="transcribe"
+        )
     ],
     outputs=[
+        gr.Textbox(
+            label="변환된 텍스트",
+            lines=10,
+            max_lines=30
+        ),
+        gr.Textbox(
+            label="요약",
+            lines=5
+        )
     ],
+    title="받아쓰기 AI: 음성 녹음 및 변환",
     flagging_mode="never",
     css=css
 )
 # 메인 애플리케이션
+demo = gr.Blocks(theme="gradio/soft", css=css)
 with demo:
     gr.TabbedInterface(
         [file_transcribe, mic_transcribe],
     )
 # 애플리케이션 실행
+demo.queue(concurrency_count=1).launch(
+    share=False,
+    debug=True,
+    ssr_mode=False
+)