|
import logging |
|
import os |
|
import tempfile |
|
|
|
logger = logging.getLogger(__name__) |
|
|
|
def process_transcription(audio_content: bytes, whisper_model): |
|
if not whisper_model: |
|
raise ValueError("Whisper model not loaded.") |
|
|
|
temp_file_path = None |
|
try: |
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as temp_file: |
|
temp_file_path = temp_file.name |
|
temp_file.write(audio_content) |
|
|
|
segments, info = whisper_model.transcribe(temp_file_path, beam_size=5) |
|
transcript = " ".join([seg.text.strip() for seg in segments]) |
|
return transcript, info |
|
finally: |
|
if temp_file_path and os.path.exists(temp_file_path): |
|
os.remove(temp_file_path) |
|
|
|
def process_summary(text: str, summarizer_pipeline, nlp_spacy, config): |
|
if not summarizer_pipeline: |
|
raise ValueError("Summarizer model not loaded.") |
|
|
|
processed_text = text |
|
if nlp_spacy: |
|
try: |
|
doc = nlp_spacy(text) |
|
sentences = [sent.text.strip() for sent in doc.sents] |
|
processed_text = " ".join(sentences) |
|
except Exception as e: |
|
logger.error(f"SpaCy processing failed: {e}", exc_info=True) |
|
|
|
summary_output = summarizer_pipeline( |
|
processed_text, |
|
max_length=config.SUMMARIZER_MAX_LENGTH, |
|
min_length=config.SUMMARIZER_MIN_LENGTH, |
|
do_sample=False |
|
) |
|
|
|
final_summary = summary_output[0]['summary_text'] |
|
return final_summary |