import gradio as gr import torch from transformers import pipeline, WhisperProcessor, WhisperForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer from huggingface_hub import login import os from accelerate import init_empty_weights, load_checkpoint_and_dispatch # Retrieve the token from the environment variable hf_api_token = os.getenv("HF_API_TOKEN") if hf_api_token is None: raise ValueError("HF_API_TOKEN environment variable is not set") # Authenticate with Hugging Face login(token=hf_api_token, add_to_git_credential=True) # Initialize the Whisper processor and model whisper_processor = WhisperProcessor.from_pretrained("openai/whisper-base") whisper_model = WhisperForConditionalGeneration.from_pretrained("openai/whisper-base") # Initialize the summarization model and tokenizer # Load LLAMA 7B model with accelerate from local path model_name = "meta-llama/Llama-2-7b-chat" with init_empty_weights(): summarization_model = AutoModelForCausalLM.from_pretrained(model_name) # Load checkpoint and dispatch model summarization_model = load_checkpoint_and_dispatch( summarization_model, checkpoint=model_name, device_map="auto", dtype=torch.float16 ) summarization_tokenizer = AutoTokenizer.from_pretrained(model_name) # Function to transcribe audio def transcribe_audio(audio_file): # Load audio file audio_input, _ = whisper_processor(audio_file, return_tensors="pt", sampling_rate=16000).input_values # Generate transcription transcription_ids = whisper_model.generate(audio_input) transcription = whisper_processor.decode(transcription_ids[0]) return transcription # Function to summarize text def summarize_text(text): inputs = summarization_tokenizer(text, return_tensors="pt", max_length=512, truncation=True) summary_ids = summarization_model.generate(inputs.input_ids, max_length=150, min_length=40, length_penalty=2.0, num_beams=4, early_stopping=True) summary = summarization_tokenizer.decode(summary_ids[0], skip_special_tokens=True) return summary # Gradio interface def process_audio(audio_file): transcription = transcribe_audio(audio_file) summary = summarize_text(transcription) return transcription, summary # Gradio UI iface = gr.Interface( fn=process_audio, inputs=gr.Audio(type="file"), outputs=[ gr.Textbox(label="Transcription"), gr.Textbox(label="Summary") ], title="Audio Transcription and Summarization", description="Upload an audio file to transcribe and summarize the conversation." ) # Launch the app iface.launch()