demo

File size: 2,014 Bytes

# -*- coding: utf-8 -*-
"""OpenAI Whisper from Hugging Face Transformers with Microsoft PHI 3 Integration"""

import gradio as gr
from transformers import pipeline
import torch
from huggingface_hub import InferenceClient
import os

# Initialize the InferenceClient for PHI 3
client = InferenceClient(
    "microsoft/Phi-3.5-mini-instruct",  # Update this to the correct model name for PHI 3
    token=os.getenv("HF_API_TOKEN", "")  # You can configure this API token through the Hugging Face Secrets
)

# Check if a GPU is available and use it if possible
device = 'cuda' if torch.cuda.is_available() else 'cpu'

# Initialize the Whisper pipeline
whisper = pipeline('automatic-speech-recognition', model='openai/whisper-tiny', device=0 if device == 'cuda' else -1)

# Instructions (can be set through Hugging Face Secrets or hardcoded)
instructions = os.getenv("INST", "Your default instructions here.")

def query_phi(prompt):
    response = ""  # Initialize an empty string to store the response
    for message in client.chat_completion(
      messages=[{"role": "user", "content": f"{instructions}\n{prompt}"}],
      max_tokens=500,
      stream=True,
    ):
        response += message.choices[0].delta.content  # Append each message to the response
    return response  # Return the accumulated response after the loop

def transcribe_and_query(audio):
    # Transcribe the audio file
    transcription = whisper(audio)["text"]
    transcription = "Prompt : " + transcription
    # Query Microsoft PHI 3 with the transcribed text
    phi_response = query_phi(transcription)

    return transcription, phi_response

# Create Gradio interface
iface = gr.Interface(
    fn=transcribe_and_query,
    inputs=gr.Audio(type="filepath"),
    outputs=["text", "text"],
    title="Scam Call detector with BEEP",
    description="Upload your recorded call to see if it is a scam or not. /n Stay Safe, Stay Secure."
)

# Launch the interface
iface.launch(share=True)  # share=True is optional, it provides a public link