File size: 4,822 Bytes
ffef4ce 29b0098 ffef4ce 29b0098 ffef4ce 29b0098 ffef4ce 29b0098 ffef4ce 29b0098 cc90eeb 29b0098 cc90eeb ffef4ce cc90eeb ffef4ce 537b839 cc90eeb ffef4ce cc90eeb ffef4ce |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
from huggingface_hub import InferenceClient
import gradio as gr
import random
import tempfile
import asyncio
from streaming_stt_nemo import Model
import edge_tts
from langchain_community.tools import DuckDuckGoSearchRun
API_URL = "https://api-inference.huggingface.co/models/"
client = InferenceClient("mistralai/Mistral-7B-Instruct-v0.1")
# Initialize DuckDuckGo search tool
duckduckgo_search = DuckDuckGoSearchRun()
# Initialize ASR model
default_lang = "en"
engines = { default_lang: Model(default_lang) }
def transcribe(audio):
"""Transcribes the audio file to text."""
lang = "en"
model = engines[lang]
text = model.stt_file(audio)[0]
return text
def format_prompt(message, history):
"""Formats the prompt for the language model."""
prompt = "<s>"
for user_prompt, bot_response in history:
prompt += f"[INST] {user_prompt} [/INST]"
prompt += f" {bot_response}</s> "
prompt += f"[INST] {message} [/INST]"
return prompt
def generate(prompt, history, temperature=0.9, max_new_tokens=512, top_p=0.95, repetition_penalty=1.0):
"""Generates a response from the language model."""
temperature = float(temperature)
if temperature < 1e-2:
temperature = 1e-2
top_p = float(top_p)
generate_kwargs = dict(
temperature=temperature,
max_new_tokens=max_new_tokens,
top_p=top_p,
repetition_penalty=repetition_penalty,
do_sample=True,
seed=random.randint(0, 10**7),
)
formatted_prompt = format_prompt(prompt, history)
stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
output = ""
for response in stream:
output += response.token.text
yield output
search_result = duckduckgo_search.run(prompt)
if search_result:
yield search_result
else:
yield "Sorry, I couldn't find any relevant information."
async def respond(audio):
"""Handles the full pipeline: transcribe, generate response, and TTS."""
try:
# Transcribe audio to text
user_text = transcribe(audio)
# Generate response using the language model
history = []
response_generator = generate(user_text, history)
response_text = ""
for response in response_generator:
response_text = response
# Convert the text response to speech
communicate = edge_tts.Communicate(response_text)
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return response_text, tmp_path
except Exception as e:
return str(e), None
additional_inputs = [
gr.Slider(
label="Temperature",
value=0.9,
minimum=0.0,
maximum=1.0,
step=0.05,
interactive=True,
info="Higher values produce more diverse outputs",
),
gr.Slider(
label="Max new tokens",
value=512,
minimum=64,
maximum=1024,
step=64,
interactive=True,
info="The maximum numbers of new tokens",
),
gr.Slider(
label="Top-p (nucleus sampling)",
value=0.90,
minimum=0.0,
maximum=1,
step=0.05,
interactive=True,
info="Higher values sample more low-probability tokens",
),
gr.Slider(
label="Repetition penalty",
value=1.2,
minimum=1.0,
maximum=2.0,
step=0.05,
interactive=True,
info="Penalize repeated tokens",
)
]
customCSS = """
#component-7 { # this is the default element ID of the chat component
height: 800px; # adjust the height as needed
flex-grow: 1;
}
"""
with gr.Blocks(css=customCSS) as demo:
gr.Markdown("# RAG_FRIDAY_4.0🤖 WELCOME TO OPEN-SOURCE FREEDOM🤗(like never before)")
gr.Markdown("Getting real-time updated results for prompts is still proprietary in the face of GPT-4, Co-Pilot etc. This app serves as an open-source alternative for this! UPDATE: Previous version of this app i.e. RAG_FRIDAY_mark_3 is also available, this is just a upgrade providing voice-based search comfort for users")
with gr.Row():
input_audio = gr.Audio(label="Voice Chat (BETA)", sources="microphone", type="filepath", waveform_options=False)
output_text = gr.Textbox(label="Text Response")
output_audio = gr.Audio(label="JARVIS", type="filepath", interactive=False, autoplay=True, elem_classes="audio")
gr.Interface(fn=respond, inputs=[input_audio], outputs=[output_text, output_audio], live=True)
gr.Markdown("## Additional Parameters")
for slider in additional_inputs:
slider.render()
demo.queue().launch(debug=True)
|