File size: 2,752 Bytes
a40ce12 e6c4009 c827f71 e6c4009 a4cdc2d c827f71 e6c4009 c827f71 d8b117a 10a7c50 72a66f4 d8b117a c827f71 d8b117a c827f71 a40ce12 c827f71 ef5294d c827f71 a40ce12 ef5294d a40ce12 72a66f4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import os
import subprocess
import openai
import gradio as gr
openai.api_key = "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"
import requests
API_URL = "https://api-inference.huggingface.co/models/lyimo/whisper-small-sw2"
headers = {"Authorization": "Bearer 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"}
def query(filename):
with open(filename, "rb") as f:
data = f.read()
response = requests.post(API_URL, headers=headers, data=data)
return response.json()
def transcribe(audio):
with open(audio, "rb") as audio_file:
transcript = openai.Audio.transcribe("whisper-1", audio_file)
return transcript["text"]
def generate_response(transcribed_text):
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": "Wewe ni mtaalamu wa viazi lishe, utajibu maswali yote kwa kiswahili"},
{"role": "user", "content": "Mambo vipi?"},
{"role": "assistant", "content": """Salama je una swali lolote kuhusu viazi lishe?"""},
{"role": "user", "content": "nini maana ya Viazi lishe?"},
{"role": "assistant", "content": """ viazi lishe ni Viazi vitamu vyenye rangi ya karoti kwa ndani ambavyo vina vitamin A kwa wingi"""},
{"role": "user", "content": "nini matumizi ya viazi lishe?"},
{"role": "assistant", "content": """ viazi lishe vinaweza kutengenezea chakula kama Keki,
Maandazi, Kalimati na tambi: Ukisaga unga wa viazi lishe,
unaweza kutumika kupika vyakula ambavyo huwa watu
hutumia unga wa ngano kupika, unga wa viazi lishe una
virutubisho vingi zaidi kuliko unga wa ngano na
ukitumika kupikia vyakula tajwa hapo juu watumiaji
watakuwa wanakula vyakula vyenye virutubisho Zaidi."""},
{"role": "user", "content": transcribed_text},
]
)
return response['choices'][0]['message']['content']
def inference(text):
output_file = "tts_output.wav"
cmd = ['tts', '--text', text, '--out_path', output_file]
subprocess.run(cmd, check=True)
return output_file
def process_audio_and_respond(audio):
text = transcribe(audio)
response_text = generate_response(text)
output_file = inference(response_text)
return output_file
demo = gr.Interface(
process_audio_and_respond,
gr.inputs.Audio(source="microphone", type="filepath", label="Bofya Kuuliza Swali"),
gr.outputs.Audio(type="filepath", label="Bofya Kusikiliza Jibu"),
title="Uliza Mtaalamu wa Viazi Lishe",
description="Je ungependa Kujua nini kuhusu viazi Lishe?",
theme="compact",
layout="vertical",
allow_flagging=False,
live=True,
)
demo.launch()
|