File size: 4,816 Bytes
5f325ee 6ee2c17 5f325ee 6ee2c17 5f325ee 6ee2c17 5f325ee 6ee2c17 5f325ee 6ee2c17 5f325ee 6ee2c17 5f325ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 |
import os
import gradio as gr
import requests
import io
import re
from PIL import Image
from groq import Groq
# Set Your API Keys
# Use environment variables securely
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
HF_API_KEY = os.getenv("HF_TOKEN")
if not GROQ_API_KEY or not HF_API_KEY:
raise ValueError("GROQ_API_KEY and HF_TOKEN must be set in the environment variables.")
# Initialize Groq API client
client = Groq(api_key=GROQ_API_KEY)
# Use a Public Hugging Face Image Model
HF_IMAGE_MODEL = "stabilityai/stable-diffusion-2-1"
# Function 1: Tamil Audio to Tamil Text (Transcription)
def transcribe_audio(audio_path):
if not audio_path:
return "Error: Please upload an audio file."
try:
with open(audio_path, "rb") as file:
transcription = client.audio.transcriptions.create(
file=(os.path.basename(audio_path), file.read()),
model="whisper-large-v3",
language="ta", # Tamil
response_format="verbose_json",
)
return transcription.text.strip()
except Exception as e:
return f"Error in transcription: {str(e)}"
# Function 2: Tamil Text to English Translation
def translate_tamil_to_english(tamil_text):
if not tamil_text:
return "Error: Please enter Tamil text for translation."
prompt = f"Translate this Tamil text to English: {tamil_text}\nGive only the translated text as output."
try:
response = client.chat.completions.create(
model="llama3-8b-8192", # Groq-supported model
messages=[{"role": "user", "content": prompt}],
)
translated_text = response.choices[0].message.content.strip()
# Fix: Remove unwanted XML tags like <think></think>
translated_text = re.sub(r"</?think>", "", translated_text).strip()
return translated_text
except Exception as e:
return f"Error in translation: {str(e)}"
# Function 3: English Text to Image Generation (Hugging Face)
def generate_image(english_text):
if not english_text:
return "Error: Please enter a description for image generation."
try:
headers = {"Authorization": f"Bearer {HF_API_KEY}"}
payload = {"inputs": english_text}
response = requests.post(f"https://api-inference.huggingface.co/models/{HF_IMAGE_MODEL}",
headers=headers, json=payload)
response.raise_for_status()
image_bytes = response.content
# Check if the response is a valid image
if not image_bytes:
return "Error: Received empty response from API."
return Image.open(io.BytesIO(image_bytes))
except Exception as e:
return f"Error in image generation: {str(e)}"
# Function 4: English Text to AI-Generated Text
def generate_text(english_text):
if not english_text:
return "Please enter a prompt."
try:
response = client.chat.completions.create(
model="llama3-8b-8192", # Ensure you're using a valid model
messages=[{"role": "user", "content": english_text}],
)
# Extract the response content
generated_text = response.choices[0].message.content.strip()
# Remove unwanted XML-like tags
cleaned_text = re.sub(r"</?think>", "", generated_text).strip()
return cleaned_text
except Exception as e:
return f"Error in text generation: {str(e)}"
# Combined Function to Process All Steps
def process_audio(audio_path):
# Step 1: Tamil Audio → Tamil Text
tamil_text = transcribe_audio(audio_path)
if "Error" in tamil_text:
return tamil_text, None, None, None
# Step 2: Tamil Text → English Text
english_text = translate_tamil_to_english(tamil_text)
if "Error" in english_text:
return tamil_text, english_text, None, None
# Step 3: English Text → Image
image = generate_image(english_text)
if isinstance(image, str) and "Error" in image:
return tamil_text, english_text, None, None
# Step 4: English Text → AI-Generated Text
generated_text = generate_text(english_text)
return tamil_text, english_text, image, generated_text
# Create Gradio Interface
iface = gr.Interface(
fn=process_audio,
inputs=gr.Audio(type="filepath", label="Upload Tamil Audio"),
outputs=[
gr.Textbox(label="Transcribed Tamil Text"),
gr.Textbox(label="Translated English Text"),
gr.Image(label="Generated Image"),
gr.Textbox(label="Generated Text from English Prompt"),
],
title="Tamil Audio to AI Processing Pipeline",
description="Upload a Tamil audio file and get transcription, translation, image generation, and further text generation.",
)
# Launch Gradio App
iface.launch()
|