import gradio as gr
import openai

from openai import AzureOpenAI
from openai import OpenAIError, RateLimitError

from dotenv import load_dotenv
import os
load_dotenv()


AZURE_OPENAI_ENDPOINT = os.getenv("AZURE_OPENAI_ENDPOINT")
AZURE_OPENAI_KEY = os.getenv("AZURE_OPENAI_KEY")
AZURE_API_VERSION = os.getenv("AZURE_API_VERSION")

client = AzureOpenAI(
    azure_endpoint=AZURE_OPENAI_ENDPOINT,
    api_key=AZURE_OPENAI_KEY,
    api_version=AZURE_API_VERSION
)
from openai import OpenAI,AsyncAzureOpenAI
from client import gradio_client

import gradio as gr
import logging
logging.basicConfig(level=logging.INFO)
import time
import asyncio

def get_streaming_response(query):
    try:
        messages, urls = gradio_client.predict(
		query=query,
		api_name="/predict")

        logging.info("Starting streaming response...")

        response = client.chat.completions.create(
            messages=messages,
            model="urdu-llama", 
            temperature=0.5,
            stream=True
        )

        output = ''
        for chunk in response:
            if chunk.choices:
                token = chunk.choices[0].delta.content
                if token:
                    output += token
                    yield output,urls

    except Exception as e:
        yield f"Error: {str(e)}"


iface = gr.Interface(
    fn=get_streaming_response,
    inputs=gr.Textbox(placeholder="Ask me anything...", show_label=False),
    outputs=[
        gr.Markdown(label="AI Response"), 
        # gr.JSON(label="Related Questions to Explore:"),  
        gr.JSON(label="URLs")
    ],
    live=False  
)

iface.launch()