Spaces:

shukdevdatta123
/

openai-o4-mini-multi-modal-chat-app

Running

App Files Files Community

openai-o4-mini-multi-modal-chat-app / prompts.txt

shukdevdatta123

Add 2 files

feefa81 verified 13 days ago

raw

history blame contribute delete

35.5 kB

import gradio as gr import base64 import os from openai import OpenAI import tempfile def extract_text_from_response(response): """Extract text from OpenAI API response based on the actual structure""" try: # First check if response has an output attribute if hasattr(response, 'output'): output = response.output # Look for message items in the output list for item in output: if hasattr(item, 'role') and item.role == 'assistant': for content_item in item.content: if hasattr(content_item, 'type') and content_item.type == 'output_text': return content_item.text # If we couldn't find it in the expected structure, return an error return f"Couldn't extract text from response: {str(response)}" except Exception as e: return f"Error processing response: {str(e)}" def analyze_pdf(api_key, pdf_file, prompt, effort_level): """Analyze a PDF file using OpenAI API""" if not api_key: return "Please provide an OpenAI API key." if pdf_file is None: return "Please upload a PDF file." # Initialize OpenAI client with the provided API key client = OpenAI(api_key=api_key) # Read the PDF file and encode it in base64 with open(pdf_file.name, "rb") as file: pdf_bytes = file.read() pdf_base64 = base64.b64encode(pdf_bytes).decode("utf-8") try: # Create the request response = client.responses.create( model="o4-mini", input=[ { "role": "user", "content": [ { "type": "input_text", "text": prompt }, { "type": "input_file", "filename": os.path.basename(pdf_file.name), "file_data": f"data:application/pdf;base64,{pdf_base64}" } ] } ], text={ "format": { "type": "text" } }, reasoning={ "effort": effort_level }, tools=[], store=True ) return extract_text_from_response(response) except Exception as e: return f"Error: {str(e)}" def analyze_image(api_key, image, image_url, prompt, effort_level): """Analyze an image using OpenAI API""" if not api_key: return "Please provide an OpenAI API key." if image is None and not image_url: return "Please upload an image or provide an image URL." if not prompt: return "Please provide a prompt for the image analysis." # Initialize OpenAI client with the provided API key client = OpenAI(api_key=api_key) content = [] # Add text prompt content.append({ "type": "input_text", "text": prompt }) # Add image - either from uploaded file or URL try: if image is not None: # Convert image to base64 with open(image, "rb") as img_file: img_bytes = img_file.read() img_base64 = base64.b64encode(img_bytes).decode("utf-8") content.insert(0, { "type": "input_image", "image_url": f"data:image/png;base64,{img_base64}" }) elif image_url: content.insert(0, { "type": "input_image", "image_url": image_url }) # Create the request response = client.responses.create( model="o4-mini", input=[ { "role": "user", "content": content } ], text={ "format": { "type": "text" } }, reasoning={ "effort": effort_level }, tools=[], store=True ) return extract_text_from_response(response) except Exception as e: return f"Error: {str(e)}" # Create Gradio interface with gr.Blocks(title="OpenAI O4 Analysis App") as app: gr.Markdown("# OpenAI O4 Mini Analysis Application") # API Key input (used across both tabs) api_key_input = gr.Textbox( label="OpenAI API Key", placeholder="Enter your OpenAI API key here...", type="password" ) with gr.Tab("PDF Analysis"): gr.Markdown("## Upload a PDF file for analysis") with gr.Row(): pdf_input = gr.File(label="Upload PDF", file_types=[".pdf"]) pdf_prompt = gr.Textbox( label="Prompt", placeholder="Enter your question about the PDF...", value="What is this PDF about?" ) pdf_effort = gr.Radio( ["low", "medium", "high"], label="Reasoning Effort", value="medium" ) pdf_button = gr.Button("Analyze PDF") pdf_output = gr.Textbox(label="Analysis Result", lines=10) pdf_button.click( analyze_pdf, inputs=[api_key_input, pdf_input, pdf_prompt, pdf_effort], outputs=pdf_output ) with gr.Tab("Image Analysis"): gr.Markdown("## Upload an image or provide an image URL for analysis") with gr.Row(): with gr.Column(): img_input = gr.Image(label="Upload Image", type="filepath") img_url = gr.Textbox( label="Or enter image URL", placeholder="https://example.com/image.jpg" ) img_prompt = gr.Textbox( label="Prompt", placeholder="Enter your question about the image...", value="What is this image about?" ) img_effort = gr.Radio( ["low", "medium", "high"], label="Reasoning Effort", value="medium" ) img_output = gr.Textbox(label="Analysis Result", lines=10) img_button = gr.Button("Analyze Image") img_button.click( analyze_image, inputs=[api_key_input, img_input, img_url, img_prompt, img_effort], outputs=img_output ) gr.Markdown(""" ### Note: - Your OpenAI API key is required to use this application - The API key is only used for requests and is not stored """) # Launch the app if __name__ == "__main__": app.launch() (please convert this code to a very very awesome looking website with amazing animations... make sure the website works exactly in the same way the above project code works)

<!DOCTYPE html> <html lang="en"> <head> <meta charset="UTF-8"> <meta name="viewport" content="width=device-width, initial-scale=1.0"> <title>OpenAI O4 Analysis App</title> <script src="https://cdn.tailwindcss.com"></script> <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/animate.css/4.1.1/animate.min.css"/> <link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap" rel="stylesheet"> <script src="https://cdn.jsdelivr.net/npm/axios/dist/axios.min.js"></script> <style> body { font-family: 'Inter', sans-serif; background: linear-gradient(135deg, #f5f7fa 0%, #c3cfe2 100%); min-height: 100vh; } .tab-button { transition: all 0.3s ease; } .tab-button.active { transform: translateY(-2px); box-shadow: 0 4px 6px -1px rgba(0, 0, 0, 0.1), 0 2px 4px -1px rgba(0, 0, 0, 0.06); } .result-box { transition: all 0.3s ease; min-height: 200px; } .loading-dots { display: inline-block; } .loading-dots span { display: inline-block; width: 8px; height: 8px; border-radius: 50%; background-color: #4f46e5; margin: 0 2px; animation: bounce 1.4s infinite ease-in-out both; } .loading-dots span:nth-child(1) { animation-delay: -0.32s; } .loading-dots span:nth-child(2) { animation-delay: -0.16s; } @keyframes bounce { 0%, 80%, 100% { transform: scale(0); } 40% { transform: scale(1); } } .file-input-label { transition: all 0.3s ease; } .file-input-label:hover { transform: translateY(-2px); box-shadow: 0 4px 6px -1px rgba(79, 70, 229, 0.3), 0 2px 4px -1px rgba(79, 70, 229, 0.06); } .glow { animation: glow 2s infinite alternate; } @keyframes glow { from { box-shadow: 0 0 5px rgba(79, 70, 229, 0.5); } to { box-shadow: 0 0 20px rgba(79, 70, 229, 0.8); } } </style> </head> <body class="py-8 px-4 sm:px-6 lg:px-8"> <div class="max-w-4xl mx-auto">  <div class="text-center mb-10 animate__animated animate__fadeIn"> <h1 class="text-4xl font-bold text-indigo-600 mb-2">OpenAI O4 Mini Analysis</h1> <p class="text-lg text-gray-600">Analyze PDFs and Images with AI-powered insights</p> </div>  <div class="bg-white rounded-xl shadow-lg p-6 mb-8 animate__animated animate__fadeInUp"> <label for="api-key" class="block text-sm font-medium text-gray-700 mb-1">OpenAI API Key</label> <div class="relative"> <input type="password" id="api-key" placeholder="Enter your OpenAI API key here..." class="w-full px-4 py-3 rounded-lg border border-gray-300 focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 transition duration-200" > <button onclick="toggleApiKeyVisibility()" class="absolute right-3 top-3 text-gray-500 hover:text-indigo-600 focus:outline-none" > 👁️ </button> </div> <p class="mt-2 text-sm text-gray-500">Your API key is only used for requests and is not stored.</p> </div>  <div class="flex space-x-2 mb-6 animate__animated animate__fadeIn"> <button id="pdf-tab" onclick="switchTab('pdf')" class="tab-button active px-6 py-3 rounded-lg font-medium bg-indigo-600 text-white" > PDF Analysis </button> <button id="image-tab" onclick="switchTab('image')" class="tab-button px-6 py-3 rounded-lg font-medium bg-gray-200 text-gray-700 hover:bg-gray-300" > Image Analysis </button> </div>  <div id="pdf-content" class="animate__animated animate__fadeIn"> <div class="bg-white rounded-xl shadow-lg overflow-hidden"> <div class="p-6"> <h2 class="text-2xl font-semibold text-gray-800 mb-4">Upload a PDF for Analysis</h2> <div class="mb-6"> <label class="block text-sm font-medium text-gray-700 mb-2">Upload PDF</label> <label for="pdf-upload" class="file-input-label cursor-pointer flex flex-col items-center justify-center px-6 py-12 border-2 border-dashed border-gray-300 rounded-lg bg-gray-50 hover:bg-gray-100 transition duration-200"> <svg xmlns="http://www.w3.org/2000/svg" class="h-12 w-12 text-gray-400 mb-2" fill="none" viewBox="0 0 24 24" stroke="currentColor"> <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M7 21h10a2 2 0 002-2V9.414a1 1 0 00-.293-.707l-5.414-5.414A1 1 0 0012.586 3H7a2 2 0 00-2 2v14a2 2 0 002 2z" /> </svg> <span class="text-sm font-medium text-gray-600">Click to upload PDF</span> <span class="text-xs text-gray-500 mt-1">.pdf files only</span> <input id="pdf-upload" type="file" accept=".pdf" class="hidden"> </label> <p id="pdf-filename" class="mt-2 text-sm text-gray-500 hidden">Selected file: <span class="font-medium"></span></p> </div> <div class="mb-6"> <label for="pdf-prompt" class="block text-sm font-medium text-gray-700 mb-2">Prompt</label> <textarea id="pdf-prompt" rows="3" class="w-full px-4 py-3 rounded-lg border border-gray-300 focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 transition duration-200" placeholder="Enter your question about the PDF..." >What is this PDF about?</textarea> </div> <div class="mb-6"> <label class="block text-sm font-medium text-gray-700 mb-2">Reasoning Effort</label> <div class="flex space-x-4"> <label class="inline-flex items-center"> <input type="radio" name="pdf-effort" value="low" class="h-4 w-4 text-indigo-600 focus:ring-indigo-500"> <span class="ml-2 text-gray-700">Low</span> </label> <label class="inline-flex items-center"> <input type="radio" name="pdf-effort" value="medium" checked class="h-4 w-4 text-indigo-600 focus:ring-indigo-500"> <span class="ml-2 text-gray-700">Medium</span> </label> <label class="inline-flex items-center"> <input type="radio" name="pdf-effort" value="high" class="h-4 w-4 text-indigo-600 focus:ring-indigo-500"> <span class="ml-2 text-gray-700">High</span> </label> </div> </div> <button id="analyze-pdf-btn" onclick="analyzePDF()" class="w-full py-3 px-6 bg-indigo-600 hover:bg-indigo-700 text-white font-medium rounded-lg transition duration-200 transform hover:scale-105 focus:outline-none focus:ring-2 focus:ring-indigo-500 focus:ring-offset-2" > Analyze PDF </button> </div> <div id="pdf-result" class="result-box border-t border-gray-200 p-6 bg-gray-50 hidden"> <h3 class="text-lg font-medium text-gray-800 mb-3">Analysis Result</h3> <div id="pdf-result-content" class="min-h-40 p-4 bg-white rounded-lg border border-gray-200"></div> </div> </div> </div>  <div id="image-content" class="hidden animate__animated animate__fadeIn"> <div class="bg-white rounded-xl shadow-lg overflow-hidden"> <div class="p-6"> <h2 class="text-2xl font-semibold text-gray-800 mb-4">Upload an Image for Analysis</h2> <div class="grid grid-cols-1 md:grid-cols-2 gap-6 mb-6"> <div> <label class="block text-sm font-medium text-gray-700 mb-2">Upload Image</label> <label for="image-upload" class="file-input-label cursor-pointer flex flex-col items-center justify-center px-6 py-12 border-2 border-dashed border-gray-300 rounded-lg bg-gray-50 hover:bg-gray-100 transition duration-200"> <svg xmlns="http://www.w3.org/2000/svg" class="h-12 w-12 text-gray-400 mb-2" fill="none" viewBox="0 0 24 24" stroke="currentColor"> <path stroke-linecap="round" stroke-linejoin="round" stroke-width="2" d="M4 16l4.586-4.586a2 2 0 012.828 0L16 16m-2-2l1.586-1.586a2 2 0 012.828 0L20 14m-6-6h.01M6 20h12a2 2 0 002-2V6a2 2 0 00-2-2H6a2 2 0 00-2 2v12a2 2 0 002 2z" /> </svg> <span class="text-sm font-medium text-gray-600">Click to upload image</span> <span class="text-xs text-gray-500 mt-1">.jpg, .png, etc.</span> <input id="image-upload" type="file" accept="image/*" class="hidden"> </label> <p id="image-filename" class="mt-2 text-sm text-gray-500 hidden">Selected file: <span class="font-medium"></span></p> <div class="mt-4"> <label for="image-url" class="block text-sm font-medium text-gray-700 mb-2">Or enter image URL</label> <input type="text" id="image-url" placeholder="https://example.com/image.jpg" class="w-full px-4 py-3 rounded-lg border border-gray-300 focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 transition duration-200" > </div> </div> <div> <div class="mb-6"> <label for="image-prompt" class="block text-sm font-medium text-gray-700 mb-2">Prompt</label> <textarea id="image-prompt" rows="3" class="w-full px-4 py-3 rounded-lg border border-gray-300 focus:ring-2 focus:ring-indigo-500 focus:border-indigo-500 transition duration-200" placeholder="Enter your question about the image..." >What is this image about?</textarea> </div> <div class="mb-6"> <label class="block text-sm font-medium text-gray-700 mb-2">Reasoning Effort</label> <div class="flex space-x-4"> <label class="inline-flex items-center"> <input type="radio" name="image-effort" value="low" class="h-4 w-4 text-indigo-600 focus:ring-indigo-500"> <span class="ml-2 text-gray-700">Low</span> </label> <label class="inline-flex items-center"> <input type="radio" name="image-effort" value="medium" checked class="h-4 w-4 text-indigo-600 focus:ring-indigo-500"> <span class="ml-2 text-gray-700">Medium</span> </label> <label class="inline-flex items-center"> <input type="radio" name="image-effort" value="high" class="h-4 w-4 text-indigo-600 focus:ring-indigo-500"> <span class="ml-2 text-gray-700">High</span> </label> </div> </div> </div> </div> <button id="analyze-image-btn" onclick="analyzeImage()" class="w-full py-3 px-6 bg-indigo-600 hover:bg-indigo-700 text-white font-medium rounded-lg transition duration-200 transform hover:scale-105 focus:outline-none focus:ring-2 focus:ring-indigo-500 focus:ring-offset-2" > Analyze Image </button> </div> <div id="image-result" class="result-box border-t border-gray-200 p-6 bg-gray-50 hidden"> <h3 class="text-lg font-medium text-gray-800 mb-3">Analysis Result</h3> <div id="image-result-content" class="min-h-40 p-4 bg-white rounded-lg border border-gray-200"></div> </div> </div> </div>  <div class="mt-12 text-center text-gray-500 text-sm animate__animated animate__fadeIn"> <p>OpenAI O4 Mini Analysis Application</p> </div> </div> <script> // Toggle API key visibility function toggleApiKeyVisibility() { const apiKeyInput = document.getElementById('api-key'); if (apiKeyInput.type === 'password') { apiKeyInput.type = 'text'; } else { apiKeyInput.type = 'password'; } } // Switch between tabs function switchTab(tab) { const pdfTab = document.getElementById('pdf-tab'); const imageTab = document.getElementById('image-tab'); const pdfContent = document.getElementById('pdf-content'); const imageContent = document.getElementById('image-content'); if (tab === 'pdf') { pdfTab.classList.remove('bg-gray-200', 'text-gray-700'); pdfTab.classList.add('bg-indigo-600', 'text-white'); imageTab.classList.remove('bg-indigo-600', 'text-white'); imageTab.classList.add('bg-gray-200', 'text-gray-700'); pdfContent.classList.remove('hidden'); imageContent.classList.add('hidden'); // Add animation pdfContent.classList.remove('animate__fadeIn'); void pdfContent.offsetWidth; // Trigger reflow pdfContent.classList.add('animate__fadeIn'); } else { imageTab.classList.remove('bg-gray-200', 'text-gray-700'); imageTab.classList.add('bg-indigo-600', 'text-white'); pdfTab.classList.remove('bg-indigo-600', 'text-white'); pdfTab.classList.add('bg-gray-200', 'text-gray-700'); imageContent.classList.remove('hidden'); pdfContent.classList.add('hidden'); // Add animation imageContent.classList.remove('animate__fadeIn'); void imageContent.offsetWidth; // Trigger reflow imageContent.classList.add('animate__fadeIn'); } } // Handle file upload display document.getElementById('pdf-upload').addEventListener('change', function(e) { const filenameDisplay = document.getElementById('pdf-filename'); if (this.files.length > 0) { filenameDisplay.classList.remove('hidden'); filenameDisplay.querySelector('span').textContent = this.files[0].name; } else { filenameDisplay.classList.add('hidden'); } }); document.getElementById('image-upload').addEventListener('change', function(e) { const filenameDisplay = document.getElementById('image-filename'); if (this.files.length > 0) { filenameDisplay.classList.remove('hidden'); filenameDisplay.querySelector('span').textContent = this.files[0].name; } else { filenameDisplay.classList.add('hidden'); } }); // Show loading state function showLoading(element) { element.innerHTML = '<div class="loading-dots"><span></span><span></span><span></span></div>'; } // Analyze PDF function async function analyzePDF() { const apiKey = document.getElementById('api-key').value; const pdfFile = document.getElementById('pdf-upload').files[0]; const prompt = document.getElementById('pdf-prompt').value; const effortLevel = document.querySelector('input[name="pdf-effort"]:checked').value; const resultContent = document.getElementById('pdf-result-content'); const resultBox = document.getElementById('pdf-result'); if (!apiKey) { alert('Please provide an OpenAI API key.'); return; } if (!pdfFile) { alert('Please upload a PDF file.'); return; } // Show loading state resultBox.classList.remove('hidden'); showLoading(resultContent); try { // Read the PDF file and convert to base64 const pdfBase64 = await new Promise((resolve, reject) => { const reader = new FileReader(); reader.onload = () => { const arrayBuffer = reader.result; const bytes = new Uint8Array(arrayBuffer); let binary = ''; for (let i = 0; i < bytes.byteLength; i++) { binary += String.fromCharCode(bytes[i]); } const base64 = btoa(binary); resolve(base64); }; reader.onerror = reject; reader.readAsArrayBuffer(pdfFile); }); // Create the request payload const payload = { model: "o4-mini", input: [ { role: "user", content: [ { type: "input_text", text: prompt }, { type: "input_file", filename: pdfFile.name, file_data: `data:application/pdf;base64,${pdfBase64}` } ] } ], text: { format: { type: "text" } }, reasoning: { effort: effortLevel }, tools: [], store: true }; // Make the API request const response = await axios.post('https://api.openai.com/v1/responses', payload, { headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json' } }); // Process the response const result = extractTextFromResponse(response.data); resultContent.innerHTML = `<div class="text-gray-700 whitespace-pre-wrap">${result}</div>`; } catch (error) { console.error('Error:', error); let errorMessage = 'An error occurred while analyzing the PDF.'; if (error.response) { errorMessage = `Error: ${error.response.data.error?.message || error.response.statusText}`; } else if (error.message) { errorMessage = `Error: ${error.message}`; } resultContent.innerHTML = `<div class="text-red-600">${errorMessage}</div>`; } } // Analyze Image function async function analyzeImage() { const apiKey = document.getElementById('api-key').value; const imageFile = document.getElementById('image-upload').files[0]; const imageUrl = document.getElementById('image-url').value; const prompt = document.getElementById('image-prompt').value; const effortLevel = document.querySelector('input[name="image-effort"]:checked').value; const resultContent = document.getElementById('image-result-content'); const resultBox = document.getElementById('image-result'); if (!apiKey) { alert('Please provide an OpenAI API key.'); return; } if (!imageFile && !imageUrl) { alert('Please upload an image or provide an image URL.'); return; } if (!prompt) { alert('Please provide a prompt for the image analysis.'); return; } // Show loading state resultBox.classList.remove('hidden'); showLoading(resultContent); try { const content = [ { type: "input_text", text: prompt } ]; if (imageFile) { // Read the image file and convert to base64 const imageBase64 = await new Promise((resolve, reject) => { const reader = new FileReader(); reader.onload = () => { const base64 = reader.result.split(',')[1]; resolve(base64); }; reader.onerror = reject; reader.readAsDataURL(imageFile); }); content.unshift({ type: "input_image", image_url: `data:image/png;base64,${imageBase64}` }); } else if (imageUrl) { content.unshift({ type: "input_image", image_url: imageUrl }); } // Create the request payload const payload = { model: "o4-mini", input: [ { role: "user", content: content } ], text: { format: { type: "text" } }, reasoning: { effort: effortLevel }, tools: [], store: true }; // Make the API request const response = await axios.post('https://api.openai.com/v1/responses', payload, { headers: { 'Authorization': `Bearer ${apiKey}`, 'Content-Type': 'application/json' } }); // Process the response const result = extractTextFromResponse(response.data); resultContent.innerHTML = `<div class="text-gray-700 whitespace-pre-wrap">${result}</div>`; } catch (error) { console.error('Error:', error); let errorMessage = 'An error occurred while analyzing the image.'; if (error.response) { errorMessage = `Error: ${error.response.data.error?.message || error.response.statusText}`; } else if (error.message) { errorMessage = `Error: ${error.message}`; } resultContent.innerHTML = `<div class="text-red-600">${errorMessage}</div>`; } } // Extract text from response function extractTextFromResponse(response) { try { // First check if response has an output attribute if (response.output) { const output = response.output; // Look for message items in the output list for (const item of output) { if (item.role === 'assistant') { for (const contentItem of item.content) { if (contentItem.type === 'output_text') { return contentItem.text; } } } } // If we couldn't find it in the expected structure, return an error return `Couldn't extract text from response: ${JSON.stringify(response)}`; } return `Unexpected response structure: ${JSON.stringify(response)}`; } catch (error) { return `Error processing response: ${error.message}`; } } // Add glow effect to buttons on hover document.querySelectorAll('button').forEach(button => { if (button.id !== 'api-key-toggle') { button.addEventListener('mouseenter', () => { button.classList.add('glow'); }); button.addEventListener('mouseleave', () => { button.classList.remove('glow'); }); } }); </script> </body> </html> (hey in this code once I select a pdf file after upload I cannot remove it so please give option for that similarly when I upload an image it get selected but I cannot remove it so please give option for that also.... In case of image chat user will atfirst to choose image upload chat... or image url chat... give me full code please) (include some html css that will make the website even more attractive)