Spaces:

Amarthya7
/

Image-Question-Answering-System

Runtime error

App Files Files Community

Amarthya7 commited on Mar 11

Commit

0757241

verified ·

1 Parent(s): 0ca949f

Upload 5 files

Browse files

Files changed (5) hide show

README.md +94 -13
__init__.py +9 -0
app.py +189 -0
requirements.txt +10 -0
run.py +90 -0

README.md CHANGED Viewed

@@ -1,13 +1,94 @@
----
-title: Image Question Answering System
-emoji: 🏃
-colorFrom: indigo
-colorTo: yellow
-sdk: streamlit
-sdk_version: 1.43.1
-app_file: app.py
-pinned: false
-short_description: multi-modal AI application
----
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

+---
+title: Visual Question Answering (VQA) System
+emoji: 🏞️
+colorFrom: blue
+colorTo: purple
+sdk: gradio
+sdk_version: 5.20.1
+app_file: run.py
+pinned: false
+---
+# Visual Question Answering (VQA) System
+A multi-modal AI application that allows users to upload images and ask questions about them. This project uses pre-trained models from Hugging Face to analyze images and answer natural language questions.
+## Features
+- Upload images in common formats (jpg, png, etc.)
+- Ask questions about image content in natural language
+- Get AI-generated answers based on image content
+- User-friendly Streamlit interface
+- Support for various types of questions (objects, attributes, counting, etc.)
+## Technical Stack
+- **Python**: Main programming language
+- **PyTorch & Transformers**: Deep learning frameworks for running the models
+- **Streamlit**: Interactive web application framework
+- **HuggingFace Models**: Pre-trained visual question answering models
+- **PIL**: Image processing
+## Setup Instructions
+1. Clone this repository:
+   ```
+   git clone https://github.com/your-username/visual-question-answering.git
+   cd visual-question-answering
+   ```
+2. Create a virtual environment (recommended):
+   ```
+   python -m venv venv
+   # On Windows
+   venv\Scripts\activate
+   # On macOS/Linux
+   source venv/bin/activate
+   ```
+3. Install dependencies:
+   ```
+   pip install -r requirements.txt
+   ```
+4. Run the application:
+   ```
+   python run.py
+   ```
+   Or directly with Streamlit:
+   ```
+   streamlit run app.py
+   ```
+5. Open a web browser and go to `http://localhost:8501`
+## Usage
+1. Upload an image using the file upload area
+2. Type your question about the image in the text field
+3. Select a model from the sidebar (BLIP or ViLT)
+4. Click "Get Answer" to get an AI-generated response
+5. View the answer displayed on the right side of the screen
+## Models Used
+This application uses the following pre-trained models from Hugging Face:
+- **BLIP**: For general visual question answering with free-form answers
+- **ViLT**: For detailed understanding of image content and yes/no questions
+## Project Structure
+- `app.py`: Main Streamlit application
+- `models/`: Contains model handling code
+- `utils/`: Utility functions for image processing and more
+- `static/`: Static files including uploaded images
+- `run.py`: Script to run the application
+## License
+This project is licensed under the MIT License - see the LICENSE file for details.
+## Acknowledgments
+- Hugging Face for their excellent pre-trained models
+- The open-source community for various libraries used in this project

__init__.py ADDED Viewed

	@@ -0,0 +1,9 @@

+"""
+Visual Question Answering - Multi-Modal AI Application
+A Python application for answering questions about images using
+pre-trained Hugging Face models for multi-modal understanding.
+"""
+__version__ = "0.1.0"
+__author__ = "Multi-Modal AI Project"

app.py ADDED Viewed

	@@ -0,0 +1,189 @@

+"""
+Visual Question Answering Streamlit Application
+"""
+import logging
+import os
+import sys
+import time
+from datetime import datetime
+import streamlit as st
+from PIL import Image
+# Configure path to include parent directory
+sys.path.append(os.path.dirname(os.path.abspath(__file__)))
+# Configure logging
+log_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), "logs")
+os.makedirs(log_dir, exist_ok=True)
+log_file = os.path.join(
+    log_dir, f"vqa_app_{datetime.now().strftime('%Y%m%d_%H%M%S')}.log"
+)
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
+    handlers=[logging.FileHandler(log_file), logging.StreamHandler()],
+)
+logger = logging.getLogger("vqa_app")
+# Import modules
+from models import VQAInference
+from utils.image_utils import resize_image
+# Global variables
+MODEL_OPTIONS = {"BLIP": "blip", "ViLT": "vilt"}
+# Setup directories
+uploads_dir = os.path.join(
+    os.path.dirname(os.path.abspath(__file__)), "static", "uploads"
+)
+os.makedirs(uploads_dir, exist_ok=True)
+# Configure page
+st.set_page_config(
+    page_title="Visual Question Answering",
+    page_icon="🔍",
+    layout="wide",
+    initial_sidebar_state="expanded",
+)
+@st.cache_resource
+def load_model(model_name):
+    """Load the VQA model with caching for better performance"""
+    try:
+        logger.info(f"Loading model: {model_name}")
+        return VQAInference(model_name=model_name)
+    except Exception as e:
+        logger.error(f"Error loading model: {str(e)}")
+        st.error(f"Failed to load model: {str(e)}")
+        return None
+def process_image_and_question(image_file, question, model_name):
+    """Process the uploaded image and question to generate an answer"""
+    start_time = time.time()
+    try:
+        # Load image
+        image = Image.open(image_file).convert("RGB")
+        logger.info(f"Image loaded, size: {image.size}")
+        # Resize image
+        image = resize_image(image)
+        logger.info(f"Image resized to: {image.size}")
+        # Load model
+        model = load_model(model_name)
+        if model is None:
+            return None
+        # Generate answer
+        logger.info(f"Generating answer for question: '{question}'")
+        answer = model.predict(image, question)
+        logger.info(f"Answer generated: '{answer}'")
+        # Calculate processing time
+        processing_time = time.time() - start_time
+        return {"answer": answer, "processing_time": f"{processing_time:.2f} seconds"}
+    except Exception as e:
+        logger.error(f"Error processing request: {str(e)}", exc_info=True)
+        return None
+def main():
+    """Main function for Streamlit app"""
+    # Header
+    st.title("Visual Question Answering")
+    st.markdown("Upload an image, ask a question, and get AI-powered answers")
+    # Sidebar for model selection
+    st.sidebar.title("Model Options")
+    selected_model_name = st.sidebar.radio(
+        "Choose a model:", options=list(MODEL_OPTIONS.keys()), index=0
+    )
+    model_name = MODEL_OPTIONS[selected_model_name]
+    st.sidebar.markdown("---")
+    st.sidebar.markdown("## About the Models")
+    st.sidebar.markdown("**BLIP**: General purpose VQA with free-form answers")
+    st.sidebar.markdown("**ViLT**: Better for yes/no questions and specific categories")
+    # Main content - two columns
+    col1, col2 = st.columns([1, 1])
+    with col1:
+        st.markdown("### Upload & Ask")
+        uploaded_file = st.file_uploader(
+            "Upload an image:", type=["jpg", "jpeg", "png", "bmp", "gif"]
+        )
+        question = st.text_input(
+            "Your question about the image:", placeholder="E.g., What is in this image?"
+        )
+        submit_button = st.button(
+            "Get Answer", type="primary", use_container_width=True
+        )
+        # Preview uploaded image
+        if uploaded_file is not None:
+            st.markdown("### Image Preview")
+            st.image(uploaded_file, caption="Uploaded Image", use_column_width=True)
+    with col2:
+        st.markdown("### AI Answer")
+        # Process when submit button is clicked
+        if submit_button and uploaded_file is not None and question:
+            with st.spinner("Generating answer..."):
+                result = process_image_and_question(uploaded_file, question, model_name)
+                if result:
+                    st.success("Answer generated successfully!")
+                    # Display results
+                    st.markdown("#### Question:")
+                    st.write(question)
+                    st.markdown("#### Answer:")
+                    st.markdown(
+                        f"<div style='background-color: #f0f2f6; padding: 20px; border-radius: 5px;'>{result['answer']}</div>",
+                        unsafe_allow_html=True,
+                    )
+                    st.markdown("#### Processing Time:")
+                    st.text(result["processing_time"])
+                else:
+                    st.error(
+                        "Failed to generate an answer. Please check the image and question, and try again."
+                    )
+        elif not uploaded_file and submit_button:
+            st.warning("Please upload an image first.")
+        elif not question and submit_button:
+            st.warning("Please enter a question about the image.")
+        else:
+            st.info("AI answers will appear here after you submit your question")
+    # Information about the application
+    st.markdown("---")
+    st.markdown("### About Visual Question Answering")
+    st.markdown("""
+    This application uses multi-modal AI, combining computer vision and natural language processing
+    to answer questions about images. Here are some examples of questions you can ask:
+    - **Objects**: "What objects are in this image?"
+    - **Counting**: "How many people are in this image?"
+    - **Colors**: "What color is the car?"
+    - **Actions**: "What is the person doing?"
+    - **Spatial relations**: "What is to the left of the chair?"
+    - **Attributes**: "Is the cat sleeping?"
+    """)
+if __name__ == "__main__":
+    main()

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+torch>=2.0.0
+torchvision>=0.15.0
+transformers>=4.30.0
+Pillow>=9.0.0
+timm>=0.9.0
+numpy>=1.24.0
+tqdm>=4.65.0
+streamlit>=1.34.0
+watchdog>=3.0.0
+python-dotenv>=1.0.0

run.py ADDED Viewed

	@@ -0,0 +1,90 @@

+"""
+Visual Question Answering Application - Run Script for Streamlit
+"""
+import os
+import subprocess
+import sys
+# Configure minimal environment settings
+os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"  # Suppress TensorFlow logging
+def check_requirements_installed():
+    """Check if requirements are installed"""
+    try:
+        import streamlit
+        import torch
+        import transformers
+        from PIL import Image
+        return True
+    except ImportError as e:
+        print(f"Error: Required package not installed - {e}")
+        print("Please install requirements using: pip install -r requirements.txt")
+        return False
+def ensure_directories():
+    """Ensure all required directories exist"""
+    # Get the base directory
+    base_dir = os.path.dirname(os.path.abspath(__file__))
+    # Create uploads directory
+    uploads_dir = os.path.join(base_dir, "static", "uploads")
+    os.makedirs(uploads_dir, exist_ok=True)
+    print(f"Uploads directory: {uploads_dir}")
+    # Create logs directory
+    logs_dir = os.path.join(base_dir, "logs")
+    os.makedirs(logs_dir, exist_ok=True)
+def main():
+    """Main function to run the application"""
+    print("Visual Question Answering - Multi-Modal AI Application with Streamlit")
+    # Check requirements
+    if not check_requirements_installed():
+        sys.exit(1)
+    # Ensure directories exist
+    ensure_directories()
+    # Set environment variables
+    os.environ["VQA_MODEL"] = os.environ.get(
+        "VQA_MODEL", "blip"
+    )  # Default to 'blip' model
+    # Get the app.py path
+    app_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "app.py")
+    if not os.path.exists(app_path):
+        print(f"Error: Streamlit app not found at {app_path}")
+        sys.exit(1)
+    # Print startup information
+    port = int(os.environ.get("PORT", 8501))  # Streamlit default port is 8501
+    print(f"Starting VQA application on http://localhost:{port}")
+    print(f"Using VQA model: {os.environ.get('VQA_MODEL', 'blip')}")
+    print("Press Ctrl+C to exit")
+    # Run the Streamlit app
+    cmd = [
+        "streamlit",
+        "run",
+        app_path,
+        "--server.port",
+        str(port),
+        "--server.address",
+        "0.0.0.0",
+    ]
+    try:
+        subprocess.run(cmd)
+    except KeyboardInterrupt:
+        print("\nShutting down the application...")
+    except Exception as e:
+        print(f"Error launching Streamlit: {e}")
+if __name__ == "__main__":
+    main()