Spaces:

Ansemin101
/

Markit_v2

Running on Zero

App Files Files Community

AnseMin commited on Mar 17

Commit

66f3c4d

1 Parent(s): 98f25ae

restore

Browse files

Files changed (8) hide show

app.py +0 -21
build.sh +5 -11
requirements.txt +10 -18
setup.sh +2 -8
src/parsers/__init__.py +1 -0
src/parsers/marker_parser.py +61 -0
src/services/docling_chat.py +29 -0
src/ui/ui.py +62 -35

app.py CHANGED Viewed

@@ -134,26 +134,5 @@ except ModuleNotFoundError:
 # Call setup function at import time
 setup_tesseract()
-# Add this near the top of app.py after imports
-# Handle potential import conflicts
-try:
-    import transformers
-    print(f"Transformers version: {transformers.__version__}")
-except ImportError:
-    print("Warning: Transformers not installed or not working")
-try:
-    import torch
-    print(f"Torch version: {torch.__version__}")
-    print(f"CUDA available: {torch.cuda.is_available()}")
-except ImportError:
-    print("Warning: PyTorch not installed or not working")
-try:
-    import docling
-    print(f"Docling version: {docling.__version__ if hasattr(docling, '__version__') else 'unknown'}")
-except ImportError:
-    print("Warning: Docling not installed or not working")
 if __name__ == "__main__":
     main()

 # Call setup function at import time
 setup_tesseract()
 if __name__ == "__main__":
     main()

build.sh CHANGED Viewed

@@ -78,20 +78,14 @@ echo "Installing Google Gemini API client..."
 pip install -q -U google-genai
 echo "Google Gemini API client installed successfully"
-# Install GOT-OCR dependencies first
 echo "Installing GOT-OCR dependencies..."
-pip install -q -U torch==2.0.1 torchvision==0.15.2 --no-deps
-pip install -q -U transformers==4.37.2 tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.0
 echo "GOT-OCR dependencies installed successfully"
-# Install docling separately with --no-deps to avoid conflicts
-echo "Installing docling..."
-pip install -q -U docling==2.25.0 --no-deps
-echo "Docling installed successfully"
-# Install remaining Python dependencies
-echo "Installing remaining Python dependencies..."
-pip install -e . --no-deps
 # Create .env file if it doesn't exist
 if [ ! -f .env ]; then

 pip install -q -U google-genai
 echo "Google Gemini API client installed successfully"
+# Install GOT-OCR dependencies
 echo "Installing GOT-OCR dependencies..."
+pip install -q -U torch==2.0.1 torchvision==0.15.2 transformers==4.47.0 tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.0
 echo "GOT-OCR dependencies installed successfully"
+# Install Python dependencies
+echo "Installing Python dependencies..."
+pip install -e .
 # Create .env file if it doesn't exist
 if [ ! -f .env ]; then

requirements.txt CHANGED Viewed

@@ -1,7 +1,8 @@
-# Core dependencies
 gradio==5.14.0
 grpcio-status==1.70.0
 markdown==3.7
 multiprocess==0.70.16
 openai==1.61.1
 pipdeptree==2.25.0
@@ -9,35 +10,26 @@ pytesseract==0.3.13
 semchunk==2.2.2
 Pillow>=9.0.0
 numpy>=1.21.0
 # Tesseract dependencies
 tesseract==0.1.3
 tesserocr>=2.5.0; platform_system != "Windows"  # Only install on non-Windows systems
 # Additional dependencies for image processing
 opencv-python-headless>=4.5.0  # Headless version for server environments
 pdf2image>=1.16.0  # For PDF processing
 dill==0.3.8  # Downgraded to be compatible with datasets
 # Gemini API client
 google-genai>=0.1.0
 # Environment variables
 python-dotenv>=1.0.0
 # Pin pydantic to resolve compatibility issues with gradio
 pydantic==2.7.1
-# Common dependencies - not pinned to allow resolution
-packaging>=21.0  # For version comparison
 safetensors>=0.4.0
-# Note: The following packages will be installed separately in setup.sh and build.sh
-# to avoid dependency conflicts:
-# - docling
-# - transformers
-# - torch
-# - torchvision
-# - tiktoken
-# - verovio
-# - accelerate

+docling==2.25.0
 gradio==5.14.0
 grpcio-status==1.70.0
 markdown==3.7
+marker-pdf==1.3.5
 multiprocess==0.70.16
 openai==1.61.1
 pipdeptree==2.25.0
 semchunk==2.2.2
 Pillow>=9.0.0
 numpy>=1.21.0
 # Tesseract dependencies
 tesseract==0.1.3
 tesserocr>=2.5.0; platform_system != "Windows"  # Only install on non-Windows systems
 # Additional dependencies for image processing
 opencv-python-headless>=4.5.0  # Headless version for server environments
 pdf2image>=1.16.0  # For PDF processing
 dill==0.3.8  # Downgraded to be compatible with datasets
 # Gemini API client
 google-genai>=0.1.0
 # Environment variables
 python-dotenv>=1.0.0
 # Pin pydantic to resolve compatibility issues with gradio
 pydantic==2.7.1
+# GOT-OCR dependencies
+torch>=2.0.1
+torchvision>=0.15.2
+transformers>=4.37.2,<4.48.0  # Pin to a compatible version for GOT-OCR
+tiktoken>=0.6.0
+verovio>=4.3.1
+accelerate>=0.28.0
 safetensors>=0.4.0
+packaging>=21.0  # For version comparison

setup.sh CHANGED Viewed

@@ -18,17 +18,11 @@ pip install -q -U pytesseract pillow opencv-python-headless pdf2image
 pip install -q -U google-genai
 echo "Python dependencies installed successfully"
-# Install GOT-OCR dependencies first
 echo "Installing GOT-OCR dependencies..."
-pip install -q -U torch==2.0.1 torchvision==0.15.2 --no-deps
-pip install -q -U transformers==4.37.2 tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.0
 echo "GOT-OCR dependencies installed successfully"
-# Install docling separately with --no-deps to avoid conflicts
-echo "Installing docling..."
-pip install -q -U docling==2.25.0 --no-deps
-echo "Docling installed successfully"
 # Install tesserocr with pip
 echo "Installing tesserocr..."
 pip install -q -U tesserocr || echo "Failed to install tesserocr with pip, trying with specific compiler flags..."

 pip install -q -U google-genai
 echo "Python dependencies installed successfully"
+# Install GOT-OCR dependencies
 echo "Installing GOT-OCR dependencies..."
+pip install -q -U torch==2.0.1 torchvision==0.15.2 transformers==4.47.0 tiktoken==0.6.0 verovio==4.3.1 accelerate==0.28.0 safetensors==0.4.0
 echo "GOT-OCR dependencies installed successfully"
 # Install tesserocr with pip
 echo "Installing tesserocr..."
 pip install -q -U tesserocr || echo "Failed to install tesserocr with pip, trying with specific compiler flags..."

src/parsers/__init__.py CHANGED Viewed

@@ -2,6 +2,7 @@
 # Import all parsers to ensure they're registered
 from src.parsers.docling_parser import DoclingParser
 from src.parsers.pypdfium_parser import PyPdfiumParser
 from src.parsers.gemini_flash_parser import GeminiFlashParser
 from src.parsers.got_ocr_parser import GotOcrParser

 # Import all parsers to ensure they're registered
 from src.parsers.docling_parser import DoclingParser
+from src.parsers.marker_parser import MarkerParser
 from src.parsers.pypdfium_parser import PyPdfiumParser
 from src.parsers.gemini_flash_parser import GeminiFlashParser
 from src.parsers.got_ocr_parser import GotOcrParser

src/parsers/marker_parser.py ADDED Viewed

	@@ -0,0 +1,61 @@

+from pathlib import Path
+from typing import Dict, List, Optional, Any, Union
+import subprocess
+import tempfile
+import os
+import json
+from src.parsers.parser_interface import DocumentParser
+from src.parsers.parser_registry import ParserRegistry
+from marker.converters.pdf import PdfConverter
+from marker.models import create_model_dict
+from marker.output import text_from_rendered
+class MarkerParser(DocumentParser):
+    """Parser implementation using Marker."""
+    @classmethod
+    def get_name(cls) -> str:
+        return "Marker"
+    @classmethod
+    def get_supported_ocr_methods(cls) -> List[Dict[str, Any]]:
+        return [
+            {
+                "id": "no_ocr",
+                "name": "No OCR",
+                "default_params": {}
+            },
+            {
+                "id": "force_ocr",
+                "name": "Force OCR",
+                "default_params": {}
+            }
+        ]
+    def parse(self, file_path: Union[str, Path], ocr_method: Optional[str] = None, **kwargs) -> str:
+        """Parse a document using Marker."""
+        force_ocr = ocr_method == "force_ocr"
+        converter = PdfConverter(
+            artifact_dict=create_model_dict(),
+            config={"force_ocr": force_ocr}
+        )
+        rendered = converter(str(file_path))
+        content, _, _ = text_from_rendered(rendered)
+        # Format the content based on the requested output format
+        output_format = kwargs.get("output_format", "markdown")
+        if output_format.lower() == "json":
+            return json.dumps({"content": content}, ensure_ascii=False, indent=2)
+        elif output_format.lower() == "text":
+            return content.replace("#", "").replace("*", "").replace("_", "")
+        elif output_format.lower() == "document_tags":
+            return f"<doc>\n{content}\n</doc>"
+        else:
+            return content
+# Register the parser with the registry
+ParserRegistry.register(MarkerParser)

src/services/docling_chat.py ADDED Viewed

	@@ -0,0 +1,29 @@

+import openai
+import os
+# Load API key from environment variable
+openai.api_key = os.getenv("OPENAI_API_KEY")
+# Check if API key is available and print a message if not
+if not openai.api_key:
+    print("Warning: OPENAI_API_KEY environment variable not found. Chat functionality may not work.")
+def chat_with_document(message, history, document_text_state):
+    history = history or []
+    history.append({"role": "user", "content": message})
+    context = f"Document: {document_text_state}\n\nUser: {message}"
+    # Add error handling for API calls
+    try:
+        response = openai.chat.completions.create(
+            model="gpt-4o-2024-08-06",
+            messages=[{"role": "system", "content": context}] + history
+        )
+        reply = response.choices[0].message.content
+    except Exception as e:
+        reply = f"Error: Could not generate response. Please check your OpenAI API key. Details: {str(e)}"
+        print(f"OpenAI API error: {str(e)}")
+    history.append({"role": "assistant", "content": reply})
+    return history, history

src/ui/ui.py CHANGED Viewed

@@ -5,6 +5,7 @@ import time
 import logging
 from pathlib import Path
 from src.core.converter import convert_file, set_cancellation_flag, is_conversion_in_progress
 from src.parsers.parser_registry import ParserRegistry
 # Configure logging
@@ -168,44 +169,52 @@ def create_ui():
         # State to store the output format (fixed to Markdown)
         output_format_state = gr.State("Markdown")
-        # File input first
-        file_input = gr.File(label="Upload PDF", type="filepath")
-        # Provider and OCR options below the file input
-        with gr.Row(elem_classes=["provider-options-row"]):
-            with gr.Column(scale=1):
-                parser_names = ParserRegistry.get_parser_names()
-                default_parser = parser_names[0] if parser_names else "PyPdfium"
-                provider_dropdown = gr.Dropdown(
-                    label="Provider",
-                    choices=parser_names,
-                    value=default_parser,
-                    interactive=True
-                )
-            with gr.Column(scale=1):
-                default_ocr_options = ParserRegistry.get_ocr_options(default_parser)
-                default_ocr = default_ocr_options[0] if default_ocr_options else "No OCR"
-                ocr_dropdown = gr.Dropdown(
-                    label="OCR Options",
-                    choices=default_ocr_options,
-                    value=default_ocr,
-                    interactive=True
                 )
-        # Simple output container with just one scrollbar
-        file_display = gr.HTML(
-            value="<div class='output-container'></div>",
-            label="Converted Content"
-        )
-        file_download = gr.File(label="Download File")
-        # Processing controls row
-        with gr.Row(elem_classes=["processing-controls"]):
-            convert_button = gr.Button("Convert", variant="primary")
-            cancel_button = gr.Button("Cancel", variant="stop", visible=False)
         # Event handlers
         provider_dropdown.change(
@@ -260,6 +269,24 @@ def create_ui():
             queue=False  # Execute immediately
         )
     return demo

 import logging
 from pathlib import Path
 from src.core.converter import convert_file, set_cancellation_flag, is_conversion_in_progress
+from src.services.docling_chat import chat_with_document
 from src.parsers.parser_registry import ParserRegistry
 # Configure logging
         # State to store the output format (fixed to Markdown)
         output_format_state = gr.State("Markdown")
+        with gr.Tabs():
+            with gr.Tab("Upload and Convert"):
+                # File input first
+                file_input = gr.File(label="Upload PDF", type="filepath")
+                # Provider and OCR options below the file input
+                with gr.Row(elem_classes=["provider-options-row"]):
+                    with gr.Column(scale=1):
+                        parser_names = ParserRegistry.get_parser_names()
+                        default_parser = parser_names[0] if parser_names else "PyPdfium"
+                        provider_dropdown = gr.Dropdown(
+                            label="Provider",
+                            choices=parser_names,
+                            value=default_parser,
+                            interactive=True
+                        )
+                    with gr.Column(scale=1):
+                        default_ocr_options = ParserRegistry.get_ocr_options(default_parser)
+                        default_ocr = default_ocr_options[0] if default_ocr_options else "No OCR"
+                        ocr_dropdown = gr.Dropdown(
+                            label="OCR Options",
+                            choices=default_ocr_options,
+                            value=default_ocr,
+                            interactive=True
+                        )
+                # Simple output container with just one scrollbar
+                file_display = gr.HTML(
+                    value="<div class='output-container'></div>",
+                    label="Converted Content"
                 )
+                file_download = gr.File(label="Download File")
+                # Processing controls row
+                with gr.Row(elem_classes=["processing-controls"]):
+                    convert_button = gr.Button("Convert", variant="primary")
+                    cancel_button = gr.Button("Cancel", variant="stop", visible=False)
+            with gr.Tab("Chat with Document"):
+                document_text_state = gr.State("")
+                chatbot = gr.Chatbot(label="Chat", type="messages")
+                text_input = gr.Textbox(placeholder="Type here...")
+                clear_btn = gr.Button("Clear")
         # Event handlers
         provider_dropdown.change(
             queue=False  # Execute immediately
         )
+        file_display.change(
+            lambda text: text,
+            inputs=[file_display],
+            outputs=[document_text_state]
+        )
+        text_input.submit(
+            fn=chat_with_document,
+            inputs=[text_input, chatbot, document_text_state],
+            outputs=[chatbot, chatbot]
+        )
+        clear_btn.click(
+            lambda: ([], []),
+            None,
+            [chatbot, chatbot]
+        )
     return demo