AnseMin commited on
Commit
05abb4e
Β·
1 Parent(s): 46bc24b

fix: add Tesseract configuration and startup script

Browse files
Files changed (3) hide show
  1. README.md +1 -1
  2. app.py +7 -0
  3. setup.sh +35 -0
README.md CHANGED
@@ -1,4 +1,3 @@
1
-
2
  ---
3
  title: Doc2Md
4
  emoji: πŸ“„
@@ -8,6 +7,7 @@ sdk: gradio
8
  sdk_version: 5.14.0
9
  app_file: app.py
10
  build_script: build.sh
 
11
  pinned: false
12
  ---
13
 
 
 
1
  ---
2
  title: Doc2Md
3
  emoji: πŸ“„
 
7
  sdk_version: 5.14.0
8
  app_file: app.py
9
  build_script: build.sh
10
+ startup_script: setup.sh
11
  pinned: false
12
  ---
13
 
app.py CHANGED
@@ -1,6 +1,13 @@
1
  import sys
2
  import os
3
 
 
 
 
 
 
 
 
4
  # Get the current directory
5
  current_dir = os.path.dirname(os.path.abspath(__file__))
6
 
 
1
  import sys
2
  import os
3
 
4
+ # Set TESSDATA_PREFIX if not already set
5
+ if not os.environ.get('TESSDATA_PREFIX'):
6
+ tessdata_dir = "/usr/share/tesseract-ocr/4.00/tessdata"
7
+ if os.path.exists(tessdata_dir):
8
+ os.environ['TESSDATA_PREFIX'] = tessdata_dir
9
+ print(f"Set TESSDATA_PREFIX to {tessdata_dir}")
10
+
11
  # Get the current directory
12
  current_dir = os.path.dirname(os.path.abspath(__file__))
13
 
setup.sh ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Exit on error
4
+ set -e
5
+
6
+ # Create tessdata directory if it doesn't exist
7
+ TESSDATA_DIR="/usr/share/tesseract-ocr/4.00/tessdata"
8
+ mkdir -p "$TESSDATA_DIR"
9
+
10
+ # Download traineddata files if they don't exist
11
+ if [ ! -f "$TESSDATA_DIR/eng.traineddata" ]; then
12
+ echo "Downloading eng.traineddata..."
13
+ wget -O "$TESSDATA_DIR/eng.traineddata" "https://github.com/tesseract-ocr/tessdata/raw/main/eng.traineddata"
14
+ fi
15
+
16
+ if [ ! -f "$TESSDATA_DIR/osd.traineddata" ]; then
17
+ echo "Downloading osd.traineddata..."
18
+ wget -O "$TESSDATA_DIR/osd.traineddata" "https://github.com/tesseract-ocr/tessdata/raw/main/osd.traineddata"
19
+ fi
20
+
21
+ # Set TESSDATA_PREFIX
22
+ export TESSDATA_PREFIX="$TESSDATA_DIR"
23
+ echo "TESSDATA_PREFIX=${TESSDATA_PREFIX}" >> /etc/environment
24
+
25
+ # Test Tesseract functionality
26
+ echo "Testing Tesseract..."
27
+ echo "Hello World" > test.png
28
+ tesseract test.png stdout
29
+ rm test.png
30
+
31
+ # Print Tesseract version and available languages
32
+ echo "Tesseract version:"
33
+ tesseract --version
34
+ echo "Available languages:"
35
+ tesseract --list-langs