Spanicin commited on
Commit
2992d69
·
verified ·
1 Parent(s): 9dcceec

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  from pdf2image import convert_from_bytes, convert_from_path
2
  from PIL import Image
3
  import numpy as np
@@ -29,7 +30,7 @@ data_ready = False # Flag to check if extraction is complete
29
  lock = threading.Lock() # Lock to manage concurrent access
30
  extracted_texts = {}
31
 
32
-
33
  ocr_tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
34
  ocr_model = AutoModel.from_pretrained(
35
  'ucaslcl/GOT-OCR2_0', trust_remote_code=True,
 
1
+ import os
2
  from pdf2image import convert_from_bytes, convert_from_path
3
  from PIL import Image
4
  import numpy as np
 
30
  lock = threading.Lock() # Lock to manage concurrent access
31
  extracted_texts = {}
32
 
33
+ os.environ["HF_HOME"] = "/app/cache"
34
  ocr_tokenizer = AutoTokenizer.from_pretrained('ucaslcl/GOT-OCR2_0', trust_remote_code=True)
35
  ocr_model = AutoModel.from_pretrained(
36
  'ucaslcl/GOT-OCR2_0', trust_remote_code=True,