Spaces:

gagan3012
/

QalamV0.2

Runtime error

App Files Files Community

gagan3012 commited on Oct 23, 2023

Commit

3eb719d

1 Parent(s): af367a2

Update app.py

Browse files

Files changed (1) hide show

app.py +21 -3

app.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import streamlit as st
 from streamlit_cropper import st_cropper
 from PIL import Image
-from transformers import TrOCRProcessor, VisionEncoderDecoderModel, DonutProcessor
 import torch
 import re
 import pytesseract
@@ -58,6 +58,24 @@ def predict_english(img, model_name="naver-clova-ix/donut-base-finetuned-cord-v2
     sequence = re.sub(r"<.*?>", "", sequence).strip()
     return sequence
 def predict_tesseract(img):
     text = pytesseract.image_to_string(Image.open(img))
     return text
@@ -96,7 +114,7 @@ Lng = st.sidebar.selectbox(label="Language", options=[
 Models = {
     "Arabic": "Qalam",
-    "English": "Donut",
     "French": "Tesseract",
     "Korean": "Donut",
     "Chinese": "Donut"
@@ -138,7 +156,7 @@ if img_file:
                     text_file = BytesIO(ocr_text.encode())
                     st.download_button('Download Text', text_file, file_name='ocr_text.txt')
                 elif Lng == "English":
-                    ocr_text = predict_english(cropped_img)
                     st.subheader(f"OCR Results for {Lng}")
                     st.write(ocr_text)
                     text_file = BytesIO(ocr_text.encode())

 import streamlit as st
 from streamlit_cropper import st_cropper
 from PIL import Image
+from transformers import TrOCRProcessor, VisionEncoderDecoderModel, DonutProcessor, NougatProcessor
 import torch
 import re
 import pytesseract
     sequence = re.sub(r"<.*?>", "", sequence).strip()
     return sequence
+def predict_nougat(img, model_name="facebook/nougat-small"):
+    processor = NougatProcessor.from_pretrained(model_name)
+    model = VisionEncoderDecoderModel.from_pretrained(model_name)
+    image = img.convert("RGB")
+    pixel_values = processor(image, return_tensors="pt").pixel_values
+    # generate transcription (here we only generate 30 tokens)
+    outputs = model.generate(
+      pixel_values.to(device),
+      min_length=1,
+      max_new_tokens=1500,
+      bad_words_ids=[[processor.tokenizer.unk_token_id]],
+    )
+    page_sequence = processor.batch_decode(outputs, skip_special_tokens=True)[0]
+    page_sequence = processor.post_process_generation(page_sequence, fix_markdown=False)
+    return page_sequence
 def predict_tesseract(img):
     text = pytesseract.image_to_string(Image.open(img))
     return text
 Models = {
     "Arabic": "Qalam",
+    "English": "Nougat",
     "French": "Tesseract",
     "Korean": "Donut",
     "Chinese": "Donut"
                     text_file = BytesIO(ocr_text.encode())
                     st.download_button('Download Text', text_file, file_name='ocr_text.txt')
                 elif Lng == "English":
+                    ocr_text = predict_nougat(cropped_img)
                     st.subheader(f"OCR Results for {Lng}")
                     st.write(ocr_text)
                     text_file = BytesIO(ocr_text.encode())