Spaces:

gagan3012
/

QalamV0.2

Runtime error

App Files Files Community

gagan3012 commited on Oct 26, 2023

Commit

20757be

1 Parent(s): b524f07

Update app.py

Browse files

Files changed (1) hide show

app.py +50 -30

app.py CHANGED Viewed

@@ -9,7 +9,21 @@ from io import BytesIO
 import openai
 import requests
 from nougat.dataset.rasterize import rasterize_paper
 def predict_arabic(img, model_name="UBC-NLP/Qalam"):
   # if img is None:
@@ -61,22 +75,25 @@ def predict_english(img, model_name="naver-clova-ix/donut-base-finetuned-cord-v2
     sequence = re.sub(r"<.*?>", "", sequence).strip()
     return sequence
 def predict_nougat(img, model_name="facebook/nougat-small"):
-    device="cpu"
     processor = NougatProcessor.from_pretrained(model_name)
     model = VisionEncoderDecoderModel.from_pretrained(model_name)
     image = img.convert("RGB")
-    pixel_values = processor(image, return_tensors="pt", data_format="channels_first").pixel_values
     # generate transcription (here we only generate 30 tokens)
     outputs = model.generate(
-      pixel_values.to(device),
-      min_length=1,
-      max_new_tokens=1500,
-      bad_words_ids=[[processor.tokenizer.unk_token_id]],
     )
-    page_sequence = processor.batch_decode(outputs, skip_special_tokens=True)[0]
     # page_sequence = processor.post_process_generation(page_sequence, fix_markdown=False)
     return page_sequence
@@ -91,20 +108,23 @@ def inference_nougat(pdf_file, pdf_link):
     else:
         file_name = pdf_file.name
         pdf_name = pdf_file.name.split('/')[-1].split('.')[0]
     images = rasterize_paper(file_name, return_pil=True)
     sequence = ""
     # infer for every page and concat
     for image in images:
         sequence += predict_nougat(image)
-    content = sequence.replace(r'\(', '$').replace(r'\)', '$').replace(r'\[', '$$').replace(r'\]', '$$')
     return content
 def predict_tesseract(img):
     text = pytesseract.image_to_string(Image.open(img))
     return text
 st.set_option('deprecation.showfileUploaderEncoding', False)
 st.set_page_config(
@@ -123,7 +143,8 @@ st.set_page_config(
 st.header("Qalam: A Multilingual OCR System")
 st.sidebar.header("Configuration and Image Upload")
 st.sidebar.subheader("Adjust Image Enhancement Options")
-img_file = st.sidebar.file_uploader(label='Upload a file', type=['png', 'jpg', "pdf"])
 input_file = st.sidebar.text_input("Enter the file URL")
 realtime_update = st.sidebar.checkbox(label="Update in Real Time", value=True)
 # box_color = st.sidebar.color_picker(label="Box Color", value='#0000FF')
@@ -145,12 +166,11 @@ Models = {
     "Korean": "Donut",
     "Chinese": "Donut"
 }
-st.sidebar.markdown(f"### Selected Model: {Models[Lng]}")
-if img_file:
     cropped_img = Image.open(img_file)
     if not realtime_update:
         st.write("Double click to save crop")
@@ -160,19 +180,19 @@ if img_file:
     #     st.subheader("Input: Upload and Crop Your Image")
     # # Get a cropped image from the frontend
     #     cropped_img = st_cropper(
-    #         img,
-    #         realtime_update=realtime_update,
     #         box_color="#FF0000",
     #         aspect_ratio=aspect_ratio,
     #         should_resize_image=True,
     #     )
     # with col2:
     # # Manipulate cropped image at will
     #     st.subheader("Output: Preview and Analyze")
     #     # _ = cropped_img.thumbnail((150, 150))
     #     st.image(cropped_img)
     button = st.sidebar.button("Run OCR")
     if button:
@@ -191,9 +211,10 @@ if img_file:
         st.subheader(f"OCR Results for {Lng}")
         st.write(ocr_text)
         text_file = BytesIO(ocr_text.encode())
-        st.download_button('Download Text', text_file, file_name='ocr_text.txt')
-elif input_file is not "":
     button = st.sidebar.button("Run OCR")
     if button:
@@ -202,26 +223,26 @@ elif input_file is not "":
             st.subheader(f"OCR Results for the PDF file")
             st.write(ocr_text)
             text_file = BytesIO(ocr_text.encode())
-            st.download_button('Download Text', text_file, file_name='ocr_text.txt')
         # openai.api_key = ""
         # if "openai_model" not in st.session_state:
         #     st.session_state["openai_model"] = "gpt-3.5-turbo"
         # if "messages" not in st.session_state:
         #     st.session_state.messages = []
         # for message in st.session_state.messages:
         #     with st.chat_message(message["role"]):
         #         st.markdown(message["content"])
         # if prompt := st.chat_input("How can I help?"):
         #     st.session_state.messages.append({"role": "user", "content": ocr_text + prompt})
         #     with st.chat_message("user"):
         #         st.markdown(prompt)
         #     with st.chat_message("assistant"):
         #         message_placeholder = st.empty()
         #         full_response = ""
@@ -237,4 +258,3 @@ elif input_file is not "":
         #             message_placeholder.markdown(full_response + "▌")
         #         message_placeholder.markdown(full_response)
         #     st.session_state.messages.append({"role": "assistant", "content": full_response})

 import openai
 import requests
 from nougat.dataset.rasterize import rasterize_paper
+import uuid
+import os
+def get_pdf(pdf_link):
+  unique_filename = f"{os.getcwd()}/downloaded_paper_{uuid.uuid4().hex}.pdf"
+  response = requests.get(pdf_link)
+  if response.status_code == 200:
+      with open(unique_filename, 'wb') as pdf_file:
+          pdf_file.write(response.content)
+      print("PDF downloaded successfully.")
+  else:
+      print("Failed to download the PDF.")
+  return unique_filename
 def predict_arabic(img, model_name="UBC-NLP/Qalam"):
   # if img is None:
     sequence = re.sub(r"<.*?>", "", sequence).strip()
     return sequence
 def predict_nougat(img, model_name="facebook/nougat-small"):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
     processor = NougatProcessor.from_pretrained(model_name)
     model = VisionEncoderDecoderModel.from_pretrained(model_name)
     image = img.convert("RGB")
+    pixel_values = processor(image, return_tensors="pt",
+                             data_format="channels_first").pixel_values
     # generate transcription (here we only generate 30 tokens)
     outputs = model.generate(
+        pixel_values.to(device),
+        min_length=1,
+        max_new_tokens=1500,
+        bad_words_ids=[[processor.tokenizer.unk_token_id]],
     )
+    page_sequence = processor.batch_decode(
+        outputs, skip_special_tokens=True)[0]
     # page_sequence = processor.post_process_generation(page_sequence, fix_markdown=False)
     return page_sequence
     else:
         file_name = pdf_file.name
         pdf_name = pdf_file.name.split('/')[-1].split('.')[0]
     images = rasterize_paper(file_name, return_pil=True)
     sequence = ""
     # infer for every page and concat
     for image in images:
         sequence += predict_nougat(image)
+    content = sequence.replace(r'\(', '$').replace(
+        r'\)', '$').replace(r'\[', '$$').replace(r'\]', '$$')
     return content
 def predict_tesseract(img):
     text = pytesseract.image_to_string(Image.open(img))
     return text
 st.set_option('deprecation.showfileUploaderEncoding', False)
 st.set_page_config(
 st.header("Qalam: A Multilingual OCR System")
 st.sidebar.header("Configuration and Image Upload")
 st.sidebar.subheader("Adjust Image Enhancement Options")
+img_file = st.sidebar.file_uploader(
+    label='Upload a file', type=['png', 'jpg', "pdf"])
 input_file = st.sidebar.text_input("Enter the file URL")
 realtime_update = st.sidebar.checkbox(label="Update in Real Time", value=True)
 # box_color = st.sidebar.color_picker(label="Box Color", value='#0000FF')
     "Korean": "Donut",
     "Chinese": "Donut"
 }
+st.sidebar.markdown(f"### Selected Model: {Models[Lng]}")
+if not img_file.endswith(".pdf"):
     cropped_img = Image.open(img_file)
     if not realtime_update:
         st.write("Double click to save crop")
     #     st.subheader("Input: Upload and Crop Your Image")
     # # Get a cropped image from the frontend
     #     cropped_img = st_cropper(
+    #         img,
+    #         realtime_update=realtime_update,
     #         box_color="#FF0000",
     #         aspect_ratio=aspect_ratio,
     #         should_resize_image=True,
     #     )
     # with col2:
     # # Manipulate cropped image at will
     #     st.subheader("Output: Preview and Analyze")
     #     # _ = cropped_img.thumbnail((150, 150))
     #     st.image(cropped_img)
     button = st.sidebar.button("Run OCR")
     if button:
         st.subheader(f"OCR Results for {Lng}")
         st.write(ocr_text)
         text_file = BytesIO(ocr_text.encode())
+        st.download_button('Download Text', text_file,
+                           file_name='ocr_text.txt')
+elif input_file is not "" or img_file.endswith(".pdf"):
     button = st.sidebar.button("Run OCR")
     if button:
             st.subheader(f"OCR Results for the PDF file")
             st.write(ocr_text)
             text_file = BytesIO(ocr_text.encode())
+            st.download_button('Download Text', text_file,
+                               file_name='ocr_text.txt')
         # openai.api_key = ""
         # if "openai_model" not in st.session_state:
         #     st.session_state["openai_model"] = "gpt-3.5-turbo"
         # if "messages" not in st.session_state:
         #     st.session_state.messages = []
         # for message in st.session_state.messages:
         #     with st.chat_message(message["role"]):
         #         st.markdown(message["content"])
         # if prompt := st.chat_input("How can I help?"):
         #     st.session_state.messages.append({"role": "user", "content": ocr_text + prompt})
         #     with st.chat_message("user"):
         #         st.markdown(prompt)
         #     with st.chat_message("assistant"):
         #         message_placeholder = st.empty()
         #         full_response = ""
         #             message_placeholder.markdown(full_response + "▌")
         #         message_placeholder.markdown(full_response)
         #     st.session_state.messages.append({"role": "assistant", "content": full_response})