Spaces:

MawaredHR
/

Vision_tester

Running

Daemontatox commited on Jan 27

Commit

2ebf628

verified ·

1 Parent(s): 2ba178a

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from transformers import MllamaForConditionalGeneration, AutoProcessor, TextIteratorStreamer , AutoModel,Qwen2VLForConditionalGeneration, AutoModelForImageTextToText
 from qwen_vl_utils import process_vision_info
 from PIL import Image
 import requests
@@ -18,8 +18,8 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 # Load model and processor
-ckpt ="mistral-community/pixtral-12b"
-model = AutoModelForImageTextToText.from_pretrained(ckpt, torch_dtype=torch.bfloat16,trust_remote_code=True).to("cuda")
 processor = AutoProcessor.from_pretrained(ckpt,trust_remote_code=True)
 class DocumentState:
@@ -49,7 +49,7 @@ def process_pdf_file(file_path):
                 if page_text.strip():
                     text += f"Page {page_num + 1}:\n{page_text}\n\n"
-                zoom = 2.5
                 mat = fitz.Matrix(zoom, zoom)
                 pix = page.get_pixmap(matrix=mat, alpha=False)
                 img_data = pix.tobytes("png")

+from transformers import MllamaForConditionalGeneration, AutoProcessor, TextIteratorStreamer , AutoModel,Qwen2VLForConditionalGeneration, AutoModelForImageTextToText , Qwen2_5_VLForConditionalGeneration
 from qwen_vl_utils import process_vision_info
 from PIL import Image
 import requests
 logger = logging.getLogger(__name__)
 # Load model and processor
+ckpt ="Qwen/Qwen2.5-VL-7B-Instruct"
+model = Qwen2_5_VLForConditionalGeneration.from_pretrained(ckpt, torch_dtype=torch.bfloat16,trust_remote_code=True).to("cuda")
 processor = AutoProcessor.from_pretrained(ckpt,trust_remote_code=True)
 class DocumentState:
                 if page_text.strip():
                     text += f"Page {page_num + 1}:\n{page_text}\n\n"
+                zoom = 3
                 mat = fitz.Matrix(zoom, zoom)
                 pix = page.get_pixmap(matrix=mat, alpha=False)
                 img_data = pix.tobytes("png")