import numpy as np import torch import torchvision.transforms as T from decord import VideoReader, cpu from PIL import Image from torchvision.transforms.functional import InterpolationMode from transformers import AutoModel, AutoTokenizer IMAGENET_MEAN = (0.485, 0.456, 0.406) IMAGENET_STD = (0.229, 0.224, 0.225) def build_transform(input_size): MEAN, STD = IMAGENET_MEAN, IMAGENET_STD transform = T.Compose([ T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img), T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC), T.ToTensor(), T.Normalize(mean=MEAN, std=STD) ]) return transform def preprocess_for_model(image): """Prepares an image for the OpenGVLab model.""" # Define the necessary image transformations transform = T.Compose([ T.ToTensor(), # Convert to PyTorch Tensor T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) # Normalize ]) image = transform(image).unsqueeze(0) # Add batch dimension return image def load_image(image_file): transform = build_transform(input_size=800) pixel_values = preprocess_for_model(image_file) return pixel_values def main(image_path,model,tokenizer): pixel_values = load_image(image_path).to(torch.float32).to("cpu") generation_config = dict(max_new_tokens=1024, do_sample=True) question = """\n**Instruction:** Analyze the image to extract values for the specified keys. Use the detailed descriptions below to determine the correct value for each key. Handle missing or ambiguous data as instructed. --- ### Keys and Descriptions 1. **`surat_tanda_nomor_kendaraan_bermotor`** - **Extract**: The value of the field labeled as "Surat Tanda Nomor Kendaraan Bermotor" and this is titel. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 2. **`tempat_tanggal`** - **Extract**: The location and date from the top right corner of the document. - **Note**: This field does not have a title such as "Tempat - Tanggal." - **Format**: `"CITY, DD MMM YYYY"` (e.g., `"JAKARTA, 07 DES 2018"`). - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 3. **`no`** - **Extract**: The value in the "NO" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 4. **`nomor_registrasi`** - **Extract**: The "NOMOR REGISTRASI" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 5. **`nama_pemilik`** - **Extract**: The "NAMA PEMILIK" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 6. **`alamat`** - **Extract**: The "ALAMAT" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 7. **`merk`** - **Extract**: The "MERK" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 8. **`type`** - **Extract**: The "TYPE" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 9. **`jenis`** - **Extract**: The "JENIS" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 10. **`model`** - **Extract**: The "MODEL" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 11. **`tahun_pembuatan`** - **Extract**: The "TAHUN PEMBUATAN" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 12. **`isi_silinder_daya_listrik`** - **Extract**: The "ISI SILINDER / DAYA LISTRIK" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 13. **`nomor_rangka`** - **Extract**: The "NOMOR RANGKA" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 14. **`nomor_mesin`** - **Extract**: The "NOMOR MESIN" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 15. **`nik_tdp_nie_kitas_kitap`** - **Extract**: The "NIK/TDP/NIE/KITAS/KITAP" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 16. **`warna`** - **Extract**: The "WARNA" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 17. **`bahan_bakar`** - **Extract**: The "BAHAN BAKAR" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 18. **`warna_tnkb`** - **Extract**: The "WARNA TNKB" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 19. **`tahun_registrasi`** - **Extract**: The "TAHUN REGISTRASI" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 20. **`nomor_bpkb`** - **Extract**: The "NOMOR BPKB" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 21. **`kode_lokasi`** - **Extract**: The "KODE LOKASI" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 22. **`no_urut_pendaftaran`** - **Extract**: The "NO URUT PENDAFTARAN" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 23. **`berlaku_sampai`** - **Extract**: The "BERLAKU SAMPAI" field. - **If the Field is Absent**: `"null"` - **If the Field is Present but No Value is Provided**: `"empty"` 24. **`qr_code`** - **Extract**: The value encoded in the QR code, if present. - **If No QR Code is Found**: `"null"` - **If a QR Code is Present but Contains No Data**: `"empty"` --- ### Output Format ```json { "surat_tanda_nomor_kendaraan_bermotor": " OR empty OR null", "tempat_tanggal": " OR empty OR null", "no": " OR empty OR null", "nomor_registrasi": " OR empty OR null", "nama_pemilik": " OR empty OR null", "alamat": " OR empty OR null", "merk": " OR empty OR null", "type": " OR empty OR null", "jenis": " OR empty OR null", "model": " OR empty OR null", "tahun_pembuatan": " OR empty OR null", "isi_silinder_daya_listrik": " OR empty OR null", "nomor_rangka": " OR empty OR null", "nomor_mesin": " OR empty OR null", "nik_tdp_nie_kitas_kitap": " OR empty OR null", "warna": " OR empty OR null", "bahan_bakar": " OR empty OR null", "warna_tnkb": " OR empty OR null", "tahun_registrasi": " OR empty OR null", "nomor_bpkb": " OR empty OR null", "kode_lokasi": " OR empty OR null", "no_urut_pendaftaran": " OR empty OR null", "berlaku_sampai": " OR empty OR null" "qr_code" : " OR empty OR null" } ### Reasoning Process For each key, explain your reasoning: Indicate whether the field was present. Justify the extracted value or the use of "null" or "empty" as per the conditions. Return Output: Generate a JSON object: { "reasoning": "reasoning for each key", "output JSON": "key-value pairs" } --- """ print("Before requesting model................................................................................") response = model.chat(tokenizer, pixel_values, question, generation_config) print("After requesting model................................................................................",response) return (f'User: {question}\nAssistant: {response}')