pdufour's picture
Create index.js
54648ea verified
raw
history blame
3.47 kB
import { AutoProcessor, Qwen2VLForConditionalGeneration, RawImage } from "@huggingface/transformers";
const EXAMPLE_URL = "https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/transformers/tasks/car.jpg";
const exampleButton = document.getElementById('example');
const promptInput = document.querySelector('input[type="text"]');
const status = document.getElementById('status');
const thumb = document.getElementById('thumb');
const uploadInput = document.getElementById('upload');
const form = document.getElementById('form');
const output = document.getElementById('llm-output');
let currentImage = '';
let currentQuery = '';
const model_id = "onnx-community/Qwen2-VL-2B-Instruct";
let processor;
let model;
async function initializeSessions() {
status.textContent = 'Loading model...';
container.classList.add('disabled');
processor = await AutoProcessor.from_pretrained(model_id);
model = await Qwen2VLForConditionalGeneration.from_pretrained(model_id, { dtype: 'q4f16', device: 'webgpu' });
status.textContent = 'Ready';
status.classList.add('ready');
uploadInput.disabled = false;
promptInput.disabled = false;
container.classList.remove('disabled');
}
async function handleQuery(imageUrl, query) {
try {
status.textContent = 'Analyzing...';
const result = await imageTextToText(imageUrl, query, (out) => {
console.log({ out });
output.textContent = out;
});
} catch (err) {
status.textContent = 'Error processing request';
console.error(err);
}
}
export async function imageTextToText(
imagePath,
query,
cb,
) {
const image = await (await RawImage.read(imagePath)).resize(448, 448);
const conversation = [
{
role: "user",
content: [
{ type: "image" },
{ type: "text", text: query, },
],
images: [image],
},
];
const text = processor.apply_chat_template(conversation, { add_generation_prompt: true });
const inputs = await processor(text, image);
const outputs = await model.generate({
...inputs,
max_new_tokens: 128,
});
const decoded = processor.batch_decode(
outputs.slice(null, [inputs.input_ids.dims.at(-1), null]),
{ skip_special_tokens: true },
);
cb(decoded);
return decoded;
}
async function updatePreview(url) {
const image = await RawImage.fromURL(url);
const ar = image.width / image.height;
const [cw, ch] = (ar > 1) ? [320, 320 / ar] : [320 * ar, 320];
thumb.style.width = `${cw}px`;
thumb.style.height = `${ch}px`;
thumb.style.backgroundImage = `url(${url})`;
thumb.innerHTML = '';
}
await initializeSessions();
// UI Event Handlers
exampleButton.addEventListener('click', (e) => {
e.preventDefault();
currentImage = EXAMPLE_URL;
updatePreview(currentImage);
});
uploadInput.addEventListener('change', (e) => {
const file = e.target.files[0];
if (!file) return;
const reader = new FileReader();
reader.onload = (e2) => {
currentImage = e2.target.result;
updatePreview(currentImage);
};
reader.readAsDataURL(file);
});
promptInput.addEventListener('keypress', (e) => {
currentQuery = e.target.value;
});
form.addEventListener('submit', (e) => {
e.preventDefault();
if (!currentImage || !currentQuery) {
status.textContent = 'Please select an image and type a prompt';
} else {
promptInput.disabled = true;
uploadInput.disabled = true;
handleQuery(currentImage, currentQuery);
}
});