File size: 2,601 Bytes
38477c8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import streamlit as st
from PIL import Image
from ultralytics import YOLO
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch

# Load YOLO model
yolo_model = YOLO('runs/detect/makkah-yolo/weights/best.pt')  # Replace with your relative path or uploaded model if needed

# Load ALLaM in 8-bit to reduce memory
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
llm_model = AutoModelForCausalLM.from_pretrained(
    "ALLaM-AI/ALLaM-7B-Instruct-preview",
    quantization_config=bnb_config,
    device_map="auto",
    trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained("ALLaM-AI/ALLaM-7B-Instruct-preview", trust_remote_code=True)

# Streamlit App UI
st.title("๐Ÿ”Ž๐Ÿ•‹ Makkah Landmark Detection + Arabic Cultural Explanation")
st.write("Upload an image to detect landmarks in Makkah and receive a cultural explanation in Arabic using ALLaM.")

uploaded_file = st.file_uploader("๐Ÿ“ท Upload an image...", type=["jpg", "jpeg", "png"])

if uploaded_file:
    img = Image.open(uploaded_file)
    st.image(img, caption="Uploaded Image", use_column_width=True)

    temp_image_path = "temp_uploaded_image.jpg"
    img.save(temp_image_path)

    with st.spinner("๐Ÿ” Detecting landmarks..."):
        results = yolo_model.predict(temp_image_path, conf=0.25, save=False)

    detections = results[0].names
    boxes = results[0].boxes.cls
    detected_classes = [detections[int(cls_id)] for cls_id in boxes]

    if detected_classes:
        landmarks = 'ุŒ '.join(detected_classes)
        user_prompt = f"ู…ู† ูุถู„ูƒุŒ ุชุญุฏุซ ุนู† ุฃู‡ู…ูŠุฉ ุงู„ู…ุนุงู„ู… ุงู„ุชุงู„ูŠุฉ ููŠ ู…ูƒุฉ ุงู„ู…ูƒุฑู…ุฉ: {landmarks}."

        st.write("๐Ÿ“œ **Arabic Prompt:**")
        st.info(user_prompt)

        with st.spinner("๐Ÿค– Generating Arabic explanation..."):
            messages = [{"role": "user", "content": user_prompt}]
            inputs = tokenizer.apply_chat_template(messages, tokenize=False)
            inputs = tokenizer(inputs, return_tensors='pt', return_token_type_ids=False)
            inputs = {k: v.to('cuda') for k, v in inputs.items()}

            response = llm_model.generate(
                **inputs,
                max_new_tokens=512,
                do_sample=True,
                top_k=50,
                top_p=0.95,
                temperature=0.6
            )

            output_text = tokenizer.batch_decode(response, skip_special_tokens=True)[0]
            st.write("๐Ÿ•Œ **Explanation in Arabic:**")
            st.success(output_text)
    else:
        st.warning("๐Ÿšซ No landmarks detected in this image.")