MakkahLens / app.py
leyla95's picture
Create app.py
38477c8 verified
raw
history blame contribute delete
2.6 kB
import streamlit as st
from PIL import Image
from ultralytics import YOLO
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
# Load YOLO model
yolo_model = YOLO('runs/detect/makkah-yolo/weights/best.pt') # Replace with your relative path or uploaded model if needed
# Load ALLaM in 8-bit to reduce memory
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
llm_model = AutoModelForCausalLM.from_pretrained(
"ALLaM-AI/ALLaM-7B-Instruct-preview",
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained("ALLaM-AI/ALLaM-7B-Instruct-preview", trust_remote_code=True)
# Streamlit App UI
st.title("๐Ÿ”Ž๐Ÿ•‹ Makkah Landmark Detection + Arabic Cultural Explanation")
st.write("Upload an image to detect landmarks in Makkah and receive a cultural explanation in Arabic using ALLaM.")
uploaded_file = st.file_uploader("๐Ÿ“ท Upload an image...", type=["jpg", "jpeg", "png"])
if uploaded_file:
img = Image.open(uploaded_file)
st.image(img, caption="Uploaded Image", use_column_width=True)
temp_image_path = "temp_uploaded_image.jpg"
img.save(temp_image_path)
with st.spinner("๐Ÿ” Detecting landmarks..."):
results = yolo_model.predict(temp_image_path, conf=0.25, save=False)
detections = results[0].names
boxes = results[0].boxes.cls
detected_classes = [detections[int(cls_id)] for cls_id in boxes]
if detected_classes:
landmarks = 'ุŒ '.join(detected_classes)
user_prompt = f"ู…ู† ูุถู„ูƒุŒ ุชุญุฏุซ ุนู† ุฃู‡ู…ูŠุฉ ุงู„ู…ุนุงู„ู… ุงู„ุชุงู„ูŠุฉ ููŠ ู…ูƒุฉ ุงู„ู…ูƒุฑู…ุฉ: {landmarks}."
st.write("๐Ÿ“œ **Arabic Prompt:**")
st.info(user_prompt)
with st.spinner("๐Ÿค– Generating Arabic explanation..."):
messages = [{"role": "user", "content": user_prompt}]
inputs = tokenizer.apply_chat_template(messages, tokenize=False)
inputs = tokenizer(inputs, return_tensors='pt', return_token_type_ids=False)
inputs = {k: v.to('cuda') for k, v in inputs.items()}
response = llm_model.generate(
**inputs,
max_new_tokens=512,
do_sample=True,
top_k=50,
top_p=0.95,
temperature=0.6
)
output_text = tokenizer.batch_decode(response, skip_special_tokens=True)[0]
st.write("๐Ÿ•Œ **Explanation in Arabic:**")
st.success(output_text)
else:
st.warning("๐Ÿšซ No landmarks detected in this image.")