Spaces:
Sleeping
Sleeping
File size: 2,601 Bytes
38477c8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 |
import streamlit as st
from PIL import Image
from ultralytics import YOLO
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
# Load YOLO model
yolo_model = YOLO('runs/detect/makkah-yolo/weights/best.pt') # Replace with your relative path or uploaded model if needed
# Load ALLaM in 8-bit to reduce memory
bnb_config = BitsAndBytesConfig(load_in_8bit=True)
llm_model = AutoModelForCausalLM.from_pretrained(
"ALLaM-AI/ALLaM-7B-Instruct-preview",
quantization_config=bnb_config,
device_map="auto",
trust_remote_code=True
)
tokenizer = AutoTokenizer.from_pretrained("ALLaM-AI/ALLaM-7B-Instruct-preview", trust_remote_code=True)
# Streamlit App UI
st.title("๐๐ Makkah Landmark Detection + Arabic Cultural Explanation")
st.write("Upload an image to detect landmarks in Makkah and receive a cultural explanation in Arabic using ALLaM.")
uploaded_file = st.file_uploader("๐ท Upload an image...", type=["jpg", "jpeg", "png"])
if uploaded_file:
img = Image.open(uploaded_file)
st.image(img, caption="Uploaded Image", use_column_width=True)
temp_image_path = "temp_uploaded_image.jpg"
img.save(temp_image_path)
with st.spinner("๐ Detecting landmarks..."):
results = yolo_model.predict(temp_image_path, conf=0.25, save=False)
detections = results[0].names
boxes = results[0].boxes.cls
detected_classes = [detections[int(cls_id)] for cls_id in boxes]
if detected_classes:
landmarks = 'ุ '.join(detected_classes)
user_prompt = f"ู
ู ูุถููุ ุชุญุฏุซ ุนู ุฃูู
ูุฉ ุงูู
ุนุงูู
ุงูุชุงููุฉ ูู ู
ูุฉ ุงูู
ูุฑู
ุฉ: {landmarks}."
st.write("๐ **Arabic Prompt:**")
st.info(user_prompt)
with st.spinner("๐ค Generating Arabic explanation..."):
messages = [{"role": "user", "content": user_prompt}]
inputs = tokenizer.apply_chat_template(messages, tokenize=False)
inputs = tokenizer(inputs, return_tensors='pt', return_token_type_ids=False)
inputs = {k: v.to('cuda') for k, v in inputs.items()}
response = llm_model.generate(
**inputs,
max_new_tokens=512,
do_sample=True,
top_k=50,
top_p=0.95,
temperature=0.6
)
output_text = tokenizer.batch_decode(response, skip_special_tokens=True)[0]
st.write("๐ **Explanation in Arabic:**")
st.success(output_text)
else:
st.warning("๐ซ No landmarks detected in this image.")
|