Spaces:
Sleeping
Sleeping
import streamlit as st | |
from PIL import Image | |
from ultralytics import YOLO | |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig | |
import torch | |
# Load YOLO model | |
yolo_model = YOLO('runs/detect/makkah-yolo/weights/best.pt') # Replace with your relative path or uploaded model if needed | |
# Load ALLaM in 8-bit to reduce memory | |
bnb_config = BitsAndBytesConfig(load_in_8bit=True) | |
llm_model = AutoModelForCausalLM.from_pretrained( | |
"ALLaM-AI/ALLaM-7B-Instruct-preview", | |
quantization_config=bnb_config, | |
device_map="auto", | |
trust_remote_code=True | |
) | |
tokenizer = AutoTokenizer.from_pretrained("ALLaM-AI/ALLaM-7B-Instruct-preview", trust_remote_code=True) | |
# Streamlit App UI | |
st.title("๐๐ Makkah Landmark Detection + Arabic Cultural Explanation") | |
st.write("Upload an image to detect landmarks in Makkah and receive a cultural explanation in Arabic using ALLaM.") | |
uploaded_file = st.file_uploader("๐ท Upload an image...", type=["jpg", "jpeg", "png"]) | |
if uploaded_file: | |
img = Image.open(uploaded_file) | |
st.image(img, caption="Uploaded Image", use_column_width=True) | |
temp_image_path = "temp_uploaded_image.jpg" | |
img.save(temp_image_path) | |
with st.spinner("๐ Detecting landmarks..."): | |
results = yolo_model.predict(temp_image_path, conf=0.25, save=False) | |
detections = results[0].names | |
boxes = results[0].boxes.cls | |
detected_classes = [detections[int(cls_id)] for cls_id in boxes] | |
if detected_classes: | |
landmarks = 'ุ '.join(detected_classes) | |
user_prompt = f"ู ู ูุถููุ ุชุญุฏุซ ุนู ุฃูู ูุฉ ุงูู ุนุงูู ุงูุชุงููุฉ ูู ู ูุฉ ุงูู ูุฑู ุฉ: {landmarks}." | |
st.write("๐ **Arabic Prompt:**") | |
st.info(user_prompt) | |
with st.spinner("๐ค Generating Arabic explanation..."): | |
messages = [{"role": "user", "content": user_prompt}] | |
inputs = tokenizer.apply_chat_template(messages, tokenize=False) | |
inputs = tokenizer(inputs, return_tensors='pt', return_token_type_ids=False) | |
inputs = {k: v.to('cuda') for k, v in inputs.items()} | |
response = llm_model.generate( | |
**inputs, | |
max_new_tokens=512, | |
do_sample=True, | |
top_k=50, | |
top_p=0.95, | |
temperature=0.6 | |
) | |
output_text = tokenizer.batch_decode(response, skip_special_tokens=True)[0] | |
st.write("๐ **Explanation in Arabic:**") | |
st.success(output_text) | |
else: | |
st.warning("๐ซ No landmarks detected in this image.") | |