import streamlit as st from PIL import Image from ultralytics import YOLO from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig import torch # Load YOLO model yolo_model = YOLO('runs/detect/makkah-yolo/weights/best.pt') # Replace with your relative path or uploaded model if needed # Load ALLaM in 8-bit to reduce memory bnb_config = BitsAndBytesConfig(load_in_8bit=True) llm_model = AutoModelForCausalLM.from_pretrained( "ALLaM-AI/ALLaM-7B-Instruct-preview", quantization_config=bnb_config, device_map="auto", trust_remote_code=True ) tokenizer = AutoTokenizer.from_pretrained("ALLaM-AI/ALLaM-7B-Instruct-preview", trust_remote_code=True) # Streamlit App UI st.title("🔎🕋 Makkah Landmark Detection + Arabic Cultural Explanation") st.write("Upload an image to detect landmarks in Makkah and receive a cultural explanation in Arabic using ALLaM.") uploaded_file = st.file_uploader("📷 Upload an image...", type=["jpg", "jpeg", "png"]) if uploaded_file: img = Image.open(uploaded_file) st.image(img, caption="Uploaded Image", use_column_width=True) temp_image_path = "temp_uploaded_image.jpg" img.save(temp_image_path) with st.spinner("🔍 Detecting landmarks..."): results = yolo_model.predict(temp_image_path, conf=0.25, save=False) detections = results[0].names boxes = results[0].boxes.cls detected_classes = [detections[int(cls_id)] for cls_id in boxes] if detected_classes: landmarks = '، '.join(detected_classes) user_prompt = f"من فضلك، تحدث عن أهمية المعالم التالية في مكة المكرمة: {landmarks}." st.write("📜 **Arabic Prompt:**") st.info(user_prompt) with st.spinner("🤖 Generating Arabic explanation..."): messages = [{"role": "user", "content": user_prompt}] inputs = tokenizer.apply_chat_template(messages, tokenize=False) inputs = tokenizer(inputs, return_tensors='pt', return_token_type_ids=False) inputs = {k: v.to('cuda') for k, v in inputs.items()} response = llm_model.generate( **inputs, max_new_tokens=512, do_sample=True, top_k=50, top_p=0.95, temperature=0.6 ) output_text = tokenizer.batch_decode(response, skip_special_tokens=True)[0] st.write("🕌 **Explanation in Arabic:**") st.success(output_text) else: st.warning("🚫 No landmarks detected in this image.")