Spaces:

leyla95
/

MakkahLens

Sleeping

App Files Files Community

MakkahLens / app.py

leyla95

Create app.py

38477c8 verified 8 days ago

raw

history blame contribute delete

2.6 kB

	import streamlit as st
	from PIL import Image
	from ultralytics import YOLO
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
	import torch

	# Load YOLO model
	yolo_model = YOLO('runs/detect/makkah-yolo/weights/best.pt') # Replace with your relative path or uploaded model if needed

	# Load ALLaM in 8-bit to reduce memory
	bnb_config = BitsAndBytesConfig(load_in_8bit=True)
	llm_model = AutoModelForCausalLM.from_pretrained(
	"ALLaM-AI/ALLaM-7B-Instruct-preview",
	quantization_config=bnb_config,
	device_map="auto",
	trust_remote_code=True
	)
	tokenizer = AutoTokenizer.from_pretrained("ALLaM-AI/ALLaM-7B-Instruct-preview", trust_remote_code=True)

	# Streamlit App UI
	st.title("🔎🕋 Makkah Landmark Detection + Arabic Cultural Explanation")
	st.write("Upload an image to detect landmarks in Makkah and receive a cultural explanation in Arabic using ALLaM.")

	uploaded_file = st.file_uploader("📷 Upload an image...", type=["jpg", "jpeg", "png"])

	if uploaded_file:
	img = Image.open(uploaded_file)
	st.image(img, caption="Uploaded Image", use_column_width=True)

	temp_image_path = "temp_uploaded_image.jpg"
	img.save(temp_image_path)

	with st.spinner("🔍 Detecting landmarks..."):
	results = yolo_model.predict(temp_image_path, conf=0.25, save=False)

	detections = results[0].names
	boxes = results[0].boxes.cls
	detected_classes = [detections[int(cls_id)] for cls_id in boxes]

	if detected_classes:
	landmarks = '، '.join(detected_classes)
	user_prompt = f"من فضلك، تحدث عن أهمية المعالم التالية في مكة المكرمة: {landmarks}."

	st.write("📜 Arabic Prompt:")
	st.info(user_prompt)

	with st.spinner("🤖 Generating Arabic explanation..."):
	messages = [{"role": "user", "content": user_prompt}]
	inputs = tokenizer.apply_chat_template(messages, tokenize=False)
	inputs = tokenizer(inputs, return_tensors='pt', return_token_type_ids=False)
	inputs = {k: v.to('cuda') for k, v in inputs.items()}

	response = llm_model.generate(
	**inputs,
	max_new_tokens=512,
	do_sample=True,
	top_k=50,
	top_p=0.95,
	temperature=0.6
	)

	output_text = tokenizer.batch_decode(response, skip_special_tokens=True)[0]
	st.write("🕌 Explanation in Arabic:")
	st.success(output_text)
	else:
	st.warning("🚫 No landmarks detected in this image.")