Spaces:

hackerbyhobby
/

SMS_scam_detection

Running

hackerbyhobby

added text to voice

aaede28 unverified 3 months ago

13.4 kB

	import gradio as gr
	import pytesseract
	from PIL import Image
	from transformers import pipeline
	import re
	from langdetect import detect
	from deep_translator import GoogleTranslator
	import openai
	import os
	import io
	import requests
	import json

	# For text-to-speech
	from gtts import gTTS

	# Set your OpenAI API key
	openai.api_key = os.getenv("OPENAI_API_KEY")

	# Translator instance
	translator = GoogleTranslator(source="auto", target="es")

	# 1. Load separate keywords for SMiShing and Other Scam (assumed in English)
	with open("smishing_keywords.txt", "r", encoding="utf-8") as f:
	SMISHING_KEYWORDS = [line.strip().lower() for line in f if line.strip()]

	with open("other_scam_keywords.txt", "r", encoding="utf-8") as f:
	OTHER_SCAM_KEYWORDS = [line.strip().lower() for line in f if line.strip()]

	# 2. Zero-Shot Classification Pipeline
	model_name = "joeddav/xlm-roberta-large-xnli"
	classifier = pipeline("zero-shot-classification", model=model_name)
	CANDIDATE_LABELS = ["SMiShing", "Other Scam", "Legitimate"]

	def tts_explanation(explanation: str, detected_lang: str):
	"""
	Generate TTS audio from the final explanation text.
	We'll choose English or Spanish voices in gTTS, but cannot guarantee
	a specific "female" voice. We'll do a best approximation.

	- If text is Spanish: set lang="es"
	- If text is English (or other): set lang="en"
	- We'll set tld="co.uk" for a British accent that might sound female.
	Adjust if needed or switch to a more advanced TTS service.
	"""
	# Choose language for gTTS
	if detected_lang == "es":
	lang_code = "es"
	tld = "com"
	else:
	lang_code = "en"
	# Attempt a 'comforting female' accent:
	# gTTS doesn't let you pick male/female directly, but you can pick a TLD for a different accent
	tld = "co.uk"

	try:
	tts = gTTS(text=explanation, lang=lang_code, tld=tld, slow=False)
	mp3_bytes = io.BytesIO()
	tts.write_to_fp(mp3_bytes)
	mp3_bytes.seek(0)
	return mp3_bytes
	except Exception as e:
	print("TTS generation error:", e)
	# If TTS fails, return an empty buffer
	return io.BytesIO()

	def get_keywords_by_language(text: str):
	snippet = text[:200]
	try:
	detected_lang = detect(snippet)
	except Exception:
	detected_lang = "en"

	if detected_lang == "es":
	smishing_in_spanish = [
	translator.translate(kw).lower() for kw in SMISHING_KEYWORDS
	]
	other_scam_in_spanish = [
	translator.translate(kw).lower() for kw in OTHER_SCAM_KEYWORDS
	]
	return smishing_in_spanish, other_scam_in_spanish, "es"
	else:
	return SMISHING_KEYWORDS, OTHER_SCAM_KEYWORDS, "en"

	def boost_probabilities(probabilities: dict, text: str):
	lower_text = text.lower()
	smishing_keywords, other_scam_keywords, detected_lang = get_keywords_by_language(text)

	smishing_count = sum(1 for kw in smishing_keywords if kw in lower_text)
	other_scam_count = sum(1 for kw in other_scam_keywords if kw in lower_text)

	smishing_boost = 0.30 * smishing_count
	other_scam_boost = 0.30 * other_scam_count

	found_urls = re.findall(
	r"(https?://[^\s]+\|\b[a-zA-Z0-9.-]+\.(?:com\|net\|org\|edu\|gov\|mil\|io\|ai\|co\|info\|biz\|us\|uk\|de\|fr\|es\|ru\|jp\|cn\|in\|au\|ca\|br\|mx\|it\|nl\|se\|no\|fi\|ch\|pl\|kr\|vn\|id\|tw\|sg\|hk)\b)",
	lower_text
	)
	if found_urls:
	smishing_boost += 0.35

	p_smishing = probabilities.get("SMiShing", 0.0)
	p_other_scam = probabilities.get("Other Scam", 0.0)
	p_legit = probabilities.get("Legitimate", 1.0)

	p_smishing += smishing_boost
	p_other_scam += other_scam_boost
	p_legit -= (smishing_boost + other_scam_boost)

	# Clamp
	p_smishing = max(p_smishing, 0.0)
	p_other_scam = max(p_other_scam, 0.0)
	p_legit = max(p_legit, 0.0)

	total = p_smishing + p_other_scam + p_legit
	if total > 0:
	p_smishing /= total
	p_other_scam /= total
	p_legit /= total
	else:
	p_smishing, p_other_scam, p_legit = 0.0, 0.0, 1.0

	return {
	"SMiShing": p_smishing,
	"Other Scam": p_other_scam,
	"Legitimate": p_legit,
	"detected_lang": detected_lang
	}

	def query_llm_for_classification(raw_message: str) -> dict:
	if not raw_message.strip():
	return {"label": "Unknown", "reason": "No message provided to the LLM."}

	system_prompt = (
	"You are a cybersecurity expert. You will classify the user's message "
	"as one of: SMiShing, Other Scam, or Legitimate. Provide a short reason. "
	"Return only JSON with keys: label, reason."
	)
	user_prompt = f"Message: {raw_message}\nClassify it as SMiShing, Other Scam, or Legitimate."

	try:
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_prompt}
	],
	temperature=0.2
	)
	raw_reply = response["choices"][0]["message"]["content"].strip()

	llm_result = json.loads(raw_reply)
	if "label" not in llm_result or "reason" not in llm_result:
	return {"label": "Unknown", "reason": f"Unexpected format: {raw_reply}"}

	return llm_result

	except Exception as e:
	return {"label": "Unknown", "reason": f"LLM error: {e}"}

	def incorporate_llm_label(boosted: dict, llm_label: str) -> dict:
	if llm_label == "SMiShing":
	boosted["SMiShing"] += 0.2
	elif llm_label == "Other Scam":
	boosted["Other Scam"] += 0.2
	elif llm_label == "Legitimate":
	boosted["Legitimate"] += 0.2

	for k in boosted:
	if boosted[k] < 0:
	boosted[k] = 0.0

	total = sum(boosted.values())
	if total > 0:
	for k in boosted:
	boosted[k] /= total
	else:
	boosted["Legitimate"] = 1.0
	boosted["SMiShing"] = 0.0
	boosted["Other Scam"] = 0.0

	return boosted

	def query_llm_for_explanation(
	text: str,
	final_label: str,
	final_conf: float,
	local_label: str,
	local_conf: float,
	llm_label: str,
	llm_reason: str,
	found_smishing: list,
	found_other_scam: list,
	found_urls: list,
	detected_lang: str
	) -> str:
	if detected_lang == "es":
	system_prompt = (
	"Eres un experto en ciberseguridad. Proporciona una explicación final al usuario en español. "
	"Combina la clasificación local, la clasificación LLM y la etiqueta final en una sola explicación breve. "
	"No reveles el código interno ni el JSON bruto; simplemente da una breve explicación fácil de entender. "
	"Termina con la etiqueta final."
	)
	else:
	system_prompt = (
	"You are a cybersecurity expert providing a final explanation to the user in English. "
	"Combine the local classification, the LLM classification, and the final label "
	"into one concise explanation. Do not reveal internal code or raw JSON. "
	"End with a final statement of the final label."
	)

	user_context = f"""
	User Message:
	{text}

	Local Classification => Label: {local_label}, Confidence: {local_conf}
	LLM Classification => Label: {llm_label}, Reason: {llm_reason}
	Final Overall Label => {final_label} (confidence {final_conf})

	Suspicious SMiShing Keywords => {found_smishing}
	Suspicious Other Scam Keywords => {found_other_scam}
	URLs => {found_urls}
	"""

	try:
	response = openai.ChatCompletion.create(
	model="gpt-3.5-turbo",
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": user_context}
	],
	temperature=0.2
	)
	final_explanation = response["choices"][0]["message"]["content"].strip()
	return final_explanation
	except Exception as e:
	return f"Could not generate final explanation due to error: {e}"

	def smishing_detector(input_type, text, image):
	if input_type == "Text":
	combined_text = text.strip() if text else ""
	else:
	combined_text = ""
	if image is not None:
	combined_text = pytesseract.image_to_string(image, lang="spa+eng").strip()

	if not combined_text:
	return {
	"text_used_for_classification": "(none)",
	"label": "No text provided",
	"confidence": 0.0,
	"keywords_found": [],
	"urls_found": [],
	"llm_label": "Unknown",
	"llm_reason": "No text to analyze",
	"final_explanation": "No text provided"
	}

	local_result = classifier(
	sequences=combined_text,
	candidate_labels=CANDIDATE_LABELS,
	hypothesis_template="This message is {}."
	)
	original_probs = {k: float(v) for k, v in zip(local_result["labels"], local_result["scores"])}

	boosted = boost_probabilities(original_probs, combined_text)
	detected_lang = boosted.pop("detected_lang", "en")

	for k in boosted:
	boosted[k] = float(boosted[k])

	local_label = max(boosted, key=boosted.get)
	local_conf = round(boosted[local_label], 3)

	llm_classification = query_llm_for_classification(combined_text)
	llm_label = llm_classification.get("label", "Unknown")
	llm_reason = llm_classification.get("reason", "No reason provided")

	boosted = incorporate_llm_label(boosted, llm_label)

	final_label = max(boosted, key=boosted.get)
	final_confidence = round(boosted[final_label], 3)

	lower_text = combined_text.lower()
	smishing_keys, scam_keys, _ = get_keywords_by_language(combined_text)
	found_urls = re.findall(
	r"(https?://[^\s]+\|\b[a-zA-Z0-9.-]+\.(?:com\|net\|org\|edu\|gov\|mil\|io\|ai\|co\|info\|biz\|us\|uk\|de\|fr\|es\|ru\|jp\|cn\|in\|au\|ca\|br\|mx\|it\|nl\|se\|no\|fi\|ch\|pl\|kr\|vn\|id\|tw\|sg\|hk)\b)",
	lower_text
	)
	found_smishing = [kw for kw in smishing_keys if kw in lower_text]
	found_other_scam = [kw for kw in scam_keys if kw in lower_text]

	final_explanation = query_llm_for_explanation(
	text=combined_text,
	final_label=final_label,
	final_conf=final_confidence,
	local_label=local_label,
	local_conf=local_conf,
	llm_label=llm_label,
	llm_reason=llm_reason,
	found_smishing=found_smishing,
	found_other_scam=found_other_scam,
	found_urls=found_urls,
	detected_lang=detected_lang
	)

	return {
	"detected_language": detected_lang,
	"text_used_for_classification": combined_text,
	"original_probabilities": {k: round(v, 3) for k, v in original_probs.items()},
	"boosted_probabilities_before_llm": {local_label: local_conf},
	"llm_label": llm_label,
	"llm_reason": llm_reason,
	"boosted_probabilities_after_llm": {k: round(v, 3) for k, v in boosted.items()},
	"label": final_label,
	"confidence": final_confidence,
	"smishing_keywords_found": found_smishing,
	"other_scam_keywords_found": found_other_scam,
	"urls_found": found_urls,
	"final_explanation": final_explanation,
	}

	###
	# Combined function to produce both text (JSON) and TTS audio
	###
	def classify_and_tts(input_type, text, image):
	"""
	1. Perform the classification logic (smishing_detector).
	2. Generate TTS audio from the final explanation in a comforting female voice.
	3. Return both the JSON result & the audio bytes.
	"""
	result = smishing_detector(input_type, text, image)
	final_explanation = result["final_explanation"]
	detected_lang = result.get("detected_language", "en")

	# Generate TTS from final_explanation
	audio_data = tts_explanation(final_explanation, detected_lang)
	# Return both
	return result, audio_data


	def toggle_inputs(choice):
	if choice == "Text":
	return gr.update(visible=True), gr.update(visible=False)
	else:
	return gr.update(visible=False), gr.update(visible=True)


	with gr.Blocks() as demo:
	gr.Markdown("## SMiShing & Scam Detector with LLM-Enhanced Logic + TTS Explanation")

	with gr.Row():
	input_type = gr.Radio(
	choices=["Text", "Screenshot"],
	value="Text",
	label="Choose Input Type"
	)

	text_input = gr.Textbox(
	lines=3,
	label="Paste Suspicious SMS Text",
	placeholder="Type or paste the message here...",
	visible=True
	)
	image_input = gr.Image(
	type="pil",
	label="Upload Screenshot",
	visible=False
	)

	input_type.change(
	fn=toggle_inputs,
	inputs=input_type,
	outputs=[text_input, image_input],
	queue=False
	)

	analyze_btn = gr.Button("Classify")

	# We'll show the classification JSON + TTS audio
	output_json = gr.JSON(label="Classification Result")
	audio_output = gr.Audio(label="TTS Explanation")

	# We call classify_and_tts, which returns (dict_result, audio_data)
	analyze_btn.click(
	fn=classify_and_tts,
	inputs=[input_type, text_input, image_input],
	outputs=[output_json, audio_output]
	)

	if __name__ == "__main__":
	if not openai.api_key:
	print("WARNING: OPENAI_API_KEY not set. LLM calls will fail or be skipped.")
	demo.launch()