hackerbyhobby
added text to voice
aaede28 unverified
raw
history blame
13.4 kB
import gradio as gr
import pytesseract
from PIL import Image
from transformers import pipeline
import re
from langdetect import detect
from deep_translator import GoogleTranslator
import openai
import os
import io
import requests
import json
# For text-to-speech
from gtts import gTTS
# Set your OpenAI API key
openai.api_key = os.getenv("OPENAI_API_KEY")
# Translator instance
translator = GoogleTranslator(source="auto", target="es")
# 1. Load separate keywords for SMiShing and Other Scam (assumed in English)
with open("smishing_keywords.txt", "r", encoding="utf-8") as f:
SMISHING_KEYWORDS = [line.strip().lower() for line in f if line.strip()]
with open("other_scam_keywords.txt", "r", encoding="utf-8") as f:
OTHER_SCAM_KEYWORDS = [line.strip().lower() for line in f if line.strip()]
# 2. Zero-Shot Classification Pipeline
model_name = "joeddav/xlm-roberta-large-xnli"
classifier = pipeline("zero-shot-classification", model=model_name)
CANDIDATE_LABELS = ["SMiShing", "Other Scam", "Legitimate"]
def tts_explanation(explanation: str, detected_lang: str):
"""
Generate TTS audio from the final explanation text.
We'll choose English or Spanish voices in gTTS, but cannot guarantee
a specific "female" voice. We'll do a best approximation.
- If text is Spanish: set lang="es"
- If text is English (or other): set lang="en"
- We'll set tld="co.uk" for a British accent that might sound female.
Adjust if needed or switch to a more advanced TTS service.
"""
# Choose language for gTTS
if detected_lang == "es":
lang_code = "es"
tld = "com"
else:
lang_code = "en"
# Attempt a 'comforting female' accent:
# gTTS doesn't let you pick male/female directly, but you can pick a TLD for a different accent
tld = "co.uk"
try:
tts = gTTS(text=explanation, lang=lang_code, tld=tld, slow=False)
mp3_bytes = io.BytesIO()
tts.write_to_fp(mp3_bytes)
mp3_bytes.seek(0)
return mp3_bytes
except Exception as e:
print("TTS generation error:", e)
# If TTS fails, return an empty buffer
return io.BytesIO()
def get_keywords_by_language(text: str):
snippet = text[:200]
try:
detected_lang = detect(snippet)
except Exception:
detected_lang = "en"
if detected_lang == "es":
smishing_in_spanish = [
translator.translate(kw).lower() for kw in SMISHING_KEYWORDS
]
other_scam_in_spanish = [
translator.translate(kw).lower() for kw in OTHER_SCAM_KEYWORDS
]
return smishing_in_spanish, other_scam_in_spanish, "es"
else:
return SMISHING_KEYWORDS, OTHER_SCAM_KEYWORDS, "en"
def boost_probabilities(probabilities: dict, text: str):
lower_text = text.lower()
smishing_keywords, other_scam_keywords, detected_lang = get_keywords_by_language(text)
smishing_count = sum(1 for kw in smishing_keywords if kw in lower_text)
other_scam_count = sum(1 for kw in other_scam_keywords if kw in lower_text)
smishing_boost = 0.30 * smishing_count
other_scam_boost = 0.30 * other_scam_count
found_urls = re.findall(
r"(https?://[^\s]+|\b[a-zA-Z0-9.-]+\.(?:com|net|org|edu|gov|mil|io|ai|co|info|biz|us|uk|de|fr|es|ru|jp|cn|in|au|ca|br|mx|it|nl|se|no|fi|ch|pl|kr|vn|id|tw|sg|hk)\b)",
lower_text
)
if found_urls:
smishing_boost += 0.35
p_smishing = probabilities.get("SMiShing", 0.0)
p_other_scam = probabilities.get("Other Scam", 0.0)
p_legit = probabilities.get("Legitimate", 1.0)
p_smishing += smishing_boost
p_other_scam += other_scam_boost
p_legit -= (smishing_boost + other_scam_boost)
# Clamp
p_smishing = max(p_smishing, 0.0)
p_other_scam = max(p_other_scam, 0.0)
p_legit = max(p_legit, 0.0)
total = p_smishing + p_other_scam + p_legit
if total > 0:
p_smishing /= total
p_other_scam /= total
p_legit /= total
else:
p_smishing, p_other_scam, p_legit = 0.0, 0.0, 1.0
return {
"SMiShing": p_smishing,
"Other Scam": p_other_scam,
"Legitimate": p_legit,
"detected_lang": detected_lang
}
def query_llm_for_classification(raw_message: str) -> dict:
if not raw_message.strip():
return {"label": "Unknown", "reason": "No message provided to the LLM."}
system_prompt = (
"You are a cybersecurity expert. You will classify the user's message "
"as one of: SMiShing, Other Scam, or Legitimate. Provide a short reason. "
"Return only JSON with keys: label, reason."
)
user_prompt = f"Message: {raw_message}\nClassify it as SMiShing, Other Scam, or Legitimate."
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt}
],
temperature=0.2
)
raw_reply = response["choices"][0]["message"]["content"].strip()
llm_result = json.loads(raw_reply)
if "label" not in llm_result or "reason" not in llm_result:
return {"label": "Unknown", "reason": f"Unexpected format: {raw_reply}"}
return llm_result
except Exception as e:
return {"label": "Unknown", "reason": f"LLM error: {e}"}
def incorporate_llm_label(boosted: dict, llm_label: str) -> dict:
if llm_label == "SMiShing":
boosted["SMiShing"] += 0.2
elif llm_label == "Other Scam":
boosted["Other Scam"] += 0.2
elif llm_label == "Legitimate":
boosted["Legitimate"] += 0.2
for k in boosted:
if boosted[k] < 0:
boosted[k] = 0.0
total = sum(boosted.values())
if total > 0:
for k in boosted:
boosted[k] /= total
else:
boosted["Legitimate"] = 1.0
boosted["SMiShing"] = 0.0
boosted["Other Scam"] = 0.0
return boosted
def query_llm_for_explanation(
text: str,
final_label: str,
final_conf: float,
local_label: str,
local_conf: float,
llm_label: str,
llm_reason: str,
found_smishing: list,
found_other_scam: list,
found_urls: list,
detected_lang: str
) -> str:
if detected_lang == "es":
system_prompt = (
"Eres un experto en ciberseguridad. Proporciona una explicación final al usuario en español. "
"Combina la clasificación local, la clasificación LLM y la etiqueta final en una sola explicación breve. "
"No reveles el código interno ni el JSON bruto; simplemente da una breve explicación fácil de entender. "
"Termina con la etiqueta final."
)
else:
system_prompt = (
"You are a cybersecurity expert providing a final explanation to the user in English. "
"Combine the local classification, the LLM classification, and the final label "
"into one concise explanation. Do not reveal internal code or raw JSON. "
"End with a final statement of the final label."
)
user_context = f"""
User Message:
{text}
Local Classification => Label: {local_label}, Confidence: {local_conf}
LLM Classification => Label: {llm_label}, Reason: {llm_reason}
Final Overall Label => {final_label} (confidence {final_conf})
Suspicious SMiShing Keywords => {found_smishing}
Suspicious Other Scam Keywords => {found_other_scam}
URLs => {found_urls}
"""
try:
response = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_context}
],
temperature=0.2
)
final_explanation = response["choices"][0]["message"]["content"].strip()
return final_explanation
except Exception as e:
return f"Could not generate final explanation due to error: {e}"
def smishing_detector(input_type, text, image):
if input_type == "Text":
combined_text = text.strip() if text else ""
else:
combined_text = ""
if image is not None:
combined_text = pytesseract.image_to_string(image, lang="spa+eng").strip()
if not combined_text:
return {
"text_used_for_classification": "(none)",
"label": "No text provided",
"confidence": 0.0,
"keywords_found": [],
"urls_found": [],
"llm_label": "Unknown",
"llm_reason": "No text to analyze",
"final_explanation": "No text provided"
}
local_result = classifier(
sequences=combined_text,
candidate_labels=CANDIDATE_LABELS,
hypothesis_template="This message is {}."
)
original_probs = {k: float(v) for k, v in zip(local_result["labels"], local_result["scores"])}
boosted = boost_probabilities(original_probs, combined_text)
detected_lang = boosted.pop("detected_lang", "en")
for k in boosted:
boosted[k] = float(boosted[k])
local_label = max(boosted, key=boosted.get)
local_conf = round(boosted[local_label], 3)
llm_classification = query_llm_for_classification(combined_text)
llm_label = llm_classification.get("label", "Unknown")
llm_reason = llm_classification.get("reason", "No reason provided")
boosted = incorporate_llm_label(boosted, llm_label)
final_label = max(boosted, key=boosted.get)
final_confidence = round(boosted[final_label], 3)
lower_text = combined_text.lower()
smishing_keys, scam_keys, _ = get_keywords_by_language(combined_text)
found_urls = re.findall(
r"(https?://[^\s]+|\b[a-zA-Z0-9.-]+\.(?:com|net|org|edu|gov|mil|io|ai|co|info|biz|us|uk|de|fr|es|ru|jp|cn|in|au|ca|br|mx|it|nl|se|no|fi|ch|pl|kr|vn|id|tw|sg|hk)\b)",
lower_text
)
found_smishing = [kw for kw in smishing_keys if kw in lower_text]
found_other_scam = [kw for kw in scam_keys if kw in lower_text]
final_explanation = query_llm_for_explanation(
text=combined_text,
final_label=final_label,
final_conf=final_confidence,
local_label=local_label,
local_conf=local_conf,
llm_label=llm_label,
llm_reason=llm_reason,
found_smishing=found_smishing,
found_other_scam=found_other_scam,
found_urls=found_urls,
detected_lang=detected_lang
)
return {
"detected_language": detected_lang,
"text_used_for_classification": combined_text,
"original_probabilities": {k: round(v, 3) for k, v in original_probs.items()},
"boosted_probabilities_before_llm": {local_label: local_conf},
"llm_label": llm_label,
"llm_reason": llm_reason,
"boosted_probabilities_after_llm": {k: round(v, 3) for k, v in boosted.items()},
"label": final_label,
"confidence": final_confidence,
"smishing_keywords_found": found_smishing,
"other_scam_keywords_found": found_other_scam,
"urls_found": found_urls,
"final_explanation": final_explanation,
}
###
# Combined function to produce both text (JSON) and TTS audio
###
def classify_and_tts(input_type, text, image):
"""
1. Perform the classification logic (smishing_detector).
2. Generate TTS audio from the final explanation in a comforting female voice.
3. Return both the JSON result & the audio bytes.
"""
result = smishing_detector(input_type, text, image)
final_explanation = result["final_explanation"]
detected_lang = result.get("detected_language", "en")
# Generate TTS from final_explanation
audio_data = tts_explanation(final_explanation, detected_lang)
# Return both
return result, audio_data
def toggle_inputs(choice):
if choice == "Text":
return gr.update(visible=True), gr.update(visible=False)
else:
return gr.update(visible=False), gr.update(visible=True)
with gr.Blocks() as demo:
gr.Markdown("## SMiShing & Scam Detector with LLM-Enhanced Logic + TTS Explanation")
with gr.Row():
input_type = gr.Radio(
choices=["Text", "Screenshot"],
value="Text",
label="Choose Input Type"
)
text_input = gr.Textbox(
lines=3,
label="Paste Suspicious SMS Text",
placeholder="Type or paste the message here...",
visible=True
)
image_input = gr.Image(
type="pil",
label="Upload Screenshot",
visible=False
)
input_type.change(
fn=toggle_inputs,
inputs=input_type,
outputs=[text_input, image_input],
queue=False
)
analyze_btn = gr.Button("Classify")
# We'll show the classification JSON + TTS audio
output_json = gr.JSON(label="Classification Result")
audio_output = gr.Audio(label="TTS Explanation")
# We call classify_and_tts, which returns (dict_result, audio_data)
analyze_btn.click(
fn=classify_and_tts,
inputs=[input_type, text_input, image_input],
outputs=[output_json, audio_output]
)
if __name__ == "__main__":
if not openai.api_key:
print("WARNING: OPENAI_API_KEY not set. LLM calls will fail or be skipped.")
demo.launch()