Spaces:
Runtime error
Runtime error
from fastai.vision.all import * | |
import gradio as gr | |
import requests | |
import base64 | |
from bs4 import BeautifulSoup | |
import os | |
# Load the trained model | |
learn = load_learner('nsfw_model.pkl') | |
labels = learn.dls.vocab | |
def analyze(url): | |
"""Analyzer function that classifies the images found at the given URL""" | |
# Make sure URL starts with http or https | |
# TODO: confirm that the url points to a web page, and not some resource. | |
# Regex could be useful here | |
if not url.startswith(('http://','https://')): | |
url = 'http://'+url | |
safety = 'safe' # our return variable | |
# Extract html and all img tags | |
html = requests.get(url) | |
soup = BeautifulSoup(html.text, "html.parser") | |
img_elements = soup.find_all("img") | |
# Save all src urls that we can clearly tell are img urls. | |
# A better approach would be to use regex here | |
srcs = [] | |
for img in img_elements: | |
for v in img.attrs.values(): | |
if isinstance(v, str): | |
if v.lower().endswith(('jpg', 'png', 'gif', 'jpeg')): | |
srcs.append(v) | |
# Get the images from the urls and classify | |
# If there is a single unsafe image, report it. | |
for src_url in srcs: | |
try: | |
img_data = requests.get(src_url).content | |
temp = 'temp.' + src_url.lower().split('.')[-1] | |
with open(temp, 'wb') as handler: | |
handler.write(img_data) | |
is_nsfw,_,probs = learn.predict(PILImage.create(temp)) | |
os.remove(temp) | |
if is_nsfw == "unsafe_searches": | |
safety = 'NOT safe' | |
return safety | |
except Exception as e: | |
pass | |
return safety | |
title = "Website Safety Analyzer" | |
description = "**The internet is not safe for children**. Even if we know the 'bad' sites, social media is hard to regulate. \n"+\ | |
"This is step one in an attempt to solve that. An image classifier that audits every image at a URL. \n"+\ | |
"In this iteration, I classify sites with sexually explicit content as **'NOT safe'**. \n\n"+\ | |
"There is a long way to go with NLP for profanity, cyber-bullying, as well as CV for violence, substance abuse, etc. \n"+\ | |
"Another step will be to convert this into a browser extension/add-on. \n"+\ | |
"I welcome any help on this. π" | |
examples = ['pornhub.com', 'cnn.com', 'xvideos.com', 'www.pinterest.com'] | |
enable_queue=True | |
iface = gr.Interface( | |
fn=analyze, | |
inputs="text", | |
outputs="text", | |
title=title, | |
description=description, | |
examples=examples, | |
) | |
iface.launch(enable_queue=enable_queue) |