Spaces:

wooj0216
/

deepfake-detection

Sleeping

App Files Files Community

deepfake-detection / app.py

wooj0216

ADD: demo

0a911cc about 2 months ago

raw

history blame

3.34 kB

	import gradio as gr
	import cv2
	from PIL import Image
	import torch
	import numpy as np

	from transformers import AutoImageProcessor, AutoProcessor, AutoModel, CLIPVisionModel
	from detection import detect_image, detect_video
	from model import LinearClassifier


	def load_model(detection_type):

	device = torch.device("cpu")

	processor = AutoProcessor.from_pretrained("openai/clip-vit-large-patch14")
	clip_model = CLIPVisionModel.from_pretrained("openai/clip-vit-large-patch14", output_attentions=True)

	model_path = f"pretrained_models/{detection_type}/clip_weights.pth"
	checkpoint = torch.load(model_path, map_location="cpu")
	input_dim = checkpoint["linear.weight"].shape[1]

	detection_model = LinearClassifier(input_dim)
	detection_model.load_state_dict(checkpoint)
	detection_model = detection_model.to(device)

	return processor, clip_model, detection_model

	def process_image(image, detection_type):
	processor, clip_model, detection_model = load_model(detection_type)

	results = detect_image(image, processor, clip_model, detection_model)

	pred_score = results["pred_score"]
	attn_map = results["attn_map"]

	return pred_score, attn_map

	def process_video(video, detection_type):
	processor, clip_model, detection_model = load_model(detection_type)

	cap = cv2.VideoCapture(video)
	frames = []
	while True:
	ret, frame = cap.read()
	if not ret:
	break
	frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
	pil_image = Image.fromarray(frame)
	frames.append(pil_image)
	cap.release()

	results = detect_video(frames, processor, clip_model, detection_model)

	pred_score = results["pred_score"]
	attn_map = results["attn_map"]

	return pred_score, attn_map

	def change_input(input_type):
	if input_type == "Image":
	return gr.update(visible=True), gr.update(visible=False)
	elif input_type == "Video":
	return gr.update(visible=False), gr.update(visible=True)
	else:
	return None


	def process_input(input_type, model_type, image, video):
	detection_type = "facial" if model_type == "Facial" else "general"

	if input_type == "Image" and image is not None:
	return process_image(image, detection_type)
	elif input_type == "Video" and video is not None:
	return process_video(video, detection_type)
	else:
	return None, None


	with gr.Blocks() as demo:

	gr.Markdown("## Deepfake Detection : Facial / General")

	input_type = gr.Radio(["Image", "Video"], label="Choose Input Type", value="Image")

	model_type = gr.Radio(["Facial", "General"], label="Choose Model Type", value="General")

	image_input = gr.Image(type="pil", label="Upload Image", visible=True)
	video_input = gr.Video(label="Upload Video", visible=False)

	process_button = gr.Button("Run Model")

	pred_score_output = gr.Textbox(label="Prediction Score")
	attn_map_output = gr.Image(type="pil", label="Attention Map")

	input_type.change(fn=change_input, inputs=[input_type], outputs=[image_input, video_input])

	process_button.click(
	fn=process_input,
	inputs=[input_type, model_type, image_input, video_input],
	outputs=[pred_score_output, attn_map_output]
	)

	if __name__ == "__main__":
	demo.launch()